diff --git a/paddle/legacy/api/Arguments.cpp b/paddle/legacy/api/Arguments.cpp deleted file mode 100644 index 7bb5a6f75b9a8ab800fc74c6cc01c0b104ccdd5e..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/Arguments.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PaddleAPI.h" -#include "PaddleAPIPrivate.h" - -#include "paddle/legacy/parameter/Argument.h" - -size_t Arguments::getSlotNum() const { return m->outputs.size(); } - -Arguments* Arguments::createArguments(size_t slotNum) { - auto args = new Arguments(); - args->m->outputs.resize(slotNum); - return args; -} - -void Arguments::resize(size_t slotNum) { m->outputs.resize(slotNum); } - -Arguments::Arguments() : m(new ArgumentsPrivate()) {} - -Arguments::~Arguments() { delete m; } - -Arguments* Arguments::createByPaddleArgumentVector(void* ptr) { - auto p = (std::vector*)(ptr); - auto args = new Arguments(); - args->m->outputs = *p; - return args; -} - -Arguments* Arguments::createByPaddleArgument(const void* ptr) { - auto p = (paddle::Argument*)(ptr); - auto args = new Arguments(); - args->m->outputs.push_back(*p); - return args; -} - -Matrix* Arguments::getSlotValue(size_t idx) const throw(RangeError) { - auto& a = m->getArg(idx); - return Matrix::createByPaddleMatrixPtr(&a.value); -} - -Matrix* Arguments::getSlotGrad(size_t idx) const throw(RangeError) { - auto& a = m->getArg(idx); - return Matrix::createByPaddleMatrixPtr(&a.grad); -} - -IVector* Arguments::getSlotIds(size_t idx) const throw(RangeError) { - auto& a = m->getArg(idx); - return IVector::createByPaddleVectorPtr(&a.ids); -} - -Matrix* Arguments::getSlotIn(size_t idx) const throw(RangeError) { - auto& a = m->getArg(idx); - return Matrix::createByPaddleMatrixPtr(&a.in); -} - -void Arguments::setSlotValue(size_t idx, Matrix* mat) throw(RangeError) { - auto& a = m->getArg(idx); - a.value = m->cast(mat->getSharedPtr()); -} - -void Arguments::setSlotGrad(size_t idx, Matrix* mat) throw(RangeError) { - auto& a = m->getArg(idx); - a.grad = m->cast(mat->getSharedPtr()); -} - -void Arguments::setSlotIn(size_t idx, Matrix* mat) throw(RangeError) { - auto& a = m->getArg(idx); - a.in = m->cast(mat->getSharedPtr()); -} - -void Arguments::setSlotIds(size_t idx, IVector* vec) throw(RangeError) { - auto& a = m->getArg(idx); - auto& v = m->cast(vec->getSharedPtr()); - a.ids = v; -} - -template -static inline void doCopyFromSafely(std::shared_ptr& dest, - std::shared_ptr& src) { - if (src) { - if (dest) { - dest->copyFrom(*src); - } else { - dest = src; - } - } -} - -IVector* Arguments::getSlotSequenceStartPositions(size_t idx) const - throw(RangeError) { - auto& a = m->getArg(idx); - if (a.sequenceStartPositions) { - return IVector::createByPaddleVectorPtr( - &a.sequenceStartPositions->getMutableVector(false)); - } else { - return nullptr; - } -} - -IVector* Arguments::getSlotSubSequenceStartPositions(size_t idx) const - throw(RangeError) { - auto& a = m->getArg(idx); - if (a.subSequenceStartPositions) { - return IVector::createByPaddleVectorPtr( - &a.subSequenceStartPositions->getMutableVector(false)); - } else { - return nullptr; - } -} - -void Arguments::setSlotSequenceStartPositions(size_t idx, - IVector* vec) throw(RangeError) { - auto& a = m->getArg(idx); - auto& v = m->cast(vec->getSharedPtr()); - a.sequenceStartPositions = std::make_shared(v); -} - -void Arguments::setSlotSubSequenceStartPositions( - size_t idx, IVector* vec) throw(RangeError) { - auto& a = m->getArg(idx); - auto& v = m->cast(vec->getSharedPtr()); - a.subSequenceStartPositions = std::make_shared(v); -} - -IVector* Arguments::getSlotSequenceDim(size_t idx) const throw(RangeError) { - auto& a = m->getArg(idx); - return IVector::createByPaddleVectorPtr(&a.cpuSequenceDims); -} - -void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) { - auto& a = m->getArg(idx); - a.cpuSequenceDims = m->cast(vec->getSharedPtr()); -} - -float Arguments::sum() const { return paddle::Argument::sum(m->outputs); } - -int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) { - auto& a = m->getArg(idx); - return a.getBatchSize(); -} - -void Arguments::setSlotFrameHeight(size_t idx, size_t h) throw(RangeError) { - auto& a = m->getArg(idx); - a.setFrameHeight(h); -} - -void Arguments::setSlotFrameWidth(size_t idx, size_t w) throw(RangeError) { - auto& a = m->getArg(idx); - a.setFrameWidth(w); -} - -size_t Arguments::getSlotFrameHeight(size_t idx) const throw(RangeError) { - auto& a = m->getArg(idx); - return a.getFrameHeight(); -} - -size_t Arguments::getSlotFrameWidth(size_t idx) const throw(RangeError) { - auto& a = m->getArg(idx); - return a.getFrameWidth(); -} - -void* Arguments::getInternalArgumentsPtr() const { return &m->outputs; } diff --git a/paddle/legacy/api/CMakeLists.txt b/paddle/legacy/api/CMakeLists.txt deleted file mode 100644 index 06e1f5d5f0884efabfcdf917ca5c35d94ad5dce9..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/CMakeLists.txt +++ /dev/null @@ -1,120 +0,0 @@ -set(API_SOURCES - Arguments.cpp - ConfigParser.cpp - Evaluator.cpp - GradientMachine.cpp - Matrix.cpp - Parameter.cpp - ParameterOptimizer.cpp - ParameterUpdater.cpp - SequenceGenerator.cpp - Trainer.cpp - Util.cpp - Vector.cpp) -set(API_HEADER - PaddleAPI.h - Internal.h) - -add_library(paddle_api STATIC ${API_SOURCES}) -add_dependencies(paddle_api paddle_proto paddle_trainer_lib) - -INCLUDE(${SWIG_USE_FILE}) -INCLUDE_DIRECTORIES(${PADDLE_SOURCE_DIR}/paddle) - -FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py) - -SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) - -SET(SWIG_NEED_FLAGS - -ftls-model=global-dynamic - -Wno-parentheses-equality - -Wno-self-assign - -Wno-maybe-uninitialized - -Wno-missing-field-initializers) - FOREACH(flag ${SWIG_NEED_FLAGS}) - safe_set_cxxflag(SWIG_CXX_FLAGS ${flag}) -ENDFOREACH() - -SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR}) -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SWIG_CXX_FLAGS}") - -SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS - paddle_parameter - paddle_function - paddle_math - paddle_utils - paddle_gserver - paddle_pserver - paddle_api - paddle_cuda - paddle_trainer_lib - paddle_network - paddle_proto - ${external_project_dependencies} - ${RDMA_LIBS} -) - -IF(APPLE) - SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load -framework CoreFoundation -framework Security") -ELSE(APPLE) - SET(START_GROUP "-Xlinker -start-group") - SET(END_GROUP "-Xlinker -end-group") - SET(ARCHIVE_START "-Wl,--whole-archive") - SET(ARCHIVE_END "-Wl,--no-whole-archive") -ENDIF(APPLE) - -SWIG_ADD_MODULE(swig_paddle python Paddle.i) -SWIG_LINK_LIBRARIES(swig_paddle - ${MACOS_LD_FLAGS} - ${START_GROUP} - ${ARCHIVE_START} - paddle_gserver - paddle_function - ${METRIC_LIBS} - ${ARCHIVE_END} - paddle_pserver - paddle_trainer_lib - paddle_network - paddle_parameter - paddle_optimizer - paddle_math - paddle_utils - paddle_proto - paddle_cuda - paddle_api - ${CMAKE_DL_LIBS} - ${EXTERNAL_LIBS} - ${CMAKE_THREAD_LIBS_INIT} - ${RDMA_LD_FLAGS} - ${START_END} -) - -add_custom_command(OUTPUT ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/py_paddle - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PADDLE_BINARY_DIR}/python/py_paddle - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PADDLE_BINARY_DIR}/python/py_paddle - COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_BINARY_DIR}/.timestamp - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle - DEPENDS _swig_paddle -) - -# TODO(yuyang18) : make wheel name calculated by cmake -add_custom_target(python_api_wheel ALL DEPENDS ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so) - -if(WITH_TESTING) - IF(NOT PY_PIP_FOUND) - SET(PIP_SOURCES_DIR ${PYTHON_SOURCES_DIR}/pip) - ExternalProject_Add(pip - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY https://github.com/pypa/pip.git - GIT_TAG 9.0.1 - PREFIX ${PIP_SOURCES_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - BUILD_IN_SOURCE 1 - #DEPENDS python setuptools python_api_wheel - ) - ENDIF() - add_subdirectory(test) -endif() diff --git a/paddle/legacy/api/ConfigParser.cpp b/paddle/legacy/api/ConfigParser.cpp deleted file mode 100644 index 016d6da4e2e4ce888527fe9b61a163056d7729eb..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/ConfigParser.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PaddleAPI.h" -#include "PaddleAPIPrivate.h" -#include "paddle/legacy/trainer/Trainer.h" - -struct ParameterConfigPrivate { - paddle::ParameterPtr parameter; - paddle::ParameterConfig config; - - inline paddle::ParameterConfig* getConfigPtr() { - if (parameter != nullptr) { - auto& conf = parameter->getConfig(); - return const_cast(&conf); - } else { - return &config; - } - } -}; - -TrainerConfig::TrainerConfig() : m(new TrainerConfigPrivate()) {} - -TrainerConfig::~TrainerConfig() { delete m; } - -TrainerConfig* TrainerConfig::createFromTrainerConfigFile( - const std::string& confPath) { - LOG(INFO) << "load trainer config from " << confPath; - auto conf = std::make_shared(confPath); - auto retv = new TrainerConfig(); - retv->m->conf = conf; - return retv; -} - -TrainerConfig* TrainerConfig::createFromProtoString(const std::string& str) { - auto retv = new TrainerConfig(); - paddle::TrainerConfig trainerConfigProto; - auto conf = std::make_shared(trainerConfigProto); - CHECK(conf->getMutableConfig().ParseFromString(str)); - retv->m->conf = conf; - return retv; -} - -ModelConfig::ModelConfig() : m(new ModelConfigPrivate()) {} - -ModelConfig::~ModelConfig() { delete m; } - -ModelConfig* TrainerConfig::getModelConfig() const { - auto retv = new ModelConfig(); - retv->m->conf = m->conf; - return retv; -} - -ParameterConfig::ParameterConfig() : m(new ParameterConfigPrivate()) {} - -ParameterConfig::~ParameterConfig() { delete m; } - -ParameterConfig* ParameterConfig::createParameterConfigFromParameterSharedPtr( - void* ptr) { - auto& p = *(paddle::ParameterPtr*)(ptr); - if (p != nullptr) { - auto conf = new ParameterConfig(); - conf->m->parameter = p; - return conf; - } else { - return nullptr; - } -} - -ParameterConfig* ParameterConfig::createParameterConfigFromParameterPtr( - void* ptr) { - auto& p = *(paddle::Parameter*)(ptr); - auto conf = new ParameterConfig(); - conf->m->config = p.getConfig(); - return conf; -} - -std::string ParameterConfig::toProtoString() const { - return m->getConfigPtr()->SerializeAsString(); -} - -void* ParameterConfig::getRawPtr() { return m->getConfigPtr(); } - -OptimizationConfig::OptimizationConfig() : m(new OptimizationConfigPrivate()) {} - -OptimizationConfig::~OptimizationConfig() { delete m; } - -std::string OptimizationConfig::toProtoString() { - return m->getConfig().SerializeAsString(); -} - -OptimizationConfig* TrainerConfig::getOptimizationConfig() const { - auto opt_config = new OptimizationConfig(); - opt_config->m->trainer_config = m->conf; - return opt_config; -} - -OptimizationConfig* OptimizationConfig::createFromProtoString( - const std::string& str) { - auto conf = new OptimizationConfig(); - conf->m->config.ParseFromString(str); - return conf; -} diff --git a/paddle/legacy/api/Evaluator.cpp b/paddle/legacy/api/Evaluator.cpp deleted file mode 100644 index c4aac47cbec5876117b09b5598f87a1a7e8bc6c3..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/Evaluator.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#include -#include "PaddleAPI.h" -#include "PaddleAPIPrivate.h" - -Evaluator::Evaluator() : m(new EvaluatorPrivate()) {} -Evaluator::~Evaluator() { delete m; } - -void Evaluator::start() { m->rawPtr->start(); } - -void Evaluator::finish() { m->rawPtr->finish(); } - -std::string Evaluator::toString() { - std::ostringstream sout; - m->rawPtr->printStats(sout); - return sout.str(); -} - -std::vector Evaluator::getNames() const { - std::vector retv; - m->rawPtr->getNames(&retv); - return retv; -} - -double Evaluator::getValue(const std::string name) const { - paddle::Error err; - double v = m->rawPtr->getValue(name, &err); - if (!err.isOK()) { - throw std::runtime_error(err.msg()); - } - return v; -} diff --git a/paddle/legacy/api/GradientMachine.cpp b/paddle/legacy/api/GradientMachine.cpp deleted file mode 100644 index 5ad2fe11a4c668a318f76492f57091f386183986..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/GradientMachine.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PaddleAPI.h" -#include "PaddleAPIPrivate.h" - -#include "Internal.h" -#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" - -std::vector GradientMachine::defaultParamTypes = { - PARAMETER_VALUE, PARAMETER_GRADIENT, PARAMETER_MOMENTUM}; - -GradientMachine::GradientMachine() : m(new GradientMachinePrivate()) {} - -GradientMachine::~GradientMachine() { delete m; } - -GradientMachine* GradientMachine::createFromPaddleModelPtr( - const void* confPtr, - GradientMatchineCreateMode mode, - const std::vector& types) { - auto& conf = *(const paddle::ModelConfig*)(confPtr); - std::vector realTypes; - staticCastVector(&realTypes, types); - auto machineRawPtr = paddle::GradientMachine::create(conf, mode, realTypes); - auto machinePtr = std::shared_ptr(machineRawPtr); - if (machinePtr != nullptr) { - auto machine = new GradientMachine(); - machine->m->machine = machinePtr; - return machine; - } else { - return nullptr; - } -} - -GradientMachine* GradientMachine::createByConfigProtoStr( - const std::string& protoStr, - GradientMatchineCreateMode mode, - const std::vector& types) { - paddle::ModelConfig conf; - conf.ParseFromString(protoStr); - if (conf.IsInitialized()) { - return GradientMachine::createFromPaddleModelPtr(&conf, mode, types); - } else { - return nullptr; - } -} - -GradientMachine* GradientMachine::createByModelConfig( - ModelConfig* conf, - GradientMatchineCreateMode mode, - const std::vector& types) { - auto confPtr = &conf->m->conf->getModelConfig(); - return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types); -} - -void GradientMachine::start() { m->machine->start(); } - -void GradientMachine::finish() { m->machine->finish(); } - -void GradientMachine::onPassEnd() { m->machine->onPassEnd(); } - -void GradientMachine::prefetch(const Arguments& inArgs) { - auto& in = - m->cast>(inArgs.getInternalArgumentsPtr()); - m->machine->prefetch(in); -} - -void GradientMachine::forward(const Arguments& inArgs, - Arguments* outArgs, - PassType passType) { - auto& in = - m->cast>(inArgs.getInternalArgumentsPtr()); - auto& out = m->cast>( - outArgs->getInternalArgumentsPtr()); - paddle::PassType pt = (paddle::PassType)(passType); - m->machine->forward(in, &out, pt); -} - -UpdateCallback::~UpdateCallback() {} - -void UpdateCallback::apply(Parameter* p) { - // UNUSED(p); -} - -class UpdateCallbackWrapper { - public: - explicit UpdateCallbackWrapper(const UpdateCallback& callback) - : callback(const_cast(callback)) {} - - void operator()(paddle::Parameter* param) { - auto p = Parameter::createFromRawPtr(¶m); - // @TODO Use Stack variable instead. - callback.apply(p); - delete p; - } - - private: - UpdateCallback& callback; -}; - -void GradientMachine::backward(const UpdateCallback& callback) { - m->machine->backward(UpdateCallbackWrapper(callback)); -} - -void GradientMachine::forwardBackward(const Arguments& inArgs, - Arguments* outArgs, - PassType passType, - const UpdateCallback& callback) { - auto& in = - m->cast>(inArgs.getInternalArgumentsPtr()); - auto& out = m->cast>( - outArgs->getInternalArgumentsPtr()); - paddle::PassType pt = (paddle::PassType)(passType); - m->machine->forwardBackward(in, &out, pt, UpdateCallbackWrapper(callback)); -} - -void GradientMachine::loadParameters(const std::string& path) { - m->machine->loadParameters(path); -} - -size_t GradientMachine::getParameterSize() const { - return m->machine->getParameters().size(); -} - -Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { - auto params = m->machine->getParameters(); - if (i < params.size()) { - return Parameter::createFromSharedPtr(&m->machine->getParameters()[i]); - } else { - throw RangeError(); - } -} - -size_t GradientMachine::getNonStaticParameterSize() const { - return m->machine->getNonStaticParameters().size(); -} - -Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) { - auto params = m->machine->getNonStaticParameters(); - if (i < params.size()) { - return Parameter::createFromSharedPtr( - &m->machine->getNonStaticParameters()[i]); - } else { - throw RangeError(); - } -} - -void GradientMachine::randParameters() { m->machine->randParameters(); } - -Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const - throw(UnsupportError) { - auto nn = m->machine; - if (nn) { - auto arg = nn->getLayerOutput(layerName); - return Arguments::createByPaddleArgument(&arg); - } else { - throw UnsupportError(); - } -} - -SequenceGenerator* GradientMachine::asSequenceGenerator( - const std::vector& dict, - size_t begin_id, - size_t end_id, - size_t max_length, - size_t beam_size) { - SequenceGenerator* r = - SequenceGenerator::createByGradientMachineSharedPtr(&m->machine); - r->setDict(dict); - r->setBos(begin_id); - r->setEos(end_id); - r->setMaxLength(max_length); - r->setBeamSize(beam_size); - return r; -} - -Evaluator* GradientMachine::makeEvaluator() { - auto ev = new Evaluator(); - ev->m->rawPtr = m->machine->makeEvaluator(); - return ev; -} - -void GradientMachine::eval(Evaluator* evaluator) { - m->machine->eval(evaluator->m->rawPtr); -} diff --git a/paddle/legacy/api/Internal.h b/paddle/legacy/api/Internal.h deleted file mode 100644 index 2195cc6739d2066303ce3462f35b839dbd44474a..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/Internal.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "PaddleAPI.h" - -#include -#include - -template -void staticCastVector(std::vector* dest, const std::vector& src) { - dest->resize(src.size()); - std::transform(src.begin(), src.end(), dest->begin(), [](T1 t) { - return static_cast(t); - }); -} diff --git a/paddle/legacy/api/Matrix.cpp b/paddle/legacy/api/Matrix.cpp deleted file mode 100644 index 8862d0ea92c92a2608b49c6b1315badae9e9fd98..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/Matrix.cpp +++ /dev/null @@ -1,317 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/math/Matrix.h" -#include -#include -#include "PaddleAPI.h" -#include "paddle/legacy/math/CpuSparseMatrix.h" -#include "paddle/legacy/math/SparseMatrix.h" - -struct MatrixPrivate { - std::shared_ptr mat; -}; - -Matrix::Matrix() : m(new MatrixPrivate()) {} - -Matrix* Matrix::createByPaddleMatrixPtr(void* sharedPtr) { - auto* mat = reinterpret_cast(sharedPtr); - if ((*mat) != nullptr) { - auto m = new Matrix(); - m->m->mat = *mat; - return m; - } else { - return nullptr; - } -} - -Matrix* Matrix::createZero(size_t height, size_t width, bool useGpu) { - auto m = new Matrix(); - m->m->mat = paddle::Matrix::create(height, width, useGpu); - m->m->mat->zero(); - return m; -} - -Matrix* Matrix::createDense(const std::vector& data, - size_t height, - size_t width, - bool useGpu) { - auto m = new Matrix(); - m->m->mat = paddle::Matrix::create(height, width, useGpu); - m->m->mat->copyFrom(data.data(), data.size()); - return m; -} - -Matrix* Matrix::createDenseFromNumpy(float* data, - int dim1, - int dim2, - bool copy, - bool useGpu) throw(UnsupportError) { - if (useGpu) { - /// Gpu mode only supports copy=True - if (!copy) { - throw UnsupportError("Gpu mode only supports copy=True"); - } - return Matrix::createGpuDenseFromNumpy(data, dim1, dim2); - } else { - return Matrix::createCpuDenseFromNumpy(data, dim1, dim2, copy); - } -} - -Matrix* Matrix::createCpuDenseFromNumpy(float* data, - int dim1, - int dim2, - bool copy) { - auto m = new Matrix(); - if (copy) { - m->m->mat = paddle::Matrix::create(dim1, dim2); - m->m->mat->copyFrom(data, dim1 * dim2); - } else { - m->m->mat = paddle::Matrix::create(data, dim1, dim2, false); - } - return m; -} - -Matrix* Matrix::createGpuDenseFromNumpy(float* data, int dim1, int dim2) { - auto m = new Matrix(); - m->m->mat = paddle::Matrix::create(dim1, dim2, false, true); - m->m->mat->copyFrom(data, dim1 * dim2); - return m; -} - -Matrix* Matrix::createSparse(size_t height, - size_t width, - size_t nnz, - bool isNonVal, - bool isTrans, - bool useGpu) { - auto m = new Matrix(); - m->m->mat = paddle::Matrix::createSparseMatrix( - height, - width, - nnz, - isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE, - isTrans, - useGpu); - return m; -} - -Matrix::~Matrix() { delete m; } - -size_t Matrix::getHeight() const { return m->mat->getHeight(); } - -size_t Matrix::getWidth() const { return m->mat->getWidth(); } - -float Matrix::get(size_t x, size_t y) const throw(RangeError) { - if (x > this->getWidth() || y > this->getHeight()) { - RangeError e; - throw e; - } - return m->mat->getElement(x, y); -} - -void Matrix::set(size_t x, size_t y, float val) throw(RangeError, - UnsupportError) { - if (x > this->getWidth() || y > this->getHeight()) { - RangeError e; - throw e; - } - auto rawMat = m->mat.get(); - if (auto cDenseMat = dynamic_cast(rawMat)) { - *(cDenseMat->getData() + x + y * cDenseMat->getWidth()) = val; - } else { - UnsupportError e; - throw e; - } -} - -bool Matrix::isSparse() const { - auto raw_mat = m->mat.get(); - return dynamic_cast(raw_mat) != nullptr || - dynamic_cast(raw_mat) != nullptr; -} - -SparseValueType Matrix::getSparseValueType() const throw(UnsupportError) { - auto cpuSparseMat = - std::dynamic_pointer_cast(m->mat); - if (cpuSparseMat != nullptr) { - return (SparseValueType)cpuSparseMat->getValueType(); - } else { - auto gpuSparseMat = - std::dynamic_pointer_cast(m->mat); - if (gpuSparseMat != nullptr) { - return (SparseValueType)gpuSparseMat->getValueType(); - } else { - UnsupportError e; - throw e; - } - } -} - -SparseFormatType Matrix::getSparseFormat() const throw(UnsupportError) { - auto cpuSparseMat = - std::dynamic_pointer_cast(m->mat); - if (cpuSparseMat != nullptr) { - return (SparseFormatType)cpuSparseMat->getFormat(); - } else { - auto gpuSparseMat = - std::dynamic_pointer_cast(m->mat); - if (gpuSparseMat != nullptr) { - return SPARSE_CSR; - } else { - UnsupportError e; - throw e; - } - } -} - -IntArray Matrix::getSparseRowCols(size_t i) const - throw(UnsupportError, RangeError) { - auto cpuSparseMat = - std::dynamic_pointer_cast(m->mat); - if (cpuSparseMat != nullptr && - cpuSparseMat->getFormat() == paddle::SPARSE_CSR) { - if (i < cpuSparseMat->getHeight()) { - // cpuSparseMat->print(std::cout); - size_t len = cpuSparseMat->getColNum(i); - return IntArray(cpuSparseMat->getRowCols(i), len); - } else { - RangeError e; - throw e; - } - } else { - UnsupportError e; - throw e; - } -} - -IntWithFloatArray Matrix::getSparseRowColsVal(size_t i) const - throw(UnsupportError, RangeError) { - auto cpuSparseMat = - std::dynamic_pointer_cast(m->mat); - if (cpuSparseMat != nullptr && - cpuSparseMat->getValueType() == paddle::FLOAT_VALUE) { - if (i < cpuSparseMat->getHeight()) { - return IntWithFloatArray(cpuSparseMat->getRowValues(i), - cpuSparseMat->getRowCols(i), - cpuSparseMat->getColNum(i)); - } else { - RangeError e; - throw e; - } - } else { - UnsupportError e; - throw e; - } -} - -FloatArray Matrix::getData() const { - auto rawMat = m->mat.get(); - if (dynamic_cast(rawMat->getMemoryHandle().get())) { - // is gpu. then copy data - float* data = rawMat->getData(); - size_t len = rawMat->getElementCnt(); - float* cpuData = new float[len]; - hl_memcpy_device2host(cpuData, data, len * sizeof(float)); - FloatArray ret_val(cpuData, len); - ret_val.needFree = true; - return ret_val; - } else { - FloatArray ret_val(rawMat->getData(), rawMat->getElementCnt()); - return ret_val; - } -} - -void Matrix::sparseCopyFrom( - const std::vector& rows, - const std::vector& cols, - const std::vector& vals) throw(UnsupportError) { - auto cpuSparseMat = - std::dynamic_pointer_cast(m->mat); - if (cpuSparseMat != nullptr) { - // LOG(INFO) <<"RowSize = "<isSparse()) { - throw UnsupportError(); - } else { - *dim1 = m->mat->getHeight(); - *dim2 = m->mat->getWidth(); - *view_m_data = new float[(*dim1) * (*dim2)]; - if (auto cpuMat = dynamic_cast(m->mat.get())) { - auto src = cpuMat->getData(); - auto dest = *view_m_data; - std::memcpy(dest, src, sizeof(paddle::real) * (*dim1) * (*dim2)); - } else if (auto gpuMat = dynamic_cast(m->mat.get())) { - auto src = gpuMat->getData(); - auto dest = *view_m_data; - hl_memcpy_device2host( - dest, src, sizeof(paddle::real) * (*dim1) * (*dim2)); - } else { - LOG(WARNING) << "Unexpected Situation"; - throw UnsupportError(); - } - } -} - -void Matrix::copyFromNumpyMat(float* data, - int dim1, - int dim2) throw(UnsupportError, RangeError) { - if (isSparse()) { - throw UnsupportError(); - } else { - if (this->getHeight() == (size_t)dim1 && this->getWidth() == (size_t)dim2) { - if (m->mat->getData() != data) { - m->mat->copyFrom(data, dim1 * dim2); - } - } else { - throw RangeError(); - } - } -} - -bool Matrix::isGpu() const { - auto rawPtr = m->mat.get(); - return dynamic_cast(rawPtr) != nullptr || - dynamic_cast(rawPtr) != nullptr; -} diff --git a/paddle/legacy/api/Paddle.i b/paddle/legacy/api/Paddle.i deleted file mode 100644 index 7a1456a5c065821caa54fbf4a10f7ceda08780c0..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/Paddle.i +++ /dev/null @@ -1,202 +0,0 @@ -%module(directors="1") swig_paddle -%include "std_string.i" -%{ -#define SWIG_FILE_WITH_INIT -#include "legacy/api/PaddleAPI.h" -%} - -%include "exception.i" -%typemap(throws) UnsupportError %{ - SWIG_exception(SWIG_RuntimeError, $1.what()); - SWIG_fail; -%} - -%include "std_vector.i" -%include "std_pair.i" -#ifdef SWIGPYTHON -%include "numpy.i" -#endif - -%init %{ -#ifdef SWIGPYTHON -import_array(); -#endif -%} - - -namespace std { -%template(vector_int) vector; -%template(vector_uint) vector; -%template(vector_float) vector; -%template(vector_string) vector; -%template(vector_vec_star) vector; -} -#ifdef SWIGPYTHON -%typemap(in) (int argc, char** argv) { - int i = 0; - if (!PyList_Check($input)) { - PyErr_SetString(PyExc_ValueError, "Expecting a list"); - return NULL; - } - $1 = PyList_Size($input); - $2 = (char **) malloc(($1+1)*sizeof(char *)); - for (i = 0; i < $1; i++) { - PyObject *s = PyList_GetItem($input,i); - if (!PyString_Check(s)) { - free($2); - PyErr_SetString(PyExc_ValueError, "List items must be strings"); - return NULL; - } - $2[i] = PyString_AsString(s); - } - $2[i] = 0; -} -%typemap(freearg) (int argc, char** argv) { - if ($2) free($2); -} - -%typemap(out) FloatArray { - $result = PyList_New($1.length); - for (size_t i=0; i<$1.length; ++i) { - PyList_SetItem($result, i, PyFloat_FromDouble($1.buf[i])); - } - if($1.needFree) { - delete [] $1.buf; - } -} - -%typemap(out) IntArray { - $result = PyList_New($1.length); - for (size_t i=0; i<$1.length; ++i) { - PyList_SetItem($result, i, PyInt_FromLong($1.buf[i])); - } - if ($1.needFree) { - delete [] $1.buf; - } -} - -%typemap(out) IntWithFloatArray { - $result = PyList_New($1.length); - for (size_t i=0; i<$1.length; ++i) { - PyList_SetItem($result, i, PyTuple_Pack(2, - PyInt_FromLong($1.idxBuf[i]), - PyFloat_FromDouble($1.valBuf[i]) - )); - } - if ($1.needFree) { - delete [] $1.idxBuf; - delete [] $1.valBuf; - } -} - - -%rename(__getitem__) IVector::get; -%rename(__setitem__) IVector::set; -%rename(__len__) IVector::getSize; -%rename(__getitem__) Vector::get; -%rename(__setitem__) Vector::set; -%rename(__len__) Vector::getSize; -%rename(__len__) Parameter::getSize; -%rename(__call__) ParameterTraverseCallback::apply; -%rename(__repr__) Evaluator::toString; - -%apply (float* INPLACE_ARRAY2, int DIM1, int DIM2) { - (float* data, int dim1, int dim2) -} - -%apply (float** ARGOUTVIEW_ARRAY2, int* DIM1, int* DIM2) { - (float** view_data, int* dim1, int* dim2) -} - -%apply (float** ARGOUTVIEWM_ARRAY2, int* DIM1, int* DIM2) { - (float** view_m_data, int* dim1, int* dim2) -} - -%apply (int** ARGOUTVIEWM_ARRAY1, int* DIM1) { - (int** view_m_data, int* dim1) -} - -%apply (int* INPLACE_ARRAY1, int DIM1) { - (int* data, int dim) -} - -%apply (int** ARGOUTVIEW_ARRAY1, int* DIM1) { - (int** view_data, int* dim1) -} - -%apply (float* INPLACE_ARRAY1, int DIM1) { - (float* data, int dim) -} - -%apply (float** ARGOUTVIEW_ARRAY1, int* DIM1) { - (float** view_data, int* dim1) -} - -%apply (float** ARGOUTVIEWM_ARRAY1, int* DIM1) { - (float** view_m_data, int* dim1) -} - -#endif -// The below functions internally create object by "new", so it should use -// use SWIG to handle gc. There are hints for SWIG to handle GC. -%newobject Matrix::createZero; -%newobject Matrix::createSparse; -%newobject Matrix::createDense; -%newobject Matrix::createDenseFromNumpy; -%newobject Matrix::createCpuDenseFromNumpy; -%newobject Matrix::createGpuDenseFromNumpy; -%newobject Vector::createZero; -%newobject Vector::create; -%newobject Vector::createVectorFromNumpy; -%newobject Vector::createCpuVectorFromNumpy; -%newobject Vector::createGpuVectorFromNumpy; -%newobject IVector::createZero; -%newobject IVector::create; -%newobject IVector::createVectorFromNumpy; -%newobject IVector::createCpuVectorFromNumpy; -%newobject IVector::createGpuVectorFromNumpy; -%newobject Trainer::createByCommandLine; -%newobject Trainer::getForwardOutput; -%newobject Trainer::getLayerOutput; -%newobject Arguments::getSlotValue; -%newobject Arguments::getSlotIds; -%newobject Arguments::getSlotIn; -%newobject Arguments::getSlotSequenceStartPositions; -%newobject Arguments::getSlotSequenceDim; -%newobject Arguments::createArguments; -%newobject GradientMachine::createByConfigProtoStr; -%newobject GradientMachine::createByModelConfig; -%newobject GradientMachine::asSequenceGenerator; -%newobject GradientMachine::getParameter; -%newobject GradientMachine::getLayerOutput; -%newobject GradientMachine::makeEvaluator; -%newobject TrainerConfig::createFromTrainerConfigFile; -%newobject TrainerConfig::getModelConfig; -%newobject TrainerConfig::getOptimizationConfig; -%newobject Parameter::getBuf; -%newobject Parameter::getConfig; -%newobject ParameterOptimizer::create; -%newobject ParameterOptimizer::needSpecialTraversal; -%newobject ParameterUpdater::createLocalUpdater; -%newobject ParameterUpdater::createRemoteUpdater; -%newobject ParameterUpdater::createNewRemoteUpdater; - -%feature("director") UpdateCallback; -%feature("autodoc", 1); // To generate method stub, for code hint in ide - -// Ignore many private class, and method cannot be handled by swig. -%ignore MatrixPrivate; -%ignore TrainerPrivate; -%ignore IVector::operator[]; -%ignore ArgumentsPrivate; -%ignore GradientMachinePrivate; -%ignore TrainerConfigPrivate; -%ignore ModelConfigPrivate; -%ignore ParameterPrivate; -%ignore SequenceGeneratorPrivate; -%ignore VectorPrivate; -%ignore ParameterConfigPrivate; -%ignore OptimizationConfigPrivate; -%ignore ParameterTraverseCallbackPrivate; -%include "legacy/utils/GlobalConstants.h" -%include "legacy/api/PaddleAPI.h" diff --git a/paddle/legacy/api/PaddleAPI.h b/paddle/legacy/api/PaddleAPI.h deleted file mode 100644 index 475984a3d57ebc25d5d071c33b7e6562ac78c503..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/PaddleAPI.h +++ /dev/null @@ -1,1054 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/GlobalConstants.h" - -/// Import PaddlePaddle's enumeration into global namespace. -using namespace paddle::enumeration_wrapper; // NOLINT - -/** - * @brief Initialize paddle. - * - * In python, this method should be invoked as - * @code - * import sys - * import paddle - * paddle.initPaddle(sys.argv) - * or you can change arguments as any list of str. - * @endcode - */ -void initPaddle(int argc, char** argv); - -/// Return FLAGS_use_gpu -bool isUsingGpu(); - -/// Set the Flags_use_gpu to the given parameter -void setUseGpu(bool useGpu); - -/// Return true if this py_paddle is compiled in GPU Version -bool isGpuVersion(); - -/// Return FLAGS_trainer_count -int getTrainerCount(); - -/// The Error of IO Operation. Such as file not found, etc. -class IOError {}; - -/// Out of range error -class RangeError {}; - -/// Not support Error, such as access GPU memory directly, etc. -class UnsupportError : public std::runtime_error { - public: - UnsupportError() : std::runtime_error(" ") {} - explicit UnsupportError(const std::string& message) - : std::runtime_error(message) {} -}; - -/// This type will map to python's list of float. -struct FloatArray { - const float* buf; - const size_t length; - bool needFree; // true if the buf is dynamic alloced. - FloatArray(const float* b, const size_t l); -}; - -/// This type will map to python's list of int -struct IntArray { - const int* buf; - const size_t length; - bool needFree; - IntArray(const int* b, const size_t l, bool f = false); -}; - -/// This type will map to python's list of (int, float) -struct IntWithFloatArray { - const float* valBuf; - const int* idxBuf; - const size_t length; - bool needFree; - IntWithFloatArray(const float* v, const int* i, size_t l, bool f = false); -}; - -enum SparseValueType { SPARSE_NON_VALUE = 0, SPARSE_VALUE = 1 }; - -enum SparseFormatType { SPARSE_CSR = 0, SPARSE_CSC = 1 }; - -/** - * In Python, -1UL is hard to write. So define a const value used by python - * side. - */ -const size_t NO_SPARSE_ID = -1UL; - -struct MatrixPrivate; -class Matrix { - Matrix(); // User Cannot Create Matrix. - DISABLE_COPY(Matrix); - static Matrix* createByPaddleMatrixPtr(void* sharedPtr); - - public: - virtual ~Matrix(); - - /** - * Create A Matrix with height,width, which is filled by zero. - */ - static Matrix* createZero(size_t height, - size_t width, - bool useGpu = isUsingGpu()); - - /** - * Create Sparse Matrix. - * - * After create sparse, sparseCopyFrom can be used to fill matrix. - * - * @param nnz Number of non zero values. - * - * @note the default sparse type is SPARSE_CSR. - */ - static Matrix* createSparse(size_t height, - size_t width, - size_t nnz, - bool isNonVal = true, - bool trans = false, - bool useGpu = isUsingGpu()); - - /** - * Create Dense Matrix. - * - * @param data list of float should be passed in python. - * @note the value will be copy into a new matrix. - */ - static Matrix* createDense(const std::vector& data, - size_t height, - size_t width, - bool useGpu = isUsingGpu()); - - static Matrix* createDenseFromNumpy( - float* data, - int dim1, - int dim2, - bool copy = true, - bool useGpu = isUsingGpu()) throw(UnsupportError); - - /** - * Create Cpu Dense Matrix from numpy matrix, dtype=float32 - * - * @param data a numpy matrix. - * @param dim1 dimension of data. - * @param dim2 dimension of data. - * @param copy true if copy into a new matrix, false will create - * matrix inplace. copy = false should be used with extreme - * care because Matrix will share the memory with the given - * numpy array. If the numpy array object is no longer valid, - * the memory space will not be usable. - */ - static Matrix* createCpuDenseFromNumpy(float* data, - int dim1, - int dim2, - bool copy = true); - - /// Create Gpu Dense Matrix from numpy matrix, dtype=float32 - static Matrix* createGpuDenseFromNumpy(float* data, int dim1, int dim2); - - /** - * Cast to numpy matrix. - * - * @note This method take no parameter in python. - * @note This method in python will return a numpy matrix, not void. - * @note Only CpuDenseMatrix is supported. - * - * Example: - * @code - * import paddle - * m = paddle.Matrix.createZero(10,2) - * numpy_mat = m.toNumpyMat() - * @endcode - */ - void toNumpyMatInplace(float** view_data, - int* dim1, - int* dim2) throw(UnsupportError); - - /// Copy To numpy mat. - void copyToNumpyMat(float** view_m_data, - int* dim1, - int* dim2) throw(UnsupportError); - - /// Copy From Numpy Mat - void copyFromNumpyMat(float* data, int dim1, int dim2) throw(UnsupportError, - RangeError); - - /// return true if this matrix is sparse. - bool isSparse() const; - - SparseValueType getSparseValueType() const throw(UnsupportError); - - SparseFormatType getSparseFormat() const throw(UnsupportError); - - IntArray getSparseRowCols(size_t i) const throw(UnsupportError, RangeError); - - IntWithFloatArray getSparseRowColsVal(size_t i) const - throw(UnsupportError, RangeError); - - size_t getHeight() const; - - size_t getWidth() const; - - float get(size_t x, size_t y) const throw(RangeError); - - void set(size_t x, size_t y, float val) throw(RangeError, UnsupportError); - - /// return type is list of float - FloatArray getData() const; - - /** - * Copy from rows, cols, values. - * - * if sparse_nonvalue, the values should be [] - */ - void sparseCopyFrom(const std::vector& rows, - const std::vector& cols, - const std::vector& values = - std::vector()) throw(UnsupportError); - - bool isGpu() const; - - private: - void* getSharedPtr() const; - - MatrixPrivate* m; - friend class Trainer; - friend class GradientMachine; - friend class Arguments; -}; - -struct VectorPrivate; -class Vector { - DISABLE_COPY(Vector); - Vector(); - static Vector* createByPaddleVectorPtr(void* ptr); - - void* getSharedPtr(); - - public: - ~Vector(); - - /// Create Vector filled with zero. - static Vector* createZero(size_t sz, bool useGpu = isUsingGpu()); - - /** - * Create Vector from list of float. - * - * It will create a new vector, and copy data into it. - */ - static Vector* create(const std::vector& data, - bool useGpu = isUsingGpu()); - - static Vector* createVectorFromNumpy( - float* data, - int dim, - bool copy = true, - bool useGpu = isUsingGpu()) throw(UnsupportError); - /** - * Create Cpu Vector from numpy array, which dtype=float32 - * - * If copy is false, it will create vector inplace. - */ - static Vector* createCpuVectorFromNumpy(float* data, - int dim, - bool copy = true); - - /// Create Gpu Vector from numpy array, which dtype=float32 - static Vector* createGpuVectorFromNumpy(float* data, int dim); - - /** - * copy from another vector - * throw(RangeError) if size of src vector is different from size of this - * vector - */ - void copyFrom(Vector* src) throw(RangeError); - - /// Cast to numpy array inplace. - void toNumpyArrayInplace(float** view_data, int* dim1) throw(UnsupportError); - - /// Copy to numpy array. - void copyToNumpyArray(float** view_m_data, int* dim1); - - /// Copy from numpy array. - void copyFromNumpyArray(float* data, int dim); - - /// __getitem__ in python - float get(const size_t idx) const throw(RangeError, UnsupportError); - - /// __setitem__ in python - void set(const size_t idx, float val) throw(RangeError, UnsupportError); - - /// Return is GPU vector or not. - bool isGpu() const; - - /// Return a list of float, the memory is alloced and copied. - FloatArray getData() const; - - /// __len__ in python - size_t getSize() const; - - private: - VectorPrivate* m; - - private: - friend class Parameter; - friend class ParameterOptimizer; - friend struct ParameterTraverseCallbackPrivate; -}; - -struct IVectorPrivate; -class IVector { - IVector(); - DISABLE_COPY(IVector); - static IVector* createByPaddleVectorPtr(void* ptr); - - public: - /// Create IVector filled with zero - static IVector* createZero(size_t sz, bool useGpu = isUsingGpu()); - - /** - * Create IVector from list of int. - * It will create a new vector, and copy data into it. - */ - static IVector* create(const std::vector& data, - bool useGpu = isUsingGpu()); - - static IVector* createVectorFromNumpy( - int* data, - int dim, - bool copy = true, - bool useGpu = isUsingGpu()) throw(UnsupportError); - - /** - * Create Cpu IVector from numpy array, which dtype=int32 - * - * If copy is false, it will create vector inplace - */ - static IVector* createCpuVectorFromNumpy(int* data, - int dim, - bool copy = true); - /** - * Create Gpu IVector from numpy array, which dtype=int32 - */ - static IVector* createGpuVectorFromNumpy(int* data, int dim); - - /// Cast to numpy array inplace. - void toNumpyArrayInplace(int** view_data, int* dim1) throw(UnsupportError); - - /// Copy to numpy array. - void copyToNumpyArray(int** view_m_data, int* dim1); - - /// Copy from numpy array. - void copyFromNumpyArray(int* data, int dim); - - virtual ~IVector(); - - /// Return a list of int, the memory is alloced and copied. - IntArray getData() const; - - /// This method will map to python [] method. - int& operator[](const size_t idx) throw(RangeError, UnsupportError); - - const int& operator[](const size_t idx) const - throw(RangeError, UnsupportError); - - inline int get(const size_t idx) const throw(RangeError, UnsupportError) { - return (*this)[idx]; - } - - inline void set(const size_t idx, int val) throw(RangeError, UnsupportError) { - (*this)[idx] = val; - } - - /// Return true if it is gpu vector. - bool isGpu() const; - - /// This method will map to python __len__(); - size_t getSize() const; - - private: - void* getSharedPtr() const; - - friend class Arguments; - IVectorPrivate* m; -}; - -struct ArgumentsPrivate; - -/// The Arguments is actual a std::vector in paddle. -class Arguments { - private: - Arguments(); // Internal Create. - DISABLE_COPY(Arguments); - - public: - /** - * Create a arguments with size. - * Note that it can be zero. - */ - static Arguments* createArguments(size_t slotNum); - - void resize(size_t slotNum); - - virtual ~Arguments(); - - /** - * Return the slot number that aguments contains. - * - * It is actually the vector's size - */ - size_t getSlotNum() const; - - /** - * The get functions of Arguments - * - * the param idx is the slot id - */ - Matrix* getSlotValue(size_t idx) const throw(RangeError); - Matrix* getSlotGrad(size_t idx) const throw(RangeError); - IVector* getSlotIds(size_t idx) const throw(RangeError); - Matrix* getSlotIn(size_t idx) const throw(RangeError); - IVector* getSlotSequenceStartPositions(size_t idx) const throw(RangeError); - IVector* getSlotSubSequenceStartPositions(size_t idx) const throw(RangeError); - IVector* getSlotSequenceDim(size_t idx) const throw(RangeError); - // End Of get functions of Arguments - - int64_t getBatchSize(size_t idx = 0) const throw(RangeError); - - /** - * The set functions of Arguments. - * - * The param idx is the slot id. - * The other param is the input Matrix or vector. - */ - void setSlotValue(size_t idx, Matrix* mat) throw(RangeError); - void setSlotGrad(size_t idx, Matrix* mat) throw(RangeError); - void setSlotIn(size_t idx, Matrix* mat) throw(RangeError); - void setSlotIds(size_t idx, IVector* vec) throw(RangeError); - void setSlotSequenceStartPositions(size_t idx, - IVector* vec) throw(RangeError); - void setSlotSubSequenceStartPositions(size_t idx, - IVector* vec) throw(RangeError); - void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError); - - /** - * Set the frame height of the idx-th Argument. - * - * @param ids The index of which Argument. - * @param h The height value. - */ - void setSlotFrameHeight(size_t idx, size_t h) throw(RangeError); - - /** - * Set the frame height of the idx-th Argument. - * - * @param ids The index of which Argument. - * @param h The height value. - */ - void setSlotFrameWidth(size_t idx, size_t w) throw(RangeError); - - size_t getSlotFrameHeight(size_t idx = 0) const throw(RangeError); - size_t getSlotFrameWidth(size_t idx = 0) const throw(RangeError); - - float sum() const; - - private: - static Arguments* createByPaddleArgumentVector(void* ptr); - static Arguments* createByPaddleArgument(const void* ptr); - void* getInternalArgumentsPtr() const; - - private: - ArgumentsPrivate* m; - friend class Trainer; - friend class GradientMachine; - friend class SequenceGenerator; -}; - -enum GradientMatchineCreateMode { - CREATE_MODE_NORMAL = paddle::GradientMachine::kNormal, - CREATE_MODE_SGD_SPARSE_CPU_TRAINING = - paddle::GradientMachine::kSgdSparseCpuTraining, - CREATE_MODE_TESTING = paddle::GradientMachine::kTesting -}; - -struct ParameterConfigPrivate; -class ParameterConfig { - DISABLE_COPY(ParameterConfig); - ParameterConfig(); - - /** - * Internal methods - */ - static ParameterConfig* createParameterConfigFromParameterSharedPtr( - void* ptr); - static ParameterConfig* createParameterConfigFromParameterPtr(void* ptr); - void* getRawPtr(); - - public: - ~ParameterConfig(); - - /** - * return proto buf string. - */ - std::string toProtoString() const; - - private: - ParameterConfigPrivate* m; - - private: - friend class Parameter; - friend class ParameterOptimizer; - friend struct ParameterTraverseCallbackPrivate; -}; - -struct OptimizationConfigPrivate; -class OptimizationConfig { - DISABLE_COPY(OptimizationConfig); - OptimizationConfig(); - - public: - static OptimizationConfig* createFromProtoString(const std::string& str); - ~OptimizationConfig(); - - /** - * return protobuf string. - */ - std::string toProtoString(); - - private: - OptimizationConfigPrivate* m; - - friend class TrainerConfig; - friend class ParameterOptimizer; - friend class ParameterUpdater; - friend class Trainer; -}; - -struct ParameterPrivate; -class Parameter { - private: - Parameter(); - DISABLE_COPY(Parameter); - - public: - virtual ~Parameter(); - - /** - * get parameter name - */ - std::string getName() const; - - /** - * get buf in Parameter - */ - Vector* getBuf(ParameterType type); - - /** - * get id - */ - size_t getID() const; - - ParameterConfig* getConfig(); - void setValueUpdated(); - - bool save(const std::string& filename) const; - - bool load(const std::string& filename) const; - - size_t getSize() const; - - private: - static Parameter* createFromRawPtr(void* ptr); - static Parameter* createFromSharedPtr(void* ptr); - - private: - ParameterPrivate* m; - friend class UpdateCallbackWrapper; - friend class GradientMachine; - friend class ParameterUpdater; -}; - -struct ModelConfigPrivate; -/** - * You can only get model config from TrainerConfig. - * - * It is used by GradientMachine. - */ -class ModelConfig { - private: - ModelConfig(); - DISABLE_COPY(ModelConfig); - - public: - virtual ~ModelConfig(); - - private: - ModelConfigPrivate* m; - friend class TrainerConfig; - friend struct TrainerConfigPrivate; - friend class GradientMachine; -}; - -struct TrainerConfigPrivate; -/** - * To get TrainerConfig from file. - * - * It is used by GradientMachine. - */ -class TrainerConfig { - private: - TrainerConfig(); - DISABLE_COPY(TrainerConfig); - - public: - virtual ~TrainerConfig(); - - static TrainerConfig* createFromTrainerConfigFile( - const std::string& configPath); - static TrainerConfig* createFromProtoString(const std::string& str); - - ModelConfig* getModelConfig() const; - - OptimizationConfig* getOptimizationConfig() const; - - private: - TrainerConfigPrivate* m; - friend class Trainer; -}; - -/** - * The callback in backword. - * - * You can inherit this class in python. - * - * @code - * class UpdateCallbackInPython(paddle.UpdateCallback): - * def __init__(self): - * paddle.UpdateCallback.__init__(self) - * - * def apply(self, param): - * assert isinstance(param, paddle.Parameter) - * @endcode - */ -class UpdateCallback { - public: - virtual ~UpdateCallback(); - virtual void apply(Parameter* p); -}; - -struct ParameterTraverseCallbackPrivate; -class ParameterTraverseCallback { - DISABLE_COPY(ParameterTraverseCallback); - ParameterTraverseCallback(); - - public: - ~ParameterTraverseCallback(); - - void apply(const std::vector& vecs, - const ParameterConfig& config, - size_t sparseId); - - private: - ParameterTraverseCallbackPrivate* m; - friend class ParameterOptimizer; -}; - -/** - * The ParameterOptimizer Wrapper Class. - * - * Basically same as common/ParameterOptimizer.h - */ -struct ParameterOptimizerPrivate; -class ParameterOptimizer { - DISABLE_COPY(ParameterOptimizer); - ParameterOptimizer(); - - public: - static ParameterOptimizer* create(OptimizationConfig* config); - - ~ParameterOptimizer(); - - void init(size_t numRows, const ParameterConfig* config); - - void startPass(); - - void finishPass(); - - void startBatch(size_t numSamplesProcessed); - - void finishBatch(); - - void update(const std::vector& vecs, - const ParameterConfig& conf, - size_t sparseId = NO_SPARSE_ID); - - std::vector getParameterTypes() const; - - ParameterTraverseCallback* needSpecialTraversal( - const ParameterConfig& config) const; - - private: - ParameterOptimizerPrivate* m; -}; - -class SequenceGenerator; -class Evaluator; -struct GradientMachinePrivate; -class GradientMachine { - private: - GradientMachine(); - DISABLE_COPY(GradientMachine); - - public: - virtual ~GradientMachine(); - - /** - * Create By ProtoStr. - * - * The ProtoStr can be generate by python's protobuf code. - */ - static GradientMachine* createByConfigProtoStr( - const std::string& protoStr, - GradientMatchineCreateMode mode = CREATE_MODE_NORMAL, - const std::vector& parameterTypes = defaultParamTypes); - - /** - * Create by ModelConfig object. - * - * To get ModelConfig, you can get TrainerConfig from config file, then get - * model config by TrainerConfig - */ - static GradientMachine* createByModelConfig( - ModelConfig* conf, - GradientMatchineCreateMode mode = CREATE_MODE_NORMAL, - const std::vector& parameterTypes = defaultParamTypes); - - /** - * @brief finish - */ - void finish(); - - void start(); - - /** - * Prefetch row ids of sparse parameter. - */ - void prefetch(const Arguments& inArgs); - - /** - * Do some thing when train pass ended. - */ - void onPassEnd(); - - /** - * The forward stage of GradientMachine. - * - * @note the outArgs could be zero length arguemnts. - * @note THIS METHOD IS VERY USEFULL FOR PREDICT FROM TRAINED MODEL. - */ - void forward(const Arguments& inArgs, Arguments* outArgs, PassType passType); - - /** - * The backward stage of GradientMachine. - * - * @note Currently the ParameterUpdater is not wrapped in SWIG, so backward - * cannot actually train a network. But you can write a update callback to - * change the parameter or implement a ParameterUpdater in python side. - */ - void backward(const UpdateCallback& callback = UpdateCallback()); - - /** - * Combine forward/backward - */ - void forwardBackward(const Arguments& inArgs, - Arguments* outArgs, - PassType passType, - const UpdateCallback& callback = UpdateCallback()); - - void loadParameters(const std::string& path); - - size_t getParameterSize() const; - Parameter* getParameter(size_t i) throw(RangeError); - - size_t getNonStaticParameterSize() const; - Parameter* getNonStaticParameter(size_t i) throw(RangeError); - - void randParameters(); - - Arguments* getLayerOutput(const std::string& layerName) const - throw(UnsupportError); - - /** - * Create a sequence generator. - * - * @note It just like a paddle_gen_sequence. - */ - SequenceGenerator* asSequenceGenerator( - const std::vector& dict = std::vector(), - size_t begin_id = 0UL, - size_t end_id = 0UL, - size_t max_length = 100UL, - size_t beam_size = -1UL); - - Evaluator* makeEvaluator(); - - void eval(Evaluator* evaluator); - - private: - GradientMachinePrivate* m; - - static GradientMachine* createFromPaddleModelPtr( - const void* confPtr, - GradientMatchineCreateMode mode, - const std::vector& types); - - // Not to use c++ 11 init-list, so we use static var as function default arg. - static std::vector defaultParamTypes; - friend class Trainer; - friend class ParameterUpdater; -}; - -struct ParameterUpdaterPrivate; -class ParameterUpdater { - private: - ParameterUpdater(); - - public: - static ParameterUpdater* createLocalUpdater(OptimizationConfig* config); - static ParameterUpdater* createRemoteUpdater(OptimizationConfig* config, - int passCount, - bool useSparseUpdater); - static ParameterUpdater* createNewRemoteUpdater( - OptimizationConfig* config, - const std::string pserverSpec, - const bool useEtcd) throw(UnsupportError); - ~ParameterUpdater(); - - /** - * @brief initialize Parameter Updater by GradientMachine. - * @param gm - */ - void init(const GradientMachine& gm); - - /** - * @brief begin of a training/testing of one pass. - */ - void startPass(); - - /** - * @brief end of a traning/testing of one pass. - */ - void finishPass(); - - /** - * @brief begin of a training/testing of one batch. - * @param data batch's size - * @return PassType, mostly will be training. - */ - PassType startBatch(size_t batchSize); - - /** - * @brief end of a traning/testing of one batch - * @param cost current batch cost. - */ - void finishBatch(float cost); - - /** - * @brief update a parameter (by local optimizer or by cluster pserver) - * @param param - */ - void update(Parameter* param); - - /** - * @breif only get required sparse rows by default. - * @param fullSize: get full matrix parameter if *fullSize* set - * @param apply: get PARAMETER_APPLY on pserver if *apply* set - */ - void getParametersRemote(bool fullSize = false, bool apply = false); - - /** - * @brief restore the average parameter. - * @note It is only used in AverageOptimizer. Restore will get the current - * PARAMETER_VALUE back. - */ - void restore(); - - /** - * @brief apply. Store the average parameter. - * @note It is only used in AverageOptimizer. Apply will store the current - * PARAMETER_VALUE to buffer, calcaualte current Average Parameter, and save - * it to PARAMETER_VALUE. - */ - void apply(); - - /** - * @brief catchUpWith The Regularization will be delayed in many situations( - * pserver, local sparse). Catch Up means catch the regularization up, apply - * regularization to all params. - */ - void catchUpWith(); - - private: - ParameterUpdaterPrivate* m; -}; - -struct EvaluatorPrivate; -class Evaluator { - private: - Evaluator(); - DISABLE_COPY(Evaluator); - - public: - ~Evaluator(); - - /** - * @brief begin an evaluate stage. - */ - void start(); - - /** - * @brief end an evaluate stage. - */ - void finish(); - - /** - * @brief toString will get a evaluate result. - * - * __repr__ method in python - */ - std::string toString(); - - std::vector getNames() const; - - double getValue(const std::string name) const; - - private: - EvaluatorPrivate* m; - - friend class GradientMachine; -}; - -struct TrainerPrivate; -class Trainer { - private: - TrainerPrivate* m; - Trainer(); - Trainer(TrainerConfig* optConfig, GradientMachine* gm); - DISABLE_COPY(Trainer); - - public: - virtual ~Trainer(); - - /// Create A Trainer By TrainerConfig. using paddle command line. - static Trainer* createByCommandLine() throw(IOError); - - static Trainer* create(TrainerConfig* optConfig, - GradientMachine* gm) throw(IOError); - - /// Start training - void startTrain(); - - /// Finish training - void finishTrain(); - - /// Start a pass. - void startTrainPass(); - - /// Finish a pass - void finishTrainPass(); - - /** - * Train one batch, - * - * @return true if all batch finished. - */ - bool trainOneBatch(size_t batchSize); - - void trainOneDataBatch(size_t batchSize, const Arguments& args); - - void startTestPeriod(); - void testOneDataBatch(size_t batchSize, const Arguments& args); - void finishTestPeriod(); - - void forwardOneBatch(size_t batchSize); - - Arguments* getForwardOutput(); - - Arguments* getLayerOutput(const std::string& layerName) const; -}; - -/// the N-Best results generated from one input sequence. -class ISequenceResults { - public: - virtual ~ISequenceResults(); - - /// Number of result. - virtual size_t getSize() const = 0; - - /** - * Get sentence from dictionary. - * - * @param id the index of result. - * @param split if true, the return sentence will be splited with ' ' by - * each word. Default is false. - */ - virtual std::string getSentence(size_t id, bool split = false) const - throw(RangeError) = 0; - virtual std::vector getSequence(size_t id) const throw(RangeError) = 0; - virtual float getScore(size_t id) const throw(RangeError) = 0; -}; - -struct SequenceGeneratorPrivate; -class SequenceGenerator { - DISABLE_COPY(SequenceGenerator); - SequenceGenerator(); - - public: - virtual ~SequenceGenerator(); - - /** - * Generate Sequence by input. - * - * @note The inArgs is just one sequence of data. - * @note The return will get a N-best generate result by inArgs. - * Sort by score. - */ - ISequenceResults* generateSequence(const Arguments& inArgs) const; - - void setDict(const std::vector& dict); - void setBos(size_t bos); - void setEos(size_t eos); - void setMaxLength(size_t maxlength); - void setBeamSize(size_t beamSize); - - private: - static SequenceGenerator* createByGradientMachineSharedPtr(void* ptr); - friend class GradientMachine; - - private: - SequenceGeneratorPrivate* m; -}; diff --git a/paddle/legacy/api/PaddleAPIPrivate.h b/paddle/legacy/api/PaddleAPIPrivate.h deleted file mode 100644 index 3ee192c31d597c4b4575e4a53a4aece09e642831..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/PaddleAPIPrivate.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once -#include -#include "PaddleAPI.h" -#include "paddle/legacy/gserver/evaluators/Evaluator.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" -#include "paddle/legacy/parameter/ParameterUpdaterBase.h" -#include "paddle/legacy/trainer/TrainerConfigHelper.h" - -struct GradientMachinePrivate { - std::shared_ptr machine; - - template - inline T& cast(void* ptr) { - return *(T*)(ptr); - } -}; - -struct OptimizationConfigPrivate { - std::shared_ptr trainer_config; - paddle::OptimizationConfig config; - - const paddle::OptimizationConfig& getConfig() { - if (trainer_config != nullptr) { - return trainer_config->getOptConfig(); - } else { - return config; - } - } -}; - -struct TrainerConfigPrivate { - std::shared_ptr conf; - TrainerConfigPrivate() {} -}; - -struct ModelConfigPrivate { - std::shared_ptr conf; -}; - -struct ArgumentsPrivate { - std::vector outputs; - - inline paddle::Argument& getArg(size_t idx) throw(RangeError) { - if (idx < outputs.size()) { - return outputs[idx]; - } else { - RangeError e; - throw e; - } - } - - template - std::shared_ptr& cast(void* rawPtr) const { - return *(std::shared_ptr*)(rawPtr); - } -}; - -struct ParameterUpdaterPrivate { - std::unique_ptr updater; -}; - -struct ParameterPrivate { - std::shared_ptr sharedPtr; - paddle::Parameter* rawPtr; // rawPtr only used in ParameterUpdater, - // in other situation sharedPtr should - // contains value. - - ParameterPrivate() : sharedPtr(nullptr), rawPtr(nullptr) {} - - paddle::Parameter* getPtr() { - if (sharedPtr) { - return sharedPtr.get(); - } else { - return rawPtr; - } - } -}; - -struct EvaluatorPrivate { - paddle::Evaluator* rawPtr; - - EvaluatorPrivate() : rawPtr(nullptr) {} - ~EvaluatorPrivate() { delete rawPtr; } -}; diff --git a/paddle/legacy/api/Parameter.cpp b/paddle/legacy/api/Parameter.cpp deleted file mode 100644 index f05740eb750cccd8cfb6cbc826a04585ec06822e..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/Parameter.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/parameter/Parameter.h" -#include "PaddleAPI.h" -#include "PaddleAPIPrivate.h" - -Parameter::Parameter() : m(new ParameterPrivate()) {} - -Parameter::~Parameter() { delete m; } - -Parameter* Parameter::createFromRawPtr(void* ptr) { - auto p = new Parameter(); - p->m->rawPtr = *static_cast(ptr); - return p; -} - -Parameter* Parameter::createFromSharedPtr(void* ptr) { - auto& p = *(paddle::ParameterPtr*)(ptr); - if (p == nullptr) { - return nullptr; - } else { - auto retParam = new Parameter(); - retParam->m->sharedPtr = p; - return retParam; - } -} - -std::string Parameter::getName() const { return m->getPtr()->getName(); } - -Vector* Parameter::getBuf(ParameterType type) { - auto buf = m->getPtr()->getBuf(type); - return Vector::createByPaddleVectorPtr(&buf); -} - -ParameterConfig* Parameter::getConfig() { - if (m->sharedPtr) { - return ParameterConfig::createParameterConfigFromParameterSharedPtr( - &m->sharedPtr); - } else { - return ParameterConfig::createParameterConfigFromParameterPtr(m->rawPtr); - } -} - -size_t Parameter::getID() const { return m->getPtr()->getID(); } - -void Parameter::setValueUpdated() { m->getPtr()->setValueUpdated(); } - -bool Parameter::save(const std::string& filename) const { - return m->getPtr()->save(filename); -} - -bool Parameter::load(const std::string& filename) const { - return m->getPtr()->load(filename); -} - -size_t Parameter::getSize() const { return m->getPtr()->getSize(); } diff --git a/paddle/legacy/api/ParameterOptimizer.cpp b/paddle/legacy/api/ParameterOptimizer.cpp deleted file mode 100644 index 477d9dae44362f9073639093c3c4d1cf0ac12044..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/ParameterOptimizer.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/parameter/ParameterOptimizer.h" -#include -#include "Internal.h" -#include "PaddleAPI.h" -#include "PaddleAPIPrivate.h" - -struct ParameterOptimizerPrivate { - std::unique_ptr optimizer; -}; - -struct ParameterTraverseCallbackPrivate { - paddle::ParameterOptimizer::TraverseCallback callback; - - ParameterTraverseCallbackPrivate() {} - - ParameterTraverseCallbackPrivate( - const paddle::ParameterOptimizer::TraverseCallback& callback) - : callback(callback) {} - - void apply(const std::vector& vecs, - const ParameterConfig& conf, - size_t sparseId) { - std::vector real_vecs; - real_vecs.resize(vecs.size()); - std::transform(vecs.begin(), vecs.end(), real_vecs.begin(), [](Vector* v) { - if (v) { - return *(paddle::VectorPtr*)(v->getSharedPtr()); - } else { - return paddle::VectorPtr(); - } - }); - - paddle::ParameterConfig& real_conf = - *(paddle::ParameterConfig*)(const_cast(conf) - .getRawPtr()); - callback(real_vecs.data(), real_conf, sparseId); - } -}; - -ParameterOptimizer::ParameterOptimizer() : m(new ParameterOptimizerPrivate()) {} - -ParameterOptimizer::~ParameterOptimizer() { delete m; } - -ParameterOptimizer* ParameterOptimizer::create(OptimizationConfig* config) { - CHECK(config != nullptr); - auto retOptimizer = new ParameterOptimizer(); - retOptimizer->m->optimizer.reset( - paddle::ParameterOptimizer::create(config->m->getConfig(), false)); - return retOptimizer; -} - -void ParameterOptimizer::init(size_t numRows, const ParameterConfig* config) { - auto& conf = *(paddle::ParameterConfig*)(const_cast(config) - ->getRawPtr()); - m->optimizer->init(numRows, &conf); -} - -void ParameterOptimizer::startPass() { m->optimizer->startPass(); } - -void ParameterOptimizer::finishPass() { m->optimizer->finishPass(); } - -void ParameterOptimizer::startBatch(size_t numSamplesProcessed) { - constexpr size_t high_1 = 1UL << (sizeof(size_t) * 8 - 1); - CHECK_EQ(numSamplesProcessed & high_1, 0UL); // Safely cast. - m->optimizer->startBatch((int64_t)numSamplesProcessed); -} - -void ParameterOptimizer::finishBatch() { m->optimizer->finishBatch(); } - -void ParameterOptimizer::update(const std::vector& vecs, - const ParameterConfig& conf, - size_t sparseId) { - ParameterTraverseCallbackPrivate invoker( - [&](const paddle::VectorPtr _vecs[], - const paddle::ParameterConfig& config, - size_t sid = -1UL) { m->optimizer->update(_vecs, config, sid); }); - invoker.apply(vecs, conf, sparseId); -} - -std::vector ParameterOptimizer::getParameterTypes() const { - std::vector returnValue; - staticCastVector(&returnValue, m->optimizer->getParameterTypes()); - return returnValue; -} - -ParameterTraverseCallback::ParameterTraverseCallback() - : m(new ParameterTraverseCallbackPrivate()) {} - -ParameterTraverseCallback::~ParameterTraverseCallback() { delete m; } - -void ParameterTraverseCallback::apply(const std::vector& vecs, - const ParameterConfig& conf, - size_t sparseId) { - m->apply(vecs, conf, sparseId); -} - -ParameterTraverseCallback* ParameterOptimizer::needSpecialTraversal( - const ParameterConfig& config) const { - auto& param_config = - *(paddle::ParameterConfig*)const_cast(config) - .getRawPtr(); - auto callback = m->optimizer->needSpecialTraversal(param_config); - if (callback) { - auto retCallback = new ParameterTraverseCallback(); - retCallback->m->callback = callback; - return retCallback; - } else { - return nullptr; - } -} diff --git a/paddle/legacy/api/ParameterUpdater.cpp b/paddle/legacy/api/ParameterUpdater.cpp deleted file mode 100644 index 44af3f4635f2bda07d0079faff0bbc1ec7ed3954..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/ParameterUpdater.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PaddleAPI.h" - -#include "PaddleAPIPrivate.h" -#ifndef PADDLE_WITHOUT_GOLANG -#include "paddle/legacy/trainer/NewRemoteParameterUpdater.h" -#endif -#include "paddle/legacy/trainer/RemoteParameterUpdater.h" -#include "paddle/legacy/trainer/ThreadParameterUpdater.h" - -ParameterUpdater::ParameterUpdater() : m(new ParameterUpdaterPrivate()) {} - -ParameterUpdater *ParameterUpdater::createLocalUpdater( - OptimizationConfig *config) { - auto updater = new ParameterUpdater(); - updater->m->updater.reset( - new paddle::SgdThreadUpdater(config->m->getConfig())); - return updater; -} - -ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( - OptimizationConfig *config, - const std::string pserverSpec, - const bool useEtcd) throw(UnsupportError) { -#ifndef PADDLE_WITHOUT_GOLANG - auto updater = new ParameterUpdater(); - updater->m->updater.reset(new paddle::NewRemoteParameterUpdater( - config->m->getConfig(), pserverSpec, useEtcd)); - return updater; -#else - throw UnsupportError("not compiled with WITH_GOLANG"); -#endif -} - -ParameterUpdater *ParameterUpdater::createRemoteUpdater( - OptimizationConfig *config, int passCount, bool useSparseUpdater) { - auto updater = new ParameterUpdater(); - auto remoteUpdater = new paddle::RemoteParameterUpdater( - config->m->getConfig(), passCount, nullptr); - if (useSparseUpdater) { - std::unique_ptr remoteUpdaterPtr(remoteUpdater); - auto sparseRemoteUpdater = - new paddle::SparseRemoteParameterUpdaterComposite( - config->m->getConfig(), - passCount, - false, - std::move(remoteUpdaterPtr)); - updater->m->updater.reset(sparseRemoteUpdater); - } else { - updater->m->updater.reset(remoteUpdater); - } - return updater; -} - -ParameterUpdater::~ParameterUpdater() { delete m; } - -void ParameterUpdater::init(const GradientMachine &gm) { - m->updater->init(gm.m->machine->getNonStaticParameters()); -} - -void ParameterUpdater::startPass() { m->updater->startPass(); } - -void ParameterUpdater::finishPass() { m->updater->finishPass(); } - -PassType ParameterUpdater::startBatch(size_t batchSize) { - return m->updater->startBatch((int64_t)batchSize); -} - -void ParameterUpdater::finishBatch(float cost) { - m->updater->finishBatch(cost); -} - -void ParameterUpdater::update(Parameter *param) { - auto paddleParam = param->m->getPtr(); - m->updater->update(paddleParam); -} - -void ParameterUpdater::getParametersRemote(bool fullSize, bool apply) { - m->updater->getParametersRemote(fullSize, apply); -} - -void ParameterUpdater::restore() { m->updater->restore(); } - -void ParameterUpdater::apply() { m->updater->apply(); } - -void ParameterUpdater::catchUpWith() { m->updater->catchUpWith(); } diff --git a/paddle/legacy/api/SequenceGenerator.cpp b/paddle/legacy/api/SequenceGenerator.cpp deleted file mode 100644 index 2a73228f6d4770d9be31defd7a5dc217fc5c21f2..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/SequenceGenerator.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include "PaddleAPI.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" -#include "paddle/legacy/parameter/Argument.h" -#include "paddle/legacy/utils/Flags.h" - -// used to represent partial sequence -struct Path { - std::vector ids; - float logProb; - paddle::MachineState machineState; - - Path() { logProb = 0; } - - Path(std::vector& ids, float logProb, paddle::MachineState& machineState) - : ids(ids), logProb(logProb), machineState(machineState) {} - - bool operator<(const Path& other) const { return (logProb > other.logProb); } -}; - -// Return top k (k == beam_size) optimal paths using beam search. The last -// element of inArgs is the Argument of feedback. gradMachine has MaxIdLayer -// as output and outArgs thus stores top k labels and their probabilities per -// position -static void findNBest(paddle::GradientMachine* gradMachine, - std::vector& inArgs, - std::vector& finalPaths, - size_t bos_id, - size_t eos_id, - size_t max_length) { - std::vector paths; - Path emptyPath; - paths.push_back(emptyPath); - finalPaths.clear(); - gradMachine->resetState(); - paddle::Argument feedback = inArgs.back(); - feedback.ids->setElement(0, (int)(bos_id)); - float minFinalPathLogProb = 0; - size_t beam = 0; - int id; - std::vector outArgs; - while (true) { // iterate over each generated word - std::vector newPaths; - paddle::MachineState machineState; - for (size_t j = 0; j < paths.size(); j++) { - Path& path = paths[j]; - if (path.machineState.size() > 0) { - gradMachine->setState(path.machineState); - feedback.ids->setElement(0, path.ids.back()); - } - gradMachine->forward(inArgs, &outArgs, paddle::PASS_TEST); - gradMachine->getState(machineState); - beam = outArgs[0].ids->getSize(); - for (size_t k = 0; k < beam; k++) { - id = outArgs[0].ids->getElement(k); - float prob = outArgs[0].in->getElement(0, k); - std::vector nids(path.ids); - nids.push_back(id); - float newLogProb = path.logProb + log(prob); - Path newPath(nids, newLogProb, machineState); - if (id == (int)eos_id || nids.size() >= max_length) { - finalPaths.push_back(newPath); - if (minFinalPathLogProb > newPath.logProb) { - minFinalPathLogProb = newPath.logProb; - } - } else { - newPaths.push_back(newPath); - } - } - } - - if (newPaths.size() == 0) { - break; - } - std::nth_element(newPaths.begin(), - newPaths.begin() + std::min(beam, newPaths.size()), - newPaths.end()); - if (newPaths.size() > beam) { - newPaths.resize(beam); - } - // pathA < pathB means pathA.logProb > pathB.logProb - float maxPathLogProb = - std::min_element(newPaths.begin(), newPaths.end())->logProb; - if (finalPaths.size() >= beam && minFinalPathLogProb >= maxPathLogProb) { - break; - } - paths = newPaths; - } // end while - - std::partial_sort(finalPaths.begin(), - finalPaths.begin() + std::min(beam, finalPaths.size()), - finalPaths.end()); - if (finalPaths.size() > beam) { - finalPaths.resize(beam); - } -} - -struct SequenceGeneratorPrivate { - std::shared_ptr machine; - std::shared_ptr> dict; - size_t beginPos; - size_t endPos; - size_t maxLength; - - paddle::Argument feedback; - - template - inline T& cast(void* ptr) { - return *(T*)(ptr); - } - - inline void findNBest(std::vector& inArgs, - std::vector& path) { - ::findNBest(machine.get(), inArgs, path, beginPos, endPos, maxLength); - } - - SequenceGeneratorPrivate() - : dict(std::make_shared>()), - beginPos(0UL), - endPos(0UL), - maxLength(0UL), - feedback(__create_feedback__()) {} - - private: - static paddle::Argument __create_feedback__() { - paddle::Argument feedback; - feedback.ids = paddle::IVector::create(/* size= */ 1, FLAGS_use_gpu); - - feedback.sequenceStartPositions = - paddle::ICpuGpuVector::create(/* size= */ 2, /* useGpu= */ false); - feedback.sequenceStartPositions->getMutableData(false)[0] = 0; - feedback.sequenceStartPositions->getMutableData(false)[1] = 1; - return feedback; - } -}; - -SequenceGenerator::SequenceGenerator() : m(new SequenceGeneratorPrivate()) {} - -SequenceGenerator::~SequenceGenerator() { delete m; } - -class PathSequenceResults : public ISequenceResults { - // ISequenceResults interface - public: - PathSequenceResults(const std::shared_ptr>& path, - const std::shared_ptr>& dict) - : path_(path), dict_(dict) {} - - size_t getSize() const { return path_->size(); } - std::string getSentence(size_t id, bool split) const throw(RangeError) { - if (id < getSize()) { - Path& p = (*path_)[id]; - std::ostringstream sout; - std::transform(p.ids.begin(), - p.ids.end(), - std::ostream_iterator(sout, split ? " " : ""), - [&](int id) { return (*dict_)[id]; }); - return sout.str(); - } else { - RangeError e; - throw e; - } - } - std::vector getSequence(size_t id) const throw(RangeError) { - if (id < getSize()) { - Path& p = (*path_)[id]; - return p.ids; - } else { - RangeError e; - throw e; - } - } - float getScore(size_t id) const throw(RangeError) { - if (id < getSize()) { - Path& p = (*path_)[id]; - return p.logProb; - } else { - RangeError e; - throw e; - } - } - - private: - std::shared_ptr> path_; - std::shared_ptr> dict_; -}; - -ISequenceResults* SequenceGenerator::generateSequence( - const Arguments& inArgs) const { - auto& in_args = - m->cast>(inArgs.getInternalArgumentsPtr()); - for (auto& arg : in_args) { - arg.sequenceStartPositions = m->feedback.sequenceStartPositions; - } - in_args.push_back(m->feedback); - auto path = std::make_shared>(); - m->findNBest(in_args, *path); - return new PathSequenceResults(path, m->dict); -} - -SequenceGenerator* SequenceGenerator::createByGradientMachineSharedPtr( - void* ptr) { - SequenceGenerator* r = new SequenceGenerator(); - r->m->machine = r->m->cast>(ptr); - return r; -} - -void SequenceGenerator::setDict(const std::vector& dict) { - *m->dict = dict; -} - -void SequenceGenerator::setBos(size_t bos) { m->beginPos = bos; } - -void SequenceGenerator::setEos(size_t eos) { m->endPos = eos; } - -void SequenceGenerator::setMaxLength(size_t maxLength) { - m->maxLength = maxLength; -} - -void SequenceGenerator::setBeamSize(size_t beamSize) { - if (beamSize != -1UL) { - FLAGS_beam_size = beamSize; - } -} - -ISequenceResults::~ISequenceResults() {} diff --git a/paddle/legacy/api/Trainer.cpp b/paddle/legacy/api/Trainer.cpp deleted file mode 100644 index e7c607201b0b946a6d6b2f3da35356e2c4e5e15e..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/Trainer.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PaddleAPI.h" -#include "PaddleAPIPrivate.h" - -#include -#include -#include - -#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/legacy/trainer/ParamUtil.h" -#include "paddle/legacy/trainer/Trainer.h" -#include "paddle/legacy/trainer/TrainerInternal.h" -#include "paddle/legacy/utils/Flags.h" - -using paddle::real; - -DECLARE_string(config); -DECLARE_string(init_model_path); -DECLARE_int32(start_pass); - -struct TrainerPrivate : public paddle::Trainer { - bool _trainOneBatch(size_t batchSize); - bool forwardOneBatch(size_t batchSize); - void forwardOneDataBatch(const std::vector& inArgs); - void setBatchSize(size_t batchSize); - std::vector& getForwardOutput(); - - void startTestPeriod(); - void finishTestPeriod(); - void testOneDataBatch(const paddle::DataBatch& dataBatch); - TrainerPrivate() : paddle::Trainer() {} -}; - -Trainer::Trainer() : m(new TrainerPrivate()) { - auto conf = paddle::TrainerConfigHelper::createFromFlags(); - if (conf != nullptr) { - m->init(conf); - } -} - -Trainer::~Trainer() { delete m; } - -Trainer* Trainer::createByCommandLine() throw(IOError) { - auto retv = new Trainer(); - if (retv->m->getConfig().IsInitialized()) { - return retv; - } else { - throw IOError(); - } -} - -Trainer::Trainer(TrainerConfig* config, GradientMachine* gm) - : m(new TrainerPrivate()) { - m->init(config->m->conf, /* testing= */ false, gm ? gm->m->machine : nullptr); -} - -Trainer* Trainer::create(TrainerConfig* config, - GradientMachine* gm) throw(IOError) { - auto retv = new Trainer(config, gm); - if (retv->m->getConfig().IsInitialized()) { - return retv; - } else { - retv->m->getConfig().CheckInitialized(); - throw IOError(); - } -} - -void Trainer::startTrain() { m->startTrain(); } - -void Trainer::finishTrain() { m->finishTrain(); } - -void Trainer::startTrainPass() { m->startTrainPass(); } - -void Trainer::finishTrainPass() { m->finishTrainPass(); } - -void Trainer::trainOneDataBatch(size_t batchSize, const Arguments& inArgs) { - paddle::DataBatch dataBatch; - dataBatch.getStreams() = inArgs.m->outputs; - dataBatch.setSize(batchSize); - m->trainOneDataBatch(dataBatch); -} - -bool Trainer::trainOneBatch(size_t batchSize) { - return m->_trainOneBatch(batchSize); -} - -bool TrainerPrivate::_trainOneBatch(size_t batchSize) { - paddle::DataBatch dataBatch; - CHECK(dataProvider_) << "data_provider is not specified"; - int num = dataProvider_->getNextBatch(batchSize, &dataBatch); - if (num == 0) { - return false; - } - trainOneDataBatch(dataBatch); - return false; -} - -void TrainerPrivate::startTestPeriod() { - if (!tester_) { - createTester(); - } - tester_->startTestPeriod(); -} - -void Trainer::startTestPeriod() { m->startTestPeriod(); } - -void TrainerPrivate::testOneDataBatch(const paddle::DataBatch& dataBatch) { - tester_->testOneDataBatch(dataBatch, &forwardOutput_); -} - -void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) { - paddle::DataBatch dataBatch; - dataBatch.getStreams() = args.m->outputs; - dataBatch.setSize(batchSize); - m->testOneDataBatch(dataBatch); -} - -void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); } -void Trainer::finishTestPeriod() { m->finishTestPeriod(); } - -Arguments* Trainer::getLayerOutput(const std::string& layerName) const { - auto nn = this->m->getGradientMachine(); - CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; - auto arg = nn->getLayerOutput(layerName); - return Arguments::createByPaddleArgument(&arg); -} - -void Trainer::forwardOneBatch(size_t batchSize) { - m->forwardOneBatch(batchSize); -} - -bool TrainerPrivate::forwardOneBatch(size_t batchSize) { - CHECK(dataProvider_) << "data_provider is not specified"; - paddle::DataBatch dataBatch; - int num = dataProvider_->getNextBatch(batchSize, &dataBatch); - if (num == 0) { - return false; - } - - forwardOneDataBatch(dataBatch.getStreams()); - return true; -} - -void TrainerPrivate::forwardOneDataBatch( - const std::vector& inArgs) { - std::vector& outArgs = forwardOutput_; - - if (config_->getOptConfig().use_sparse_remote_updater()) { - trainerInternal_.getGradientMachine()->prefetch(inArgs); - trainerInternal_.getParameterUpdater()->getParametersRemote(); - } - trainerInternal_.getGradientMachine()->forward( - inArgs, &outArgs, paddle::PASS_TEST); -} - -Arguments* Trainer::getForwardOutput() { - return Arguments::createByPaddleArgumentVector(&m->getForwardOutput()); -} - -std::vector& TrainerPrivate::getForwardOutput() { - return forwardOutput_; -} diff --git a/paddle/legacy/api/Util.cpp b/paddle/legacy/api/Util.cpp deleted file mode 100644 index b458c4d90ecc7333066f887dcbc93c4da5c43853..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/Util.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PaddleAPI.h" - -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/PythonUtil.h" -#include "paddle/legacy/utils/Util.h" - -#include -#include -#include - -void initPaddle(int argc, char** argv) { - paddle::initMain(argc, argv); - paddle::initPython(argc, argv); - feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW); -} - -FloatArray::FloatArray(const float* b, const size_t l) - : buf(b), length(l), needFree(false) {} - -IntArray::IntArray(const int* b, const size_t l, bool f) - : buf(b), length(l), needFree(f) {} - -IntWithFloatArray::IntWithFloatArray(const float* v, - const int* i, - size_t l, - bool f) - : valBuf(v), idxBuf(i), length(l), needFree(f) {} - -bool isUsingGpu() { return FLAGS_use_gpu; } - -void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; } - -bool isGpuVersion() { -#ifndef PADDLE_WITH_CUDA - return false; -#else - return true; -#endif -} - -int getTrainerCount() { return FLAGS_trainer_count; } - -static_assert(NUM_PARAMETER_TYPES == paddle::NUM_PARAMETER_TYPES, - "The Parameter Type should be same in core/api and core/common"); diff --git a/paddle/legacy/api/Vector.cpp b/paddle/legacy/api/Vector.cpp deleted file mode 100644 index 73b6d3a15d6d0ddc80a17846604d9500d8f7e4e3..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/Vector.cpp +++ /dev/null @@ -1,304 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PaddleAPI.h" - -#include "paddle/legacy/math/Vector.h" - -#include - -struct IVectorPrivate { - paddle::IVectorPtr vec; -}; - -IVector::IVector() : m(new IVectorPrivate()) {} - -IVector* IVector::createZero(size_t sz, bool useGpu) { - auto v = new IVector(); - v->m->vec = paddle::IVector::create(sz, useGpu); - v->m->vec->zeroMem(); - return v; -} - -IVector* IVector::create(const std::vector& data, bool useGpu) { - auto v = new IVector(); - v->m->vec = paddle::IVector::create(data.size(), useGpu); - v->m->vec->copyFrom(data.data(), data.size()); - return v; -} - -IVector* IVector::createVectorFromNumpy(int* data, - int dim, - bool copy, - bool useGpu) throw(UnsupportError) { - if (useGpu) { - /// if use gpu only copy=true is supported - if (!copy) { - throw UnsupportError("Gpu mode only supports copy=True"); - } - return IVector::createGpuVectorFromNumpy(data, dim); - } else { - return IVector::createCpuVectorFromNumpy(data, dim, copy); - } -} - -IVector* IVector::createCpuVectorFromNumpy(int* data, int dim, bool copy) { - auto v = new IVector(); - if (copy) { - v->m->vec = paddle::IVector::create(dim, false); - v->m->vec->copyFrom(data, dim); - } else { - v->m->vec = paddle::IVector::create(data, dim, false); - } - return v; -} - -IVector* IVector::createGpuVectorFromNumpy(int* data, int dim) { - auto v = new IVector(); - v->m->vec = paddle::IVector::create(dim, true); - v->m->vec->copyFrom(data, dim); - return v; -} - -bool IVector::isGpu() const { - return dynamic_cast(m->vec.get()) != nullptr; -} - -IntArray IVector::getData() const { - if (this->isGpu()) { - int* src = m->vec->getData(); - size_t len = m->vec->getSize(); - int* dest = new int[len]; - hl_memcpy_device2host(dest, src, len * sizeof(int)); - return IntArray(dest, len, true); - } else { - return IntArray(m->vec->getData(), m->vec->getSize()); - } -} - -int& IVector::operator[](const size_t idx) throw(RangeError, UnsupportError) { - if (this->isGpu()) { - UnsupportError e; - throw e; - } else { - if (idx >= m->vec->getSize()) { - RangeError e; - throw e; - } - } - return m->vec->getData()[idx]; -} - -const int& IVector::operator[](const size_t idx) const - throw(RangeError, UnsupportError) { - return (*const_cast(this))[idx]; -} - -IVector* IVector::createByPaddleVectorPtr(void* ptr) { - auto* p = (paddle::IVectorPtr*)ptr; - if ((*p) != nullptr) { - IVector* vec = new IVector(); - vec->m->vec = *p; - return vec; - } else { - return nullptr; - } -} - -IVector::~IVector() { delete m; } - -void* IVector::getSharedPtr() const { return &m->vec; } - -size_t IVector::getSize() const { return m->vec->getSize(); } - -void IVector::toNumpyArrayInplace(int** data, int* dim1) throw(UnsupportError) { - auto v = std::dynamic_pointer_cast(m->vec); - if (v) { - *data = v->getData(); - *dim1 = v->getSize(); - } else { - throw UnsupportError(); - } -} - -void IVector::copyToNumpyArray(int** view_m_data, int* dim1) { - *dim1 = m->vec->getSize(); - *view_m_data = new int[*dim1]; - if (auto cpuVec = dynamic_cast(m->vec.get())) { - std::memcpy(*view_m_data, cpuVec->getData(), sizeof(int) * (*dim1)); - } else if (auto gpuVec = dynamic_cast(m->vec.get())) { - hl_memcpy_device2host( - *view_m_data, gpuVec->getData(), sizeof(int) * (*dim1)); - } else { - LOG(INFO) << "Unexpected situation"; - } -} - -void IVector::copyFromNumpyArray(int* data, int dim) { - m->vec->resize(dim); - m->vec->copyFrom(data, dim); -} - -struct VectorPrivate { - paddle::VectorPtr vec; - - void safeAccessData(const size_t idx, - const std::function& func) const - throw(RangeError, UnsupportError) { - auto cpuVec = std::dynamic_pointer_cast(vec); - if (cpuVec != nullptr) { - if (idx < vec->getSize()) { - func(vec->getData()[idx]); - } else { - throw RangeError(); - } - } else { - throw UnsupportError(); - } - } -}; - -Vector::Vector() : m(new VectorPrivate()) {} - -Vector::~Vector() { delete m; } - -Vector* Vector::createZero(size_t sz, bool useGpu) { - auto retVec = new Vector(); - retVec->m->vec = paddle::Vector::create(sz, useGpu); - retVec->m->vec->zero(); - return retVec; -} - -Vector* Vector::create(const std::vector& data, bool useGpu) { - auto retVec = new Vector(); - retVec->m->vec = paddle::Vector::create(data.size(), useGpu); - retVec->m->vec->copyFrom(data.data(), data.size()); - return retVec; -} - -Vector* Vector::createByPaddleVectorPtr(void* ptr) { - auto& v = *(paddle::VectorPtr*)(ptr); - if (v == nullptr) { - return nullptr; - } else { - auto retVec = new Vector(); - retVec->m->vec = v; - return retVec; - } -} - -Vector* Vector::createVectorFromNumpy(float* data, - int dim, - bool copy, - bool useGpu) throw(UnsupportError) { - if (useGpu) { - /// if use gpu only copy=True is supported - if (!copy) { - throw UnsupportError("Gpu mode only supports copy=True"); - } - return Vector::createGpuVectorFromNumpy(data, dim); - } else { - return Vector::createCpuVectorFromNumpy(data, dim, copy); - } -} - -Vector* Vector::createCpuVectorFromNumpy(float* data, int dim, bool copy) { - CHECK_GT(dim, 0); - auto retVec = new Vector(); - if (copy) { - retVec->m->vec = paddle::Vector::create((size_t)dim, false); - retVec->m->vec->copyFrom(data, dim); - } else { - retVec->m->vec = paddle::Vector::create(data, (size_t)dim, false); - } - return retVec; -} - -Vector* Vector::createGpuVectorFromNumpy(float* data, int dim) { - CHECK_GT(dim, 0); - auto retVec = new Vector(); - retVec->m->vec = paddle::Vector::create((size_t)dim, true); - retVec->m->vec->copyFrom(data, (size_t)dim); - return retVec; -} - -void Vector::toNumpyArrayInplace(float** view_data, - int* dim1) throw(UnsupportError) { - auto v = std::dynamic_pointer_cast(m->vec); - if (v != nullptr) { - *view_data = v->getData(); - *dim1 = (int)v->getSize(); - } else { - throw UnsupportError(); - } -} - -void Vector::copyToNumpyArray(float** view_m_data, int* dim1) { - *dim1 = m->vec->getSize(); - *view_m_data = new float[*dim1]; - if (auto cpuVec = dynamic_cast(m->vec.get())) { - std::memcpy(*view_m_data, cpuVec->getData(), sizeof(float) * (*dim1)); - } else if (auto gpuVec = dynamic_cast(m->vec.get())) { - hl_memcpy_device2host( - *view_m_data, gpuVec->getData(), sizeof(float) * (*dim1)); - } else { - LOG(INFO) << "Unexpected situation"; - } -} - -void Vector::copyFromNumpyArray(float* data, int dim) { - m->vec->resize(dim); - m->vec->copyFrom(data, dim); -} - -FloatArray Vector::getData() const { - if (this->isGpu()) { - float* src = m->vec->getData(); - size_t len = m->vec->getSize(); - float* dest = new float[len]; - hl_memcpy_device2host(dest, src, len * sizeof(float)); - FloatArray ret_val(dest, len); - ret_val.needFree = true; - return ret_val; - } else { - FloatArray ret_val(m->vec->getData(), m->vec->getSize()); - return ret_val; - } -} - -void Vector::copyFrom(Vector* src) throw(RangeError) { - if (src->m->vec->getSize() != m->vec->getSize()) { - throw RangeError(); - } - m->vec->copyFrom(*src->m->vec); -} - -bool Vector::isGpu() const { - return std::dynamic_pointer_cast(m->vec) != nullptr; -} - -float Vector::get(const size_t idx) const throw(RangeError, UnsupportError) { - float r; - m->safeAccessData(idx, [&](float& o) { r = o; }); - return r; -} - -void Vector::set(const size_t idx, float val) throw(RangeError, - UnsupportError) { - m->safeAccessData(idx, [&](float& o) { o = val; }); -} - -size_t Vector::getSize() const { return m->vec->getSize(); } - -void* Vector::getSharedPtr() { return &m->vec; } diff --git a/paddle/legacy/api/__init__.py b/paddle/legacy/api/__init__.py deleted file mode 100644 index f662d6826321eb840739382558f76327d27b5847..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/paddle/legacy/api/numpy.i b/paddle/legacy/api/numpy.i deleted file mode 100644 index 2ddc11de7a40d11a78e2d242f8b4badc9f629f12..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/numpy.i +++ /dev/null @@ -1,3161 +0,0 @@ -/* -*- C -*- (not really, but good for syntax highlighting) */ - -/* - * Copyright (c) 2005-2015, NumPy Developers. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * * Neither the name of the NumPy Developers nor the names of any - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef SWIGPYTHON - -%{ -#ifndef SWIG_FILE_WITH_INIT -#define NO_IMPORT_ARRAY -#endif -#include "stdio.h" -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include -%} - -/**********************************************************************/ - -%fragment("NumPy_Backward_Compatibility", "header") -{ -%#if NPY_API_VERSION < 0x00000007 -%#define NPY_ARRAY_DEFAULT NPY_DEFAULT -%#define NPY_ARRAY_FARRAY NPY_FARRAY -%#define NPY_FORTRANORDER NPY_FORTRAN -%#endif -} - -/**********************************************************************/ - -/* The following code originally appeared in - * enthought/kiva/agg/src/numeric.i written by Eric Jones. It was - * translated from C++ to C by John Hunter. Bill Spotz has modified - * it to fix some minor bugs, upgrade from Numeric to numpy (all - * versions), add some comments and functionality, and convert from - * direct code insertion to SWIG fragments. - */ - -%fragment("NumPy_Macros", "header") -{ -/* Macros to extract array attributes. - */ -%#if NPY_API_VERSION < 0x00000007 -%#define is_array(a) ((a) && PyArray_Check((PyArrayObject*)a)) -%#define array_type(a) (int)(PyArray_TYPE((PyArrayObject*)a)) -%#define array_numdims(a) (((PyArrayObject*)a)->nd) -%#define array_dimensions(a) (((PyArrayObject*)a)->dimensions) -%#define array_size(a,i) (((PyArrayObject*)a)->dimensions[i]) -%#define array_strides(a) (((PyArrayObject*)a)->strides) -%#define array_stride(a,i) (((PyArrayObject*)a)->strides[i]) -%#define array_data(a) (((PyArrayObject*)a)->data) -%#define array_descr(a) (((PyArrayObject*)a)->descr) -%#define array_flags(a) (((PyArrayObject*)a)->flags) -%#define array_enableflags(a,f) (((PyArrayObject*)a)->flags) = f -%#else -%#define is_array(a) ((a) && PyArray_Check(a)) -%#define array_type(a) PyArray_TYPE((PyArrayObject*)a) -%#define array_numdims(a) PyArray_NDIM((PyArrayObject*)a) -%#define array_dimensions(a) PyArray_DIMS((PyArrayObject*)a) -%#define array_strides(a) PyArray_STRIDES((PyArrayObject*)a) -%#define array_stride(a,i) PyArray_STRIDE((PyArrayObject*)a,i) -%#define array_size(a,i) PyArray_DIM((PyArrayObject*)a,i) -%#define array_data(a) PyArray_DATA((PyArrayObject*)a) -%#define array_descr(a) PyArray_DESCR((PyArrayObject*)a) -%#define array_flags(a) PyArray_FLAGS((PyArrayObject*)a) -%#define array_enableflags(a,f) PyArray_ENABLEFLAGS((PyArrayObject*)a,f) -%#endif -%#define array_is_contiguous(a) (PyArray_ISCONTIGUOUS((PyArrayObject*)a)) -%#define array_is_native(a) (PyArray_ISNOTSWAPPED((PyArrayObject*)a)) -%#define array_is_fortran(a) (PyArray_ISFORTRAN((PyArrayObject*)a)) -} - -/**********************************************************************/ - -%fragment("NumPy_Utilities", - "header") -{ - /* Given a PyObject, return a string describing its type. - */ - const char* pytype_string(PyObject* py_obj) - { - if (py_obj == NULL ) return "C NULL value"; - if (py_obj == Py_None ) return "Python None" ; - if (PyCallable_Check(py_obj)) return "callable" ; - if (PyString_Check( py_obj)) return "string" ; - if (PyInt_Check( py_obj)) return "int" ; - if (PyFloat_Check( py_obj)) return "float" ; - if (PyDict_Check( py_obj)) return "dict" ; - if (PyList_Check( py_obj)) return "list" ; - if (PyTuple_Check( py_obj)) return "tuple" ; -%#if PY_MAJOR_VERSION < 3 - if (PyFile_Check( py_obj)) return "file" ; - if (PyModule_Check( py_obj)) return "module" ; - if (PyInstance_Check(py_obj)) return "instance" ; -%#endif - - return "unknown type"; - } - - /* Given a NumPy typecode, return a string describing the type. - */ - const char* typecode_string(int typecode) - { - static const char* type_names[25] = {"bool", - "byte", - "unsigned byte", - "short", - "unsigned short", - "int", - "unsigned int", - "long", - "unsigned long", - "long long", - "unsigned long long", - "float", - "double", - "long double", - "complex float", - "complex double", - "complex long double", - "object", - "string", - "unicode", - "void", - "ntypes", - "notype", - "char", - "unknown"}; - return typecode < 24 ? type_names[typecode] : type_names[24]; - } - - /* Make sure input has correct numpy type. This now just calls - PyArray_EquivTypenums(). - */ - int type_match(int actual_type, - int desired_type) - { - return PyArray_EquivTypenums(actual_type, desired_type); - } - -%#ifdef SWIGPY_USE_CAPSULE - void free_cap(PyObject * cap) - { - void* array = (void*) PyCapsule_GetPointer(cap,SWIGPY_CAPSULE_NAME); - if (array != NULL) free(array); - } -%#endif - - -} - -/**********************************************************************/ - -%fragment("NumPy_Object_to_Array", - "header", - fragment="NumPy_Backward_Compatibility", - fragment="NumPy_Macros", - fragment="NumPy_Utilities") -{ - /* Given a PyObject pointer, cast it to a PyArrayObject pointer if - * legal. If not, set the python error string appropriately and - * return NULL. - */ - PyArrayObject* obj_to_array_no_conversion(PyObject* input, - int typecode) - { - PyArrayObject* ary = NULL; - if (is_array(input) && (typecode == NPY_NOTYPE || - PyArray_EquivTypenums(array_type(input), typecode))) - { - ary = (PyArrayObject*) input; - } - else if is_array(input) - { - const char* desired_type = typecode_string(typecode); - const char* actual_type = typecode_string(array_type(input)); - PyErr_Format(PyExc_TypeError, - "Array of type '%s' required. Array of type '%s' given", - desired_type, actual_type); - ary = NULL; - } - else - { - const char* desired_type = typecode_string(typecode); - const char* actual_type = pytype_string(input); - PyErr_Format(PyExc_TypeError, - "Array of type '%s' required. A '%s' was given", - desired_type, - actual_type); - ary = NULL; - } - return ary; - } - - /* Convert the given PyObject to a NumPy array with the given - * typecode. On success, return a valid PyArrayObject* with the - * correct type. On failure, the python error string will be set and - * the routine returns NULL. - */ - PyArrayObject* obj_to_array_allow_conversion(PyObject* input, - int typecode, - int* is_new_object) - { - PyArrayObject* ary = NULL; - PyObject* py_obj; - if (is_array(input) && (typecode == NPY_NOTYPE || - PyArray_EquivTypenums(array_type(input),typecode))) - { - ary = (PyArrayObject*) input; - *is_new_object = 0; - } - else - { - py_obj = PyArray_FROMANY(input, typecode, 0, 0, NPY_ARRAY_DEFAULT); - /* If NULL, PyArray_FromObject will have set python error value.*/ - ary = (PyArrayObject*) py_obj; - *is_new_object = 1; - } - return ary; - } - - /* Given a PyArrayObject, check to see if it is contiguous. If so, - * return the input pointer and flag it as not a new object. If it is - * not contiguous, create a new PyArrayObject using the original data, - * flag it as a new object and return the pointer. - */ - PyArrayObject* make_contiguous(PyArrayObject* ary, - int* is_new_object, - int min_dims, - int max_dims) - { - PyArrayObject* result; - if (array_is_contiguous(ary)) - { - result = ary; - *is_new_object = 0; - } - else - { - result = (PyArrayObject*) PyArray_ContiguousFromObject((PyObject*)ary, - array_type(ary), - min_dims, - max_dims); - *is_new_object = 1; - } - return result; - } - - /* Given a PyArrayObject, check to see if it is Fortran-contiguous. - * If so, return the input pointer, but do not flag it as not a new - * object. If it is not Fortran-contiguous, create a new - * PyArrayObject using the original data, flag it as a new object - * and return the pointer. - */ - PyArrayObject* make_fortran(PyArrayObject* ary, - int* is_new_object) - { - PyArrayObject* result; - if (array_is_fortran(ary)) - { - result = ary; - *is_new_object = 0; - } - else - { - Py_INCREF(array_descr(ary)); - result = (PyArrayObject*) PyArray_FromArray(ary, - array_descr(ary), - NPY_FORTRANORDER); - *is_new_object = 1; - } - return result; - } - - /* Convert a given PyObject to a contiguous PyArrayObject of the - * specified type. If the input object is not a contiguous - * PyArrayObject, a new one will be created and the new object flag - * will be set. - */ - PyArrayObject* obj_to_array_contiguous_allow_conversion(PyObject* input, - int typecode, - int* is_new_object) - { - int is_new1 = 0; - int is_new2 = 0; - PyArrayObject* ary2; - PyArrayObject* ary1 = obj_to_array_allow_conversion(input, - typecode, - &is_new1); - if (ary1) - { - ary2 = make_contiguous(ary1, &is_new2, 0, 0); - if ( is_new1 && is_new2) - { - Py_DECREF(ary1); - } - ary1 = ary2; - } - *is_new_object = is_new1 || is_new2; - return ary1; - } - - /* Convert a given PyObject to a Fortran-ordered PyArrayObject of the - * specified type. If the input object is not a Fortran-ordered - * PyArrayObject, a new one will be created and the new object flag - * will be set. - */ - PyArrayObject* obj_to_array_fortran_allow_conversion(PyObject* input, - int typecode, - int* is_new_object) - { - int is_new1 = 0; - int is_new2 = 0; - PyArrayObject* ary2; - PyArrayObject* ary1 = obj_to_array_allow_conversion(input, - typecode, - &is_new1); - if (ary1) - { - ary2 = make_fortran(ary1, &is_new2); - if (is_new1 && is_new2) - { - Py_DECREF(ary1); - } - ary1 = ary2; - } - *is_new_object = is_new1 || is_new2; - return ary1; - } -} /* end fragment */ - -/**********************************************************************/ - -%fragment("NumPy_Array_Requirements", - "header", - fragment="NumPy_Backward_Compatibility", - fragment="NumPy_Macros") -{ - /* Test whether a python object is contiguous. If array is - * contiguous, return 1. Otherwise, set the python error string and - * return 0. - */ - int require_contiguous(PyArrayObject* ary) - { - int contiguous = 1; - if (!array_is_contiguous(ary)) - { - PyErr_SetString(PyExc_TypeError, - "Array must be contiguous. A non-contiguous array was given"); - contiguous = 0; - } - return contiguous; - } - - /* Test whether a python object is (C_ or F_) contiguous. If array is - * contiguous, return 1. Otherwise, set the python error string and - * return 0. - */ - int require_c_or_f_contiguous(PyArrayObject* ary) - { - int contiguous = 1; - if (!(array_is_contiguous(ary) || array_is_fortran(ary))) - { - PyErr_SetString(PyExc_TypeError, - "Array must be contiguous (C_ or F_). A non-contiguous array was given"); - contiguous = 0; - } - return contiguous; - } - - /* Require that a numpy array is not byte-swapped. If the array is - * not byte-swapped, return 1. Otherwise, set the python error string - * and return 0. - */ - int require_native(PyArrayObject* ary) - { - int native = 1; - if (!array_is_native(ary)) - { - PyErr_SetString(PyExc_TypeError, - "Array must have native byteorder. " - "A byte-swapped array was given"); - native = 0; - } - return native; - } - - /* Require the given PyArrayObject to have a specified number of - * dimensions. If the array has the specified number of dimensions, - * return 1. Otherwise, set the python error string and return 0. - */ - int require_dimensions(PyArrayObject* ary, - int exact_dimensions) - { - int success = 1; - if (array_numdims(ary) != exact_dimensions) - { - PyErr_Format(PyExc_TypeError, - "Array must have %d dimensions. Given array has %d dimensions", - exact_dimensions, - array_numdims(ary)); - success = 0; - } - return success; - } - - /* Require the given PyArrayObject to have one of a list of specified - * number of dimensions. If the array has one of the specified number - * of dimensions, return 1. Otherwise, set the python error string - * and return 0. - */ - int require_dimensions_n(PyArrayObject* ary, - int* exact_dimensions, - int n) - { - int success = 0; - int i; - char dims_str[255] = ""; - char s[255]; - for (i = 0; i < n && !success; i++) - { - if (array_numdims(ary) == exact_dimensions[i]) - { - success = 1; - } - } - if (!success) - { - for (i = 0; i < n-1; i++) - { - sprintf(s, "%d, ", exact_dimensions[i]); - strcat(dims_str,s); - } - sprintf(s, " or %d", exact_dimensions[n-1]); - strcat(dims_str,s); - PyErr_Format(PyExc_TypeError, - "Array must have %s dimensions. Given array has %d dimensions", - dims_str, - array_numdims(ary)); - } - return success; - } - - /* Require the given PyArrayObject to have a specified shape. If the - * array has the specified shape, return 1. Otherwise, set the python - * error string and return 0. - */ - int require_size(PyArrayObject* ary, - npy_intp* size, - int n) - { - int i; - int success = 1; - int len; - char desired_dims[255] = "["; - char s[255]; - char actual_dims[255] = "["; - for(i=0; i < n;i++) - { - if (size[i] != -1 && size[i] != array_size(ary,i)) - { - success = 0; - } - } - if (!success) - { - for (i = 0; i < n; i++) - { - if (size[i] == -1) - { - sprintf(s, "*,"); - } - else - { - sprintf(s, "%ld,", (long int)size[i]); - } - strcat(desired_dims,s); - } - len = strlen(desired_dims); - desired_dims[len-1] = ']'; - for (i = 0; i < n; i++) - { - sprintf(s, "%ld,", (long int)array_size(ary,i)); - strcat(actual_dims,s); - } - len = strlen(actual_dims); - actual_dims[len-1] = ']'; - PyErr_Format(PyExc_TypeError, - "Array must have shape of %s. Given array has shape of %s", - desired_dims, - actual_dims); - } - return success; - } - - /* Require the given PyArrayObject to to be Fortran ordered. If the - * the PyArrayObject is already Fortran ordered, do nothing. Else, - * set the Fortran ordering flag and recompute the strides. - */ - int require_fortran(PyArrayObject* ary) - { - int success = 1; - int nd = array_numdims(ary); - int i; - npy_intp * strides = array_strides(ary); - if (array_is_fortran(ary)) return success; - /* Set the Fortran ordered flag */ - array_enableflags(ary,NPY_ARRAY_FARRAY); - /* Recompute the strides */ - strides[0] = strides[nd-1]; - for (i=1; i < nd; ++i) - strides[i] = strides[i-1] * array_size(ary,i-1); - return success; - } -} - -/* Combine all NumPy fragments into one for convenience */ -%fragment("NumPy_Fragments", - "header", - fragment="NumPy_Backward_Compatibility", - fragment="NumPy_Macros", - fragment="NumPy_Utilities", - fragment="NumPy_Object_to_Array", - fragment="NumPy_Array_Requirements") -{ -} - -/* End John Hunter translation (with modifications by Bill Spotz) - */ - -/* %numpy_typemaps() macro - * - * This macro defines a family of 75 typemaps that allow C arguments - * of the form - * - * 1. (DATA_TYPE IN_ARRAY1[ANY]) - * 2. (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) - * 3. (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) - * - * 4. (DATA_TYPE IN_ARRAY2[ANY][ANY]) - * 5. (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - * 6. (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) - * 7. (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - * 8. (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) - * - * 9. (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) - * 10. (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - * 11. (DATA_TYPE** IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - * 12. (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3) - * 13. (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - * 14. (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3) - * - * 15. (DATA_TYPE IN_ARRAY4[ANY][ANY][ANY][ANY]) - * 16. (DATA_TYPE* IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - * 17. (DATA_TYPE** IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - * 18. (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, , DIM_TYPE DIM4, DATA_TYPE* IN_ARRAY4) - * 19. (DATA_TYPE* IN_FARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - * 20. (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* IN_FARRAY4) - * - * 21. (DATA_TYPE INPLACE_ARRAY1[ANY]) - * 22. (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1) - * 23. (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1) - * - * 24. (DATA_TYPE INPLACE_ARRAY2[ANY][ANY]) - * 25. (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - * 26. (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2) - * 27. (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - * 28. (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2) - * - * 29. (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY]) - * 30. (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - * 31. (DATA_TYPE** INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - * 32. (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_ARRAY3) - * 33. (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - * 34. (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_FARRAY3) - * - * 35. (DATA_TYPE INPLACE_ARRAY4[ANY][ANY][ANY][ANY]) - * 36. (DATA_TYPE* INPLACE_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - * 37. (DATA_TYPE** INPLACE_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - * 38. (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* INPLACE_ARRAY4) - * 39. (DATA_TYPE* INPLACE_FARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - * 40. (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* INPLACE_FARRAY4) - * - * 41. (DATA_TYPE ARGOUT_ARRAY1[ANY]) - * 42. (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1) - * 43. (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1) - * - * 44. (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY]) - * - * 45. (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY]) - * - * 46. (DATA_TYPE ARGOUT_ARRAY4[ANY][ANY][ANY][ANY]) - * - * 47. (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1) - * 48. (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEW_ARRAY1) - * - * 49. (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) - * 50. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_ARRAY2) - * 51. (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) - * 52. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_FARRAY2) - * - * 53. (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) - * 54. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3) - * 55. (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) - * 56. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_FARRAY3) - * - * 57. (DATA_TYPE** ARGOUTVIEW_ARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4) - * 58. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEW_ARRAY4) - * 59. (DATA_TYPE** ARGOUTVIEW_FARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4) - * 60. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEW_FARRAY4) - * - * 61. (DATA_TYPE** ARGOUTVIEWM_ARRAY1, DIM_TYPE* DIM1) - * 62. (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEWM_ARRAY1) - * - * 63. (DATA_TYPE** ARGOUTVIEWM_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) - * 64. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEWM_ARRAY2) - * 65. (DATA_TYPE** ARGOUTVIEWM_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) - * 66. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEWM_FARRAY2) - * - * 67. (DATA_TYPE** ARGOUTVIEWM_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) - * 68. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEWM_ARRAY3) - * 69. (DATA_TYPE** ARGOUTVIEWM_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) - * 70. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEWM_FARRAY3) - * - * 71. (DATA_TYPE** ARGOUTVIEWM_ARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4) - * 72. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEWM_ARRAY4) - * 73. (DATA_TYPE** ARGOUTVIEWM_FARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4) - * 74. (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEWM_FARRAY4) - * - * 75. (DATA_TYPE* INPLACE_ARRAY_FLAT, DIM_TYPE DIM_FLAT) - * - * where "DATA_TYPE" is any type supported by the NumPy module, and - * "DIM_TYPE" is any int-like type suitable for specifying dimensions. - * The difference between "ARRAY" typemaps and "FARRAY" typemaps is - * that the "FARRAY" typemaps expect Fortran ordering of - * multidimensional arrays. In python, the dimensions will not need - * to be specified (except for the "DATA_TYPE* ARGOUT_ARRAY1" - * typemaps). The IN_ARRAYs can be a numpy array or any sequence that - * can be converted to a numpy array of the specified type. The - * INPLACE_ARRAYs must be numpy arrays of the appropriate type. The - * ARGOUT_ARRAYs will be returned as new numpy arrays of the - * appropriate type. - * - * These typemaps can be applied to existing functions using the - * %apply directive. For example: - * - * %apply (double* IN_ARRAY1, int DIM1) {(double* series, int length)}; - * double prod(double* series, int length); - * - * %apply (int DIM1, int DIM2, double* INPLACE_ARRAY2) - * {(int rows, int cols, double* matrix )}; - * void floor(int rows, int cols, double* matrix, double f); - * - * %apply (double IN_ARRAY3[ANY][ANY][ANY]) - * {(double tensor[2][2][2] )}; - * %apply (double ARGOUT_ARRAY3[ANY][ANY][ANY]) - * {(double low[2][2][2] )}; - * %apply (double ARGOUT_ARRAY3[ANY][ANY][ANY]) - * {(double upp[2][2][2] )}; - * void luSplit(double tensor[2][2][2], - * double low[2][2][2], - * double upp[2][2][2] ); - * - * or directly with - * - * double prod(double* IN_ARRAY1, int DIM1); - * - * void floor(int DIM1, int DIM2, double* INPLACE_ARRAY2, double f); - * - * void luSplit(double IN_ARRAY3[ANY][ANY][ANY], - * double ARGOUT_ARRAY3[ANY][ANY][ANY], - * double ARGOUT_ARRAY3[ANY][ANY][ANY]); - */ - -%define %numpy_typemaps(DATA_TYPE, DATA_TYPECODE, DIM_TYPE) - -/************************/ -/* Input Array Typemaps */ -/************************/ - -/* Typemap suite for (DATA_TYPE IN_ARRAY1[ANY]) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE IN_ARRAY1[ANY]) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE IN_ARRAY1[ANY]) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[1] = { $1_dim0 }; - array = obj_to_array_contiguous_allow_conversion($input, - DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 1) || - !require_size(array, size, 1)) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} -%typemap(freearg) - (DATA_TYPE IN_ARRAY1[ANY]) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[1] = { -1 }; - array = obj_to_array_contiguous_allow_conversion($input, - DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 1) || - !require_size(array, size, 1)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); -} -%typemap(freearg) - (DATA_TYPE* IN_ARRAY1, DIM_TYPE DIM1) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[1] = {-1}; - array = obj_to_array_contiguous_allow_conversion($input, - DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 1) || - !require_size(array, size, 1)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DATA_TYPE*) array_data(array); -} -%typemap(freearg) - (DIM_TYPE DIM1, DATA_TYPE* IN_ARRAY1) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE IN_ARRAY2[ANY][ANY]) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE IN_ARRAY2[ANY][ANY]) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE IN_ARRAY2[ANY][ANY]) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[2] = { $1_dim0, $1_dim1 }; - array = obj_to_array_contiguous_allow_conversion($input, - DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 2) || - !require_size(array, size, 2)) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} -%typemap(freearg) - (DATA_TYPE IN_ARRAY2[ANY][ANY]) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[2] = { -1, -1 }; - array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 2) || - !require_size(array, size, 2)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); -} -%typemap(freearg) - (DATA_TYPE* IN_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[2] = { -1, -1 }; - array = obj_to_array_contiguous_allow_conversion($input, - DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 2) || - !require_size(array, size, 2)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DATA_TYPE*) array_data(array); -} -%typemap(freearg) - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_ARRAY2) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[2] = { -1, -1 }; - array = obj_to_array_fortran_allow_conversion($input, - DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 2) || - !require_size(array, size, 2) || !require_fortran(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); -} -%typemap(freearg) - (DATA_TYPE* IN_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[2] = { -1, -1 }; - array = obj_to_array_fortran_allow_conversion($input, - DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 2) || - !require_size(array, size, 2) || !require_fortran(array)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DATA_TYPE*) array_data(array); -} -%typemap(freearg) - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* IN_FARRAY2) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[3] = { $1_dim0, $1_dim1, $1_dim2 }; - array = obj_to_array_contiguous_allow_conversion($input, - DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 3) || - !require_size(array, size, 3)) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} -%typemap(freearg) - (DATA_TYPE IN_ARRAY3[ANY][ANY][ANY]) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[3] = { -1, -1, -1 }; - array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 3) || - !require_size(array, size, 3)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); - $4 = (DIM_TYPE) array_size(array,2); -} -%typemap(freearg) - (DATA_TYPE* IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE** IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE** IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - /* for now, only concerned with lists */ - $1 = PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE** IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - (DATA_TYPE** array=NULL, PyArrayObject** object_array=NULL, int* is_new_object_array=NULL) -{ - npy_intp size[2] = { -1, -1 }; - PyArrayObject* temp_array; - Py_ssize_t i; - int is_new_object; - - /* length of the list */ - $2 = PyList_Size($input); - - /* the arrays */ - array = (DATA_TYPE **)malloc($2*sizeof(DATA_TYPE *)); - object_array = (PyArrayObject **)calloc($2,sizeof(PyArrayObject *)); - is_new_object_array = (int *)calloc($2,sizeof(int)); - - if (array == NULL || object_array == NULL || is_new_object_array == NULL) - { - SWIG_fail; - } - - for (i=0; i<$2; i++) - { - temp_array = obj_to_array_contiguous_allow_conversion(PySequence_GetItem($input,i), DATA_TYPECODE, &is_new_object); - - /* the new array must be stored so that it can be destroyed in freearg */ - object_array[i] = temp_array; - is_new_object_array[i] = is_new_object; - - if (!temp_array || !require_dimensions(temp_array, 2)) SWIG_fail; - - /* store the size of the first array in the list, then use that for comparison. */ - if (i == 0) - { - size[0] = array_size(temp_array,0); - size[1] = array_size(temp_array,1); - } - - if (!require_size(temp_array, size, 2)) SWIG_fail; - - array[i] = (DATA_TYPE*) array_data(temp_array); - } - - $1 = (DATA_TYPE**) array; - $3 = (DIM_TYPE) size[0]; - $4 = (DIM_TYPE) size[1]; -} -%typemap(freearg) - (DATA_TYPE** IN_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - Py_ssize_t i; - - if (array$argnum!=NULL) free(array$argnum); - - /*freeing the individual arrays if needed */ - if (object_array$argnum!=NULL) - { - if (is_new_object_array$argnum!=NULL) - { - for (i=0; i<$2; i++) - { - if (object_array$argnum[i] != NULL && is_new_object_array$argnum[i]) - { Py_DECREF(object_array$argnum[i]); } - } - free(is_new_object_array$argnum); - } - free(object_array$argnum); - } -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, - * DATA_TYPE* IN_ARRAY3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[3] = { -1, -1, -1 }; - array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 3) || - !require_size(array, size, 3)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DIM_TYPE) array_size(array,2); - $4 = (DATA_TYPE*) array_data(array); -} -%typemap(freearg) - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_ARRAY3) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[3] = { -1, -1, -1 }; - array = obj_to_array_fortran_allow_conversion($input, DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 3) || - !require_size(array, size, 3) | !require_fortran(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); - $4 = (DIM_TYPE) array_size(array,2); -} -%typemap(freearg) - (DATA_TYPE* IN_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, - * DATA_TYPE* IN_FARRAY3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[3] = { -1, -1, -1 }; - array = obj_to_array_fortran_allow_conversion($input, - DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 3) || - !require_size(array, size, 3) || !require_fortran(array)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DIM_TYPE) array_size(array,2); - $4 = (DATA_TYPE*) array_data(array); -} -%typemap(freearg) - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* IN_FARRAY3) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE IN_ARRAY4[ANY][ANY][ANY][ANY]) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE IN_ARRAY4[ANY][ANY][ANY][ANY]) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE IN_ARRAY4[ANY][ANY][ANY][ANY]) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[4] = { $1_dim0, $1_dim1, $1_dim2 , $1_dim3}; - array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 4) || - !require_size(array, size, 4)) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} -%typemap(freearg) - (DATA_TYPE IN_ARRAY4[ANY][ANY][ANY][ANY]) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE* IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3, DIM_TYPE DIM4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[4] = { -1, -1, -1, -1 }; - array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 4) || - !require_size(array, size, 4)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); - $4 = (DIM_TYPE) array_size(array,2); - $5 = (DIM_TYPE) array_size(array,3); -} -%typemap(freearg) - (DATA_TYPE* IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE** IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3, DIM_TYPE DIM4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE** IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - /* for now, only concerned with lists */ - $1 = PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE** IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - (DATA_TYPE** array=NULL, PyArrayObject** object_array=NULL, int* is_new_object_array=NULL) -{ - npy_intp size[3] = { -1, -1, -1 }; - PyArrayObject* temp_array; - Py_ssize_t i; - int is_new_object; - - /* length of the list */ - $2 = PyList_Size($input); - - /* the arrays */ - array = (DATA_TYPE **)malloc($2*sizeof(DATA_TYPE *)); - object_array = (PyArrayObject **)calloc($2,sizeof(PyArrayObject *)); - is_new_object_array = (int *)calloc($2,sizeof(int)); - - if (array == NULL || object_array == NULL || is_new_object_array == NULL) - { - SWIG_fail; - } - - for (i=0; i<$2; i++) - { - temp_array = obj_to_array_contiguous_allow_conversion(PySequence_GetItem($input,i), DATA_TYPECODE, &is_new_object); - - /* the new array must be stored so that it can be destroyed in freearg */ - object_array[i] = temp_array; - is_new_object_array[i] = is_new_object; - - if (!temp_array || !require_dimensions(temp_array, 3)) SWIG_fail; - - /* store the size of the first array in the list, then use that for comparison. */ - if (i == 0) - { - size[0] = array_size(temp_array,0); - size[1] = array_size(temp_array,1); - size[2] = array_size(temp_array,2); - } - - if (!require_size(temp_array, size, 3)) SWIG_fail; - - array[i] = (DATA_TYPE*) array_data(temp_array); - } - - $1 = (DATA_TYPE**) array; - $3 = (DIM_TYPE) size[0]; - $4 = (DIM_TYPE) size[1]; - $5 = (DIM_TYPE) size[2]; -} -%typemap(freearg) - (DATA_TYPE** IN_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - Py_ssize_t i; - - if (array$argnum!=NULL) free(array$argnum); - - /*freeing the individual arrays if needed */ - if (object_array$argnum!=NULL) - { - if (is_new_object_array$argnum!=NULL) - { - for (i=0; i<$2; i++) - { - if (object_array$argnum[i] != NULL && is_new_object_array$argnum[i]) - { Py_DECREF(object_array$argnum[i]); } - } - free(is_new_object_array$argnum); - } - free(object_array$argnum); - } -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, - * DATA_TYPE* IN_ARRAY4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* IN_ARRAY4) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* IN_ARRAY4) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[4] = { -1, -1, -1 , -1}; - array = obj_to_array_contiguous_allow_conversion($input, DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 4) || - !require_size(array, size, 4)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DIM_TYPE) array_size(array,2); - $4 = (DIM_TYPE) array_size(array,3); - $5 = (DATA_TYPE*) array_data(array); -} -%typemap(freearg) - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* IN_ARRAY4) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DATA_TYPE* IN_FARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3, DIM_TYPE DIM4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* IN_FARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* IN_FARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[4] = { -1, -1, -1, -1 }; - array = obj_to_array_fortran_allow_conversion($input, DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 4) || - !require_size(array, size, 4) | !require_fortran(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); - $4 = (DIM_TYPE) array_size(array,2); - $5 = (DIM_TYPE) array_size(array,3); -} -%typemap(freearg) - (DATA_TYPE* IN_FARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, - * DATA_TYPE* IN_FARRAY4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* IN_FARRAY4) -{ - $1 = is_array($input) || PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* IN_FARRAY4) - (PyArrayObject* array=NULL, int is_new_object=0) -{ - npy_intp size[4] = { -1, -1, -1 , -1 }; - array = obj_to_array_fortran_allow_conversion($input, DATA_TYPECODE, - &is_new_object); - if (!array || !require_dimensions(array, 4) || - !require_size(array, size, 4) || !require_fortran(array)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DIM_TYPE) array_size(array,2); - $4 = (DIM_TYPE) array_size(array,3); - $5 = (DATA_TYPE*) array_data(array); -} -%typemap(freearg) - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* IN_FARRAY4) -{ - if (is_new_object$argnum && array$argnum) - { Py_DECREF(array$argnum); } -} - -/***************************/ -/* In-Place Array Typemaps */ -/***************************/ - -/* Typemap suite for (DATA_TYPE INPLACE_ARRAY1[ANY]) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE INPLACE_ARRAY1[ANY]) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE INPLACE_ARRAY1[ANY]) - (PyArrayObject* array=NULL) -{ - npy_intp size[1] = { $1_dim0 }; - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,1) || !require_size(array, size, 1) || - !require_contiguous(array) || !require_native(array)) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} - -/* Typemap suite for (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* INPLACE_ARRAY1, DIM_TYPE DIM1) - (PyArrayObject* array=NULL, int i=1) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,1) || !require_contiguous(array) - || !require_native(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = 1; - for (i=0; i < array_numdims(array); ++i) $2 *= array_size(array,i); -} - -/* Typemap suite for (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DATA_TYPE* INPLACE_ARRAY1) - (PyArrayObject* array=NULL, int i=0) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,1) || !require_contiguous(array) - || !require_native(array)) SWIG_fail; - $1 = 1; - for (i=0; i < array_numdims(array); ++i) $1 *= array_size(array,i); - $2 = (DATA_TYPE*) array_data(array); -} - -/* Typemap suite for (DATA_TYPE INPLACE_ARRAY2[ANY][ANY]) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE INPLACE_ARRAY2[ANY][ANY]) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE INPLACE_ARRAY2[ANY][ANY]) - (PyArrayObject* array=NULL) -{ - npy_intp size[2] = { $1_dim0, $1_dim1 }; - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,2) || !require_size(array, size, 2) || - !require_contiguous(array) || !require_native(array)) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} - -/* Typemap suite for (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* INPLACE_ARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,2) || !require_contiguous(array) - || !require_native(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_ARRAY2) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,2) || !require_contiguous(array) || - !require_native(array)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DATA_TYPE*) array_data(array); -} - -/* Typemap suite for (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* INPLACE_FARRAY2, DIM_TYPE DIM1, DIM_TYPE DIM2) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,2) || !require_contiguous(array) - || !require_native(array) || !require_fortran(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DATA_TYPE* INPLACE_FARRAY2) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,2) || !require_contiguous(array) || - !require_native(array) || !require_fortran(array)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DATA_TYPE*) array_data(array); -} - -/* Typemap suite for (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY]) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY]) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE INPLACE_ARRAY3[ANY][ANY][ANY]) - (PyArrayObject* array=NULL) -{ - npy_intp size[3] = { $1_dim0, $1_dim1, $1_dim2 }; - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,3) || !require_size(array, size, 3) || - !require_contiguous(array) || !require_native(array)) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} - -/* Typemap suite for (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,3) || !require_contiguous(array) || - !require_native(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); - $4 = (DIM_TYPE) array_size(array,2); -} - -/* Typemap suite for (DATA_TYPE** INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE** INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - $1 = PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE** INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - (DATA_TYPE** array=NULL, PyArrayObject** object_array=NULL) -{ - npy_intp size[2] = { -1, -1 }; - PyArrayObject* temp_array; - Py_ssize_t i; - - /* length of the list */ - $2 = PyList_Size($input); - - /* the arrays */ - array = (DATA_TYPE **)malloc($2*sizeof(DATA_TYPE *)); - object_array = (PyArrayObject **)calloc($2,sizeof(PyArrayObject *)); - - if (array == NULL || object_array == NULL) - { - SWIG_fail; - } - - for (i=0; i<$2; i++) - { - temp_array = obj_to_array_no_conversion(PySequence_GetItem($input,i), DATA_TYPECODE); - - /* the new array must be stored so that it can be destroyed in freearg */ - object_array[i] = temp_array; - - if ( !temp_array || !require_dimensions(temp_array, 2) || - !require_contiguous(temp_array) || - !require_native(temp_array) || - !PyArray_EquivTypenums(array_type(temp_array), DATA_TYPECODE) - ) SWIG_fail; - - /* store the size of the first array in the list, then use that for comparison. */ - if (i == 0) - { - size[0] = array_size(temp_array,0); - size[1] = array_size(temp_array,1); - } - - if (!require_size(temp_array, size, 2)) SWIG_fail; - - array[i] = (DATA_TYPE*) array_data(temp_array); - } - - $1 = (DATA_TYPE**) array; - $3 = (DIM_TYPE) size[0]; - $4 = (DIM_TYPE) size[1]; -} -%typemap(freearg) - (DATA_TYPE** INPLACE_ARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - if (array$argnum!=NULL) free(array$argnum); - if (object_array$argnum!=NULL) free(object_array$argnum); -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, - * DATA_TYPE* INPLACE_ARRAY3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_ARRAY3) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_ARRAY3) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,3) || !require_contiguous(array) - || !require_native(array)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DIM_TYPE) array_size(array,2); - $4 = (DATA_TYPE*) array_data(array); -} - -/* Typemap suite for (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* INPLACE_FARRAY3, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,3) || !require_contiguous(array) || - !require_native(array) || !require_fortran(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); - $4 = (DIM_TYPE) array_size(array,2); -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, - * DATA_TYPE* INPLACE_FARRAY3) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_FARRAY3) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DATA_TYPE* INPLACE_FARRAY3) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,3) || !require_contiguous(array) - || !require_native(array) || !require_fortran(array)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DIM_TYPE) array_size(array,2); - $4 = (DATA_TYPE*) array_data(array); -} - -/* Typemap suite for (DATA_TYPE INPLACE_ARRAY4[ANY][ANY][ANY][ANY]) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE INPLACE_ARRAY4[ANY][ANY][ANY][ANY]) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE INPLACE_ARRAY4[ANY][ANY][ANY][ANY]) - (PyArrayObject* array=NULL) -{ - npy_intp size[4] = { $1_dim0, $1_dim1, $1_dim2 , $1_dim3 }; - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,4) || !require_size(array, size, 4) || - !require_contiguous(array) || !require_native(array)) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} - -/* Typemap suite for (DATA_TYPE* INPLACE_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3, DIM_TYPE DIM4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* INPLACE_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* INPLACE_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,4) || !require_contiguous(array) || - !require_native(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); - $4 = (DIM_TYPE) array_size(array,2); - $5 = (DIM_TYPE) array_size(array,3); -} - -/* Typemap suite for (DATA_TYPE** INPLACE_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3, DIM_TYPE DIM4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE** INPLACE_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - $1 = PySequence_Check($input); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE** INPLACE_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - (DATA_TYPE** array=NULL, PyArrayObject** object_array=NULL) -{ - npy_intp size[3] = { -1, -1, -1 }; - PyArrayObject* temp_array; - Py_ssize_t i; - - /* length of the list */ - $2 = PyList_Size($input); - - /* the arrays */ - array = (DATA_TYPE **)malloc($2*sizeof(DATA_TYPE *)); - object_array = (PyArrayObject **)calloc($2,sizeof(PyArrayObject *)); - - if (array == NULL || object_array == NULL) - { - SWIG_fail; - } - - for (i=0; i<$2; i++) - { - temp_array = obj_to_array_no_conversion(PySequence_GetItem($input,i), DATA_TYPECODE); - - /* the new array must be stored so that it can be destroyed in freearg */ - object_array[i] = temp_array; - - if ( !temp_array || !require_dimensions(temp_array, 3) || - !require_contiguous(temp_array) || - !require_native(temp_array) || - !PyArray_EquivTypenums(array_type(temp_array), DATA_TYPECODE) - ) SWIG_fail; - - /* store the size of the first array in the list, then use that for comparison. */ - if (i == 0) - { - size[0] = array_size(temp_array,0); - size[1] = array_size(temp_array,1); - size[2] = array_size(temp_array,2); - } - - if (!require_size(temp_array, size, 3)) SWIG_fail; - - array[i] = (DATA_TYPE*) array_data(temp_array); - } - - $1 = (DATA_TYPE**) array; - $3 = (DIM_TYPE) size[0]; - $4 = (DIM_TYPE) size[1]; - $5 = (DIM_TYPE) size[2]; -} -%typemap(freearg) - (DATA_TYPE** INPLACE_ARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - if (array$argnum!=NULL) free(array$argnum); - if (object_array$argnum!=NULL) free(object_array$argnum); -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, - * DATA_TYPE* INPLACE_ARRAY4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* INPLACE_ARRAY4) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* INPLACE_ARRAY4) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,4) || !require_contiguous(array) - || !require_native(array)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DIM_TYPE) array_size(array,2); - $4 = (DIM_TYPE) array_size(array,3); - $5 = (DATA_TYPE*) array_data(array); -} - -/* Typemap suite for (DATA_TYPE* INPLACE_FARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, - * DIM_TYPE DIM3, DIM_TYPE DIM4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* INPLACE_FARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* INPLACE_FARRAY4, DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,4) || !require_contiguous(array) || - !require_native(array) || !require_fortran(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = (DIM_TYPE) array_size(array,0); - $3 = (DIM_TYPE) array_size(array,1); - $4 = (DIM_TYPE) array_size(array,2); - $5 = (DIM_TYPE) array_size(array,3); -} - -/* Typemap suite for (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, - * DATA_TYPE* INPLACE_FARRAY4) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* INPLACE_FARRAY4) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DIM_TYPE DIM2, DIM_TYPE DIM3, DIM_TYPE DIM4, DATA_TYPE* INPLACE_FARRAY4) - (PyArrayObject* array=NULL) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_dimensions(array,4) || !require_contiguous(array) - || !require_native(array) || !require_fortran(array)) SWIG_fail; - $1 = (DIM_TYPE) array_size(array,0); - $2 = (DIM_TYPE) array_size(array,1); - $3 = (DIM_TYPE) array_size(array,2); - $4 = (DIM_TYPE) array_size(array,3); - $5 = (DATA_TYPE*) array_data(array); -} - -/*************************/ -/* Argout Array Typemaps */ -/*************************/ - -/* Typemap suite for (DATA_TYPE ARGOUT_ARRAY1[ANY]) - */ -%typemap(in,numinputs=0, - fragment="NumPy_Backward_Compatibility,NumPy_Macros") - (DATA_TYPE ARGOUT_ARRAY1[ANY]) - (PyObject* array = NULL) -{ - npy_intp dims[1] = { $1_dim0 }; - array = PyArray_SimpleNew(1, dims, DATA_TYPECODE); - if (!array) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} -%typemap(argout) - (DATA_TYPE ARGOUT_ARRAY1[ANY]) -{ - $result = SWIG_Python_AppendOutput($result,(PyObject*)array$argnum); -} - -/* Typemap suite for (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1) - */ -%typemap(in,numinputs=1, - fragment="NumPy_Fragments") - (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1) - (PyObject* array = NULL) -{ - npy_intp dims[1]; - if (!PyInt_Check($input)) - { - const char* typestring = pytype_string($input); - PyErr_Format(PyExc_TypeError, - "Int dimension expected. '%s' given.", - typestring); - SWIG_fail; - } - $2 = (DIM_TYPE) PyInt_AsLong($input); - dims[0] = (npy_intp) $2; - array = PyArray_SimpleNew(1, dims, DATA_TYPECODE); - if (!array) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); -} -%typemap(argout) - (DATA_TYPE* ARGOUT_ARRAY1, DIM_TYPE DIM1) -{ - $result = SWIG_Python_AppendOutput($result,(PyObject*)array$argnum); -} - -/* Typemap suite for (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1) - */ -%typemap(in,numinputs=1, - fragment="NumPy_Fragments") - (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1) - (PyObject* array = NULL) -{ - npy_intp dims[1]; - if (!PyInt_Check($input)) - { - const char* typestring = pytype_string($input); - PyErr_Format(PyExc_TypeError, - "Int dimension expected. '%s' given.", - typestring); - SWIG_fail; - } - $1 = (DIM_TYPE) PyInt_AsLong($input); - dims[0] = (npy_intp) $1; - array = PyArray_SimpleNew(1, dims, DATA_TYPECODE); - if (!array) SWIG_fail; - $2 = (DATA_TYPE*) array_data(array); -} -%typemap(argout) - (DIM_TYPE DIM1, DATA_TYPE* ARGOUT_ARRAY1) -{ - $result = SWIG_Python_AppendOutput($result,(PyObject*)array$argnum); -} - -/* Typemap suite for (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY]) - */ -%typemap(in,numinputs=0, - fragment="NumPy_Backward_Compatibility,NumPy_Macros") - (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY]) - (PyObject* array = NULL) -{ - npy_intp dims[2] = { $1_dim0, $1_dim1 }; - array = PyArray_SimpleNew(2, dims, DATA_TYPECODE); - if (!array) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} -%typemap(argout) - (DATA_TYPE ARGOUT_ARRAY2[ANY][ANY]) -{ - $result = SWIG_Python_AppendOutput($result,(PyObject*)array$argnum); -} - -/* Typemap suite for (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY]) - */ -%typemap(in,numinputs=0, - fragment="NumPy_Backward_Compatibility,NumPy_Macros") - (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY]) - (PyObject* array = NULL) -{ - npy_intp dims[3] = { $1_dim0, $1_dim1, $1_dim2 }; - array = PyArray_SimpleNew(3, dims, DATA_TYPECODE); - if (!array) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} -%typemap(argout) - (DATA_TYPE ARGOUT_ARRAY3[ANY][ANY][ANY]) -{ - $result = SWIG_Python_AppendOutput($result,(PyObject*)array$argnum); -} - -/* Typemap suite for (DATA_TYPE ARGOUT_ARRAY4[ANY][ANY][ANY][ANY]) - */ -%typemap(in,numinputs=0, - fragment="NumPy_Backward_Compatibility,NumPy_Macros") - (DATA_TYPE ARGOUT_ARRAY4[ANY][ANY][ANY][ANY]) - (PyObject* array = NULL) -{ - npy_intp dims[4] = { $1_dim0, $1_dim1, $1_dim2, $1_dim3 }; - array = PyArray_SimpleNew(4, dims, DATA_TYPECODE); - if (!array) SWIG_fail; - $1 = ($1_ltype) array_data(array); -} -%typemap(argout) - (DATA_TYPE ARGOUT_ARRAY4[ANY][ANY][ANY][ANY]) -{ - $result = SWIG_Python_AppendOutput($result,(PyObject*)array$argnum); -} - -/*****************************/ -/* Argoutview Array Typemaps */ -/*****************************/ - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim_temp) -{ - $1 = &data_temp; - $2 = &dim_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility") - (DATA_TYPE** ARGOUTVIEW_ARRAY1, DIM_TYPE* DIM1) -{ - npy_intp dims[1] = { *$2 }; - PyObject* obj = PyArray_SimpleNewFromData(1, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEW_ARRAY1) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DATA_TYPE** ARGOUTVIEW_ARRAY1) - (DIM_TYPE dim_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim_temp; - $2 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility") - (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEW_ARRAY1) -{ - npy_intp dims[1] = { *$1 }; - PyObject* obj = PyArray_SimpleNewFromData(1, dims, DATA_TYPECODE, (void*)(*$2)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility") - (DATA_TYPE** ARGOUTVIEW_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) -{ - npy_intp dims[2] = { *$2, *$3 }; - PyObject* obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_ARRAY2) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DATA_TYPE** ARGOUTVIEW_ARRAY2) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_ARRAY2) -{ - npy_intp dims[2] = { *$1, *$2 }; - PyObject* obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$3)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") - (DATA_TYPE** ARGOUTVIEW_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) -{ - npy_intp dims[2] = { *$2, *$3 }; - PyObject* obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_FARRAY2) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DATA_TYPE** ARGOUTVIEW_FARRAY2) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEW_FARRAY2) -{ - npy_intp dims[2] = { *$1, *$2 }; - PyObject* obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$3)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility") - (DATA_TYPE** ARGOUTVIEW_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) -{ - npy_intp dims[3] = { *$2, *$3, *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, - DATA_TYPE** ARGOUTVIEW_ARRAY3) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DATA_TYPE* data_temp = NULL) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_ARRAY3) -{ - npy_intp dims[3] = { *$1, *$2, *$3 }; - PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$4)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") - (DATA_TYPE** ARGOUTVIEW_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) -{ - npy_intp dims[3] = { *$2, *$3, *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, - DATA_TYPE** ARGOUTVIEW_FARRAY3) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DATA_TYPE** ARGOUTVIEW_FARRAY3) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEW_FARRAY3) -{ - npy_intp dims[3] = { *$1, *$2, *$3 }; - PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$4)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_ARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3, DIM_TYPE* DIM4) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEW_ARRAY4, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; - $5 = &dim4_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility") - (DATA_TYPE** ARGOUTVIEW_ARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4) -{ - npy_intp dims[4] = { *$2, *$3, *$4 , *$5 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, - DATA_TYPE** ARGOUTVIEW_ARRAY4) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 , DATA_TYPE** ARGOUTVIEW_ARRAY4) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &dim4_temp; - $5 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEW_ARRAY4) -{ - npy_intp dims[4] = { *$1, *$2, *$3 , *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$5)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEW_FARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3, DIM_TYPE* DIM4) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEW_FARRAY4, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; - $5 = &dim4_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") - (DATA_TYPE** ARGOUTVIEW_FARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4) -{ - npy_intp dims[4] = { *$2, *$3, *$4 , *$5 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, - DATA_TYPE** ARGOUTVIEW_FARRAY4) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 , DATA_TYPE** ARGOUTVIEW_FARRAY4) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &dim4_temp; - $5 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEW_FARRAY4) -{ - npy_intp dims[4] = { *$1, *$2, *$3 , *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$5)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - $result = SWIG_Python_AppendOutput($result,obj); -} - -/*************************************/ -/* Managed Argoutview Array Typemaps */ -/*************************************/ - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEWM_ARRAY1, DIM_TYPE* DIM1) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEWM_ARRAY1, DIM_TYPE* DIM1 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim_temp) -{ - $1 = &data_temp; - $2 = &dim_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DATA_TYPE** ARGOUTVIEWM_ARRAY1, DIM_TYPE* DIM1) -{ - npy_intp dims[1] = { *$2 }; - PyObject* obj = PyArray_SimpleNewFromData(1, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEWM_ARRAY1) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DATA_TYPE** ARGOUTVIEWM_ARRAY1) - (DIM_TYPE dim_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim_temp; - $2 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DIM_TYPE* DIM1, DATA_TYPE** ARGOUTVIEWM_ARRAY1) -{ - npy_intp dims[1] = { *$1 }; - PyObject* obj = PyArray_SimpleNewFromData(1, dims, DATA_TYPECODE, (void*)(*$2)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEWM_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEWM_ARRAY2, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DATA_TYPE** ARGOUTVIEWM_ARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) -{ - npy_intp dims[2] = { *$2, *$3 }; - PyObject* obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEWM_ARRAY2) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DATA_TYPE** ARGOUTVIEWM_ARRAY2) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEWM_ARRAY2) -{ - npy_intp dims[2] = { *$1, *$2 }; - PyObject* obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$3)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEWM_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEWM_FARRAY2, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements,NumPy_Utilities") - (DATA_TYPE** ARGOUTVIEWM_FARRAY2, DIM_TYPE* DIM1, DIM_TYPE* DIM2) -{ - npy_intp dims[2] = { *$2, *$3 }; - PyObject* obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEWM_FARRAY2) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DATA_TYPE** ARGOUTVIEWM_FARRAY2) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements,NumPy_Utilities") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DATA_TYPE** ARGOUTVIEWM_FARRAY2) -{ - npy_intp dims[2] = { *$1, *$2 }; - PyObject* obj = PyArray_SimpleNewFromData(2, dims, DATA_TYPECODE, (void*)(*$3)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEWM_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEWM_ARRAY3, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DATA_TYPE** ARGOUTVIEWM_ARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) -{ - npy_intp dims[3] = { *$2, *$3, *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, - DATA_TYPE** ARGOUTVIEWM_ARRAY3) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DATA_TYPE** ARGOUTVIEWM_ARRAY3) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEWM_ARRAY3) -{ - npy_intp dims[3] = { *$1, *$2, *$3 }; - PyObject* obj= PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$4)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEWM_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEWM_FARRAY3, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements,NumPy_Utilities") - (DATA_TYPE** ARGOUTVIEWM_FARRAY3, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) -{ - npy_intp dims[3] = { *$2, *$3, *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, - DATA_TYPE** ARGOUTVIEWM_FARRAY3) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DATA_TYPE** ARGOUTVIEWM_FARRAY3) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements,NumPy_Utilities") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DATA_TYPE** ARGOUTVIEWM_FARRAY3) -{ - npy_intp dims[3] = { *$1, *$2, *$3 }; - PyObject* obj = PyArray_SimpleNewFromData(3, dims, DATA_TYPECODE, (void*)(*$4)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEWM_ARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3, DIM_TYPE* DIM4) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEWM_ARRAY4, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; - $5 = &dim4_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DATA_TYPE** ARGOUTVIEWM_ARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4) -{ - npy_intp dims[4] = { *$2, *$3, *$4 , *$5 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, - DATA_TYPE** ARGOUTVIEWM_ARRAY4) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 , DATA_TYPE** ARGOUTVIEWM_ARRAY4) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &dim4_temp; - $5 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEWM_ARRAY4) -{ - npy_intp dims[4] = { *$1, *$2, *$3 , *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$5)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEWM_FARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3, DIM_TYPE* DIM4) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEWM_FARRAY4, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; - $5 = &dim4_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements,NumPy_Utilities") - (DATA_TYPE** ARGOUTVIEWM_FARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3) -{ - npy_intp dims[4] = { *$2, *$3, *$4 , *$5 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, - DATA_TYPE** ARGOUTVIEWM_FARRAY4) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 , DATA_TYPE** ARGOUTVIEWM_FARRAY4) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &dim4_temp; - $5 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements,NumPy_Utilities") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEWM_FARRAY4) -{ - npy_intp dims[4] = { *$1, *$2, *$3 , *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$5)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEWM_ARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3, DIM_TYPE* DIM4) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEWM_ARRAY4, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; - $5 = &dim4_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DATA_TYPE** ARGOUTVIEWM_ARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4) -{ - npy_intp dims[4] = { *$2, *$3, *$4 , *$5 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, - DATA_TYPE** ARGOUTVIEWM_ARRAY4) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 , DATA_TYPE** ARGOUTVIEWM_ARRAY4) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &dim4_temp; - $5 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Utilities") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEWM_ARRAY4) -{ - npy_intp dims[4] = { *$1, *$2, *$3 , *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$5)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DATA_TYPE** ARGOUTVIEWM_FARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, - DIM_TYPE* DIM3, DIM_TYPE* DIM4) - */ -%typemap(in,numinputs=0) - (DATA_TYPE** ARGOUTVIEWM_FARRAY4, DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 ) - (DATA_TYPE* data_temp = NULL , DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp) -{ - $1 = &data_temp; - $2 = &dim1_temp; - $3 = &dim2_temp; - $4 = &dim3_temp; - $5 = &dim4_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements,NumPy_Utilities") - (DATA_TYPE** ARGOUTVIEWM_FARRAY4, DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4) -{ - npy_intp dims[4] = { *$2, *$3, *$4 , *$5 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$1)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/* Typemap suite for (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, - DATA_TYPE** ARGOUTVIEWM_FARRAY4) - */ -%typemap(in,numinputs=0) - (DIM_TYPE* DIM1 , DIM_TYPE* DIM2 , DIM_TYPE* DIM3 , DIM_TYPE* DIM4 , DATA_TYPE** ARGOUTVIEWM_FARRAY4) - (DIM_TYPE dim1_temp, DIM_TYPE dim2_temp, DIM_TYPE dim3_temp, DIM_TYPE dim4_temp, DATA_TYPE* data_temp = NULL ) -{ - $1 = &dim1_temp; - $2 = &dim2_temp; - $3 = &dim3_temp; - $4 = &dim4_temp; - $5 = &data_temp; -} -%typemap(argout, - fragment="NumPy_Backward_Compatibility,NumPy_Array_Requirements,NumPy_Utilities") - (DIM_TYPE* DIM1, DIM_TYPE* DIM2, DIM_TYPE* DIM3, DIM_TYPE* DIM4, DATA_TYPE** ARGOUTVIEWM_FARRAY4) -{ - npy_intp dims[4] = { *$1, *$2, *$3 , *$4 }; - PyObject* obj = PyArray_SimpleNewFromData(4, dims, DATA_TYPECODE, (void*)(*$5)); - PyArrayObject* array = (PyArrayObject*) obj; - - if (!array || !require_fortran(array)) SWIG_fail; - -%#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); -%#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); -%#endif - -%#if NPY_API_VERSION < 0x00000007 - PyArray_BASE(array) = cap; -%#else - PyArray_SetBaseObject(array,cap); -%#endif - - $result = SWIG_Python_AppendOutput($result,obj); -} - -/**************************************/ -/* In-Place Array Typemap - flattened */ -/**************************************/ - -/* Typemap suite for (DATA_TYPE* INPLACE_ARRAY_FLAT, DIM_TYPE DIM_FLAT) - */ -%typecheck(SWIG_TYPECHECK_DOUBLE_ARRAY, - fragment="NumPy_Macros") - (DATA_TYPE* INPLACE_ARRAY_FLAT, DIM_TYPE DIM_FLAT) -{ - $1 = is_array($input) && PyArray_EquivTypenums(array_type($input), - DATA_TYPECODE); -} -%typemap(in, - fragment="NumPy_Fragments") - (DATA_TYPE* INPLACE_ARRAY_FLAT, DIM_TYPE DIM_FLAT) - (PyArrayObject* array=NULL, int i=1) -{ - array = obj_to_array_no_conversion($input, DATA_TYPECODE); - if (!array || !require_c_or_f_contiguous(array) - || !require_native(array)) SWIG_fail; - $1 = (DATA_TYPE*) array_data(array); - $2 = 1; - for (i=0; i < array_numdims(array); ++i) $2 *= array_size(array,i); -} - -%enddef /* %numpy_typemaps() macro */ -/* *************************************************************** */ - -/* Concrete instances of the %numpy_typemaps() macro: Each invocation - * below applies all of the typemaps above to the specified data type. - */ -%numpy_typemaps(signed char , NPY_BYTE , int) -%numpy_typemaps(unsigned char , NPY_UBYTE , int) -%numpy_typemaps(short , NPY_SHORT , int) -%numpy_typemaps(unsigned short , NPY_USHORT , int) -%numpy_typemaps(int , NPY_INT , int) -%numpy_typemaps(unsigned int , NPY_UINT , int) -%numpy_typemaps(long , NPY_LONG , int) -%numpy_typemaps(unsigned long , NPY_ULONG , int) -%numpy_typemaps(long long , NPY_LONGLONG , int) -%numpy_typemaps(unsigned long long, NPY_ULONGLONG, int) -%numpy_typemaps(float , NPY_FLOAT , int) -%numpy_typemaps(double , NPY_DOUBLE , int) - -/* *************************************************************** - * The follow macro expansion does not work, because C++ bool is 4 - * bytes and NPY_BOOL is 1 byte - * - * %numpy_typemaps(bool, NPY_BOOL, int) - */ - -/* *************************************************************** - * On my Mac, I get the following warning for this macro expansion: - * 'swig/python detected a memory leak of type 'long double *', no destructor found.' - * - * %numpy_typemaps(long double, NPY_LONGDOUBLE, int) - */ - -#ifdef __cplusplus - -%include - -%numpy_typemaps(std::complex, NPY_CFLOAT , int) -%numpy_typemaps(std::complex, NPY_CDOUBLE, int) - -#endif - -#endif /* SWIGPYTHON */ diff --git a/paddle/legacy/api/test/.gitignore b/paddle/legacy/api/test/.gitignore deleted file mode 100644 index b7948824a1eab119140dd9bea20276c303fe4af1..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.w0 -*.wbias diff --git a/paddle/legacy/api/test/CMakeLists.txt b/paddle/legacy/api/test/CMakeLists.txt deleted file mode 100644 index 13cb79129cc2272d215cdb475fb146b37266699e..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/testTrain.py - COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/*.py ${CMAKE_CURRENT_BINARY_DIR} -) -add_custom_target(copy_api_test ALL DEPENDS testTrain.py) - -py_test(testTrain SRCS testTrain.py) -py_test(testMatrix SRCS testMatrix.py) -py_test(testVector SRCS testVector.py) -py_test(testTrainer SRCS testTrainer.py) -py_test(testArguments SRCS testArguments.py) -py_test(testGradientMachine SRCS testGradientMachine.py) diff --git a/paddle/legacy/api/test/testArguments.py b/paddle/legacy/api/test/testArguments.py deleted file mode 100644 index 4d40ffec9a030bf756a515266b2c33915fcc4e10..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/testArguments.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from py_paddle import swig_paddle -import numpy as np -import unittest - - -class TestArguments(unittest.TestCase): - def test_load_arguments(self): - m = swig_paddle.Matrix.createDense([4, 2, 4, 3, 9, 5], 2, 3) - args = swig_paddle.Arguments.createArguments(1) - args.setSlotValue(0, m) - - self.assertAlmostEqual(27.0, args.sum()) - - mat = args.getSlotValue(0) - assert isinstance(mat, swig_paddle.Matrix) - np_mat = mat.toNumpyMatInplace() - # The matrix unittest is in testMatrix.py - self.assertEqual(np_mat.shape, (2, 3)) - - args.setSlotIds(0, swig_paddle.IVector.create([1, 2, 3, 4, 5, 6])) - iv = args.getSlotIds(0) - assert isinstance(iv, swig_paddle.IVector) - np_arr = iv.toNumpyArrayInplace() - self.assertEqual(np_arr.shape, (6, )) - - def test_arguments_shape(self): - h, w = 4, 6 - v = np.random.rand(2, h * w) - m = swig_paddle.Matrix.createDense(v.flatten(), 2, h * w) - args = swig_paddle.Arguments.createArguments(1) - args.setSlotValue(0, m) - args.setSlotFrameHeight(0, h) - args.setSlotFrameWidth(0, w) - self.assertEqual(args.getSlotFrameHeight(), h) - self.assertEqual(args.getSlotFrameWidth(), w) - - -if __name__ == '__main__': - swig_paddle.initPaddle("--use_gpu=0") - unittest.main() diff --git a/paddle/legacy/api/test/testGradientMachine.py b/paddle/legacy/api/test/testGradientMachine.py deleted file mode 100644 index 4b705f66eccd267f326fe0662a17b33a09fda982..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/testGradientMachine.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from py_paddle import swig_paddle -import paddle.proto.ParameterConfig_pb2 -import util -import unittest -import numpy - - -class TestGradientMachine(unittest.TestCase): - def test_create_gradient_machine(self): - conf_file_path = "./testTrainConfig.py" - trainer_config = swig_paddle.TrainerConfig.createFromTrainerConfigFile( - conf_file_path) - self.assertIsNotNone(trainer_config) - opt_config = trainer_config.getOptimizationConfig() - model_config = trainer_config.getModelConfig() - self.assertIsNotNone(model_config) - machine = swig_paddle.GradientMachine.createByModelConfig( - model_config, swig_paddle.CREATE_MODE_NORMAL, - swig_paddle.ParameterOptimizer.create(opt_config).getParameterTypes( - )) - self.assertIsNotNone(machine) - ipt, _ = util.loadMNISTTrainData() - output = swig_paddle.Arguments.createArguments(0) - - optimizers = {} - - # Initial Machine Parameter all to 0.1 - for param in machine.getParameters(): - assert isinstance(param, swig_paddle.Parameter) - val = param.getBuf(swig_paddle.PARAMETER_VALUE) - assert isinstance(val, swig_paddle.Vector) - arr = numpy.full((len(val), ), 0.1, dtype="float32") - val.copyFromNumpyArray(arr) - self.assertTrue(param.save(param.getName())) - param_config = param.getConfig().toProto() - assert isinstance(param_config, - paddle.proto.ParameterConfig_pb2.ParameterConfig) - opt = swig_paddle.ParameterOptimizer.create(opt_config) - optimizers[param.getID()] = opt - num_rows = param_config.dims[1] - opt.init(num_rows, param.getConfig()) - - for k in optimizers: - opt = optimizers[k] - opt.startPass() - - batch_size = ipt.getSlotValue(0).getHeight() - for k in optimizers: - opt = optimizers[k] - opt.startBatch(batch_size) - - machine.forward(ipt, output, swig_paddle.PASS_TRAIN) - self.assertEqual(1, output.getSlotNum()) - self.isCalled = False - - def backward_callback(param_): - self.isCalled = isinstance(param_, swig_paddle.Parameter) - assert isinstance(param_, swig_paddle.Parameter) - vec = param_.getBuf(swig_paddle.PARAMETER_VALUE) - assert isinstance(vec, swig_paddle.Vector) - vec = vec.copyToNumpyArray() - for val_ in vec: - self.assertTrue( - util.doubleEqual(val_, 0.1)) # Assert All Value is 0.1 - - vecs = list(param_.getBufs()) - opt_ = optimizers[param_.getID()] - opt_.update(vecs, param_.getConfig()) - - machine.backward(backward_callback) - - for k in optimizers: - opt = optimizers[k] - opt.finishBatch() - - for k in optimizers: - opt = optimizers[k] - opt.finishPass() - - self.assertTrue(self.isCalled) - - for param in machine.getParameters(): - self.assertTrue(param.load(param.getName())) - - def test_train_one_pass(self): - conf_file_path = './testTrainConfig.py' - trainer_config = swig_paddle.TrainerConfig.createFromTrainerConfigFile( - conf_file_path) - model_config = trainer_config.getModelConfig() - machine = swig_paddle.GradientMachine.createByModelConfig(model_config) - - at_end = False - - output = swig_paddle.Arguments.createArguments(0) - if not at_end: - input_, at_end = util.loadMNISTTrainData(1000) - machine.forwardBackward(input_, output, swig_paddle.PASS_TRAIN) - - -if __name__ == '__main__': - swig_paddle.initPaddle('--use_gpu=0') - unittest.main() diff --git a/paddle/legacy/api/test/testMatrix.py b/paddle/legacy/api/test/testMatrix.py deleted file mode 100644 index f08fbf3ccdf5d7c0a5c739868b1bcb516146c23d..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/testMatrix.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from py_paddle import swig_paddle -import numpy as np -import unittest - - -class TestMatrix(unittest.TestCase): - def test_createZero_get_set(self): - m = swig_paddle.Matrix.createZero(32, 24) - self.assertEqual(m.getWidth(), 24) - self.assertEqual(m.getHeight(), 32) - for x in xrange(24): - for y in xrange(32): - self.assertEqual(0.0, m.get(x, y)) - with self.assertRaises(swig_paddle.RangeError): - m.get(51, 47) - m.set(3, 3, 3.0) - self.assertEqual(m.get(3, 3), 3.0) - - def test_sparse(self): - m = swig_paddle.Matrix.createSparse(3, 3, 6, True, False, False) - self.assertIsNotNone(m) - self.assertTrue(m.isSparse()) - self.assertEqual(m.getSparseValueType(), swig_paddle.SPARSE_NON_VALUE) - self.assertEqual(m.getSparseFormat(), swig_paddle.SPARSE_CSR) - m.sparseCopyFrom([0, 2, 3, 3], [0, 1, 2], []) - self.assertEqual(m.getSparseRowCols(0), [0, 1]) - self.assertEqual(m.getSparseRowCols(1), [2]) - self.assertEqual(m.getSparseRowCols(2), []) - - def test_sparse_value(self): - m = swig_paddle.Matrix.createSparse(3, 3, 6, False, False, False) - self.assertIsNotNone(m) - m.sparseCopyFrom([0, 2, 3, 3], [0, 1, 2], [7.3, 4.2, 3.2]) - - def assertKVArraySame(actual, expect): - self.assertEqual(len(actual), len(expect)) - for i in xrange(len(actual)): - a = actual[i] - e = expect[i] - self.assertIsInstance(a, tuple) - self.assertIsInstance(e, tuple) - self.assertEqual(len(a), 2) - self.assertEqual(len(e), 2) - self.assertEqual(a[0], e[0]) - self.assertTrue(abs(a[1] - e[1]) < 1e-5) - - first_row = m.getSparseRowColsVal(0) - assertKVArraySame(first_row, [(0, 7.3), (1, 4.2)]) - - def test_createDenseMat(self): - m = swig_paddle.Matrix.createDense([0.1, 0.2, 0.3, 0.4, 0.5, 0.6], 2, 3) - self.assertIsNotNone(m) - self.assertTrue(abs(m.get(1, 1) - 0.5) < 1e-5) - - def test_numpyCpu(self): - numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype="float32") - m = swig_paddle.Matrix.createCpuDenseFromNumpy(numpy_mat, False) - self.assertEqual((int(m.getHeight()), int(m.getWidth())), - numpy_mat.shape) - - # the numpy matrix and paddle matrix shared the same memory. - numpy_mat[0, 1] = 342.23 - - for h in xrange(m.getHeight()): - for w in xrange(m.getWidth()): - self.assertEqual(m.get(h, w), numpy_mat[h, w]) - - mat2 = m.toNumpyMatInplace() - mat2[1, 1] = 32.2 - self.assertTrue(np.array_equal(mat2, numpy_mat)) - - def test_numpyGpu(self): - if swig_paddle.isGpuVersion(): - numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype='float32') - gpu_m = swig_paddle.Matrix.createGpuDenseFromNumpy(numpy_mat) - assert isinstance(gpu_m, swig_paddle.Matrix) - self.assertEqual((int(gpu_m.getHeight()), int(gpu_m.getWidth())), - numpy_mat.shape) - self.assertTrue(gpu_m.isGpu()) - numpy_mat = gpu_m.copyToNumpyMat() - numpy_mat[0, 1] = 3.23 - for a, e in zip(gpu_m.getData(), [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]): - self.assertAlmostEqual(a, e) - - gpu_m.copyFromNumpyMat(numpy_mat) - - for a, e in zip(gpu_m.getData(), [1.0, 3.23, 3.0, 4.0, 5.0, 6.0]): - self.assertAlmostEqual(a, e) - - def test_numpy(self): - numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype="float32") - m = swig_paddle.Matrix.createDenseFromNumpy(numpy_mat) - self.assertEqual((int(m.getHeight()), int(m.getWidth())), - numpy_mat.shape) - self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu()) - for a, e in zip(m.getData(), [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]): - self.assertAlmostEqual(a, e) - - -if __name__ == "__main__": - swig_paddle.initPaddle("--use_gpu=0") - suite = unittest.TestLoader().loadTestsFromTestCase(TestMatrix) - unittest.TextTestRunner().run(suite) - if swig_paddle.isGpuVersion(): - swig_paddle.setUseGpu(True) - unittest.main() diff --git a/paddle/legacy/api/test/testTrain.py b/paddle/legacy/api/test/testTrain.py deleted file mode 100644 index 7061a4c43bf01158b5f084d0c310dedd81773a04..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/testTrain.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from py_paddle import swig_paddle -import paddle.trainer.config_parser -import numpy -import util - - -def init_params(params): - def init_param(p): - assert isinstance(p, swig_paddle.Parameter) - val = p.getBuf(swig_paddle.PARAMETER_VALUE) - assert isinstance(val, swig_paddle.Vector) - arr = val.toNumpyArrayInplace() - for i in xrange(len(arr)): - arr[i] = numpy.random.uniform(-1.0, 1.0) - - for p in params: - init_param(p) - - -def init_optimizers(opt_conf, params): - opts = {} - for param in params: - param_conf = param.getConfig().toProto() - opts[param.getID()] = swig_paddle.ParameterOptimizer.create(opt_conf) - opts[param.getID()].init(param_conf.dims[1], param.getConfig()) - retv_opts = [None for _ in xrange(len(opts))] - for k in opts: - assert k < len(retv_opts) - retv_opts[k] = opts[k] - return retv_opts - - -def main(): - trainer_config = paddle.trainer.config_parser.parse_config( - "./testTrainConfig.py", "") - opt_config = trainer_config.opt_config - print "========Optimization Config =======" - print opt_config - print "===================================" - opt_config = swig_paddle.OptimizationConfig.createFromProto(opt_config) - _temp_optimizer_ = swig_paddle.ParameterOptimizer.create(opt_config) - enable_types = _temp_optimizer_.getParameterTypes() - m = swig_paddle.GradientMachine.createFromConfigProto( - trainer_config.model_config, swig_paddle.CREATE_MODE_NORMAL, - enable_types) - assert m is not None - assert isinstance(m, swig_paddle.GradientMachine) - init_params(m.getParameters()) - - optimizers = init_optimizers(opt_config, m.getParameters()) - - # Train One Pass. - for optimizer in optimizers: - optimizer.startPass() - batch_id = 0 - while True: # Train one batch - batch_size = 1000 - inArgs, atEnd = util.loadMNISTTrainData(batch_size) - if atEnd: - break - outArgs = swig_paddle.Arguments.createArguments(0) - - for optimizer in optimizers: - optimizer.startBatch(batch_size) - - def update_callback(param): - try: - bufs = list(param.getBufs()) - opt = optimizers[param.getID()] - opt.update(bufs, param.getConfig()) - callback = opt.needSpecialTraversal(param.getConfig()) - if callback is not None: - callback(bufs, param.getConfig(), swig_paddle.NO_SPARSE_ID) - - except Exception as e: - print e - - ev = m.makeEvaluator() - ev.start() - m.forwardBackward(inArgs, outArgs, swig_paddle.PASS_TRAIN, - update_callback) - m.eval(ev) - ev.finish() - for name in ev.getNames(): - print name, ev.getValue(name) - for optimizer in optimizers: - optimizer.finishBatch() - - cost_vec = outArgs.getSlotValue(0) - assert isinstance(cost_vec, swig_paddle.Matrix) - cost_vec = cost_vec.copyToNumpyMat() - print 'Finish Batch', batch_id, 'with cost ', cost_vec.sum( - ) / batch_size - batch_id += 1 - - for optimizer in optimizers: - optimizer.finishPass() - - -if __name__ == '__main__': - swig_paddle.initPaddle("--use_gpu=0", "--trainer_count=1") - main() diff --git a/paddle/legacy/api/test/testTrainConfig.py b/paddle/legacy/api/test/testTrainConfig.py deleted file mode 100644 index c02d61ebad53faad6abd61d77e2c4d48f76e67af..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/testTrainConfig.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=100, learning_method=AdamOptimizer()) - -din = data_layer(name='input', size=784) - -fc1 = fc_layer(name='hidden1', input=din, size=100) -fc2 = fc_layer(name='hidden2', input=fc1, size=100) - -opt = fc_layer(input=fc2, size=10, act=SoftmaxActivation()) -outputs(classification_cost(input=opt, label=data_layer('lbl', 10))) diff --git a/paddle/legacy/api/test/testTrainer.py b/paddle/legacy/api/test/testTrainer.py deleted file mode 100644 index a76cbf02d83ac5ad82a96deee43c4afd104266a2..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/testTrainer.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.config_parser import parse_config -from paddle.trainer.config_parser import logger -from py_paddle import swig_paddle -import util - - -def main(): - trainer_config = parse_config("./testTrainConfig.py", "") - model = swig_paddle.GradientMachine.createFromConfigProto( - trainer_config.model_config) - trainer = swig_paddle.Trainer.create(trainer_config, model) - trainer.startTrain() - for train_pass in xrange(2): - trainer.startTrainPass() - num = 0 - cost = 0 - while True: # Train one batch - batch_size = 1000 - data, atEnd = util.loadMNISTTrainData(batch_size) - if atEnd: - break - trainer.trainOneDataBatch(batch_size, data) - outs = trainer.getForwardOutput() - cost += sum(outs[0]['value']) - num += batch_size - trainer.finishTrainPass() - logger.info('train cost=%f' % (cost / num)) - - trainer.startTestPeriod() - num = 0 - cost = 0 - while True: # Test one batch - batch_size = 1000 - data, atEnd = util.loadMNISTTrainData(batch_size) - if atEnd: - break - trainer.testOneDataBatch(batch_size, data) - outs = trainer.getForwardOutput() - cost += sum(outs[0]['value']) - num += batch_size - trainer.finishTestPeriod() - logger.info('test cost=%f' % (cost / num)) - - trainer.finishTrain() - - -if __name__ == '__main__': - swig_paddle.initPaddle("--use_gpu=0", "--trainer_count=1") - main() diff --git a/paddle/legacy/api/test/testVector.py b/paddle/legacy/api/test/testVector.py deleted file mode 100644 index 6339cf8542607bdda99eb9ccaa8b06480f144b78..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/testVector.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from py_paddle import swig_paddle -import util -import numpy as np -import unittest - - -class TestIVector(unittest.TestCase): - def test_createZero(self): - m = swig_paddle.IVector.createZero(10, False) - self.assertIsNotNone(m) - for i in xrange(10): - self.assertEqual(m[i], 0) - m[i] = i - self.assertEqual(m[i], i) - - m = swig_paddle.IVector.createZero(10) - self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu()) - self.assertEqual(m.getData(), [0] * 10) - - def test_create(self): - m = swig_paddle.IVector.create(range(10), False) - self.assertIsNotNone(m) - for i in xrange(10): - self.assertEqual(m[i], i) - - m = swig_paddle.IVector.create(range(10)) - self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu()) - self.assertEqual(m.getData(), range(10)) - - def test_cpu_numpy(self): - vec = np.array([1, 3, 4, 65, 78, 1, 4], dtype="int32") - iv = swig_paddle.IVector.createCpuVectorFromNumpy(vec, False) - self.assertEqual(vec.shape[0], int(iv.__len__())) - vec[4] = 832 - for i in xrange(len(iv)): - self.assertEqual(vec[i], iv[i]) - vec2 = iv.toNumpyArrayInplace() - vec2[1] = 384 - for i in xrange(len(iv)): - self.assertEqual(vec[i], iv[i]) - self.assertEqual(vec2[i], iv[i]) - - def test_gpu_numpy(self): - if swig_paddle.isGpuVersion(): - vec = swig_paddle.IVector.create(range(0, 10), True) - assert isinstance(vec, swig_paddle.IVector) - self.assertTrue(vec.isGpu()) - self.assertEqual(vec.getData(), range(0, 10)) - num_arr = vec.copyToNumpyArray() - assert isinstance(num_arr, np.ndarray) # for code hint. - num_arr[4] = 7 - self.assertEquals(vec.getData(), range(0, 10)) - - vec.copyFromNumpyArray(num_arr) - expect_vec = range(0, 10) - expect_vec[4] = 7 - self.assertEqual(vec.getData(), expect_vec) - - def test_numpy(self): - vec = np.array([1, 3, 4, 65, 78, 1, 4], dtype="int32") - iv = swig_paddle.IVector.createVectorFromNumpy(vec) - self.assertEqual(iv.isGpu(), swig_paddle.isUsingGpu()) - self.assertEqual(iv.getData(), list(vec)) - - -class TestVector(unittest.TestCase): - def testCreateZero(self): - v = swig_paddle.Vector.createZero(10, False) - self.assertIsNotNone(v) - for i in xrange(len(v)): - self.assertTrue(util.doubleEqual(v[i], 0)) - v[i] = i - self.assertTrue(util.doubleEqual(v[i], i)) - - v = swig_paddle.Vector.createZero(10) - self.assertEqual(v.isGpu(), swig_paddle.isUsingGpu()) - self.assertEqual(v.getData(), [0] * 10) - - def testCreate(self): - v = swig_paddle.Vector.create([x / 100.0 for x in xrange(100)], False) - self.assertIsNotNone(v) - for i in xrange(len(v)): - self.assertTrue(util.doubleEqual(v[i], i / 100.0)) - self.assertEqual(100, len(v)) - - v = swig_paddle.Vector.create([x / 100.0 for x in xrange(100)]) - self.assertEqual(v.isGpu(), swig_paddle.isUsingGpu()) - self.assertEqual(100, len(v)) - vdata = v.getData() - for i in xrange(len(v)): - self.assertTrue(util.doubleEqual(vdata[i], i / 100.0)) - - def testCpuNumpy(self): - numpy_arr = np.array([1.2, 2.3, 3.4, 4.5], dtype="float32") - vec = swig_paddle.Vector.createCpuVectorFromNumpy(numpy_arr, False) - assert isinstance(vec, swig_paddle.Vector) - numpy_arr[0] = 0.1 - for n, v in zip(numpy_arr, vec): - self.assertTrue(util.doubleEqual(n, v)) - - numpy_2 = vec.toNumpyArrayInplace() - vec[0] = 1.3 - for x, y in zip(numpy_arr, numpy_2): - self.assertTrue(util.doubleEqual(x, y)) - - for x, y in zip(numpy_arr, vec): - self.assertTrue(util.doubleEqual(x, y)) - - numpy_3 = vec.copyToNumpyArray() - numpy_3[0] = 0.4 - self.assertTrue(util.doubleEqual(vec[0], 1.3)) - self.assertTrue(util.doubleEqual(numpy_3[0], 0.4)) - - for i in xrange(1, len(numpy_3)): - util.doubleEqual(numpy_3[i], vec[i]) - - def testNumpy(self): - numpy_arr = np.array([1.2, 2.3, 3.4, 4.5], dtype="float32") - vec = swig_paddle.Vector.createVectorFromNumpy(numpy_arr) - self.assertEqual(vec.isGpu(), swig_paddle.isUsingGpu()) - vecData = vec.getData() - for n, v in zip(numpy_arr, vecData): - self.assertTrue(util.doubleEqual(n, v)) - - def testCopyFromNumpy(self): - vec = swig_paddle.Vector.createZero(1, False) - arr = np.array([1.3, 3.2, 2.4], dtype="float32") - vec.copyFromNumpyArray(arr) - for i in xrange(len(vec)): - self.assertTrue(util.doubleEqual(vec[i], arr[i])) - - -if __name__ == '__main__': - swig_paddle.initPaddle("--use_gpu=0") - suite = unittest.TestLoader().loadTestsFromTestCase(TestVector) - unittest.TextTestRunner().run(suite) - if swig_paddle.isGpuVersion(): - swig_paddle.setUseGpu(True) - unittest.main() diff --git a/paddle/legacy/api/test/util.py b/paddle/legacy/api/test/util.py deleted file mode 100644 index 9f4631c53e11d55f9a2638f98c52ba2f5e955b37..0000000000000000000000000000000000000000 --- a/paddle/legacy/api/test/util.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random - -import numpy as np -from py_paddle import swig_paddle - - -def doubleEqual(a, b): - return abs(a - b) < 1e-5 - - -def __readFromFile(): - for i in xrange(10002): - label = np.random.randint(0, 9) - sample = np.random.rand(784) + 0.1 * label - yield sample, label - - -def loadMNISTTrainData(batch_size=100): - if not hasattr(loadMNISTTrainData, "gen"): - generator = __readFromFile() - loadMNISTTrainData.gen = generator - else: - generator = loadMNISTTrainData.gen - args = swig_paddle.Arguments.createArguments(2) - # batch_size = 100 - - dense_slot = [] - id_slot = [] - atEnd = False - - for _ in xrange(batch_size): - try: - result = generator.next() - dense_slot.extend(result[0]) - id_slot.append(result[1]) - except StopIteration: - atEnd = True - del loadMNISTTrainData.gen - break - - dense_slot = swig_paddle.Matrix.createDense(dense_slot, batch_size, 784) - id_slot = swig_paddle.IVector.create(id_slot) - args.setSlotValue(0, dense_slot) - args.setSlotIds(1, id_slot) - return args, atEnd diff --git a/paddle/legacy/capi/Arguments.cpp b/paddle/legacy/capi/Arguments.cpp deleted file mode 100644 index 0ce1770c76c2e145d0b2bf71332cc4593517f195..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/Arguments.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "arguments.h" -#include "capi_private.h" - -using paddle::capi::cast; - -#define castArg(v) cast(v) -#define castIVec(v) cast(v) - -extern "C" { -paddle_arguments paddle_arguments_create_none() { - return new paddle::capi::CArguments(); -} - -paddle_error paddle_arguments_destroy(paddle_arguments args) { - if (args == nullptr) return kPD_NULLPTR; - delete castArg(args); - return kPD_NO_ERROR; -} - -paddle_error paddle_arguments_get_size(paddle_arguments args, uint64_t* size) { - if (args == nullptr || size == nullptr) return kPD_NULLPTR; - *size = castArg(args)->args.size(); - return kPD_NO_ERROR; -} - -paddle_error paddle_arguments_resize(paddle_arguments args, uint64_t size) { - if (args == nullptr) return kPD_NULLPTR; - castArg(args)->args.resize(size); - return kPD_NO_ERROR; -} - -paddle_error paddle_arguments_set_value(paddle_arguments args, - uint64_t ID, - paddle_matrix mat) { - if (args == nullptr || mat == nullptr) return kPD_NULLPTR; - auto m = paddle::capi::cast(mat); - if (m->mat == nullptr) return kPD_NULLPTR; - auto a = castArg(args); - if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; - a->args[ID].value = m->mat; - return kPD_NO_ERROR; -} - -paddle_error paddle_arguments_get_value(paddle_arguments args, - uint64_t ID, - paddle_matrix mat) { - if (args == nullptr || mat == nullptr) return kPD_NULLPTR; - auto m = paddle::capi::cast(mat); - auto a = castArg(args); - if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; - m->mat = a->args[ID].value; - return kPD_NO_ERROR; -} - -PD_API paddle_error paddle_arguments_get_prob(paddle_arguments args, - uint64_t ID, - paddle_matrix mat) { - if (args == nullptr || mat == nullptr) return kPD_NULLPTR; - auto m = paddle::capi::cast(mat); - auto a = castArg(args); - if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; - m->mat = a->args[ID].in; - return kPD_NO_ERROR; -} - -paddle_error paddle_arguments_get_ids(paddle_arguments args, - uint64_t ID, - paddle_ivector ids) { - if (args == nullptr || ids == nullptr) return kPD_NULLPTR; - auto iv = castIVec(ids); - auto a = castArg(args); - if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; - iv->vec = a->args[ID].ids; - return kPD_NO_ERROR; -} - -paddle_error paddle_arguments_set_ids(paddle_arguments args, - uint64_t ID, - paddle_ivector ids) { - //! TODO(lizhao): Complete this method. - if (args == nullptr || ids == nullptr) return kPD_NULLPTR; - auto iv = paddle::capi::cast(ids); - if (iv->vec == nullptr) return kPD_NULLPTR; - auto a = castArg(args); - if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; - a->args[ID].ids = iv->vec; - return kPD_NO_ERROR; -} - -paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, - uint64_t ID, - uint64_t frameHeight, - uint64_t frameWidth) { - if (args == nullptr) return kPD_NULLPTR; - auto a = castArg(args); - if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; - a->args[ID].setFrameHeight(frameHeight); - a->args[ID].setFrameWidth(frameWidth); - return kPD_NO_ERROR; -} - -paddle_error paddle_arguments_set_sequence_start_pos(paddle_arguments args, - uint64_t ID, - uint32_t nestedLevel, - paddle_ivector seqPos) { - if (args == nullptr || seqPos == nullptr) return kPD_NULLPTR; - auto iv = paddle::capi::cast(seqPos); - if (iv->vec == nullptr) return kPD_NULLPTR; - auto a = castArg(args); - return a->accessSeqPos(ID, nestedLevel, [&iv](paddle::ICpuGpuVectorPtr& ptr) { - ptr = std::make_shared(iv->vec); - }); -} - -paddle_error paddle_arguments_get_sequence_start_pos(paddle_arguments args, - uint64_t ID, - uint32_t nestedLevel, - paddle_ivector seqPos) { - if (args == nullptr || seqPos == nullptr) return kPD_NULLPTR; - auto iv = paddle::capi::cast(seqPos); - auto a = castArg(args); - return a->accessSeqPos(ID, nestedLevel, [&iv](paddle::ICpuGpuVectorPtr& ptr) { - iv->vec = ptr->getMutableVector(false); - }); -} -} diff --git a/paddle/legacy/capi/CMakeLists.txt b/paddle/legacy/capi/CMakeLists.txt deleted file mode 100644 index 957b1a3e6b07b058a76605992da387b43657146a..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/CMakeLists.txt +++ /dev/null @@ -1,118 +0,0 @@ -if (WITH_DOUBLE) - set(PADDLE_FLOAT_TYPE double) -else () - set(PADDLE_FLOAT_TYPE float) -endif() - -execute_process( - COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} - OUTPUT_VARIABLE PADDLE_GIT_COMMIT - RESULT_VARIABLE PADDLE_GIT_COMMIT_RESULT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) -if(NOT PADDLE_GIT_COMMIT) - set(PADDLE_GIT_COMMIT "no commit information") -endif() - -# config.h used for C-API. It will store Paddle building configuration as a -# header. Make user just include PaddleCAPI.h then can get building -# configuration without explicitly set -DPADDLE_WITH_DOUBLE when building their -# libraries. -configure_file(config.h.in config.h @ONLY) - -# PaddleCAPI.h is the only header we exposed. It currently only used for model -# inference. -file(GLOB CAPI_HEADERS *.h) -set(CAPI_PRIVATE_HEADER capi_private.h) -list(REMOVE_ITEM CAPI_HEADERS ${CAPI_PRIVATE_HEADER}) -file(GLOB CAPI_SOURCES *.cpp) - -# building paddle_capi -add_library(paddle_capi STATIC ${CAPI_HEADERS} ${CAPI_PRIVATE_HEADER} - ${CAPI_SOURCES}) - -target_include_directories(paddle_capi PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) - -add_dependencies(paddle_capi paddle_proto paddle_gserver) - -# TODO: paddle_capi_whole will be removed. -set(PADDLE_CAPI_LAYERS_LIBS - paddle_function - paddle_gserver) -if(MOBILE_INFERENCE) - set(PADDLE_CAPI_ENGINE_LIBS - paddle_utils - paddle_parameter - paddle_math - paddle_cuda - paddle_proto) -else() - set(PADDLE_CAPI_ENGINE_LIBS - paddle_utils - paddle_parameter - paddle_math - paddle_cuda - paddle_proto - paddle_pserver - paddle_network) -endif() -set(PADDLE_CAPI_INFER_LIBS ${PADDLE_CAPI_LAYERS_LIBS} ${PADDLE_CAPI_ENGINE_LIBS}) -cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS}) - -# Link the static library for inference -cc_library(paddle_capi_engine DEPS paddle_capi ${PADDLE_CAPI_ENGINE_LIBS}) -cc_library(paddle_capi_layers DEPS ${PADDLE_CAPI_LAYERS_LIBS}) - -# Link the shared library for inference -if(NOT IOS) - set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_capi.map") - add_library(paddle_capi_shared SHARED ${CAPI_SOURCES}) - set_target_properties(paddle_capi_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}") - target_include_directories(paddle_capi_shared PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) - link_paddle_exe(paddle_capi_shared) -endif() - -# install library & headers. -install(FILES ${CAPI_HEADERS} DESTINATION include/paddle) -install(FILES paddle_capi.map DESTINATION include/paddle) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h DESTINATION include/paddle) -if(ANDROID) - install(TARGETS paddle_capi_whole paddle_capi_engine paddle_capi_layers paddle_capi_shared - ARCHIVE DESTINATION lib/${ANDROID_ABI} - LIBRARY DESTINATION lib/${ANDROID_ABI}) - execute_process( - COMMAND ${GIT_EXECUTABLE} log --pretty=oneline -1 - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMITS_LIST - RESULT_VARIABLE GIT_COMMITS_LIST_RESULT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - if(${GIT_COMMITS_LIST_RESULT}) - set(GIT_COMMITS_LIST "No commits.") - endif() - install(CODE "FILE(WRITE ${CMAKE_INSTALL_PREFIX}/lib/${ANDROID_ABI}/BUILD.txt - \"Compiler:\n\" - \"\\t${CMAKE_C_COMPILER}\\n\" - \"\\t${CMAKE_CXX_COMPILER}\\n\" - \"Compiler Flags:\\n\" - \"\\t${CMAKE_F_FLAGS}\\n\" - \"\\t${CMAKE_CXX_FLAGS}\\n\" - \"Android API: ${CMAKE_SYSTEM_VERSION}\\n\" - \"Lastest commit:\\n\" - \"\\t${GIT_COMMITS_LIST}\\n\" - )" - ) -else(ANDROID) - install(TARGETS paddle_capi_whole paddle_capi_engine paddle_capi_layers ARCHIVE DESTINATION lib) - if(NOT IOS) - install(TARGETS paddle_capi_shared DESTINATION lib) - endif() -endif(ANDROID) - -# this variable used for unittest -set(PADDLE_CAPI_INC_PATH - ${CMAKE_CURRENT_BINARY_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}) - -if (WITH_TESTING) - add_subdirectory(tests) -endif() diff --git a/paddle/legacy/capi/Main.cpp b/paddle/legacy/capi/Main.cpp deleted file mode 100644 index 17d8f00a88a9fd0818e6b90f8f6888b7d793a46e..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/Main.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include "capi_private.h" -#include "main.h" -#include "paddle/legacy/trainer/TrainerConfigHelper.h" -#include "paddle/legacy/utils/Excepts.h" -#include "paddle/legacy/utils/PythonUtil.h" - -static void initPaddle(int argc, char** argv) { - paddle::initMain(argc, argv); - paddle::initPython(argc, argv); -} - -extern "C" { -paddle_error paddle_init(int argc, char** argv) { - static bool isInit = false; - if (isInit) return kPD_NO_ERROR; - - std::vector realArgv; - realArgv.reserve(argc + 1); - realArgv.push_back(strdup("")); - for (int i = 0; i < argc; ++i) { - realArgv.push_back(argv[i]); - } - initPaddle(argc + 1, realArgv.data()); - free(realArgv[0]); - isInit = true; - return kPD_NO_ERROR; -} - -paddle_error paddle_init_thread() { - if (FLAGS_use_gpu) { - hl_init(FLAGS_gpu_id); - } - return kPD_NO_ERROR; -} -} diff --git a/paddle/legacy/capi/Matrix.cpp b/paddle/legacy/capi/Matrix.cpp deleted file mode 100644 index 733d49cacfda17ad19b7bd7918be73c1fd14a64f..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/Matrix.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "capi_private.h" -#include "hl_cuda.h" -#include "matrix.h" - -#define cast(v) paddle::capi::cast(v) -extern "C" { -paddle_matrix paddle_matrix_create(uint64_t height, - uint64_t width, - bool useGpu) { - auto ptr = new paddle::capi::CMatrix(); - ptr->mat = paddle::Matrix::create(height, width, false, useGpu); - return ptr; -} - -paddle_matrix paddle_matrix_create_none() { - return new paddle::capi::CMatrix(); -} - -paddle_error paddle_matrix_destroy(paddle_matrix mat) { - if (mat == nullptr) return kPD_NULLPTR; - auto ptr = cast(mat); - delete ptr; - return kPD_NO_ERROR; -} - -paddle_error paddle_matrix_set_row(paddle_matrix mat, - uint64_t rowID, - paddle_real* rowArray) { - if (mat == nullptr || rowArray == nullptr) return kPD_NULLPTR; - auto ptr = cast(mat); - if (ptr->mat == nullptr) return kPD_NULLPTR; - if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE; - paddle::real* buf = ptr->mat->getRowBuf(rowID); - size_t width = ptr->mat->getWidth(); -#ifdef PADDLE_WITH_CUDA - hl_memcpy(buf, rowArray, sizeof(paddle::real) * width); -#else - std::copy(rowArray, rowArray + width, buf); -#endif - return kPD_NO_ERROR; -} - -PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat, - paddle_real* value) { - if (mat == nullptr || value == nullptr) return kPD_NULLPTR; - auto ptr = cast(mat); - if (ptr->mat == nullptr) return kPD_NULLPTR; - paddle::real* buf = ptr->mat->getRowBuf(0); - size_t width = ptr->mat->getWidth(); - size_t height = ptr->mat->getHeight(); - if (ptr->mat->useGpu()) { -#ifdef PADDLE_WITH_CUDA - hl_memcpy(buf, value, sizeof(paddle::real) * width * height); -#else - return kPD_NOT_SUPPORTED; -#endif - } else { - std::copy(value, value + width * height, buf); - } - return kPD_NO_ERROR; -} - -PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat, - paddle_real* result) { - if (mat == nullptr || result == nullptr) return kPD_NULLPTR; - auto ptr = cast(mat); - if (ptr->mat == nullptr) return kPD_NULLPTR; - paddle::real* buf = ptr->mat->getRowBuf(0); - size_t width = ptr->mat->getWidth(); - size_t height = ptr->mat->getHeight(); - if (ptr->mat->useGpu()) { -#ifdef PADDLE_WITH_CUDA - hl_memcpy(result, buf, width * height * sizeof(paddle::real)); -#else - return kPD_NOT_SUPPORTED; -#endif - } else { - std::copy(buf, buf + width * height, result); - } - return kPD_NO_ERROR; -} - -paddle_error paddle_matrix_get_row(paddle_matrix mat, - uint64_t rowID, - paddle_real** rawRowBuffer) { - if (mat == nullptr) return kPD_NULLPTR; - auto ptr = cast(mat); - if (ptr->mat == nullptr) return kPD_NULLPTR; - if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE; - *rawRowBuffer = ptr->mat->getRowBuf(rowID); - return kPD_NO_ERROR; -} - -paddle_error paddle_matrix_get_shape(paddle_matrix mat, - uint64_t* height, - uint64_t* width) { - if (mat == nullptr || cast(mat)->mat == nullptr) return kPD_NULLPTR; - if (height != nullptr) { - *height = cast(mat)->mat->getHeight(); - } - if (width != nullptr) { - *width = cast(mat)->mat->getWidth(); - } - return kPD_NO_ERROR; -} -} - -paddle_matrix paddle_matrix_create_sparse( - uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu) { -#ifndef PADDLE_MOBILE_INFERENCE - auto ptr = new paddle::capi::CMatrix(); - ptr->mat = paddle::Matrix::createSparseMatrix( - height, - width, - nnz, - isBinary ? paddle::NO_VALUE : paddle::FLOAT_VALUE, - paddle::SPARSE_CSR, - false, - useGpu); - return ptr; -#else - return nullptr; -#endif -} - -paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, - int* rowArray, - uint64_t rowSize, - int* colArray, - uint64_t colSize, - float* valueArray, - uint64_t valueSize) { -#ifndef PADDLE_MOBILE_INFERENCE - if (mat == nullptr) return kPD_NULLPTR; - auto ptr = cast(mat); - if (rowArray == nullptr || colArray == nullptr || - (valueSize != 0 && valueArray == nullptr) || ptr->mat == nullptr) { - return kPD_NULLPTR; - } - if (auto sparseMat = dynamic_cast(ptr->mat.get())) { - std::vector row(rowSize); - row.assign(rowArray, rowArray + rowSize); - std::vector col(colSize); - col.assign(colArray, colArray + colSize); - std::vector val(valueSize); - if (valueSize) { - val.assign(valueArray, valueArray + valueSize); - } - sparseMat->copyFrom(row, col, val); - return kPD_NO_ERROR; - } else { - return kPD_NOT_SUPPORTED; - } -#else - return kPD_NOT_SUPPORTED; -#endif -} diff --git a/paddle/legacy/capi/Vector.cpp b/paddle/legacy/capi/Vector.cpp deleted file mode 100644 index afb5a9afefedad7b99d440f2149ddb0c75264d80..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/Vector.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "capi_private.h" -#include "vector.h" - -using paddle::capi::cast; - -extern "C" { - -paddle_ivector paddle_ivector_create_none() { - return new paddle::capi::CIVector(); -} - -paddle_ivector paddle_ivector_create(int* array, - uint64_t size, - bool copy, - bool useGPU) { - auto ptr = new paddle::capi::CIVector(); - if (copy) { - ptr->vec = paddle::IVector::create(size, useGPU); - ptr->vec->copyFrom(array, size); - } else { - ptr->vec = paddle::IVector::create(array, size, useGPU); - } - return ptr; -} - -paddle_error paddle_ivector_destroy(paddle_ivector ivec) { - if (ivec == nullptr) return kPD_NULLPTR; - delete cast(ivec); - return kPD_NO_ERROR; -} - -paddle_error paddle_ivector_get(paddle_ivector ivec, int** buffer) { - if (ivec == nullptr || buffer == nullptr) return kPD_NULLPTR; - auto v = cast(ivec); - if (v->vec == nullptr) return kPD_NULLPTR; - *buffer = v->vec->getData(); - return kPD_NO_ERROR; -} - -paddle_error paddle_ivector_resize(paddle_ivector ivec, uint64_t size) { - if (ivec == nullptr) return kPD_NULLPTR; - auto v = cast(ivec); - if (v->vec == nullptr) return kPD_NULLPTR; - v->vec->resize(size); - return kPD_NO_ERROR; -} - -paddle_error paddle_ivector_get_size(paddle_ivector ivec, uint64_t* size) { - if (ivec == nullptr) return kPD_NULLPTR; - auto v = cast(ivec); - if (v->vec == nullptr) return kPD_NULLPTR; - *size = v->vec->getSize(); - return kPD_NO_ERROR; -} -} diff --git a/paddle/legacy/capi/arguments.h b/paddle/legacy/capi/arguments.h deleted file mode 100644 index ceb64ee6aa74a8ba4b5cb9045b366dcda8f8cc90..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/arguments.h +++ /dev/null @@ -1,171 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef __PADDLE_CAPI_ARGUMENTS_H__ -#define __PADDLE_CAPI_ARGUMENTS_H__ - -#include -#include "config.h" -#include "error.h" -#include "matrix.h" -#include "vector.h" - -/** - * Arguments functions. Each argument means layer output. Arguments means a - * array of arguemnt. - */ -typedef void* paddle_arguments; - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @brief paddle_arguments_create_none Create a array of arguments, which size - * is zero. - * @return Arguemnts - */ -PD_API paddle_arguments paddle_arguments_create_none(); - -/** - * @brief paddle_arguments_destroy Destroy the arguments - * @param args arguments to destroy - * @return paddle_error - */ -PD_API paddle_error paddle_arguments_destroy(paddle_arguments args); - -/** - * @brief paddle_arguments_get_size Get size of arguments array - * @param [in] args arguments array - * @param [out] size array size - * @return paddle_error - */ -PD_API paddle_error paddle_arguments_get_size(paddle_arguments args, - uint64_t* size); - -/** - * @brief PDArgsResize Resize a arguments array. - * @param args arguments array. - * @param size target size of array - * @return paddle_error - */ -PD_API paddle_error paddle_arguments_resize(paddle_arguments args, - uint64_t size); - -/** - * @brief PDArgsSetValue Set value matrix of one argument in array, which index - * is `ID`. - * @param args arguments array - * @param ID array index - * @param mat matrix pointer - * @return paddle_error - */ -PD_API paddle_error paddle_arguments_set_value(paddle_arguments args, - uint64_t ID, - paddle_matrix mat); - -/** - * @brief PDArgsGetValue Get value matrix of one argument in array, which index - * is `ID`. - * @param [in] args arguments array - * @param [in] ID array index - * @param [out] mat matrix pointer - * @return paddle_error - */ -PD_API paddle_error paddle_arguments_get_value(paddle_arguments args, - uint64_t ID, - paddle_matrix mat); - -/** - * @brief paddle_arguments_get_prob Get the prob matrix of beam search, which - * slot ID is `ID` - * @param [in] args arguments array - * @param [in] ID array index - * @param [out] mat matrix pointer - * @return paddle_error - */ -PD_API paddle_error paddle_arguments_get_prob(paddle_arguments args, - uint64_t ID, - paddle_matrix mat); - -/** - * @brief PDArgsGetIds Get the integer vector of one argument in array, which - * index is `ID`. - * @param args arguments array - * @param ID array index - * @param ids integer vector pointer - * @return paddle_error - */ -PD_API paddle_error paddle_arguments_get_ids(paddle_arguments args, - uint64_t ID, - paddle_ivector ids); - -/** - * @brief PDArgsSetIds Set the integer vector of one argument in array, which - * index is `ID`. - * @param [in] args arguments array - * @param [in] ID array index - * @param [out] ids integer vector pointer - * @return paddle_error - */ -PD_API paddle_error paddle_arguments_set_ids(paddle_arguments args, - uint64_t ID, - paddle_ivector ids); - -/** - * @brief paddle_arguments_set_frame_shape Set the fram size of one argument - * in array, which index is `ID`. - * @param [in] args arguments array - * @param [in] ID array index - * @param [in] frameHeight maximum height of input images - * @param [in] frameWidth maximum width of input images - * @return paddle_error - */ -PD_API paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, - uint64_t ID, - uint64_t frameHeight, - uint64_t frameWidth); - -/** - * @brief PDArgsSetSequenceStartPos Set sequence start position vector of one - * argument in array, which index is `ID`. - * @param args arguments array - * @param ID array index - * @param seqPos sequence position array. - * @return paddle_error - */ -PD_API paddle_error -paddle_arguments_set_sequence_start_pos(paddle_arguments args, - uint64_t ID, - uint32_t nestedLevel, - paddle_ivector seqPos); -/** - * @brief PDArgsGetSequenceStartPos Get sequence start position vector of one - * argument in array, which index is `ID`. - * @param [in] args arguments array - * @param [in] ID array index - * @param [out] seqPos sequence position array - * @return paddle_error - */ -PD_API paddle_error -paddle_arguments_get_sequence_start_pos(paddle_arguments args, - uint64_t ID, - uint32_t nestedLevel, - paddle_ivector seqPos); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/paddle/legacy/capi/capi.h b/paddle/legacy/capi/capi.h deleted file mode 100644 index 749fcc4b7994bad31395565d5ae16cd51c73b049..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/capi.h +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef __PADDLE_CAPI_H__ -#define __PADDLE_CAPI_H__ - -/** - * Paddle C API. It will replace SWIG as Multiple Language API for model - * training & inference. Currently it is only used in model infernece. - * - * NOTE: This is an experimental API, it could be changed. - */ -#include "arguments.h" -#include "config.h" -#include "error.h" -#include "gradient_machine.h" -#include "main.h" -#include "matrix.h" -#include "vector.h" - -#endif // PADDLECAPI_H_ diff --git a/paddle/legacy/capi/capi_private.h b/paddle/legacy/capi/capi_private.h deleted file mode 100644 index e5f8c8c5c8bd506f9c8f49ee7d03f9b20460efdb..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/capi_private.h +++ /dev/null @@ -1,82 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "capi.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/parameter/Argument.h" -#pragma once - -namespace paddle { -namespace capi { - -enum CType { kIVECTOR = 0, kMATRIX, kARGUMENTS, kGRADIENT_MACHINE }; - -#define STRUCT_HEADER CType type; - -struct CHeader { - STRUCT_HEADER -}; - -struct CIVector { - STRUCT_HEADER - IVectorPtr vec; - - CIVector() : type(kIVECTOR) {} -}; - -struct CMatrix { - STRUCT_HEADER - MatrixPtr mat; - - CMatrix() : type(kMATRIX) {} -}; - -struct CArguments { - STRUCT_HEADER - std::vector args; - - CArguments() : type(kARGUMENTS) {} - - template - paddle_error accessSeqPos(uint64_t ID, uint32_t nestedLevel, T callback) { - if (ID >= args.size()) return kPD_OUT_OF_RANGE; - switch (nestedLevel) { - case 0: - callback(args[ID].sequenceStartPositions); - break; - case 1: - callback(args[ID].subSequenceStartPositions); - break; - default: - return kPD_OUT_OF_RANGE; - } - return kPD_NO_ERROR; - } -}; - -struct CGradientMachine { - STRUCT_HEADER - paddle::GradientMachinePtr machine; - - CGradientMachine() : type(kGRADIENT_MACHINE) {} -}; - -template -inline T* cast(void* ptr) { - return reinterpret_cast(ptr); -} -} // namespace capi -} // namespace paddle diff --git a/paddle/legacy/capi/config.h.in b/paddle/legacy/capi/config.h.in deleted file mode 100644 index 0ddbd8c753c55ab95a89e1781c64b9416f7344e7..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/config.h.in +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __PADDLE_PADDLE_CAPI_CONFIG_H_INCLUDED__ -#define __PADDLE_PADDLE_CAPI_CONFIG_H_INCLUDED__ - -typedef @PADDLE_FLOAT_TYPE@ paddle_real; - -#define __PADDLE_VERSION__ "@PADDLE_VERSION@" -#define __PADDLE_COMMIT__ "@PADDLE_GIT_COMMIT@" - -// Since we only support linux and macos in compile, always use clang or -// gcc 4.8+. DLL_IMPORT/DLL_EXPORT is as simple as below. -#define PD_API __attribute__((visibility("default"))) - -#endif diff --git a/paddle/legacy/capi/error.cpp b/paddle/legacy/capi/error.cpp deleted file mode 100644 index 0c25de5ba98f938a3717060bf8d26dd310eb9b5e..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/error.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "error.h" - -extern "C" const char* paddle_error_string(paddle_error err) { - switch (err) { - case kPD_NULLPTR: - return "nullptr error"; - case kPD_OUT_OF_RANGE: - return "out of range error"; - case kPD_PROTOBUF_ERROR: - return "protobuf error"; - case kPD_NOT_SUPPORTED: - return "not supported error"; - case kPD_UNDEFINED_ERROR: - return "undefined error"; - default: - return ""; - } -} diff --git a/paddle/legacy/capi/error.h b/paddle/legacy/capi/error.h deleted file mode 100644 index b0940725b507845668694786de00933a5b51be9a..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/error.h +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef __PADDLE_CAPI_ERROR_H__ -#define __PADDLE_CAPI_ERROR_H__ - -#include "config.h" - -/** - * Error Type for Paddle API. - */ -typedef enum { - kPD_NO_ERROR = 0, - kPD_NULLPTR = 1, - kPD_OUT_OF_RANGE = 2, - kPD_PROTOBUF_ERROR = 3, - kPD_NOT_SUPPORTED = 4, - kPD_UNDEFINED_ERROR = -1, -} paddle_error; - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Error string for Paddle API. - */ -PD_API const char* paddle_error_string(paddle_error err); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/paddle/legacy/capi/examples/.gitignore b/paddle/legacy/capi/examples/.gitignore deleted file mode 100644 index 2caa0a5a298d8cec0d996c3774b6f42060a0d41a..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.bin -build-* diff --git a/paddle/legacy/capi/examples/README.md b/paddle/legacy/capi/examples/README.md deleted file mode 100644 index 14013e281ff50279473dfc4da46aaef4f8b7ea9a..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# C-API Example Usage - -* [Model Inference](./model_inference/README.md) diff --git a/paddle/legacy/capi/examples/model_inference/README.md b/paddle/legacy/capi/examples/model_inference/README.md deleted file mode 100644 index 58e6c83140b5f33ddfd1f027b6624a26f842a2f8..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# Use C-API for Model Inference - -There are several examples in this directory about how to use Paddle C-API for model inference. - -## Convert configuration file to protobuf binary. - -Firstly, the user should convert Paddle's model configuration file into a protobuf binary file. In each example directory, there is a file named `convert_protobin.sh`. It will convert `trainer_config.conf` into `trainer_config.bin`. - -The `convert_protobin.sh` is very simple, just invoke `dump_config` Python module to dump the binary file. The command line usages are: - -```bash -python -m paddle.utils.dump_config YOUR_CONFIG_FILE 'CONFIG_EXTRA_ARGS' --binary > YOUR_CONFIG_FILE.bin -``` - -## Initialize paddle - -```c++ -char* argv[] = {"--use_gpu=False"}; -paddle_init(1, (char**)argv); -``` - -We must initialize global context before we invoke other interfaces in Paddle. The initialize commands just like the `paddle_trainer` command line arguments. `paddle train --help`, will show the list of arguments. The most important argument is `use_gpu` or not. - -## Load network and parameters - -```c -paddle_gradient_machine machine; -paddle_gradient_machine_create_for_inference(&machine, config_file_content, content_size)); -paddle_gradient_machine_load_parameter_from_disk(machine, "./some_where_to_params")); -``` - -The gradient machine is a Paddle concept, which represents a neural network can be forwarded and backward. We can create a gradient machine fo model inference, and load the parameter files from disk. - -Moreover, if we want to inference in multi-thread, we could create a thread local gradient machine which shared the same parameter by using `paddle_gradient_machine_create_shared_param` API. Please reference `multi_thread` as an example. - -## Create input - -The input of a neural network is an `arguments`. The examples in this directory will show how to construct different types of inputs for prediction. Please look at `dense`, `sparse_binary`, `sequence` for details. - -## Get inference - -After invoking `paddle_gradient_machine_forward`, we could get the output of the neural network. The `value` matrix of output arguments will store the neural network output values. If the output is a `SoftmaxActivation`, the `value` matrix are the probabilities of each input samples. The height of output matrix is number of sample. The width is the number of categories. diff --git a/paddle/legacy/capi/examples/model_inference/common/common.h b/paddle/legacy/capi/examples/model_inference/common/common.h deleted file mode 100644 index 23248b0caf92e2408f451c1cc04a5c179d41aff3..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/common/common.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#ifndef __CAPI_EXAMPLE_COMMON_H__ -#define __CAPI_EXAMPLE_COMMON_H__ -#include -#include - -#define CHECK(stmt) \ - do { \ - paddle_error __err__ = stmt; \ - if (__err__ != kPD_NO_ERROR) { \ - fprintf(stderr, "Invoke paddle error %d in " #stmt "\n", __err__); \ - exit(__err__); \ - } \ - } while (0) - -void* read_config(const char* filename, long* size) { - FILE* file = fopen(filename, "r"); - if (file == NULL) { - fprintf(stderr, "Open %s error\n", filename); - return NULL; - } - fseek(file, 0L, SEEK_END); - *size = ftell(file); - fseek(file, 0L, SEEK_SET); - void* buf = malloc(*size); - fread(buf, 1, *size, file); - fclose(file); - return buf; -} -#endif diff --git a/paddle/legacy/capi/examples/model_inference/dense/CMakeLists.txt b/paddle/legacy/capi/examples/model_inference/dense/CMakeLists.txt deleted file mode 100644 index 008a488fd9e6fdca2c4cb92bf1b8c41fce1835a9..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/dense/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -project(dense) -cmake_minimum_required(VERSION 2.8) -aux_source_directory(. SRC_LIST) -add_executable(${PROJECT_NAME} ${SRC_LIST}) -set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99) -target_link_libraries(${PROJECT_NAME} -lpaddle_capi_shared) diff --git a/paddle/legacy/capi/examples/model_inference/dense/convert_protobin.sh b/paddle/legacy/capi/examples/model_inference/dense/convert_protobin.sh deleted file mode 100755 index 30ffc316ecb76cd9c8e2b628f85484a990ac6da8..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/dense/convert_protobin.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -python -m paddle.utils.dump_config trainer_config.py '' --binary > trainer_config.bin diff --git a/paddle/legacy/capi/examples/model_inference/dense/main.c b/paddle/legacy/capi/examples/model_inference/dense/main.c deleted file mode 100644 index 90444889a74e3aff9c5d933d0249619f33b2b0d4..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/dense/main.c +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "../common/common.h" - -// Modify this path as needed. -#define CONFIG_BIN "./trainer_config.bin" -// Modify this path as needed. -// This demo assumes that merged model is not used, then this path is the -// directory storing all the trained parameters. -// If the model is trained by PaddlePaddle V2 API, the model is saved as -// a compressed file. You need to uncompress the compressed file first. -#define MODEL_PATH "models/pass_4" - -int main() { - // Initalize the PaddlePaddle runtime environment. - char* argv[] = {"--use_gpu=False"}; - CHECK(paddle_init(1, (char**)argv)); - - // Read the binary configuration file generated by `convert_protobin.sh` - long size; - void* buf = read_config(CONFIG_BIN, &size); - - // Create the gradient machine for inference. - paddle_gradient_machine machine; - CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size)); - - // Load the trained model. Modify the parameter MODEL_PATH to set the correct - // path of the trained model. - CHECK(paddle_gradient_machine_load_parameter_from_disk(machine, MODEL_PATH)); - - // Inputs and outputs of the network are organized as paddle_arguments object - // in C-API. In the comments below, "argument" specifically means one input of - // the neural network in PaddlePaddle C-API. - paddle_arguments in_args = paddle_arguments_create_none(); - - // There is only one data layer in this demo MNIST network, invoke this - // function to create one argument. - CHECK(paddle_arguments_resize(in_args, 1)); - - // Each argument needs one matrix or one ivector (integer vector, for sparse - // index input, usually used in NLP task) to holds the real input data. - // In the comments below, "matrix" specifically means the object needed by - // argument to hold the data. Here we create the matrix for the above created - // agument to store the testing samples. - paddle_matrix mat = - paddle_matrix_create(/* height = batch size */ 1, - /* width = dimensionality of the data layer */ 784, - /* whether to use GPU */ false); - - paddle_real* array; - // Get the pointer pointing to the start address of the first row of the - // created matrix. - CHECK(paddle_matrix_get_row(mat, 0, &array)); - - // Fill the matrix with a randomly generated test sample. - srand(time(0)); - for (int i = 0; i < 784; ++i) { - array[i] = rand() / ((float)RAND_MAX); - } - - // Assign the matrix to the argument. - CHECK(paddle_arguments_set_value(in_args, 0, mat)); - - // Create the output argument. - paddle_arguments out_args = paddle_arguments_create_none(); - - // Invoke the forward computation. - CHECK(paddle_gradient_machine_forward(machine, - in_args, - out_args, - /* is train taks or not */ false)); - - // Create the matrix to hold the forward result of the neural network. - paddle_matrix prob = paddle_matrix_create_none(); - // Access the matrix of the output argument, the predicted result is stored in - // which. - CHECK(paddle_arguments_get_value(out_args, 0, prob)); - - uint64_t height; - uint64_t width; - CHECK(paddle_matrix_get_shape(prob, &height, &width)); - CHECK(paddle_matrix_get_row(prob, 0, &array)); - - printf("Prob: \n"); - for (int i = 0; i < height * width; ++i) { - printf("%.4f ", array[i]); - if ((i + 1) % width == 0) { - printf("\n"); - } - } - printf("\n"); - - // The cleaning up. - CHECK(paddle_matrix_destroy(prob)); - CHECK(paddle_arguments_destroy(out_args)); - CHECK(paddle_matrix_destroy(mat)); - CHECK(paddle_arguments_destroy(in_args)); - CHECK(paddle_gradient_machine_destroy(machine)); - - return 0; -} diff --git a/paddle/legacy/capi/examples/model_inference/dense/merge_v2_model.py b/paddle/legacy/capi/examples/model_inference/dense/merge_v2_model.py deleted file mode 100644 index 673aba2036c7ec16d68ebc64e91ba3c9182f63a4..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/dense/merge_v2_model.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.utils.merge_model import merge_v2_model - -from mnist_v2 import network - -net = network(is_infer=True) -param_file = "models/params_pass_4.tar" -output_file = "output.paddle.model" -merge_v2_model(net, param_file, output_file) diff --git a/paddle/legacy/capi/examples/model_inference/dense/mnist_v2.py b/paddle/legacy/capi/examples/model_inference/dense/mnist_v2.py deleted file mode 100644 index 3fd15d658adff412d91fdf374f0e6e38a23edbbe..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/dense/mnist_v2.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import gzip -import logging -import argparse -from PIL import Image -import numpy as np - -import paddle.v2 as paddle -from paddle.utils.dump_v2_config import dump_v2_config - -logger = logging.getLogger("paddle") -logger.setLevel(logging.INFO) - - -def multilayer_perceptron(img, layer_size, lbl_dim): - for idx, size in enumerate(layer_size): - hidden = paddle.layer.fc(input=(img if not idx else hidden), - size=size, - act=paddle.activation.Relu()) - return paddle.layer.fc(input=hidden, - size=lbl_dim, - act=paddle.activation.Softmax()) - - -def network(input_dim=784, lbl_dim=10, is_infer=False): - images = paddle.layer.data( - name='pixel', type=paddle.data_type.dense_vector(input_dim)) - - predict = multilayer_perceptron( - images, layer_size=[128, 64], lbl_dim=lbl_dim) - - if is_infer: - return predict - else: - label = paddle.layer.data( - name='label', type=paddle.data_type.integer_value(lbl_dim)) - return paddle.layer.classification_cost(input=predict, label=label) - - -def main(task="train", use_gpu=False, trainer_count=1, save_dir="models"): - if task == "train": - if not os.path.exists(save_dir): - os.mkdir(save_dir) - - paddle.init(use_gpu=use_gpu, trainer_count=trainer_count) - cost = network() - parameters = paddle.parameters.create(cost) - optimizer = paddle.optimizer.Momentum( - learning_rate=0.1 / 128.0, - momentum=0.9, - regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128)) - - trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=optimizer) - - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - logger.info("Pass %d, Batch %d, Cost %f, %s" % - (event.pass_id, event.batch_id, event.cost, - event.metrics)) - if isinstance(event, paddle.event.EndPass): - with gzip.open( - os.path.join(save_dir, "params_pass_%d.tar" % - event.pass_id), "w") as f: - trainer.save_parameter_to_tar(f) - - trainer.train( - reader=paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=8192), - batch_size=128), - event_handler=event_handler, - num_passes=5) - elif task == "dump_config": - predict = network(is_infer=True) - dump_v2_config(predict, "trainer_config.bin", True) - else: - raise RuntimeError(("Error value for parameter task. " - "Available options are: train and dump_config.")) - - -def parse_cmd(): - parser = argparse.ArgumentParser( - description="PaddlePaddle MNIST demo for CAPI.") - parser.add_argument( - "--task", - type=str, - required=False, - help=("A string indicating the taks type. " - "Available options are: \"train\", \"dump_config\"."), - default="train") - parser.add_argument( - "--use_gpu", - type=bool, - help=("A bool flag indicating whether to use GPU device or not."), - default=False) - parser.add_argument( - "--trainer_count", - type=int, - help=("This parameter is only used in training task. It indicates " - "how many computing threads are created in training."), - default=1) - parser.add_argument( - "--save_dir", - type=str, - help=("This parameter is only used in training task. It indicates " - "path of the directory to save the trained models."), - default="models") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_cmd() - main(args.task, args.use_gpu, args.trainer_count, args.save_dir) diff --git a/paddle/legacy/capi/examples/model_inference/dense/trainer_config.py b/paddle/legacy/capi/examples/model_inference/dense/trainer_config.py deleted file mode 100644 index eca2dce114b069bf9b455d77ce670d73b5047fd2..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/dense/trainer_config.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/paddle/legacy/capi/examples/model_inference/multi_thread/.gitignore b/paddle/legacy/capi/examples/model_inference/multi_thread/.gitignore deleted file mode 100644 index fab7372d796ea95c80d02df6caa7eb2b411a7ac1..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/multi_thread/.gitignore +++ /dev/null @@ -1,73 +0,0 @@ -# This file is used to ignore files which are generated -# ---------------------------------------------------------------------------- - -*~ -*.autosave -*.a -*.core -*.moc -*.o -*.obj -*.orig -*.rej -*.so -*.so.* -*_pch.h.cpp -*_resource.rc -*.qm -.#* -*.*# -core -!core/ -tags -.DS_Store -.directory -*.debug -Makefile* -*.prl -*.app -moc_*.cpp -ui_*.h -qrc_*.cpp -Thumbs.db -*.res -*.rc -/.qmake.cache -/.qmake.stash - -# qtcreator generated files -*.pro.user* - -# xemacs temporary files -*.flc - -# Vim temporary files -.*.swp - -# Visual Studio generated files -*.ib_pdb_index -*.idb -*.ilk -*.pdb -*.sln -*.suo -*.vcproj -*vcproj.*.*.user -*.ncb -*.sdf -*.opensdf -*.vcxproj -*vcxproj.* - -# MinGW generated files -*.Debug -*.Release - -# Python byte code -*.pyc - -# Binaries -# -------- -*.dll -*.exe - diff --git a/paddle/legacy/capi/examples/model_inference/multi_thread/CMakeLists.txt b/paddle/legacy/capi/examples/model_inference/multi_thread/CMakeLists.txt deleted file mode 100644 index 2fc8debddedeab6ae982b0df49ec2b73bc0f85f5..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/multi_thread/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -project(multi_thread) -cmake_minimum_required(VERSION 2.8) - -find_package (Threads) - -if(NOT PADDLE_ROOT) - set(PADDLE_ROOT $ENV{PADDLE_ROOT} CACHE PATH "Paddle Path") -endif() -if(PADDLE_ROOT) - include_directories(${PADDLE_ROOT}/include) - link_directories(${PADDLE_ROOT}/lib) -endif() - -set(CPU_SRCS main.c) -add_executable(${PROJECT_NAME} ${CPU_SRCS}) -set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99) -target_link_libraries(${PROJECT_NAME} - -lpaddle_capi_shared - ${CMAKE_THREAD_LIBS_INIT}) - -find_package(CUDA QUIET) -if(CUDA_FOUND) - set(GPU_SRCS main_gpu.c) - cuda_add_executable(${PROJECT_NAME}_gpu ${GPU_SRCS}) - set_property(TARGET ${PROJECT_NAME}_gpu PROPERTY C_STANDARD 99) - target_link_libraries(${PROJECT_NAME}_gpu - -lpaddle_capi_shared - ${CMAKE_THREAD_LIBS_INIT}) -endif(CUDA_FOUND) diff --git a/paddle/legacy/capi/examples/model_inference/multi_thread/convert_protobin.sh b/paddle/legacy/capi/examples/model_inference/multi_thread/convert_protobin.sh deleted file mode 100644 index b29f2cd21418ecbd2fb2ba626138e5aa11bf77f3..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/multi_thread/convert_protobin.sh +++ /dev/null @@ -1 +0,0 @@ -../dense/convert_protobin.sh diff --git a/paddle/legacy/capi/examples/model_inference/multi_thread/main.c b/paddle/legacy/capi/examples/model_inference/multi_thread/main.c deleted file mode 100644 index 0a99e6b9c8d8447aaf8b8862676eb8a93f9013ed..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/multi_thread/main.c +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include "../common/common.h" - -#define CONFIG_BIN "./trainer_config.bin" -#define NUM_THREAD 4 -#define NUM_ITER 1000 - -pthread_mutex_t mutex; - -void* thread_main(void* gm_ptr) { - paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr); - paddle_arguments in_args = paddle_arguments_create_none(); - // Create input matrix. - paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1, - /* size */ 784, - /* useGPU */ false); - paddle_arguments out_args = paddle_arguments_create_none(); - paddle_matrix prob = paddle_matrix_create_none(); - for (int iter = 0; iter < NUM_ITER; ++iter) { - // There is only one input of this network. - CHECK(paddle_arguments_resize(in_args, 1)); - - paddle_real* array; - - // Get First row. - CHECK(paddle_matrix_get_row(mat, 0, &array)); - - for (int i = 0; i < 784; ++i) { - array[i] = rand() / ((float)RAND_MAX); - } - - CHECK(paddle_arguments_set_value(in_args, 0, mat)); - - CHECK(paddle_gradient_machine_forward(machine, - in_args, - out_args, - /* isTrain */ false)); - - CHECK(paddle_arguments_get_value(out_args, 0, prob)); - - CHECK(paddle_matrix_get_row(prob, 0, &array)); - - pthread_mutex_lock(&mutex); - printf("Prob: "); - for (int i = 0; i < 10; ++i) { - printf("%.2f ", array[i]); - } - printf("\n"); - pthread_mutex_unlock(&mutex); - } - - CHECK(paddle_matrix_destroy(prob)); - CHECK(paddle_arguments_destroy(out_args)); - CHECK(paddle_matrix_destroy(mat)); - CHECK(paddle_arguments_destroy(in_args)); - CHECK(paddle_gradient_machine_destroy(machine)); - return NULL; -} - -int main() { - // Initalize Paddle - char* argv[] = {"--use_gpu=False"}; - CHECK(paddle_init(1, (char**)argv)); - - // Reading config binary file. It is generated by `convert_protobin.sh` - long size; - void* buf = read_config(CONFIG_BIN, &size); - - // Create a gradient machine for inference. - paddle_gradient_machine machine; - CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size)); - CHECK(paddle_gradient_machine_randomize_param(machine)); - - // Loading parameter. Uncomment the following line and change the directory. - // CHECK(paddle_gradient_machine_load_parameter_from_disk(machine, - // "./some_where_to_params")); - srand(time(0)); - pthread_mutex_init(&mutex, NULL); - - pthread_t threads[NUM_THREAD]; - - for (int i = 0; i < NUM_THREAD; ++i) { - paddle_gradient_machine thread_local_machine; - CHECK(paddle_gradient_machine_create_shared_param( - machine, buf, size, &thread_local_machine)); - pthread_create(&threads[i], NULL, thread_main, thread_local_machine); - } - - for (int i = 0; i < NUM_THREAD; ++i) { - pthread_join(threads[i], NULL); - } - - pthread_mutex_destroy(&mutex); - - return 0; -} diff --git a/paddle/legacy/capi/examples/model_inference/multi_thread/main_gpu.c b/paddle/legacy/capi/examples/model_inference/multi_thread/main_gpu.c deleted file mode 100644 index 60f0c59e7710de595fe297f2167bda3ce7936f6a..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/multi_thread/main_gpu.c +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include "../common/common.h" - -#define CONFIG_BIN "./trainer_config.bin" -#define NUM_THREAD 4 -#define NUM_ITER 1000 - -pthread_mutex_t mutex; - -/* - * @brief It is an simple inference example that runs multi-threads on a GPU. - * Each thread holds it own local gradient_machine but shares the same - * parameters. - * If you want to run on different GPUs, you need to launch - * multi-processes or set trainer_count > 1. - */ -void* thread_main(void* gm_ptr) { - // Initialize the thread environment of Paddle. - CHECK(paddle_init_thread()); - - paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr); - // Create input arguments. - paddle_arguments in_args = paddle_arguments_create_none(); - // Create input matrix. - paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1, - /* size */ 784, - /* useGPU */ true); - // Create output arguments. - paddle_arguments out_args = paddle_arguments_create_none(); - // Create output matrix. - paddle_matrix prob = paddle_matrix_create_none(); - - // CPU buffer to cache the input and output. - paddle_real* cpu_input = (paddle_real*)malloc(784 * sizeof(paddle_real)); - paddle_real* cpu_output = (paddle_real*)malloc(10 * sizeof(paddle_real)); - for (int iter = 0; iter < NUM_ITER; ++iter) { - // There is only one input layer of this network. - CHECK(paddle_arguments_resize(in_args, 1)); - CHECK(paddle_arguments_set_value(in_args, 0, mat)); - - for (int i = 0; i < 784; ++i) { - cpu_input[i] = rand() / ((float)RAND_MAX); - } - CHECK(paddle_matrix_set_value(mat, cpu_input)); - - CHECK(paddle_gradient_machine_forward(machine, - in_args, - out_args, - /* isTrain */ false)); - - CHECK(paddle_arguments_get_value(out_args, 0, prob)); - CHECK(paddle_matrix_get_value(prob, cpu_output)); - - pthread_mutex_lock(&mutex); - printf("Prob: "); - for (int i = 0; i < 10; ++i) { - printf("%.2f ", cpu_output[i]); - } - printf("\n"); - pthread_mutex_unlock(&mutex); - } - - CHECK(paddle_matrix_destroy(prob)); - CHECK(paddle_arguments_destroy(out_args)); - CHECK(paddle_matrix_destroy(mat)); - CHECK(paddle_arguments_destroy(in_args)); - CHECK(paddle_gradient_machine_destroy(machine)); - - free(cpu_input); - free(cpu_output); - - return NULL; -} - -int main() { - // Initalize Paddle - char* argv[] = {"--use_gpu=True"}; - CHECK(paddle_init(1, (char**)argv)); - - // Reading config binary file. It is generated by `convert_protobin.sh` - long size; - void* buf = read_config(CONFIG_BIN, &size); - - // Create a gradient machine for inference. - paddle_gradient_machine machine; - CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size)); - CHECK(paddle_gradient_machine_randomize_param(machine)); - - // Loading parameter. Uncomment the following line and change the directory. - // CHECK(paddle_gradient_machine_load_parameter_from_disk(machine, - // "./some_where_to_params")); - srand(time(0)); - pthread_mutex_init(&mutex, NULL); - - pthread_t threads[NUM_THREAD]; - - for (int i = 0; i < NUM_THREAD; ++i) { - paddle_gradient_machine thread_local_machine; - CHECK(paddle_gradient_machine_create_shared_param( - machine, buf, size, &thread_local_machine)); - pthread_create(&threads[i], NULL, thread_main, thread_local_machine); - } - - for (int i = 0; i < NUM_THREAD; ++i) { - pthread_join(threads[i], NULL); - } - - pthread_mutex_destroy(&mutex); - - return 0; -} diff --git a/paddle/legacy/capi/examples/model_inference/multi_thread/trainer_config.py b/paddle/legacy/capi/examples/model_inference/multi_thread/trainer_config.py deleted file mode 100755 index fa6a12319a99504b5aeee83fc8af6132c62f9aa5..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/multi_thread/trainer_config.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reservedd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/paddle/legacy/capi/examples/model_inference/sequence/.gitignore b/paddle/legacy/capi/examples/model_inference/sequence/.gitignore deleted file mode 100644 index fab7372d796ea95c80d02df6caa7eb2b411a7ac1..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sequence/.gitignore +++ /dev/null @@ -1,73 +0,0 @@ -# This file is used to ignore files which are generated -# ---------------------------------------------------------------------------- - -*~ -*.autosave -*.a -*.core -*.moc -*.o -*.obj -*.orig -*.rej -*.so -*.so.* -*_pch.h.cpp -*_resource.rc -*.qm -.#* -*.*# -core -!core/ -tags -.DS_Store -.directory -*.debug -Makefile* -*.prl -*.app -moc_*.cpp -ui_*.h -qrc_*.cpp -Thumbs.db -*.res -*.rc -/.qmake.cache -/.qmake.stash - -# qtcreator generated files -*.pro.user* - -# xemacs temporary files -*.flc - -# Vim temporary files -.*.swp - -# Visual Studio generated files -*.ib_pdb_index -*.idb -*.ilk -*.pdb -*.sln -*.suo -*.vcproj -*vcproj.*.*.user -*.ncb -*.sdf -*.opensdf -*.vcxproj -*vcxproj.* - -# MinGW generated files -*.Debug -*.Release - -# Python byte code -*.pyc - -# Binaries -# -------- -*.dll -*.exe - diff --git a/paddle/legacy/capi/examples/model_inference/sequence/CMakeLists.txt b/paddle/legacy/capi/examples/model_inference/sequence/CMakeLists.txt deleted file mode 100644 index 71b73acba7cdea1c869ec6061df379c3f7cb45db..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sequence/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -project(sequence) -cmake_minimum_required(VERSION 2.8) -aux_source_directory(. SRC_LIST) -add_executable(${PROJECT_NAME} ${SRC_LIST}) -set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99) -target_link_libraries(${PROJECT_NAME} -lpaddle_capi_shared) diff --git a/paddle/legacy/capi/examples/model_inference/sequence/convert_protobin.sh b/paddle/legacy/capi/examples/model_inference/sequence/convert_protobin.sh deleted file mode 100644 index b29f2cd21418ecbd2fb2ba626138e5aa11bf77f3..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sequence/convert_protobin.sh +++ /dev/null @@ -1 +0,0 @@ -../dense/convert_protobin.sh diff --git a/paddle/legacy/capi/examples/model_inference/sequence/main.c b/paddle/legacy/capi/examples/model_inference/sequence/main.c deleted file mode 100644 index 25a38d32f0b6970f78ed8d31182ffdff7fa1eddc..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sequence/main.c +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include "../common/common.h" - -#define CONFIG_BIN "./trainer_config.bin" - -int main() { - // Initalize Paddle - char* argv[] = {"--use_gpu=False"}; - CHECK(paddle_init(1, (char**)argv)); - - // Reading config binary file. It is generated by `convert_protobin.sh` - long size; - void* buf = read_config(CONFIG_BIN, &size); - - // Create a gradient machine for inference. - paddle_gradient_machine machine; - CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size)); - CHECK(paddle_gradient_machine_randomize_param(machine)); - - // Loading parameter. Uncomment the following line and change the directory. - // CHECK(paddle_gradient_machine_load_parameter_from_disk(machine, - // "./some_where_to_params")); - paddle_arguments in_args = paddle_arguments_create_none(); - - // There is only one input of this network. - CHECK(paddle_arguments_resize(in_args, 1)); - - // Create input ids. - int sentence_ids[] = {83, 48, 20, 84, 394, 853, 64, 53, 64}; - - paddle_ivector sentence = paddle_ivector_create( - sentence_ids, sizeof(sentence_ids) / sizeof(int), false, false); - CHECK(paddle_arguments_set_ids(in_args, 0, sentence)); - - int seq_pos_array[] = {0, sizeof(sentence_ids) / sizeof(int)}; - - paddle_ivector seq_pos = paddle_ivector_create( - seq_pos_array, sizeof(seq_pos_array) / sizeof(int), false, false); - - CHECK(paddle_arguments_set_sequence_start_pos(in_args, 0, 0, seq_pos)); - - paddle_arguments out_args = paddle_arguments_create_none(); - CHECK(paddle_gradient_machine_forward(machine, - in_args, - out_args, - /* isTrain */ false)); - paddle_matrix prob = paddle_matrix_create_none(); - - CHECK(paddle_arguments_get_value(out_args, 0, prob)); - - paddle_real* array; - - CHECK(paddle_matrix_get_row(prob, 0, &array)); - - printf("Prob: "); - for (int i = 0; i < 2; ++i) { - printf("%.2f ", array[i]); - } - printf("\n"); - - CHECK(paddle_matrix_destroy(prob)); - CHECK(paddle_arguments_destroy(out_args)); - CHECK(paddle_ivector_destroy(seq_pos)); - CHECK(paddle_ivector_destroy(sentence)); - CHECK(paddle_arguments_destroy(in_args)); - CHECK(paddle_gradient_machine_destroy(machine)); - - return 0; -} diff --git a/paddle/legacy/capi/examples/model_inference/sequence/trainer_config.py b/paddle/legacy/capi/examples/model_inference/sequence/trainer_config.py deleted file mode 100644 index 62ae97e2627058c66c7262a8dbf6622eef74f5af..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sequence/trainer_config.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -WORD_DIM = 3000 - -sentence = data_layer(name='sentence', size=WORD_DIM) -sentence_embedding = embedding_layer( - input=sentence, - size=64, - param_attr=ParameterAttribute( - initial_max=1.0, initial_min=0.5)) -lstm = simple_lstm(input=sentence_embedding, size=64) -lstm_last = last_seq(input=lstm) -outputs(fc_layer(input=lstm_last, size=2, act=SoftmaxActivation())) diff --git a/paddle/legacy/capi/examples/model_inference/sparse_binary/.gitignore b/paddle/legacy/capi/examples/model_inference/sparse_binary/.gitignore deleted file mode 100644 index fab7372d796ea95c80d02df6caa7eb2b411a7ac1..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sparse_binary/.gitignore +++ /dev/null @@ -1,73 +0,0 @@ -# This file is used to ignore files which are generated -# ---------------------------------------------------------------------------- - -*~ -*.autosave -*.a -*.core -*.moc -*.o -*.obj -*.orig -*.rej -*.so -*.so.* -*_pch.h.cpp -*_resource.rc -*.qm -.#* -*.*# -core -!core/ -tags -.DS_Store -.directory -*.debug -Makefile* -*.prl -*.app -moc_*.cpp -ui_*.h -qrc_*.cpp -Thumbs.db -*.res -*.rc -/.qmake.cache -/.qmake.stash - -# qtcreator generated files -*.pro.user* - -# xemacs temporary files -*.flc - -# Vim temporary files -.*.swp - -# Visual Studio generated files -*.ib_pdb_index -*.idb -*.ilk -*.pdb -*.sln -*.suo -*.vcproj -*vcproj.*.*.user -*.ncb -*.sdf -*.opensdf -*.vcxproj -*vcxproj.* - -# MinGW generated files -*.Debug -*.Release - -# Python byte code -*.pyc - -# Binaries -# -------- -*.dll -*.exe - diff --git a/paddle/legacy/capi/examples/model_inference/sparse_binary/CMakeLists.txt b/paddle/legacy/capi/examples/model_inference/sparse_binary/CMakeLists.txt deleted file mode 100644 index c82195688902ac70346fd5204fb14e28886fb51f..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sparse_binary/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -project(sparse_binary) -cmake_minimum_required(VERSION 2.8) -aux_source_directory(. SRC_LIST) -add_executable(${PROJECT_NAME} ${SRC_LIST}) -find_package (Threads) -set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99) -target_link_libraries(${PROJECT_NAME} -lpaddle_capi_shared) diff --git a/paddle/legacy/capi/examples/model_inference/sparse_binary/convert_protobin.sh b/paddle/legacy/capi/examples/model_inference/sparse_binary/convert_protobin.sh deleted file mode 100644 index b29f2cd21418ecbd2fb2ba626138e5aa11bf77f3..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sparse_binary/convert_protobin.sh +++ /dev/null @@ -1 +0,0 @@ -../dense/convert_protobin.sh diff --git a/paddle/legacy/capi/examples/model_inference/sparse_binary/main.c b/paddle/legacy/capi/examples/model_inference/sparse_binary/main.c deleted file mode 100644 index 8df1b6008856278b411a73ed88985fcef53e9a69..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sparse_binary/main.c +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "../common/common.h" - -#define CONFIG_BIN "./trainer_config.bin" - -int main() { - // Initalize Paddle - char* argv[] = {"--use_gpu=False"}; - CHECK(paddle_init(1, (char**)argv)); - - // Read the binary configuration file which is generated by - // `convert_protobin.sh` - long size; - void* buf = read_config(CONFIG_BIN, &size); - - // Create the gradient machine for inference. - paddle_gradient_machine machine; - CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size)); - CHECK(paddle_gradient_machine_randomize_param(machine)); - - // Load the trained parameters. Uncomment the following line and change the - // directory as needed. - // CHECK(paddle_gradient_machine_load_parameter_from_disk(machine, - // "./some_where_to_params")); - paddle_arguments in_args = paddle_arguments_create_none(); - - // There is only one input of this network. - CHECK(paddle_arguments_resize(in_args, 1)); - - // Create the input matrix. - paddle_matrix mat = paddle_matrix_create_sparse(1, 784, 3, true, false); - srand(time(0)); - paddle_real* array; - int colBuf[] = {9, 93, 109}; - int rowBuf[] = {0, sizeof(colBuf) / sizeof(int)}; - - CHECK(paddle_matrix_sparse_copy_from(mat, - rowBuf, - sizeof(rowBuf) / sizeof(int), - colBuf, - sizeof(colBuf) / sizeof(int), - NULL, - 0)); - - CHECK(paddle_arguments_set_value(in_args, 0, mat)); - - paddle_arguments out_args = paddle_arguments_create_none(); - CHECK(paddle_gradient_machine_forward(machine, - in_args, - out_args, - /* isTrain */ false)); - paddle_matrix prob = paddle_matrix_create_none(); - - CHECK(paddle_arguments_get_value(out_args, 0, prob)); - - CHECK(paddle_matrix_get_row(prob, 0, &array)); - - printf("Prob: "); - for (int i = 0; i < 10; ++i) { - printf("%.2f ", array[i]); - } - printf("\n"); - - CHECK(paddle_matrix_destroy(prob)); - CHECK(paddle_arguments_destroy(out_args)); - CHECK(paddle_matrix_destroy(mat)); - CHECK(paddle_arguments_destroy(in_args)); - CHECK(paddle_gradient_machine_destroy(machine)); - - return 0; -} diff --git a/paddle/legacy/capi/examples/model_inference/sparse_binary/trainer_config.py b/paddle/legacy/capi/examples/model_inference/sparse_binary/trainer_config.py deleted file mode 100755 index fa6a12319a99504b5aeee83fc8af6132c62f9aa5..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/examples/model_inference/sparse_binary/trainer_config.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reservedd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/paddle/legacy/capi/gradient_machine.cpp b/paddle/legacy/capi/gradient_machine.cpp deleted file mode 100644 index 0c5ddd856b5d374ae90d6c8ef898be52aa2e4e89..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/gradient_machine.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "gradient_machine.h" -#include "capi_private.h" -#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" - -#define cast(v) paddle::capi::cast(v) - -enum GradientMatchineCreateMode { - CREATE_MODE_NORMAL = 0, - CREATE_MODE_TESTING = 4 -}; - -namespace paddle { - -class MyNeuralNetwork : public NeuralNetwork { - public: - MyNeuralNetwork(const std::string& name, NeuralNetwork* network) - : NeuralNetwork(name, network) {} -}; - -NeuralNetwork* newCustomNerualNetwork(const std::string& name, - NeuralNetwork* network) { - return new MyNeuralNetwork(name, network); -} -} // namespace paddle - -extern "C" { -paddle_error paddle_gradient_machine_create_for_inference( - paddle_gradient_machine* machine, void* modelConfigProtobuf, int size) { - if (modelConfigProtobuf == nullptr) return kPD_NULLPTR; - paddle::ModelConfig config; - if (!config.ParseFromArray(modelConfigProtobuf, size) || - !config.IsInitialized()) { - return kPD_PROTOBUF_ERROR; - } - - auto ptr = new paddle::capi::CGradientMachine(); - ptr->machine.reset(paddle::GradientMachine::create( - config, CREATE_MODE_TESTING, {paddle::PARAMETER_VALUE})); - *machine = ptr; - return kPD_NO_ERROR; -} - -paddle_error paddle_gradient_machine_create_for_inference_with_parameters( - paddle_gradient_machine* machine, void* mergedModel, uint64_t size) { - if (mergedModel == nullptr) return kPD_NULLPTR; - std::istringstream is(std::string(static_cast(mergedModel), size)); - int64_t modelConfigSize = 0; - is.read((char*)(&modelConfigSize), sizeof(modelConfigSize)); - std::string modelConfigProtobuf; - modelConfigProtobuf.resize(modelConfigSize); - is.read(&modelConfigProtobuf[0], modelConfigSize); - paddle::TrainerConfig config; - paddle::ModelConfig modelConfig; - if (!config.ParseFromString(modelConfigProtobuf) || !config.IsInitialized()) { - if (!modelConfig.ParseFromString(modelConfigProtobuf) || - !modelConfig.IsInitialized()) { - return kPD_PROTOBUF_ERROR; - } - } else { - modelConfig = config.model_config(); - } - auto ptr = new paddle::capi::CGradientMachine(); - ptr->machine.reset(paddle::GradientMachine::create( - modelConfig, CREATE_MODE_TESTING, {paddle::PARAMETER_VALUE})); - std::vector& parameters = ptr->machine->getParameters(); - for (auto& para : parameters) { - para->load(is); - } - - *machine = ptr; - return kPD_NO_ERROR; -} - -paddle_error paddle_gradient_machine_destroy(paddle_gradient_machine machine) { - delete cast(machine); - return kPD_NO_ERROR; -} - -paddle_error paddle_gradient_machine_load_parameter_from_disk( - paddle_gradient_machine machine, const char* path) { - auto m = cast(machine); - if (m == nullptr || path == nullptr || m->machine == nullptr) - return kPD_NULLPTR; - m->machine->loadParameters(path); - return kPD_NO_ERROR; -} - -paddle_error paddle_gradient_machine_forward(paddle_gradient_machine machine, - paddle_arguments inArgs, - paddle_arguments outArgs, - bool isTrain) { - auto m = cast(machine); - auto in = paddle::capi::cast(inArgs); - auto out = paddle::capi::cast(outArgs); - if (m == nullptr || in == nullptr || out == nullptr || m->machine == nullptr) - return kPD_NULLPTR; - m->machine->forward( - in->args, &out->args, isTrain ? paddle::PASS_TRAIN : paddle::PASS_TEST); - return kPD_NO_ERROR; -} - -paddle_error paddle_gradient_machine_create_shared_param( - paddle_gradient_machine origin, - void* modelConfigProtobuf, - int size, - paddle_gradient_machine* slave) { - auto o = cast(origin); - if (origin == nullptr || slave == nullptr || o->machine == nullptr) { - return kPD_NULLPTR; - } - paddle::ModelConfig config; - if (!config.ParseFromArray(modelConfigProtobuf, size) || - !config.IsInitialized()) { - return kPD_PROTOBUF_ERROR; - } - - std::unique_ptr ptr( - new paddle::capi::CGradientMachine()); - auto nn = paddle::NeuralNetwork::create(config); - nn->init(config, - [&o](int paramId, paddle::Parameter* param) { - auto p = o->machine->getParameters()[paramId]; - param->enableSharedType(paddle::PARAMETER_VALUE, - p->getBuf(paddle::PARAMETER_VALUE)); - }, - {paddle::PARAMETER_VALUE}, - false); - ptr->machine.reset(nn); - *slave = ptr.release(); - return kPD_NO_ERROR; -} -} - -paddle_error paddle_gradient_machine_randomize_param( - paddle_gradient_machine machine) { - auto m = cast(machine); - if (m == nullptr || m->machine == nullptr) return kPD_NULLPTR; - m->machine->randParameters(); - return kPD_NO_ERROR; -} - -paddle_error paddle_gradient_machine_get_layer_output( - paddle_gradient_machine machine, - const char* layerName, - paddle_arguments args) { - auto m = cast(machine); - auto out = paddle::capi::cast(args); - if (m == nullptr || layerName == nullptr || out == nullptr || - m->machine == nullptr) { - return kPD_NULLPTR; - } - - auto layerOutput = m->machine->getLayerOutput(layerName); - out->args.push_back(layerOutput); - return kPD_NO_ERROR; -} - -paddle_error paddle_gradient_machine_release_layer_output( - paddle_gradient_machine machine) { - auto m = cast(machine); - if (m == nullptr || m->machine == nullptr) { - return kPD_NULLPTR; - } - m->machine->releaseOutput(); - return kPD_NO_ERROR; -} diff --git a/paddle/legacy/capi/gradient_machine.h b/paddle/legacy/capi/gradient_machine.h deleted file mode 100644 index f46498b3753fe85350e9ffa60bab5415623fe465..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/gradient_machine.h +++ /dev/null @@ -1,127 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef __PADDLE_CAPI_GRADIENT_MACHINE_H__ -#define __PADDLE_CAPI_GRADIENT_MACHINE_H__ -#include "arguments.h" -#include "config.h" -#include "error.h" - -#ifdef __cplusplus -extern "C" { -#endif -/** - * @brief GradientMachine means a neural network. - */ -typedef void* paddle_gradient_machine; - -/** - * @brief Create a gradient machine used for model inference. - * @param [out] machine that used for model inference. - * @param [in] modelConfigProtobuf - * @param [in] size - * @return paddle_error - */ -PD_API paddle_error paddle_gradient_machine_create_for_inference( - paddle_gradient_machine* machine, void* modelConfigProtobuf, int size); - -/** - * @brief Create a gradient machine used for model inference, using config with - * parameters which is generated by `paddle merge_model`. - * Example: - * paddle merge_model \ - * --model_dir="pass-00000" \ - * --model_file="merged_model.paddle" - * @param [out] machine that used for model inference - * @param [in] mergedModel - * @param [in] size - * @return paddle_error - */ -PD_API paddle_error -paddle_gradient_machine_create_for_inference_with_parameters( - paddle_gradient_machine* machine, void* mergedModel, uint64_t size); - -/** - * @brief Load parameter from disk. - * @param machine Gradient Machine. - * @param path local directory path. - * @return paddle_error - */ -PD_API paddle_error paddle_gradient_machine_load_parameter_from_disk( - paddle_gradient_machine machine, const char* path); - -/** - * @brief Forward a gradient machine - * @param machine Gradient machine - * @param inArgs input arguments - * @param outArgs output arguments - * @param isTrain is train or not - * @return paddle_error - */ -PD_API paddle_error -paddle_gradient_machine_forward(paddle_gradient_machine machine, - paddle_arguments inArgs, - paddle_arguments outArgs, - bool isTrain); - -/** - * @brief Create a gradient machine, which parameters are shared from another - * gradient machine. - * @param [in] origin gradient machine - * @param [in] modelConfigProtobuf model config protobuf - * @param [in] size of model config buffer. - * @param [out] slave gradient machine, the output value. - * @return paddle_error - */ -PD_API paddle_error -paddle_gradient_machine_create_shared_param(paddle_gradient_machine origin, - void* modelConfigProtobuf, - int size, - paddle_gradient_machine* slave); - -PD_API paddle_error -paddle_gradient_machine_randomize_param(paddle_gradient_machine machine); - -/** - * @brief Destroy a gradient machine - * @param machine that need to destroy - * @return paddle_error - */ -PD_API paddle_error -paddle_gradient_machine_destroy(paddle_gradient_machine machine); - -/** - * @brief Get the output of the layer named `layerName`. - * @param [in] gradient machine that have run a inference - * @param [in] layerName name of specified layer - * @param [out] args output of the specified layer - * @return paddle_error - */ -PD_API paddle_error -paddle_gradient_machine_get_layer_output(paddle_gradient_machine machine, - const char* layerName, - paddle_arguments args); - -/** - * @brief Release the middle layer's output memory of the gradient machine. - * @param [in] gradient machine that have run a inference - * @return paddle_error - */ -PD_API paddle_error -paddle_gradient_machine_release_layer_output(paddle_gradient_machine machine); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/paddle/legacy/capi/main.h b/paddle/legacy/capi/main.h deleted file mode 100644 index a0cb7bc296762cd86c931a07c908b352fb8ce582..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/main.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef __PADDLE_CAPI_MAIN_H__ -#define __PADDLE_CAPI_MAIN_H__ -#include "config.h" -#include "error.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Initialize Paddle. - */ -PD_API paddle_error paddle_init(int argc, char** argv); - -/** - * Initialize the thread environment of Paddle. - * @note it is requisite for GPU runs but optional for CPU runs. - * For GPU runs, all threads will run on the same GPU devices. - */ -PD_API paddle_error paddle_init_thread(); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/paddle/legacy/capi/matrix.h b/paddle/legacy/capi/matrix.h deleted file mode 100644 index f6747f7b1a196764dd60a3d991a91ef19dc850c1..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/matrix.h +++ /dev/null @@ -1,146 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef __PADDLE_CAPI_MATRIX_H__ -#define __PADDLE_CAPI_MATRIX_H__ - -#include -#include -#include "config.h" -#include "error.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Matrix functions. Return will be a paddle_error type. - */ -typedef void* paddle_matrix; - -/** - * @brief paddle_matrix_create Create a dense matrix - * @param height matrix height. - * @param width matrix width - * @param useGpu use GPU of not - * @return Matrix handler - */ -PD_API paddle_matrix paddle_matrix_create(uint64_t height, - uint64_t width, - bool useGpu); - -/** - * @brief paddle_matrix_create_sparse Create a sparse matrix. - * @param height the matrix height. - * @param width the matrix width. - * @param nnz the number of non-zero elements. - * @param isBinary is binary (either 1 or 0 in matrix) or not. - * @param useGpu is using GPU or not. - * @return paddle_matrix. - * @note Mobile inference does not support this interface. - */ -PD_API paddle_matrix paddle_matrix_create_sparse( - uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu); - -/** - * @brief paddle_matrix_destroy Destroy a matrix. - * @param mat - * @return paddle_error - */ -PD_API paddle_error paddle_matrix_destroy(paddle_matrix mat); - -/** - * @brief paddle_matrix_set_row Set a row to matrix. - * @param mat Target Matrix - * @param rowID Index of row - * @param rowArray Row data. - * @return paddle_error - */ -PD_API paddle_error paddle_matrix_set_row(paddle_matrix mat, - uint64_t rowID, - paddle_real* rowArray); - -/** - * @brief paddle_matrix_set_value Set value to matrix. - * @param mat Target Matrix - * @param value Row data. - * @return paddle_error - * @note value should contain enough element of data to init the mat - */ -PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat, - paddle_real* value); - -/** - * @brief PDMatGetRow Get raw row buffer from matrix - * @param [in] mat Target matrix - * @param [in] rowID Index of row. - * @param [out] rawRowBuffer Row Buffer - * @return paddle_error - */ -PD_API paddle_error paddle_matrix_get_row(paddle_matrix mat, - uint64_t rowID, - paddle_real** rawRowBuffer); - -/** - * @brief copy data from the matrix - * @param [in] mat Target matrix - * @param [out] result pointer to store the matrix data - * @return paddle_error - * @note the space of the result should allocated before invoke this API - */ -PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat, - paddle_real* result); -/** - * @brief PDMatCreateNone Create None Matrix - * @return - */ -PD_API paddle_matrix paddle_matrix_create_none(); - -/** - * @brief PDMatGetShape get the shape of matrix - * @param mat target matrix - * @param height The height of matrix - * @param width The width of matrix - * @return paddle_error - */ -PD_API paddle_error paddle_matrix_get_shape(paddle_matrix mat, - uint64_t* height, - uint64_t* width); - -/** - * @brief paddle_matrix_sparse_copy_from Copy from a CSR format matrix - * @param [out] mat output matrix - * @param [in] rowArray row array. The array slices in column array. - * @param [in] rowSize length of row array. - * @param [in] colArray the column array. It means the non-zero element indices - * in each row. - * @param [in] colSize length of column array. - * @param [in] valueArray the value array. It means the non-zero elemnt values. - * NULL if the matrix is binary. - * @param [in] valueSize length of value array. Zero if the matrix is binary. - * @return paddle_error - * @note Mobile inference does not support this interface. - */ -PD_API paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, - int* rowArray, - uint64_t rowSize, - int* colArray, - uint64_t colSize, - float* valueArray, - uint64_t valueSize); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/paddle/legacy/capi/paddle_capi.map b/paddle/legacy/capi/paddle_capi.map deleted file mode 100644 index 8d673f675dd5511f554bff9519a8c078e11868bd..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/paddle_capi.map +++ /dev/null @@ -1,6 +0,0 @@ -{ - global: - paddle_*; - local: - *; -}; diff --git a/paddle/legacy/capi/tests/.gitignore b/paddle/legacy/capi/tests/.gitignore deleted file mode 100644 index 7ab6be95e397fa8f0339294a00c2f057bc116792..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/tests/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -w -b diff --git a/paddle/legacy/capi/tests/CMakeLists.txt b/paddle/legacy/capi/tests/CMakeLists.txt deleted file mode 100644 index bb38ace62808db5ce95a1a57ff465e8edc059213..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/tests/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -add_unittest(capi_test_mats test_Vector.cpp - test_Matrix.cpp test_Arguments.cpp) - -target_include_directories(capi_test_mats PUBLIC ${PADDLE_CAPI_INC_PATH}) -target_link_libraries(capi_test_mats paddle_capi) - -if(NOT MOBILE_INFERENCE) - add_unittest_without_exec(capi_test_gradientMachine test_GradientMachine.cpp) - target_include_directories(capi_test_gradientMachine PUBLIC - ${PADDLE_CAPI_INC_PATH}) - target_link_libraries(capi_test_gradientMachine paddle_capi) - add_test(NAME capi_test_gradientMachine - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests) -endif() diff --git a/paddle/legacy/capi/tests/test_Arguments.cpp b/paddle/legacy/capi/tests/test_Arguments.cpp deleted file mode 100644 index 6fb379719dc0f3230c0801752720703ad185216f..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/tests/test_Arguments.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "capi.h" -#include "gtest/gtest.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -static std::vector randomBuffer(size_t bufSize) { - auto& eng = paddle::ThreadLocalRandomEngine::get(); - std::uniform_real_distribution dist(-1.0, 1.0); - std::vector retv; - retv.reserve(bufSize); - for (size_t i = 0; i < bufSize; ++i) { - retv.push_back(dist(eng)); - } - return retv; -} - -TEST(CAPIArguments, create) { - //! TODO(yuyang18): Test GPU Code. - paddle_arguments args = paddle_arguments_create_none(); - uint64_t size; - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_size(args, &size)); - ASSERT_EQ(0UL, size); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(args)); -} - -TEST(CAPIArguments, value) { - paddle_arguments args = paddle_arguments_create_none(); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_resize(args, 1)); - - paddle_matrix mat = paddle_matrix_create(128, 64, false); - for (size_t i = 0; i < 128; ++i) { - std::vector sampleBuf = randomBuffer(64); - paddle_matrix_set_row(mat, i, sampleBuf.data()); - } - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_set_value(args, 0, mat)); - - paddle_matrix val = paddle_matrix_create_none(); - - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_value(args, 0, val)); - - for (size_t i = 0; i < 128; ++i) { - paddle_real* row1; - paddle_real* row2; - - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_row(mat, i, &row1)); - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_row(val, i, &row2)); - ASSERT_EQ(row1, row2); - } - - paddle_ivector ivec = paddle_ivector_create_none(); - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(ivec)); - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(val)); - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(mat)); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(args)); -} - -TEST(CAPIArguments, ids) { - paddle_arguments args = paddle_arguments_create_none(); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_resize(args, 1)); - - paddle_ivector ivec; - int array[3] = {1, 2, 3}; - ivec = paddle_ivector_create(array, 3, true, false); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_set_ids(args, 0, ivec)); - - paddle_ivector val = paddle_ivector_create_none(); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_ids(args, 0, val)); - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(ivec)); - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(val)); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(args)); -} - -template -void testSequenceHelper(T1 setter, T2 getter) { - paddle_arguments args = paddle_arguments_create_none(); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_resize(args, 1)); - - paddle_ivector ivec; - int array[3] = {1, 2, 3}; - ivec = paddle_ivector_create(array, 3, true, false); - ASSERT_EQ(kPD_NO_ERROR, setter(args, 0, ivec)); - - paddle_ivector val = paddle_ivector_create_none(); - ASSERT_EQ(kPD_NO_ERROR, getter(args, 0, val)); - uint64_t size; - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_get_size(val, &size)); - - int* rawBuf; - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_get(val, &rawBuf)); - for (size_t i = 0; i < size; ++i) { - ASSERT_EQ(array[i], rawBuf[i]); - } - - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(ivec)); - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(val)); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(args)); -} - -TEST(CAPIArguments, Sequence) { - auto testSequence = [](uint32_t nestedLevel) { - testSequenceHelper(std::bind(paddle_arguments_set_sequence_start_pos, - std::placeholders::_1, - std::placeholders::_2, - nestedLevel, - std::placeholders::_3), - std::bind(paddle_arguments_get_sequence_start_pos, - std::placeholders::_1, - std::placeholders::_2, - nestedLevel, - std::placeholders::_3)); - }; - for (uint32_t i = 0; i < 2; ++i) { // test seq and sub-seq. - testSequence(i); - } -} diff --git a/paddle/legacy/capi/tests/test_GradientMachine.cpp b/paddle/legacy/capi/tests/test_GradientMachine.cpp deleted file mode 100644 index 5d1b7cb6ca4073c0a489366e415f8f74d3c19bec..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/tests/test_GradientMachine.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include -#include -#include "capi.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -static std::vector randomBuffer(size_t bufSize) { - auto& eng = paddle::ThreadLocalRandomEngine::get(); - std::uniform_real_distribution dist(-1.0, 1.0); - std::vector retv; - retv.reserve(bufSize); - for (size_t i = 0; i < bufSize; ++i) { - retv.push_back(dist(eng)); - } - return retv; -} - -TEST(GradientMachine, testPredict) { - //! TODO(yuyang18): Test GPU Code. - paddle::TrainerConfigHelper config("./test_predict_network.py"); - std::string buffer; - ASSERT_TRUE(config.getModelConfig().SerializeToString(&buffer)); - paddle_gradient_machine machine; - - ASSERT_EQ(kPD_NO_ERROR, - paddle_gradient_machine_create_for_inference( - &machine, &buffer[0], (int)buffer.size())); - std::unique_ptr gm( - paddle::GradientMachine::create(config.getModelConfig())); - ASSERT_NE(nullptr, gm); - gm->randParameters(); - gm->saveParameters("./"); - - ASSERT_EQ(kPD_NO_ERROR, - paddle_gradient_machine_load_parameter_from_disk(machine, "./")); - - paddle_gradient_machine machineSlave; - ASSERT_EQ(kPD_NO_ERROR, - paddle_gradient_machine_create_shared_param( - machine, &buffer[0], (int)buffer.size(), &machineSlave)); - std::swap(machineSlave, machine); - paddle_arguments outArgs = paddle_arguments_create_none(); - - paddle_arguments inArgs = paddle_arguments_create_none(); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_resize(inArgs, 1)); - paddle_matrix mat = paddle_matrix_create(1, 100, false); - static_assert(std::is_same::value, ""); - - auto data = randomBuffer(100); - paddle_real* rowPtr; - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_row(mat, 0, &rowPtr)); - memcpy(rowPtr, data.data(), data.size() * sizeof(paddle_real)); - - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_set_value(inArgs, 0, mat)); - ASSERT_EQ(kPD_NO_ERROR, - paddle_gradient_machine_forward(machine, inArgs, outArgs, false)); - - uint64_t sz; - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_size(outArgs, &sz)); - ASSERT_EQ(1UL, sz); - - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_value(outArgs, 0, mat)); - std::vector paddleInArgs; - std::vector paddleOutArgs; - paddleInArgs.resize(1); - paddleInArgs[0].value = - paddle::Matrix::create(data.data(), 1, 100, false, false); - - gm->forward(paddleInArgs, &paddleOutArgs, paddle::PASS_TEST); - - auto matPaddle = paddleOutArgs[0].value; - - uint64_t height, width; - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_shape(mat, &height, &width)); - ASSERT_EQ(matPaddle->getHeight(), height); - ASSERT_EQ(matPaddle->getWidth(), width); - - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_row(mat, 0, &rowPtr)); - for (size_t i = 0; i < width; ++i) { - ASSERT_NEAR(matPaddle->getData()[i], rowPtr[i], 1e-5); - } - - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(mat)); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(inArgs)); - ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(outArgs)); - std::swap(machineSlave, machine); - ASSERT_EQ(kPD_NO_ERROR, paddle_gradient_machine_destroy(machineSlave)); - ASSERT_EQ(kPD_NO_ERROR, paddle_gradient_machine_destroy(machine)); -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - std::vector argvs; - argvs.push_back(strdup("--use_gpu=false")); - paddle_init((int)argvs.size(), argvs.data()); - for (auto each : argvs) { - free(each); - } - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/capi/tests/test_Matrix.cpp b/paddle/legacy/capi/tests/test_Matrix.cpp deleted file mode 100644 index 5ba051ae179569bde82c4219e55060503c8fc4f5..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/tests/test_Matrix.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "capi.h" -#include "gtest/gtest.h" - -TEST(CAPIMatrix, create) { - //! TODO(yuyang18): Test GPU Code. - paddle_matrix mat = paddle_matrix_create(128, 32, false); - std::vector sampleRow; - sampleRow.resize(32); - for (size_t i = 0; i < sampleRow.size(); ++i) { - sampleRow[i] = 1.0 / (i + 1.0); - } - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_set_row(mat, 0, sampleRow.data())); - ASSERT_EQ(kPD_OUT_OF_RANGE, - paddle_matrix_set_row(mat, 128, sampleRow.data())); - - paddle_real* arrayPtr; - - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_row(mat, 0, &arrayPtr)); - for (size_t i = 0; i < sampleRow.size(); ++i) { - ASSERT_NEAR(sampleRow[i], arrayPtr[i], 1e-5); - } - - uint64_t height, width; - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_shape(mat, &height, &width)); - ASSERT_EQ(128UL, height); - ASSERT_EQ(32UL, width); - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(mat)); -} - -TEST(CAPIMatrix, createNone) { - paddle_matrix mat = paddle_matrix_create_none(); - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(mat)); -} - -TEST(CAPIMatrix, cpu_get_set_value) { - paddle_matrix mat = paddle_matrix_create(128, 32, false); - std::vector sample; - std::vector result; - sample.resize(128 * 32); - result.resize(128 * 32); - for (size_t i = 0; i < sample.size(); ++i) { - sample[i] = 1.0 / (i + 1.0); - } - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_set_value(mat, sample.data())); - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_value(mat, result.data())); - for (size_t i = 0; i < sample.size(); ++i) { - ASSERT_NEAR(sample[i], result[i], 1e-5); - } - - uint64_t height, width; - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_shape(mat, &height, &width)); - ASSERT_EQ(128UL, height); - ASSERT_EQ(32UL, width); - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(mat)); -} - -#ifdef PADDLE_WITH_CUDA -TEST(CAPIMatrix, gpu_get_set_value) { - paddle_matrix mat = paddle_matrix_create(128, 32, true); - std::vector sample; - std::vector result; - sample.resize(128 * 32); - result.resize(128 * 32); - for (size_t i = 0; i < sample.size(); ++i) { - sample[i] = 1.0 / (i + 1.0); - } - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_set_value(mat, sample.data())); - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_value(mat, result.data())); - for (size_t i = 0; i < sample.size(); ++i) { - ASSERT_NEAR(sample[i], result[i], 1e-5); - } - - uint64_t height, width; - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_shape(mat, &height, &width)); - ASSERT_EQ(128UL, height); - ASSERT_EQ(32UL, width); - ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(mat)); -} -#endif diff --git a/paddle/legacy/capi/tests/test_Vector.cpp b/paddle/legacy/capi/tests/test_Vector.cpp deleted file mode 100644 index fa7407e484c4e9b87e9f77f8a5f3d1580e020f3e..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/tests/test_Vector.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "capi.h" -#include "gtest/gtest.h" - -TEST(CAPIVector, create) { - //! TODO(yuyang18): Test GPU Code. - paddle_ivector vec; - int array[3] = {1, 2, 3}; - vec = paddle_ivector_create(array, 3, true, false); - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_resize(vec, 1000)); - uint64_t size; - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_get_size(vec, &size)); - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(vec)); -} - -TEST(CAPIVector, createNone) { - paddle_ivector vec = paddle_ivector_create_none(); - ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(vec)); -} diff --git a/paddle/legacy/capi/tests/test_predict_network.py b/paddle/legacy/capi/tests/test_predict_network.py deleted file mode 100644 index b8efb25704d93ebe6348bc2c6edbc272b8823b28..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/tests/test_predict_network.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=100) - -x = data_layer(name='x', size=100) - -y = fc_layer( - input=x, - size=100, - bias_attr=ParamAttr(name='b'), - param_attr=ParamAttr(name='w')) - -outputs(y) diff --git a/paddle/legacy/capi/vector.h b/paddle/legacy/capi/vector.h deleted file mode 100644 index a79f7fdf789d00ee7d4b612728a5946d407876f3..0000000000000000000000000000000000000000 --- a/paddle/legacy/capi/vector.h +++ /dev/null @@ -1,89 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef __PADDLE_CAPI_VECTOR_H__ -#define __PADDLE_CAPI_VECTOR_H__ - -#include -#include -#include "config.h" -#include "error.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Int Vector Functions. Return will be a paddle_error type. - */ -typedef void* paddle_ivector; - -/** - * @brief Create an none int vector. It just a handler and store nothing. Used - * to get output from other api. - * @return None int vector. - */ -PD_API paddle_ivector paddle_ivector_create_none(); - -/** - * @brief paddle_ivector_create create a paddle int vector - * @param array: input array. - * @param size: input array size. - * @param copy: memory copy or just use same memory. True if copy. - * @param useGPU: True if use GPU - * @return paddle_error - */ -PD_API paddle_ivector paddle_ivector_create(int* array, - uint64_t size, - bool copy, - bool useGPU); - -/** - * @brief paddle_ivector_destroy destory an int vector. - * @param ivec vector to be destoried. - * @return paddle_error - */ -PD_API paddle_error paddle_ivector_destroy(paddle_ivector ivec); - -/** - * @brief paddle_ivector_get get raw buffer stored inside this int vector. It - * could be GPU memory if this int vector is stored in GPU. - * @param [in] ivec int vector - * @param [out] buffer the return buffer pointer. - * @return paddle_error - */ -PD_API paddle_error paddle_ivector_get(paddle_ivector ivec, int** buffer); - -/** - * @brief paddle_ivector_resize resize the int vector. - * @param [in] ivec: int vector - * @param [in] size: size to change - * @return paddle_error - */ -PD_API paddle_error paddle_ivector_resize(paddle_ivector ivec, uint64_t size); - -/** - * @brief paddle_ivector_get_size get the size of int vector. - * @param [in] ivec: int vector - * @param [out] size: return size of this int vector. - * @return paddle_error - */ -PD_API paddle_error paddle_ivector_get_size(paddle_ivector ivec, - uint64_t* size); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/paddle/legacy/cuda/CMakeLists.txt b/paddle/legacy/cuda/CMakeLists.txt deleted file mode 100755 index 9bbb8de78e09829d24faf42c360811084981578f..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/CMakeLists.txt +++ /dev/null @@ -1,89 +0,0 @@ -set(AVX_SOURCES - src/hl_math.cc - src/hl_avx_functions.cc -) - -if(WITH_AVX) - set(CUDA_SOURCES - src/hl_time.cc - src/hl_cpu_functions.cc - ${AVX_SOURCES}) -else() - set(CUDA_SOURCES - src/hl_time.cc - src/hl_cpu_functions.cc) -endif() - -set(CUDA_CXX_WITH_GPU_SOURCES - src/hl_cuda_cublas.cc - src/hl_cuda_cudnn.cc - src/hl_cuda_device.cc) - -if(WITH_GPU) - set(CUDA_CXX_SOURCES - src/hl_warpctc_wrap.cc - ${CUDA_CXX_WITH_GPU_SOURCES}) - - set_source_files_properties(${CUDA_CXX_SOURCES} - PROPERTIES COMPILE_FLAGS "-D__NVCC__") -else() - if (NOT MOBILE_INFERENCE) - set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc) - endif() -endif() - -set(CUDA_CU_SOURCES - src/hl_perturbation_util.cu - src/hl_cuda_aggregate.cu - src/hl_cuda_matrix.cu - src/hl_cuda_sparse.cu - src/hl_cuda_cnn.cu - src/hl_cuda_lstm.cu - src/hl_top_k.cu - src/hl_batch_transpose.cu - src/hl_batch_norm.cu - src/hl_cuda_sequence.cu - src/hl_table_apply.cu) - -set(CUDA_HEADERS - include/hl_time.h - include/hl_warpctc_wrap.h - include/hl_sequence.h - include/hl_cuda_cublas.h - include/hl_batch_transpose.h - include/hl_avx_functions.h - include/hl_sparse.h - include/hl_functions.h - include/hl_cuda_cudnn.h - include/hl_activation_functions.h - include/hl_base.h - include/stub/hl_cuda_cudnn_stub.h - include/stub/hl_cuda_stub.h - include/stub/hl_cuda_cublas_stub.h - include/stub/hl_cnn_stub.h - include/stub/hl_lstm_stub.h - include/stub/hl_sequence_stub.h - include/stub/hl_aggregate_stub.h - include/stub/hl_sparse_stub.h - include/stub/hl_matrix_stub.h - include/hl_aggregate.h - include/hl_cuda.h - include/hl_lstm.h - include/hl_table_apply.h - include/hl_gpu.h - include/hl_top_k.h - include/hl_matrix.h - include/hl_cnn.h) - -if(WITH_GPU) - cuda_add_library(paddle_cuda - ${CUDA_SOURCES} - ${CUDA_CU_SOURCES} - ${CUDA_CXX_SOURCES}) -else() - add_library(paddle_cuda - ${CUDA_SOURCES} - ${CUDA_CXX_SOURCES}) -endif() - -add_dependencies(paddle_cuda paddle_proto ${external_project_dependencies}) diff --git a/paddle/legacy/cuda/include/hl_activation_functions.h b/paddle/legacy/cuda/include/hl_activation_functions.h deleted file mode 100644 index 66a69db545b541409f895820ad621a2a9a684e20..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_activation_functions.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_ACTIVATION_FUNCTIONS_H_ -#define HL_ACTIVATION_FUNCTIONS_H_ - -#include "hl_functions.h" - -/** - * Active functions: sigmoid, relu, tanh and linear. - */ -#define HPPL_ACTIVE_FUNCTION \ - { hppl::sigmoid, hppl::relu, hppl::tanh, hppl::linear } - -namespace hppl { - -/** - * Hppl supports sigmoid, relu, tanh, linear active functions - * for neural networks' forward and backward activation. - */ -template -class Active { - public: - typedef T (*forward)(T); - typedef T (*backward)(T, T); -}; - -#ifdef __NVCC__ -namespace gpu { -static __device__ Active::forward forward[] = HPPL_ACTIVE_FUNCTION; -static __device__ Active::backward backward[] = HPPL_ACTIVE_FUNCTION; -} // namespace gpu -#else -namespace cpu { -static Active::forward forward[] = HPPL_ACTIVE_FUNCTION; -static Active::backward backward[] = HPPL_ACTIVE_FUNCTION; -} // namespace cpu - -#ifdef __AVX__ -namespace avx { -static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION; -static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION; -} // namespace avx -#endif -#endif - -} // namespace hppl - -#endif // HL_ACTIVATION_FUNCTIONS_H_ diff --git a/paddle/legacy/cuda/include/hl_aggregate.h b/paddle/legacy/cuda/include/hl_aggregate.h deleted file mode 100644 index 1ca26aa3bbb72b13440defb46cf8d05760512e19..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_aggregate.h +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_AGGREGATE_H_ -#define HL_AGGREGATE_H_ - -#include "hl_base.h" - -/** - * @brief Calculate the sum of each row of the matrix A_d. - * - * @param[in] A_d input matrix (M x N). - * @param[out] C_d output matrix (M x 1). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_row_sum(real *A_d, real *C_d, int dimM, int dimN); - -/** - * @brief Calculate the maximum value of each row of the matrix A_d. - * - * @param[in] A_d input matrix (M x N). - * @param[out] C_d output matrix (M x 1). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_row_max(real *A_d, real *C_d, int dimM, int dimN); - -/** - * @brief Calculate the minimum value of each row of the matrix A_d. - * - * @param[in] A_d input matrix (M x N). - * @param[out] C_d output matrix (M x 1). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_row_min(real *A_d, real *C_d, int dimM, int dimN); - -/** - * @brief Calculate the sum of each column of the matrix A_d. - * - * @param[in] A_d input matrix (M x N). - * @param[out] C_d output Matrix (1 x N). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_column_sum(real *A_d, real *C_d, int dimM, int dimN); - -/** - * @brief Calculate the maximum value of each column of the matrix A_d. - * - * @param[in] A_d input matrix (M x N). - * @param[out] C_d output matrix (1 x N). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_column_max(real *A_d, real *C_d, int dimM, int dimN); - -/** - * @brief Calculate the minimum value of each column of the matrix A_d. - * - * @param[in] A_d input matrix (M x N). - * @param[out] C_d output matrix (1 x N). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_column_min(real *A_d, real *C_d, int dimM, int dimN); - -/** - * @brief C_h = sum(A_d[i]). - * - * @param[in] A_d input(m). - * @param[out] C_h output(host memory). - * @param[in] dimM size of vector. - * - */ -extern void hl_vector_sum(real *A_d, real *C_h, int dimM); - -/** - * @brief C_h = sum(abs(A_d[i])). - * - * @param[in] A_d input(m). - * @param[out] C_h output(host memory). - * @param[in] dimM size of vector. - * - */ -extern void hl_vector_abs_sum(real *A_d, real *C_h, int dimM); - -#endif /* HL_AGGREGATE_H_ */ diff --git a/paddle/legacy/cuda/include/hl_avx_functions.h b/paddle/legacy/cuda/include/hl_avx_functions.h deleted file mode 100644 index 9fb99a36ea6bca2bc9bd762ca724a934b98831a7..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_avx_functions.h +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_AVX_FUNCTIONS_H_ -#define HL_AVX_FUNCTIONS_H_ - -#include - -namespace hppl { -__m256 relu(const __m256 a); -__m256 sigmoid(const __m256 a); -__m256 tanh(const __m256 a); -__m256 linear(const __m256 a); - -__m256 relu(const __m256 a, const __m256 b); -__m256 sigmoid(const __m256 a, const __m256 b); -__m256 tanh(const __m256 a, const __m256 b); -__m256 linear(const __m256 a, const __m256 b); -} // namespace hppl - -#endif // HL_AVX_FUNCTIONS_H_ diff --git a/paddle/legacy/cuda/include/hl_base.h b/paddle/legacy/cuda/include/hl_base.h deleted file mode 100644 index bfe812a4387be72c3e73d6b45852e3a90b1926eb..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_base.h +++ /dev/null @@ -1,250 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#ifdef PADDLE_TYPE_DOUBLE -#define HL_FLOAT_MAX 3.40282347e+38F -#define HL_FLOAT_MIN 1.17549435e-38F -using real = double; -#else -#define HL_FLOAT_MAX 1.7976931348623157e+308 -#define HL_FLOAT_MIN 2.2250738585072014e-308 -using real = float; -#endif - -/** - * The maximum input value for exp, used to avoid overflow problem. - * currently only used for tanh function. - */ -#define EXP_MAX_INPUT 40.0 - -/** - * @brief DIVUP(x, y) is similar to ceil(x / y). - * @note For CUDA, DIVUP will be used to specify - * the size of blockDim. - */ -#ifndef DIVUP -#define DIVUP(x, y) (((x) + (y)-1) / (y)) -#endif - -/** - * HPPL is an internal high performance parallel computing library - * for high-level neural network routines, which can support many - * heterogeneous compute architectures, such as GPU, FPGA, etc. - */ - -/** - * @brief HPPL CUDA Stream. - * - * @note Each thread can use HPPL_STREAM_* after calling hl_init. - * HPPL_STREAM_DEFAULT is HPPL default stream. - */ -typedef enum { - HPPL_STREAM_DEFAULT = 0, /* Thread Default Stream*/ - HPPL_STREAM_1 = 1, - HPPL_STREAM_2 = 2, - HPPL_STREAM_3 = 3, - HPPL_STREAM_4 = 4, - HPPL_THREAD_STREAM_1 = 5, - HPPL_THREAD_STREAM_2 = 6, - HPPL_THREAD_STREAM_3 = 7, - HPPL_THREAD_STREAM_4 = 8, - HPPL_STREAM_END -} hl_stream_t; - -/** - * @brief HPPL activation mode. - */ -typedef enum { - HL_ACTIVATION_SIGMOID = 0, - HL_ACTIVATION_RELU = 1, - HL_ACTIVATION_TANH = 2, - HL_ACTIVATION_LINEAR = 3, - HL_ACTIVATION_END -} hl_activation_mode_t; - -/** - * @brief Transpose type. - */ -typedef enum { - HPPL_OP_N = 0, /* transpose */ - HPPL_OP_T = 1, /* non transpose */ - HPPL_OP_END -} hl_trans_op_t; - -/** - * @brief Lstm value. - * - * @param gateValue input value. - * @param prevStateValue previous state value. - * @param stateValue state value. - * @param stateActiveValue state active value. - * @param outputValue output value. - */ -typedef struct { - real *gateValue; - real *prevStateValue; - real *stateValue; - real *stateActiveValue; - real *outputValue; - real *checkIg; - real *checkFg; - real *checkOg; -} hl_lstm_value; - -/** - * @brief Lstm gradient. - * - * @param gateGrad input gradient. - * @param prevStateGrad previous state gradient. - * @param stateGrad state gradient. - * @param stateActiveGrad state active gradient. - * @param outputGrad output gradient. - */ -typedef struct { - real *gateGrad; - real *prevStateGrad; - real *stateGrad; - real *stateActiveGrad; - real *outputGrad; - real *checkIgGrad; - real *checkFgGrad; - real *checkOgGrad; -} hl_lstm_grad; - -/** - * @brief Gru value. - * - * @param gateWeight gate weight (updateGate + resetGate). - * @param stateWeight frame state weight. - * @param gateValue gate value results. - * @param resetOutputValue resetOutput value. - * @param outputValue output value. - * @param prevOutValue previous output value. - * - */ -typedef struct { - real *gateWeight; - real *stateWeight; - real *gateValue; - real *resetOutputValue; - real *outputValue; - real *prevOutValue; -} hl_gru_value; - -/** - * @brief Gru gradient. - * - * @param gateWeightGrad gate weight gradient. - * @param stateWeightGrad frame state weight gradient. - * @param gateGrad gate gradient results. - * @param resetOutputGrad resetOutput gradient. - * @param outputGrad output gradient. - * @param prevOutGrad previous output gradient. - */ -typedef struct { - real *gateWeightGrad; - real *stateWeightGrad; - real *gateGrad; - real *resetOutputGrad; - real *outputGrad; - real *prevOutGrad; -} hl_gru_grad; - -/** - * @brief Sparse matrix value type. - */ -typedef enum { - HL_NO_VALUE = 0, /* matrix values only 0 or 1 */ - HL_FLOAT_VALUE = 1, - HL_VALUE_END -} hl_matrix_value_t; - -/** - * @brief HPPL matrix format. - */ -typedef enum { - HL_SPARSE_CSR = 0, - HL_SPARSE_CSC = 1, - HL_SPARSE_END -} hl_matrix_format_t; - -typedef struct _hl_matrix_s *hl_matrix_s; - -/** - * @brief HPPL sparse matrix. - * - * @param matrix sparse matrix. - * @param format matrix format. - * @param type the type of matrix values. - * @param rows matrix rows. - * @param cols matrix columns. - * @param nnz nonzero values of sparse matrix. - */ -typedef struct { - hl_matrix_s matrix; - hl_matrix_format_t format; - hl_matrix_value_t type; - int rows; - int cols; - size_t nnz; -} _hl_sparse_matrix_s, *hl_sparse_matrix_s; - -#ifdef __NVCC__ - -#include -#include "paddle/legacy/cuda/include/hl_cuda.h" -#include "paddle/legacy/utils/Logging.h" - -extern __thread bool g_sync_flag; -extern __thread cudaStream_t default_stream; -#define STREAM_DEFAULT default_stream - -/** - * @brief Check cuda kernel execution. - * @param msg error string - */ -#define CHECK_SYNC(msg) \ - if (true == g_sync_flag) { \ - hl_stream_synchronize(HPPL_STREAM_DEFAULT); \ - cudaError_t err = (cudaError_t)hl_get_device_last_error(); \ - CHECK_EQ(cudaSuccess, err) \ - << "[" << msg << "] " \ - << "CUDA error: " << hl_get_device_error_string((size_t)err); \ - } - -// __shfl has been deprecated as of CUDA 9.0. -#if CUDA_VERSION < 9000 -template -__forceinline__ __device__ T __shfl_down_sync(unsigned, T val, int delta) { - return __shfl_down(val, delta); -} - -template -__forceinline__ __device__ T -__shfl_sync(unsigned, T val, int src_line, int width) { - return __shfl(val, src_line, width); -} - -#define CREATE_SHFL_MASK(mask, predicate) mask = 0u; -#else -#define FULL_WARP_MASK 0xFFFFFFFF -#define CREATE_SHFL_MASK(mask, predicate) \ - mask = __ballot_sync(FULL_WARP_MASK, (predicate)) -#endif - -#endif // __NVCC__ diff --git a/paddle/legacy/cuda/include/hl_batch_norm.h b/paddle/legacy/cuda/include/hl_batch_norm.h deleted file mode 100644 index 7814204d1b085694c74695c95a8f9cf517810450..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_batch_norm.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_BATCH_NORM_H_ -#define HL_BATCH_NORM_H_ - -#include "hl_base.h" - -/** - * @brief batch norm inferece. - * - * @param[in] input input data. - * @param[out] output output data. - * @param[in] scale batch normalization scale parameter (in original - * paper scale is referred to as gamma). - * @param[in] bias batch normalization bias parameter (in original - * paper scale is referred to as beta). - * @param[in] estimatedMean - * @param[in] estimatedVar The moving mean and variance - * accumulated during the training phase are passed - * as inputs here. - * @param[in] epsilon Epsilon value used in the batch - * normalization formula. - */ -extern void hl_batch_norm_cuda_inference(const real* input, - real* output, - const real* scale, - const real* bias, - const real* estimatedMean, - const real* estimatedVar, - const double epsilon, - size_t batchSize, - size_t channel, - size_t height, - size_t width); - -#endif // HL_BATCH_NORM_H_ diff --git a/paddle/legacy/cuda/include/hl_batch_transpose.h b/paddle/legacy/cuda/include/hl_batch_transpose.h deleted file mode 100644 index a16d3764fc724d2dd282a9416485d604a273378e..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_batch_transpose.h +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_BATCH_TRANSPOSE_H_ -#define HL_BATCH_TRANSPOSE_H_ - -#include "hl_base.h" - -/** - * @brief Perform matrix transpose for each data in the batch. - * - * @param[in] input height * width elements in batch. - * @param[out] output height * width elements in batch. - * @param[in] width width of batch data. - * @param[in] height height of batch data. - * @param[in] batchSize batch size - * - * @note Both the inpt and output are arranged in batch-first - * order. Each batch has height * width data, which are - * arranged in height-first (or row-first) manner. - */ -extern void batchTranspose( - const real* input, real* output, int width, int height, int batchSize); - -#endif // HL_BATCH_TRANSPOSE_H_ diff --git a/paddle/legacy/cuda/include/hl_cnn.h b/paddle/legacy/cuda/include/hl_cnn.h deleted file mode 100644 index b790fa39fe863bbb00f6cd36d4c63481b7634fe1..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cnn.h +++ /dev/null @@ -1,417 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CNN_H_ -#define HL_CNN_H_ - -#include "hl_base.h" - -/** - * @brief Maximum pool forward with Mask output. - * - * @param[in] frameCnt batch size of input image. - * @param[in] inputData input data. - * @param[in] channels number of channel. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] pooledH output image height. - * @param[in] pooledW output image width. - * @param[in] sizeX width of pooling window. - * @param[in] sizeY height of pooling window. - * @param[in] strideH pooling stride height. - * @param[in] strideW pooling stride width. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[out] tgtData output data. - * @param[in] tgtStride stride between output data samples. - * @param[out] maskData the location indices of select max data. - */ -extern void hl_maxpool_forward(const int frameCnt, - const real* inputData, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real* tgtData, - const int tgtStride, - real* maskData = NULL); - -/** - * @brief Maximum pool backward. - * - * @param[in] frameCnt batch size of input image. - * @param[in] inputData input data. - * @param[out] outData output data. - * @param[out] outGrad output grad data. - * @param[in] channels number of channel. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] pooledH output image height. - * @param[in] pooledW output image width. - * @param[in] sizeX width of pooling window. - * @param[in] sizeY height of pooling window. - * @param[in] strideH pooling stride height. - * @param[in] strideW pooling stride width. - * @param[in] scaleA scale. - * @param[in] scaleB scale. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[out] targetGrad output grad. - * @param[in] outStride stride between output data samples. - * - */ -extern void hl_maxpool_backward(const int frameCnt, - const real* inputData, - const real* outData, - const real* outGrad, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real scaleA, - real scaleB, - real* targetGrad, - const int outStride); - -/** - * @brief Averge pool forward. - * - * @param[in] frameCnt batch size of input image. - * @param[in] inputData input data. - * @param[in] channels number of channel. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] pooledH output image height. - * @param[in] pooledW output image width. - * @param[in] sizeX width of pooling window. - * @param[in] sizeY height of pooling window. - * @param[in] strideH pooling stride height. - * @param[in] strideW pooling stride width. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[out] tgtData output data. - * @param[in] tgtStride stride between output data samples. - * @param[in] excludeMode whether to consider paddings for size. - * - */ -extern void hl_avgpool_forward(const int frameCnt, - const real* inputData, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real* tgtData, - const int tgtStride, - bool excludeMode); - -/** - * @brief Maximum pool backward. - * - * @param[in] frameCnt batch size of input image. - * @param[in] outGrad output grad data. - * @param[in] channels number of channel. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] pooledH output image height. - * @param[in] pooledW output image width. - * @param[in] sizeX width of pooling window. - * @param[in] sizeY height of pooling window. - * @param[in] strideH pooling stride height. - * @param[in] strideW pooling stride width. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[in] scaleA scale. - * @param[in] scaleB scale. - * @param[out] backGrad output grad. - * @param[in] outStride stride between output data samples. - * @param[in] excludeMode whether to consider paddings for size. - * - */ -extern void hl_avgpool_backward(const int frameCnt, - const real* outGrad, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - int paddingH, - int paddingW, - real scaleA, - real scaleB, - real* backGrad, - const int outStride, - bool excludeMode); - -extern void hl_maxpool3D_forward(const int frameCnt, - const real* inputData, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, - real* tgtData, - real* maxPoolIdxData, - const int tgtStride); - -extern void hl_maxpool3D_backward(const int frameCnt, - const real* outGrad, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, - real scaleA, - real scaleB, - real* targetGrad, - real* maxPoolIdxData, - const int outStride); - -extern void hl_avgpool3D_forward(const int frameCnt, - const real* inputData, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, - real* tgtData, - const int tgtStride); - -extern void hl_avgpool3D_backward(const int frameCnt, - const real* outGrad, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - int paddingD, - int paddingH, - int paddingW, - real scaleA, - real scaleB, - real* backGrad, - const int outStride); - -/** - * @brief Bilinear interpolation forward. - * - * @param[in] inData input value. - * @param[in] inImgH input image height. - * @param[in] inImgW input image width. - * @param[in] inputH input batchSize. - * @param[in] inputW input image data dim. - * @param[out] outData output value. - * @param[in] outImgH output image height. - * @param[in] outImgW output image width. - * @param[in] outputH output batchSize. - * @param[in] outputW output image data dim. - * @param[in] numChannels number of channels. - * @param[in] ratioH inImgH / outImgH. - * @param[in] ratioW inImgW / outImgW. - * - */ -extern void hl_bilinear_forward(const real* inData, - const size_t inImgH, - const size_t inImgW, - const size_t inputH, - const size_t inputW, - real* outData, - const size_t outImgH, - const size_t outImgW, - const size_t outputH, - const size_t outputW, - const size_t numChannels, - const real ratioH, - const real ratioW); - -/** - * @brief Bilinear interpolation backward. - * - * @param[out] inGrad input gradient. - * @param[in] inImgH input image height. - * @param[in] inImgW input image width. - * @param[in] inputH input batchSize. - * @param[in] inputW input image data dim. - * @param[in] outGrad output gradient. - * @param[in] outImgH output image height. - * @param[in] outImgW output image width. - * @param[in] outputH output batchSize. - * @param[in] outputW output image data dim. - * @param[in] numChannels number of channels. - * @param[in] ratioH inImgH / outImgH. - * @param[in] ratioW inImgW / outImgW. - * - */ -extern void hl_bilinear_backward(real* inGrad, - const size_t inImgH, - const size_t inImgW, - const size_t inputH, - const size_t inputW, - const real* outGrad, - const size_t outImgH, - const size_t outImgW, - const size_t outputH, - const size_t outputW, - const size_t numChannels, - const real ratioH, - const real ratioW); - -/** - * @brief MaxOut forward. - * - * @param[in] inData input data. - * @param[out] outData output data. - * @param[out] idData output maxId. - * @param[in] batchSize batchSize. - * @param[in] size number of channels * image height * image width. - * @param[in] featLen feature length = image height * image width. - * @param[in] groups number of groups. - */ -extern void hl_maxout_forward(const real* inData, - real* outData, - int* idData, - size_t batchSize, - size_t size, - size_t featLen, - size_t groups); - -/** - * @brief MaxOut backward. - * - * @param[out] inGrad input grad data. - * @param[in] outGrad output grad data. - * @param[in] idData output maxId. - * @param[in] batchSize batchSize. - * @param[in] size number of channels * image height * image width. - * @param[in] featLen feature length = image height * image width. - * @param[in] groups number of groups. - */ -extern void hl_maxout_backward(real* inGrad, - const real* outGrad, - const int* idData, - size_t batchSize, - size_t size, - size_t featLen, - size_t groups); - -/** - * @brief Upsample forward. - * @param[in] inputData input data. - * @param[out] maskData the mask data from MaxPoolWithMaskLayer. - * @param[out] batchSize the batch size of the input. - * @param[in] imgSizeH image height. - * @param[in] imgSizeW image width. - * @param[in] channels the input channels. - * @param[in] outputH the output height. - * @param[in] outputW the output widht. - * @param[out] outputData output data. - */ -extern void hl_upsample_forward(real* inputData, - real* maskData, - size_t batchSize, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW, - real* outputData); - -/** - * @brief Upsample backward. - * @param[in] outputGradData the output grad data. - * @param[out] maskData the mask data from MaxPoolWithMaskLayer. - * @param[out] batchSize the batch size of the input. - * @param[in] imgSizeH image height. - * @param[in] imgSizeW image width. - * @param[in] channels the input channels. - * @param[in] outputH the output height. - * @param[in] outputW the output widht. - * @param[out] inputGradData the input grad data. - */ -extern void hl_upsample_backward(real* outputGradData, - real* maskData, - size_t batchSize, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW, - real* inputGradData); - -#endif // HL_CNN_H_ diff --git a/paddle/legacy/cuda/include/hl_cpu_gru.cuh b/paddle/legacy/cuda/include/hl_cpu_gru.cuh deleted file mode 100644 index ce1643932dee6c6f7fdfb4e71e371eb70857411a..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cpu_gru.cuh +++ /dev/null @@ -1,477 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_CPU_GRU_CUH_ -#define HL_CPU_GRU_CUH_ - -#ifndef __NVCC__ - -template -void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput, - real *gateValue, - real *resetOutputValue, - real *prevOutputValue, - int frameSize, - hl_activation_mode_t active_gate) { - real rValueUpdateGate; - real rValueResetGate; - real rValueResetOutput; - real rPrevOut = 0; - real *updateGate = gateValue; - real *resetGate = gateValue + frameSize; - - for (int i = 0; i < frameSize; i++) { - rValueUpdateGate = updateGate[i]; - rValueResetGate = resetGate[i]; - if (prevOutputValue) { - rPrevOut = prevOutputValue[i]; - } - - opResetOutput(rValueUpdateGate, - rValueResetGate, - rPrevOut, - rValueResetOutput, - hppl::cpu::forward[active_gate]); - - updateGate[i] = rValueUpdateGate; - resetGate[i] = rValueResetGate; - resetOutputValue[i] = rValueResetOutput; - } -} - -template -void hl_naive_gru_forward_final_output(OpFinalOutput opFinalOutput, - real *gateValue, - real *prevOutputValue, - real *outputValue, - int frameSize, - hl_activation_mode_t active_node) { - real rValueUpdateGate; - real rValueFrameState; - real rPrevOut = 0; - real rOutput; - real *updateGate = gateValue; - real *frameState = gateValue + frameSize * 2; - - for (int i = 0; i < frameSize; i++) { - rValueUpdateGate = updateGate[i]; - rValueFrameState = frameState[i]; - if (prevOutputValue) { - rPrevOut = prevOutputValue[i]; - } - - opFinalOutput(rValueUpdateGate, - rValueFrameState, - rPrevOut, - rOutput, - hppl::cpu::forward[active_node]); - - frameState[i] = rValueFrameState; - outputValue[i] = rOutput; - } -} - -template -void hl_avx_gru_forward_reset_output(OpResetOutput opResetOutput, - real *gateValue, - real *resetOutputValue, - real *prevOutputValue, - int frameSize, - hl_activation_mode_t active_gate) { -#ifdef __AVX__ - __m256 rValueUpdateGate; - __m256 rValueResetGate; - __m256 rValueResetOutput; - __m256 rPrevOut = _mm256_set1_ps(0.0f); - __m256 *updateGate = (__m256*)gateValue; - __m256 *resetGate = (__m256*)(gateValue + frameSize); - - for (int i = 0; i < frameSize / 8; i++) { - rValueUpdateGate = updateGate[i]; - rValueResetGate = resetGate[i]; - if (prevOutputValue) { - rPrevOut = ((__m256*)prevOutputValue)[i]; - } - - opResetOutput(rValueUpdateGate, - rValueResetGate, - rPrevOut, - rValueResetOutput, - hppl::avx::forward[active_gate]); - - updateGate[i] = rValueUpdateGate; - resetGate[i] = rValueResetGate; - ((__m256*)resetOutputValue)[i] = rValueResetOutput; - } -#endif -} - -template -void hl_avx_gru_forward_final_output(OpFinalOutput opFinalOutput, - real *gateValue, - real *prevOutputValue, - real *outputValue, - int frameSize, - hl_activation_mode_t active_node) { -#ifdef __AVX__ - __m256 rValueUpdateGate; - __m256 rValueFrameState; - __m256 rPrevOut = _mm256_set1_ps(0.0f); - __m256 rOutput; - __m256 *updateGate = (__m256*)gateValue; - __m256 *frameState = (__m256*)(gateValue + frameSize * 2); - - for (int i = 0; i < frameSize / 8; i++) { - rValueUpdateGate = updateGate[i]; - rValueFrameState = frameState[i]; - if (prevOutputValue) { - rPrevOut = ((__m256*)prevOutputValue)[i]; - } - - opFinalOutput(rValueUpdateGate, - rValueFrameState, - rPrevOut, - rOutput, - hppl::avx::forward[active_node]); - - frameState[i] = rValueFrameState; - ((__m256*)outputValue)[i] = rOutput; - } -#endif -} - -template -inline void forward_reset_output(OpResetOutput opResetOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_gate) { - for (int b = 0; b < batchSize; b++) { - if (OpResetOutput::avx && !(frameSize & (8 - 1)) && (sizeof(real) == 4)) { - hl_avx_gru_forward_reset_output(opResetOutput, - value.gateValue, value.resetOutputValue, value.prevOutValue, - frameSize, active_gate); - } else { - hl_naive_gru_forward_reset_output(opResetOutput, - value.gateValue, value.resetOutputValue, value.prevOutValue, - frameSize, active_gate); - } - - value.gateValue += frameSize * 3; - value.resetOutputValue += frameSize; - if (value.prevOutValue) { - value.prevOutValue += frameSize; - } - } -} - -template -inline void forward_final_output(OpFinalOutput opFinalOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node) { - for (int b = 0; b < batchSize; b++) { - if (OpFinalOutput::avx && !(frameSize & (8 - 1)) && (sizeof(real) == 4)) { - hl_avx_gru_forward_final_output(opFinalOutput, - value.gateValue, value.prevOutValue, value.outputValue, - frameSize, active_node); - } else { - hl_naive_gru_forward_final_output(opFinalOutput, - value.gateValue, value.prevOutValue, value.outputValue, - frameSize, active_node); - } - - value.gateValue += frameSize * 3; - value.outputValue += frameSize; - if (value.prevOutValue) { - value.prevOutValue += frameSize; - } - } -} - -template -void hl_naive_gru_backward_state_grad(OpStateGrad opStateGrad, - real *gateValue, - real *gateGrad, - real *prevOutValue, - real *prevOutGrad, - real *outputGrad, - int frameSize, - hl_activation_mode_t active_node) { - real rUpdateGateValue; - real rUpdateGateGrad; - real rFrameStateValue; - real rFrameStateGrad; - real rOutGrad; - real rPrevOutValue = 0; - real rPrevOutGrad = 0; - real *updateGateValue = gateValue; - real *updateGateGrad = gateGrad; - real *frameStateValue = gateValue + frameSize * 2; - real *frameStateGrad = gateGrad + frameSize * 2; - - for (int i = 0; i < frameSize; i++) { - rUpdateGateValue = updateGateValue[i]; - rFrameStateValue = frameStateValue[i]; - rOutGrad = outputGrad[i]; - if (prevOutValue) { - rPrevOutValue = prevOutValue[i]; - } - if (prevOutGrad) { - rPrevOutGrad = prevOutGrad[i]; - } - - opStateGrad(rUpdateGateValue, - rUpdateGateGrad, - rFrameStateValue, - rFrameStateGrad, - rPrevOutValue, - rPrevOutGrad, - rOutGrad, - hppl::cpu::backward[active_node]); - - updateGateGrad[i] = rUpdateGateGrad; - frameStateGrad[i] = rFrameStateGrad; - if (prevOutGrad) { - prevOutGrad[i] = rPrevOutGrad; - } - } -} - -template -void hl_naive_gru_backward_reset_grad(OpResetGrad opResetGrad, - real *gateValue, - real *gateGrad, - real *prevOutValue, - real *prevOutGrad, - real *resetOutputGrad, - int frameSize, - hl_activation_mode_t active_gate) { - real rUpdateGateValue; - real rUpdateGateGrad; - real rResetGateValue; - real rResetGateGrad; - real rResetOutputGrad = 0; - real rPrevOutValue = 0; - real rPrevOutGrad = 0; - real *updateGateValue = gateValue; - real *updateGateGrad = gateGrad; - real *resetGateValue = gateValue + frameSize; - real *resetGateGrad = gateGrad + frameSize; - - for (int i = 0; i < frameSize; i++) { - rUpdateGateValue = updateGateValue[i]; - rUpdateGateGrad = updateGateGrad[i]; - rResetGateValue = resetGateValue[i]; - - if (prevOutValue && prevOutGrad) { - rResetOutputGrad = resetOutputGrad[i]; - } - if (prevOutValue) { - rPrevOutValue = prevOutValue[i]; - } - if (prevOutGrad) { - rPrevOutGrad = prevOutGrad[i]; - } - - opResetGrad(rUpdateGateValue, - rUpdateGateGrad, - rResetGateValue, - rResetGateGrad, - rPrevOutValue, - rPrevOutGrad, - rResetOutputGrad, - hppl::cpu::backward[active_gate]); - - updateGateGrad[i] = rUpdateGateGrad; - resetGateGrad[i] = rResetGateGrad; - if (prevOutGrad) { - prevOutGrad[i] = rPrevOutGrad; - } - } -} - -template -void hl_avx_gru_backward_state_grad(OpStateGrad opStateGrad, - real *gateValue, - real *gateGrad, - real *prevOutValue, - real *prevOutGrad, - real *outputGrad, - int frameSize, - hl_activation_mode_t active_node) { -#ifdef __AVX__ - __m256 rUpdateGateValue; - __m256 rUpdateGateGrad; - __m256 rFrameStateValue; - __m256 rFrameStateGrad; - __m256 rOutGrad; - __m256 rPrevOutValue = _mm256_set1_ps(0.0f); - __m256 rPrevOutGrad = _mm256_set1_ps(0.0f); - __m256 *updateGateValue = (__m256*)gateValue; - __m256 *updateGateGrad = (__m256*)gateGrad; - __m256 *frameStateValue = (__m256*)(gateValue + frameSize * 2); - __m256 *frameStateGrad = (__m256*)(gateGrad + frameSize * 2); - - for (int i = 0; i < frameSize / 8; i++) { - rUpdateGateValue = updateGateValue[i]; - rFrameStateValue = frameStateValue[i]; - rOutGrad = ((__m256*)outputGrad)[i]; - if (prevOutValue) { - rPrevOutValue = ((__m256*)prevOutValue)[i]; - } - if (prevOutGrad) { - rPrevOutGrad = ((__m256*)prevOutGrad)[i]; - } - - opStateGrad(rUpdateGateValue, - rUpdateGateGrad, - rFrameStateValue, - rFrameStateGrad, - rPrevOutValue, - rPrevOutGrad, - rOutGrad, - hppl::avx::backward[active_node]); - - updateGateGrad[i] = rUpdateGateGrad; - frameStateGrad[i] = rFrameStateGrad; - if (prevOutGrad) { - ((__m256*)prevOutGrad)[i] = rPrevOutGrad; - } - } -#endif -} - -template -void hl_avx_gru_backward_reset_grad(OpResetGrad opResetGrad, - real *gateValue, - real *gateGrad, - real *prevOutValue, - real *prevOutGrad, - real *resetOutputGrad, - int frameSize, - hl_activation_mode_t active_gate) { -#ifdef __AVX__ - __m256 rUpdateGateValue; - __m256 rUpdateGateGrad; - __m256 rResetGateValue; - __m256 rResetGateGrad; - __m256 rResetOutputGrad = _mm256_set1_ps(0.0f); - __m256 rPrevOutValue = _mm256_set1_ps(0.0f); - __m256 rPrevOutGrad = _mm256_set1_ps(0.0f); - __m256 *updateGateValue = (__m256*)gateValue; - __m256 *updateGateGrad = (__m256*)gateGrad; - __m256 *resetGateValue = (__m256*)(gateValue + frameSize); - __m256 *resetGateGrad = (__m256*)(gateGrad + frameSize); - - for (int i = 0; i < frameSize / 8; i++) { - rUpdateGateValue = updateGateValue[i]; - rUpdateGateGrad = updateGateGrad[i]; - rResetGateValue = resetGateValue[i]; - - if (prevOutValue && prevOutGrad) { - rResetOutputGrad = ((__m256*)resetOutputGrad)[i]; - } - if (prevOutValue) { - rPrevOutValue = ((__m256*)prevOutValue)[i]; - } - if (prevOutGrad) { - rPrevOutGrad = ((__m256*)prevOutGrad)[i]; - } - - opResetGrad(rUpdateGateValue, - rUpdateGateGrad, - rResetGateValue, - rResetGateGrad, - rPrevOutValue, - rPrevOutGrad, - rResetOutputGrad, - hppl::avx::backward[active_gate]); - - updateGateGrad[i] = rUpdateGateGrad; - resetGateGrad[i] = rResetGateGrad; - if (prevOutGrad) { - ((__m256*)prevOutGrad)[i] = rPrevOutGrad; - } - } -#endif -} - -template -inline void backward_state_grad(OpStateGrad opStateGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node) { - for (int b = 0; b < batchSize; b++) { - if (OpStateGrad::avx && !(frameSize & (8 - 1)) && (sizeof(real) == 4)) { - hl_avx_gru_backward_state_grad(opStateGrad, - value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad, - grad.outputGrad, frameSize, active_node); - } else { - hl_naive_gru_backward_state_grad(opStateGrad, - value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad, - grad.outputGrad, frameSize, active_node); - } - - value.gateValue += frameSize * 3; - if (value.prevOutValue) { - value.prevOutValue += frameSize; - } - - grad.gateGrad += frameSize * 3; - grad.outputGrad += frameSize; - if (grad.prevOutGrad) { - grad.prevOutGrad += frameSize; - } - } -} - -template -inline void backward_reset_grad(OpResetGrad opResetGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_gate) { - for (int b = 0; b < batchSize; b++) { - if (OpResetGrad::avx && !(frameSize & (8 - 1)) && (sizeof(real) == 4)) { - hl_avx_gru_backward_reset_grad(opResetGrad, - value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad, - grad.resetOutputGrad, frameSize, active_gate); - } else { - hl_naive_gru_backward_reset_grad(opResetGrad, - value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad, - grad.resetOutputGrad, frameSize, active_gate); - } - - value.gateValue += frameSize * 3; - if (value.prevOutValue) { - value.prevOutValue += frameSize; - } - - grad.gateGrad += frameSize * 3; - grad.resetOutputGrad += frameSize; - if (grad.prevOutGrad) { - grad.prevOutGrad += frameSize; - } - } -} - -#endif - -#endif // HL_CPU_GRU_CUH_ diff --git a/paddle/legacy/cuda/include/hl_cpu_lstm.cuh b/paddle/legacy/cuda/include/hl_cpu_lstm.cuh deleted file mode 100644 index 58a97d1230d74545cc205fc46c61c24321db3cd7..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cpu_lstm.cuh +++ /dev/null @@ -1,372 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_CPU_LSTM_CUH_ -#define HL_CPU_LSTM_CUH_ - -#ifndef __NVCC__ - -// using namespace hppl; - -template -void hl_naive_lstm_forward_one_sequence(Op op, - hl_lstm_value value, - int frameSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - real rValueIn; - real rValueIg; - real rValueFg; - real rValueOg; - real rCheckI; - real rCheckF; - real rCheckO; - real rState; - real rPrevState = 0; - real rStateAtv; - real rOut; - - real *valueIn = value.gateValue; - real *valueIg = value.gateValue + frameSize; - real *valueFg = value.gateValue + frameSize * 2; - real *valueOg = value.gateValue + frameSize * 3; - - for (int i = 0; i < frameSize; i++) { - rValueIn = valueIn[i]; - rValueIg = valueIg[i]; - rValueFg = valueFg[i]; - rValueOg = valueOg[i]; - rCheckI = value.checkIg[i]; - rCheckF = value.checkFg[i]; - rCheckO = value.checkOg[i]; - - if (value.prevStateValue) { - rPrevState = value.prevStateValue[i]; - } - - op(rValueIn, - rValueIg, - rValueFg, - rValueOg, - rPrevState, - rState, - rStateAtv, - rOut, - rCheckI, - rCheckF, - rCheckO, - hppl::cpu::forward[active_node], - hppl::cpu::forward[active_gate], - hppl::cpu::forward[active_state]); - - valueIn[i] = rValueIn; - valueIg[i] = rValueIg; - valueFg[i] = rValueFg; - valueOg[i] = rValueOg; - value.stateValue[i] = rState; - value.stateActiveValue[i] = rStateAtv; - value.outputValue[i] = rOut; - } -} - -template -void hl_naive_lstm_backward_one_sequence(Op op, - hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - real rValueIn; - real rValueIg; - real rValueFg; - real rValueOg; - real rGradIn; - real rGradIg; - real rGradFg; - real rGradOg; - real rPrevState = 0; - real rPrevStateGrad; - real rState; - real rStateGrad; - real rStateAtv; - real rOutputGrad; - real rCheckI; - real rCheckF; - real rCheckO; - real rCheckIGrad; - real rCheckFGrad; - real rCheckOGrad; - - real *valueIn = value.gateValue; - real *valueIg = value.gateValue + frameSize; - real *valueFg = value.gateValue + frameSize * 2; - real *valueOg = value.gateValue + frameSize * 3; - real *gradIn = grad.gateGrad; - real *gradIg = grad.gateGrad + frameSize; - real *gradFg = grad.gateGrad + frameSize * 2; - real *gradOg = grad.gateGrad + frameSize * 3; - - for (int i = 0; i < frameSize; i++) { - rValueIn = valueIn[i]; - rValueIg = valueIg[i]; - rValueFg = valueFg[i]; - rValueOg = valueOg[i]; - rCheckI = value.checkIg[i]; - rCheckF = value.checkFg[i]; - rCheckO = value.checkOg[i]; - rState = value.stateValue[i]; - rStateAtv = value.stateActiveValue[i]; - rOutputGrad = grad.outputGrad[i]; - rStateGrad = grad.stateGrad[i]; - if (value.prevStateValue) { - rPrevState = value.prevStateValue[i]; - } - - op(rValueIn, - rValueIg, - rValueFg, - rValueOg, - rGradIn, - rGradIg, - rGradFg, - rGradOg, - rPrevState, - rPrevStateGrad, - rState, - rStateGrad, - rStateAtv, - rOutputGrad, - rCheckI, - rCheckF, - rCheckO, - rCheckIGrad, - rCheckFGrad, - rCheckOGrad, - hppl::cpu::backward[active_node], - hppl::cpu::backward[active_gate], - hppl::cpu::backward[active_state]); - - gradIn[i] = rGradIn; - gradIg[i] = rGradIg; - gradFg[i] = rGradFg; - gradOg[i] = rGradOg; - grad.stateGrad[i] = rStateGrad; - - if (grad.prevStateGrad) grad.prevStateGrad[i] = rPrevStateGrad; - if (value.prevStateValue) { - if (grad.checkIgGrad) grad.checkIgGrad[i] += rCheckIGrad; - if (grad.checkFgGrad) grad.checkFgGrad[i] += rCheckFGrad; - } - if (grad.checkOgGrad) grad.checkOgGrad[i] += rCheckOGrad; - } -} - -template -void hl_avx_lstm_forward_one_sequence(Op op, - hl_lstm_value value, - int frameSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { -#ifdef __AVX__ - __m256 rValueIn; - __m256 rValueIg; - __m256 rValueFg; - __m256 rValueOg; - __m256 rCheckI; - __m256 rCheckF; - __m256 rCheckO; - __m256 rState; - __m256 rPrevState = _mm256_set1_ps(0.0f); - __m256 rStateAtv; - __m256 rOut; - - __m256 *valueIn = (__m256*)value.gateValue; - __m256 *valueIg = (__m256*)(value.gateValue + frameSize); - __m256 *valueFg = (__m256*)(value.gateValue + frameSize * 2); - __m256 *valueOg = (__m256*)(value.gateValue + frameSize * 3); - - for (int i = 0; i < frameSize / 8; i++) { - rValueIn = valueIn[i]; - rValueIg = valueIg[i]; - rValueFg = valueFg[i]; - rValueOg = valueOg[i]; - rCheckI = ((__m256*)value.checkIg)[i]; - rCheckF = ((__m256*)value.checkFg)[i]; - rCheckO = ((__m256*)value.checkOg)[i]; - - if (value.prevStateValue) { - rPrevState = ((__m256*)value.prevStateValue)[i]; - } - - op(rValueIn, - rValueIg, - rValueFg, - rValueOg, - rPrevState, - rState, - rStateAtv, - rOut, - rCheckI, - rCheckF, - rCheckO, - hppl::avx::forward[active_node], - hppl::avx::forward[active_gate], - hppl::avx::forward[active_state]); - - valueIn[i] = rValueIn; - valueIg[i] = rValueIg; - valueFg[i] = rValueFg; - valueOg[i] = rValueOg; - ((__m256*)value.stateValue)[i] = rState; - ((__m256*)value.stateActiveValue)[i] = rStateAtv; - ((__m256*)value.outputValue)[i] = rOut; - } -#endif -} - -template -void hl_avx_lstm_backward_one_sequence(Op op, - hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { -#ifdef __AVX__ - __m256 rValueIn; - __m256 rValueIg; - __m256 rValueFg; - __m256 rValueOg; - __m256 rGradIn; - __m256 rGradIg; - __m256 rGradFg; - __m256 rGradOg; - __m256 rPrevState = _mm256_set1_ps(0.0f); - __m256 rPrevStateGrad; - __m256 rStateGrad; - __m256 rState; - __m256 rStateAtv; - __m256 rOutputGrad; - __m256 rCheckI; - __m256 rCheckF; - __m256 rCheckO; - __m256 rCheckIGrad; - __m256 rCheckFGrad; - __m256 rCheckOGrad; - - __m256 *valueIn = (__m256*)value.gateValue; - __m256 *valueIg = (__m256*)(value.gateValue + frameSize); - __m256 *valueFg = (__m256*)(value.gateValue + frameSize * 2); - __m256 *valueOg = (__m256*)(value.gateValue + frameSize * 3); - __m256 *gradIn = (__m256*)grad.gateGrad; - __m256 *gradIg = (__m256*)(grad.gateGrad + frameSize); - __m256 *gradFg = (__m256*)(grad.gateGrad + frameSize * 2); - __m256 *gradOg = (__m256*)(grad.gateGrad + frameSize * 3); - - for (int i = 0; i < frameSize / 8; i++) { - rValueIn = valueIn[i]; - rValueIg = valueIg[i]; - rValueFg = valueFg[i]; - rValueOg = valueOg[i]; - rCheckI = ((__m256*)value.checkIg)[i]; - rCheckF = ((__m256*)value.checkFg)[i]; - rCheckO = ((__m256*)value.checkOg)[i]; - rState = ((__m256*)value.stateValue)[i]; - rStateAtv = ((__m256*)value.stateActiveValue)[i]; - rOutputGrad = ((__m256*)grad.outputGrad)[i]; - rStateGrad = ((__m256*)grad.stateGrad)[i]; - if (value.prevStateValue) { - rPrevState = ((__m256*)value.prevStateValue)[i]; - } - - op(rValueIn, - rValueIg, - rValueFg, - rValueOg, - rGradIn, - rGradIg, - rGradFg, - rGradOg, - rPrevState, - rPrevStateGrad, - rState, - rStateGrad, - rStateAtv, - rOutputGrad, - rCheckI, - rCheckF, - rCheckO, - rCheckIGrad, - rCheckFGrad, - rCheckOGrad, - hppl::avx::backward[active_node], - hppl::avx::backward[active_gate], - hppl::avx::backward[active_state]); - - gradIn[i] = rGradIn; - gradIg[i] = rGradIg; - gradFg[i] = rGradFg; - gradOg[i] = rGradOg; - ((__m256*)grad.stateGrad)[i] = rStateGrad; - - if (grad.prevStateGrad) ((__m256*)grad.prevStateGrad)[i] = rPrevStateGrad; - if (value.prevStateValue) { - if (grad.checkIgGrad) ((__m256*)grad.checkIgGrad)[i] += rCheckIGrad; - if (grad.checkFgGrad) ((__m256*)grad.checkFgGrad)[i] += rCheckFGrad; - } - if (grad.checkOgGrad) ((__m256*)grad.checkOgGrad)[i] += rCheckOGrad; - } -#endif -} - -template -void hl_cpu_lstm_forward(Op op, - hl_lstm_value value, - int frameSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - if (Op::avx && !(frameSize & (8 - 1)) && (sizeof(real) == 4)) { - hl_avx_lstm_forward_one_sequence(op, value, frameSize, - active_node, active_gate, active_state); - } else { - hl_naive_lstm_forward_one_sequence(op, value, frameSize, - active_node, active_gate, active_state); - } -} - -template -void hl_cpu_lstm_backward(Op op, - hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - if (Op::avx && !(frameSize & (8 - 1)) && (sizeof(real) == 4)) { - hl_avx_lstm_backward_one_sequence(op, value, grad, frameSize, - active_node, active_gate, active_state); - } else { - hl_naive_lstm_backward_one_sequence(op, value, grad, frameSize, - active_node, active_gate, active_state); - } -} - -#endif - -#endif /* HL_CPU_LSTM_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_cpu_matrix_kernel.cuh b/paddle/legacy/cuda/include/hl_cpu_matrix_kernel.cuh deleted file mode 100644 index 4db9bb74e0ae2cd59b425a65ad16f47d0d9bca78..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cpu_matrix_kernel.cuh +++ /dev/null @@ -1,196 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CPU_MATRIX_KERNEL_CUH_ -#define HL_CPU_MATRIX_KERNEL_CUH_ - -#include -#include "hl_base.h" - -#ifndef __CUDA_ARCH__ -#include "hl_cpu_matrix_kernel_detail.cuh" -#endif - -/** - * @brief cpu element wise unary operator. - */ -template -void hl_cpu_apply_unary_op(Op op, T* A_h, int dimM, int dimN, int lda) { - for (int i = 0; i < dimM; i ++) { - for (int j = 0; j < dimN; j++) { - op.cpuOperator(A_h[i*lda + j]); - } - } -} - -/** - * @brief cpu element wise binary operator. - */ -template -void hl_cpu_apply_binary_op(Op op, - T* A_h, - T* B_h, - int dimM, - int dimN, - int lda, - int ldb) { - for (int i = 0; i < dimM; i ++) { - for (int j = 0; j < dimN; j++) { - if (BAsRowVector == 0 && BAsColVector == 0) { - op.cpuOperator(A_h[i * lda + j], B_h[i * ldb + j]); - } else if (BAsRowVector == 1 && BAsColVector == 0) { - op.cpuOperator(A_h[i * lda + j], B_h[j]); - } else if (BAsRowVector == 0 && BAsColVector == 1) { - op.cpuOperator(A_h[i * lda + j], B_h[i * ldb]); - } else { - op.cpuOperator(A_h[i * lda + j], B_h[0]); - } - } - } -} - -/** - * @brief cpu element wise ternary operator. - */ -template -void hl_cpu_apply_ternary_op(Op op, - T* A_h, - T* B_h, - T* C_h, - int dimM, - int dimN, - int lda, - int ldb, - int ldc) { - for (int i = 0; i < dimM; i ++) { - for (int j = 0; j < dimN; j++) { - if (CAsRowVector == 0 && CAsColVector == 0) { - op.cpuOperator(A_h[i*lda + j], B_h[i*ldb + j], C_h[i*ldc + j]); - } else if (CAsRowVector == 1 && CAsColVector == 0) { - op.cpuOperator(A_h[i*lda + j], B_h[i*ldb + j], C_h[j]); - } else if (CAsRowVector == 0 && CAsColVector == 1) { - op.cpuOperator(A_h[i*lda + j], B_h[i*ldb + j], C_h[i*ldc]); - } else { - op.cpuOperator(A_h[i*lda + j], B_h[i*ldb + j], C_h[0]); - } - } - } -} - -/** - * @brief cpu element wise quaternary operator. - */ -template -void hl_cpu_apply_quaternary_op(Op op, - T* A_h, - T* B_h, - T* C_h, - T* D_h, - int dimM, - int dimN, - int lda, - int ldb, - int ldc, - int ldd) { - for (int i = 0; i < dimM; i ++) { - for (int j = 0; j < dimN; j++) { - op.cpuOperator(A_h[i*lda + j], - B_h[i*ldb + j], - C_h[i*ldc + j], - D_h[i*ldd + j]); - } - } -} - -template -void hl_cpu_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda) { -#ifndef __CUDA_ARCH__ - if (!Agg::sse || !Op::sse || !Saver::sse) { - hl_matrix_row_op(agg, op, sv, dimM, dimN, dst, ld, A, lda); - } else { - if (hl_check_align(A) && hl_check_align(lda*sizeof(real))) { - hl_sse_matrix_row_op(agg, op, sv, dimM, dimN, dst, ld, A, lda); - } else { - hl_matrix_row_op(agg, op, sv, dimM, dimN, dst, ld, A, lda); - } - } -#endif -} - -template -void hl_cpu_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda, - real *B, int ldb) { -#ifndef __CUDA_ARCH__ - if (!Agg::sse || !Op::sse || !Saver::sse) { - hl_matrix_row_op(agg, op, sv, dimM, dimN, dst, ld, A, lda, B, ldb); - } else { - if (hl_check_align(A) && hl_check_align(lda*sizeof(real)) - && hl_check_align(B) && hl_check_align(ldb*sizeof(real))) { - hl_sse_matrix_row_op( - agg, op, sv, dimM, dimN, dst, ld, A, lda, B, ldb); - } else { - hl_matrix_row_op(agg, op, sv, dimM, dimN, dst, ld, A, lda, B, ldb); - } - } -#endif -} - -template -void hl_cpu_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda) { -#ifndef __CUDA_ARCH__ - if (!Agg::sse || !Op::sse || !Saver::sse) { - hl_matrix_column_op(agg, op, sv, dimM, dimN, dst, A, lda); - } else { - if (hl_check_align(A) && hl_check_align(lda*sizeof(real)) - && hl_check_align(dst)) { - hl_sse_matrix_column_op(agg, op, sv, dimM, dimN, dst, A, lda); - } else { - hl_matrix_column_op(agg, op, sv, dimM, dimN, dst, A, lda); - } - } -#endif -} - -template -void hl_cpu_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb) { -#ifndef __CUDA_ARCH__ - if (!Agg::sse || !Op::sse || !Saver::sse) { - hl_matrix_column_op(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb); - } else { - if (hl_check_align(A) && hl_check_align(lda*sizeof(real)) - && hl_check_align(B) && hl_check_align(ldb*sizeof(real)) - && hl_check_align(dst)) { - hl_sse_matrix_column_op( - agg, op, sv, dimM, dimN, dst, A, lda, B, ldb); - } else { - hl_matrix_column_op(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb); - } - } -#endif -} - -#endif /* HL_CPU_MATRIX_KERNEL_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_cpu_matrix_kernel_detail.cuh b/paddle/legacy/cuda/include/hl_cpu_matrix_kernel_detail.cuh deleted file mode 100644 index 54a749b99073692b61750f25d36d96bcb9f48b7c..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cpu_matrix_kernel_detail.cuh +++ /dev/null @@ -1,310 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_MATRIX_KERNEL_DETAIL_CUH_ -#define HL_MATRIX_KERNEL_DETAIL_CUH_ - -#include "hl_matrix_type.cuh" - -inline bool hl_check_align(size_t size) { - return !(size & (VECTOR_SIZE - 1)); -} - -inline bool hl_check_align(void *ptr) { - return hl_check_align(reinterpret_cast(ptr)); -} - -template -void hl_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda) { - for (int i = 0; i < dimM; i++) { - real tmp = agg.init(); - for (int j = 0; j < dimN; j++) { - tmp = agg(tmp, op(A[i * lda + j])); - } - dst[i*ld] = sv(dst[i*ld], tmp); - } -} - -template -void hl_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda, - real *B, int ldb) { - for (int i = 0; i < dimM; i++) { - real tmp = agg.init(); - for (int j = 0; j < dimN; j++) { - tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j])); - } - dst[i*ld] = sv(dst[i*ld], tmp); - } -} - -template -void hl_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda) { - for (int j = 0; j < dimN; j++) { - real tmp = agg.init(); - for (int i = 0; i < dimM; i++) { - tmp = agg(tmp, op(A[i * lda + j])); - } - dst[j] = sv(dst[j], tmp); - } -} - -template -void hl_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb) { - for (int j = 0; j < dimN; j++) { - real tmp = agg.init(); - for (int i = 0; i < dimM; i++) { - tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j])); - } - dst[j] = sv(dst[j], tmp); - } -} - -template -void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda) { - for (int i = 0; i < dimM; i++, A += lda) { - vecType mm = VECTOR_SET(agg.init()); - vecType *a = (vecType*)(A); - for (int j = 0; j < dimN / VECTOR_LEN; j++, a++) { - mm = agg.vecOp(mm, op.vecOp(*a)); - } - - int rem = dimN % VECTOR_LEN; - if (rem) { - real tmp = hl_agg_op(agg, mm); - real *a = A + (dimN / VECTOR_LEN) * VECTOR_LEN; - for (int j = 0; j < rem; j++) { - tmp = agg(tmp, op(a[j])); - } - dst[i*ld] = sv(dst[i*ld], tmp); - } else { - dst[i*ld] = sv(dst[i*ld], hl_agg_op(agg, mm)); - } - } -} - -template -void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda, - real *B, int ldb) { - for (int i = 0; i < dimM; i++, A += lda, B += ldb) { - vecType mm = VECTOR_SET(agg.init()); - vecType *a = (vecType*)(A); - vecType *b = (vecType*)(B); - for (int j = 0; j < dimN / VECTOR_LEN; j++, a++, b++) { - mm = agg.vecOp(mm, op.vecOp(*a, *b)); - } - - int rem = dimN % VECTOR_LEN; - if (rem) { - real tmp = hl_agg_op(agg, mm); - real *a = A + (dimN / VECTOR_LEN) * VECTOR_LEN; - real *b = B + (dimN / VECTOR_LEN) * VECTOR_LEN; - for (int j = 0; j < rem; j++) { - tmp = agg(tmp, op(a[j], b[j])); - } - dst[i*ld] = sv(dst[i*ld], tmp); - } else { - dst[i*ld] = sv(dst[i*ld], hl_agg_op(agg, mm)); - } - } -} - -/* - * MaxRow greater than or equal dimN - * dimN is multiples of VECTOR_LEN - * so rem <= MaxRow / VECTOR_LEN - */ -template -void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda) { - vecType mm[MaxRow / VECTOR_LEN]; - for (int n = 0; n < MaxRow / VECTOR_LEN; n++) { - mm[n] = VECTOR_SET(agg.init()); - } - - for (int i = 0; i < dimM; i++) { - vecType *a = (vecType*)(A + i * lda); - for (int n = 0; n < dimN / VECTOR_LEN; n++) { - mm[n] = agg.vecOp(mm[n], op.vecOp(a[n])); - } - } - - vecType *result = (vecType*)(dst); - for (int n = 0; n < dimN / VECTOR_LEN; n++) { - result[n] = sv.vecOp(result[n], mm[n]); - } - - int rem = dimN % VECTOR_LEN; - if (rem) { - A += (dimN / VECTOR_LEN) * VECTOR_LEN; - dst += (dimN / VECTOR_LEN) * VECTOR_LEN; - hl_matrix_column_op(agg, op, sv, dimM, rem, dst, A, lda); - } -} - -/* - * dimN is multiples of VECTOR_LEN - * dimN greater than Step - */ -template -void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda) { - for (int j = 0; j < dimN / Step; j++, dst += Step, A += Step) { - vecType mm[Step / VECTOR_LEN]; - for (int n = 0; n < Step / VECTOR_LEN; n++) { - mm[n] = VECTOR_SET(agg.init()); - } - - for (int i = 0; i < dimM; i++) { - vecType *a = (vecType*)(A + i * lda); - for (int n = 0; n < Step / VECTOR_LEN; n++) { - mm[n] = agg.vecOp(mm[n], op.vecOp(a[n])); - } - } - - vecType *result = (vecType*)(dst); - for (int n = 0; n < Step / VECTOR_LEN; n++) { - result[n] = sv.vecOp(result[n], mm[n]); - } - } - - int remRow = dimN % Step; - if (remRow) { - hl_sse_column_op_with_rem(agg, op, sv, dimM, remRow, dst, A, lda); - } -} - -template -void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda) { - if (dimN <= 16) { - hl_sse_matrix_column_op<16>(agg, op, sv, dimM, dimN, dst, A, lda); - } else if (dimN <= 32) { - hl_sse_matrix_column_op<32>(agg, op, sv, dimM, dimN, dst, A, lda); - } else if (dimN <= 1024 || dimM <= 512) { - hl_sse_matrix_column_op<64>(agg, op, sv, dimM, dimN, dst, A, lda); - } else { - hl_sse_matrix_column_op<1024>(agg, op, sv, dimM, dimN, dst, A, lda); - } -} - -template -void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb) { - vecType mm[MaxRow / VECTOR_LEN]; - for (int n = 0; n < MaxRow / VECTOR_LEN; n++) { - mm[n] = VECTOR_SET(agg.init()); - } - - for (int i = 0; i < dimM; i++) { - vecType *a = (vecType*)(A + i * lda); - vecType *b = (vecType*)(B + i * ldb); - for (int n = 0; n < dimN / VECTOR_LEN; n++) { - mm[n] = agg.vecOp(mm[n], op.vecOp(a[n], b[n])); - } - } - - vecType *result = (vecType*)(dst); - for (int n = 0; n < dimN / VECTOR_LEN; n++) { - result[n] = sv.vecOp(result[n], mm[n]); - } - - int rem = dimN % VECTOR_LEN; - if (rem) { - A += (dimN / VECTOR_LEN) * VECTOR_LEN; - B += (dimN / VECTOR_LEN) * VECTOR_LEN; - dst += (dimN / VECTOR_LEN) * VECTOR_LEN; - hl_matrix_column_op(agg, op, sv, dimM, rem, dst, A, lda, B, ldb); - } -} - -template -void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb) { - for (int j = 0; j < dimN / Step; j++, dst += Step, A += Step, B += Step) { - vecType mm[Step / VECTOR_LEN]; - for (int n = 0; n < Step / VECTOR_LEN; n++) { - mm[n] = VECTOR_SET(agg.init()); - } - - for (int i = 0; i < dimM; i++) { - vecType *a = (vecType*)(A + i * lda); - vecType *b = (vecType*)(B + i * ldb); - for (int n = 0; n < Step / VECTOR_LEN; n++) { - mm[n] = agg.vecOp(mm[n], op.vecOp(a[n], b[n])); - } - } - - vecType *result = (vecType*)(dst); - for (int n = 0; n < Step / VECTOR_LEN; n++) { - result[n] = sv.vecOp(result[n], mm[n]); - } - } - - int remRow = dimN % Step; - if (remRow) { - hl_sse_column_op_with_rem( - agg, op, sv, dimM, remRow, dst, A, lda, B, ldb); - } -} - -template -void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb) { - if (dimN <= 16) { - hl_sse_matrix_column_op<16>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb); - } else if (dimN <= 32) { - hl_sse_matrix_column_op<32>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb); - } else if (dimN <= 1024 || dimM <= 512) { - hl_sse_matrix_column_op<64>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb); - } else { - hl_sse_matrix_column_op<1024>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb); - } -} - -#endif /* HL_MATRIX_KERNEL_DETAIL_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_cpu_scalar.cuh b/paddle/legacy/cuda/include/hl_cpu_scalar.cuh deleted file mode 100644 index 939302e97158018299ad281d73483c54ae92d242..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cpu_scalar.cuh +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CPU_SCALAR_CUH_ -#define HL_CPU_SCALAR_CUH_ - -#define VECTOR_SIMD false -#define VECTOR_SET hl_vec_set - -#ifndef PADDLE_TYPE_DOUBLE -/* size of float */ -#define VECTOR_SIZE 4 -#else -/* size of double */ -#define VECTOR_SIZE 8 -#endif - -typedef real vecType; - -/* Consider a real as a vector */ -#define VECTOR_LEN 1 - -template -inline real hl_agg_op(Agg agg, vecType mm) { - return mm; -} - -INLINE real hl_vec_set(const real r) { - return r; -} - -INLINE real hl_vec_classification_error(const real a, - const real b, - const real p, - const real r) { - return ((a > p) == (b > p)) ? 0.0f : 1.0f; -} - -#endif // HL_CPU_SCALAR_CUH_ diff --git a/paddle/legacy/cuda/include/hl_cpu_simd_neon.cuh b/paddle/legacy/cuda/include/hl_cpu_simd_neon.cuh deleted file mode 100644 index e54e0f4646bbe5bfa0de7d4f1d7e2b0bab406d4e..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cpu_simd_neon.cuh +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CPU_SIMD_NEON_CUH_ -#define HL_CPU_SIMD_NEON_CUH_ - -#include - -#define VECTOR_SIMD true -#define VECTOR_SIZE 16 -#define VECTOR_SET hl_vec_set - -#ifndef PADDLE_TYPE_DOUBLE - -typedef float32x4_t vecType; - -/* number of float in vector */ -#define VECTOR_LEN 4 - -template -inline real hl_agg_op(Agg agg, vecType mm) { - float32x4_t rev = vrev64q_f32(mm); - float32x4_t tmp1 = agg.vecOp(rev, rev); - float32x2_t lo = vget_high_f32(rev); - float32x2_t hi = vget_low_f32(rev); - float32x4_t tmp2 = vcombine_f32(hi, lo); - float32x4_t ret = agg.vecOp(tmp1, tmp2); - - return vgetq_lane_f32(ret, 0); -} - -inline float32x4_t hl_vec_set(const real f) { - return vdupq_n_f32(f); -} - -inline float32x4_t hl_vec_classification_error(const float32x4_t a, - const float32x4_t b, - const float32x4_t p, - const float32x4_t r) { - uint32x4_t tmp1 = vcgtq_f32(a, p); - uint32x4_t tmp2 = vcgtq_f32(b, p); - uint32x4_t tmp3 = veorq_u32(tmp1, tmp2); - return vcvtq_f32_u32(vandq_u32(tmp3, vcvtq_u32_f32(r))); -} - -#else - -#ifdef __aarch64__ -typedef float64x2_t vecType; - -/* number of float in vector */ -#define VECTOR_LEN 2 -#define VECTOR_SET vdupq_n_f64 - -#error To be implemented -#else -#error NEON instructions does not support double precision -#endif // __aarch64__ - -#endif - -#endif // HL_CPU_SIMD_NEON_CUH_ diff --git a/paddle/legacy/cuda/include/hl_cpu_simd_sse.cuh b/paddle/legacy/cuda/include/hl_cpu_simd_sse.cuh deleted file mode 100644 index 20c37d4dd31cd415490dbebc783d830236a0b784..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cpu_simd_sse.cuh +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CPU_SIMD_SSE_CUH_ -#define HL_CPU_SIMD_SSE_CUH_ - -#include -#include -#include - -#define VECTOR_SIMD true -#define VECTOR_SIZE 16 -#define VECTOR_SET hl_vec_set - -#ifndef PADDLE_TYPE_DOUBLE - -typedef __m128 vecType; - -/* number of float in vector */ -#define VECTOR_LEN 4 - -template -inline real hl_agg_op(Agg agg, vecType mm) { - __m128 lo = _mm_unpacklo_ps(mm, mm); - __m128 hi = _mm_unpackhi_ps(mm, mm); - __m128 tmp1 = agg.vecOp(lo, hi); - __m128 tmp2 = _mm_movehl_ps(tmp1, tmp1); - __m128 ret = agg.vecOp(tmp1, tmp2); - - return _mm_cvtss_f32(ret); -} - -inline __m128 hl_vec_set(const real f) { - return _mm_set_ps1(f); -} - -inline __m128 hl_vec_classification_error(const __m128 a, - const __m128 b, - const __m128 p, - const __m128 r) { - __m128 tmp1 = _mm_cmpgt_ps(a, p); - __m128 tmp2 = _mm_cmpgt_ps(b, p); - __m128 tmp3 = _mm_xor_ps(tmp1, tmp2); - return _mm_and_ps(tmp3, r); -} - -#else - -typedef __m128d vecType; - -/* number of double in vector */ -#define VECTOR_LEN 2 - -template -inline real hl_agg_op(Agg agg, vecType mm) { - __m128d lo = _mm_unpacklo_pd(mm, mm); - __m128d hi = _mm_unpackhi_pd(mm, mm); - __m128d ret = agg.vecOp(lo, hi); - - return _mm_cvtsd_f64(ret); -} - -inline __m128d hl_vec_set(const real d) { -#if defined(__APPLE__) || defined(__OSX__) - return _mm_set1_pd(d); -#else - return _mm_set_pd1(d); -#endif -} - -inline __m128d hl_vec_classification_error(const __m128d a, - const __m128d b, - const __m128d p, - const __m128d r) { - __m128d tmp1 = _mm_cmpgt_pd(a, p); - __m128d tmp2 = _mm_cmpgt_pd(b, p); - __m128d tmp3 = _mm_xor_pd(tmp1, tmp2); - return _mm_and_pd(tmp3, r); -} - -#endif - -#endif // HL_CPU_SIMD_SSE_CUH_ diff --git a/paddle/legacy/cuda/include/hl_cuda.h b/paddle/legacy/cuda/include/hl_cuda.h deleted file mode 100644 index 70efcccb81847c11738108f222a6b0c1cff644e0..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cuda.h +++ /dev/null @@ -1,345 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CUDA_H_ -#define HL_CUDA_H_ - -#include -#include "hl_base.h" - -/** - * @brief HPPL event. - */ -typedef struct _hl_event_st *hl_event_t; - -/** - * @brief return cuda runtime api version. - */ -extern int hl_get_cuda_lib_version(); - -/** - * @brief HPPL strat(Initialize all GPU). - */ -extern void hl_start(); - -/** - * @brief HPPL start(Initialize the specific GPU). - * - * @param[in] device device id(0, 1......). - * if device is NULL, will start all GPU. - * @param[in] number number of devices. - */ -extern void hl_specify_devices_start(int *device, int number); - -/** - * @brief Queries if a device may directly access a peer device's memory. - * - * @param[in] device Device from which allocations on peerDevice are - * to be directly accessed. - * @param[in] peerDevice Device on which the allocations to be directly - * accessed by device reside. - * - * @return Returns true if device is capable of directly accessing memory - * from peerDevice and false otherwise. - */ -bool hl_device_can_access_peer(int device, int peerDevice); - -/** - * @brief Enables direct access to memory allocations on a peer device. - * - * @param[in] peerDevice Peer device to enable direct access to from the - * current device - */ -void hl_device_enable_peer_access(int peerDevice); - -/** - * @brief Init a work thread. - * - * @param[in] device device id. - */ -extern void hl_init(int device); - -/** - * @brief Finish a work thread. - */ -extern void hl_fini(); - -/** - * @brief Set synchronous/asynchronous flag. - * - * @param[in] flag true(default), set synchronous flag. - * false, set asynchronous flag. - * - * - * @note This setting is only valid for the current worker thread. - */ -extern void hl_set_sync_flag(bool flag); - -/** - * @brief Get synchronous/asynchronous flag. - * - * @return Synchronous call true. - * Asynchronous call false. - * - */ -extern bool hl_get_sync_flag(); - -/** - * @brief Returns the number of compute-capable devices. - * - */ -extern int hl_get_device_count(); - -/** - * @brief Set device to be used. - * - * @param[in] device device id. - * - */ -extern void hl_set_device(int device); - -/** - * @brief Returns which device is currently being used. - * - * @return device device id. - * - */ -extern int hl_get_device(); - -/** - * @brief Allocate device memory. - * - * @param[in] size size in bytes to copy. - * - * @return dest_d pointer to device memory. - */ -extern void *hl_malloc_device(size_t size); - -/** - * @brief Free device memory. - * - * @param[in] dest_d pointer to device memory. - * - */ -extern void hl_free_mem_device(void *dest_d); - -/** - * @brief Allocate host page-lock memory. - * - * @param[in] size size in bytes to copy. - * - * @return dest_h pointer to host memory. - */ -extern void *hl_malloc_host(size_t size); - -/** - * @brief Free host page-lock memory. - * - * @param[in] dest_h pointer to host memory. - * - */ -extern void hl_free_mem_host(void *dest_h); - -/** - * @brief Copy data. - * - * @param[in] dst dst memory address(host or device). - * @param[in] src src memory address(host or device). - * @param[in] size size in bytes to copy. - * - */ -extern void hl_memcpy(void *dst, void *src, size_t size); - -/** - * @brief Set device memory to a value. - * - * @param[in] dest_d pointer to device memory. - * @param[in] value value to set for each byte of specified memory. - * @param[in] size size in bytes to set. - * - */ -extern void hl_memset_device(void *dest_d, int value, size_t size); - -/** - * @brief Copy host memory to device memory. - * - * @param[in] dest_d dst memory address. - * @param[in] src_h src memory address. - * @param[in] size size in bytes to copy. - * - */ -extern void hl_memcpy_host2device(void *dest_d, void *src_h, size_t size); - -/** - * @brief Copy device memory to host memory. - * - * @param[in] dest_h dst memory address. - * @param[in] src_d src memory address. - * @param[in] size size in bytes to copy. - * - */ -extern void hl_memcpy_device2host(void *dest_h, void *src_d, size_t size); - -/** - * @brief Copy device memory to device memory. - * - * @param[in] dest_d dst memory address. - * @param[in] src_d src memory address. - * @param[in] size size in bytes to copy. - * - */ -extern void hl_memcpy_device2device(void *dest_d, void *src_d, size_t size); - -/** - * @brief Generate uniformly distributed floats (0, 1.0]. - * - * @param[in] dest_d pointer to device memory to store results. - * @param[in] num number of floats to generate. - * - */ -extern void hl_rand(real *dest_d, size_t num); - -/** - * @brief Set the seed value of the random number generator. - * - * @param[in] seed seed value. - */ -extern void hl_srand(unsigned int seed); - -/** - * @brief Copy data. - * - * @param[in] dst dst memory address(host or device). - * @param[in] src src memory address(host or device). - * @param[in] size size in bytes to copy. - * @param[in] stream stream id. - */ -extern void hl_memcpy_async(void *dst, - void *src, - size_t size, - hl_stream_t stream); - -/** - * @brief Waits for stream tasks to complete. - * - * @param[in] stream stream id. - */ -extern void hl_stream_synchronize(hl_stream_t stream); - -/** - * @brief Creates an event object. - * - * @param[out] event New event. - */ -extern void hl_create_event(hl_event_t *event); - -/** - * @brief Destroys an event object. - * - * @param[in] event Event to destroy. - */ -extern void hl_destroy_event(hl_event_t event); - -/** - * @brief Computes the elapsed time between events. - * - * @param[in] start Starting event. - * @param[in] end Ending event. - * - * @return time Time between start and end in ms. - */ -extern float hl_event_elapsed_time(hl_event_t start, hl_event_t end); - -/** - * @brief Records an event. - * - * @param[in] stream Stream in which to insert event. - * @param[in] event Event waiting to be recorded as completed. - * - */ -extern void hl_stream_record_event(hl_stream_t stream, hl_event_t event); - -/** - * @brief Make a compute stream wait on an event. - * - * @param[in] stream Stream in which to insert event. - * @param[in] event Event to wait on. - * - */ -extern void hl_stream_wait_event(hl_stream_t stream, hl_event_t event); - -/** - * @brief Wait for an event to complete. - * - * @param[in] event event to wait for. - * - */ -extern void hl_event_synchronize(hl_event_t event); - -/** - * @brief Sets block flags to be used for device executions. - * - * @note This interface needs to be called before hl_start. - */ -extern void hl_set_device_flags_block(); - -/** - * @brief Returns the last error string from a cuda runtime call. - */ -extern const char *hl_get_device_error_string(); - -/** - * @brief Returns the last error string from a cuda runtime call. - * - * @param[in] err error number. - * - * @see hl_get_device_last_error() - */ -extern const char *hl_get_device_error_string(size_t err); - -/** - * @brief Returns the last error number. - * - * @return error number. - * - * @see hl_get_device_error_string() - */ -extern int hl_get_device_last_error(); - -/** - * @brief check cuda event is ready - * - * @param[in] event cuda event to query. - * - * @return true cuda event is ready. - * false cuda event is not ready. - */ -extern bool hl_cuda_event_is_ready(hl_event_t event); - -/** - * @brief hppl device synchronization. - */ -extern void hl_device_synchronize(); - -/** - * @brief gpu profiler start - */ -extern void hl_profiler_start(); - -/** - * @brief gpu profiler stop - */ -extern void hl_profiler_end(); - -#endif // HL_CUDA_H_ diff --git a/paddle/legacy/cuda/include/hl_cuda.ph b/paddle/legacy/cuda/include/hl_cuda.ph deleted file mode 100644 index 7c4465e51ff7944e77f8048e584ad1dcfa274d56..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cuda.ph +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_CUDA_PH_ -#define HL_CUDA_PH_ - -#include -#include -#include -#include -#include -#include -#include -#include "hl_base.h" - -/** - * @brief hppl event. - * @param cuda event. - */ -struct _hl_event_st { - cudaEvent_t cu_event; /* cuda event */ -}; - -/** - * @brief global device resources. - * - * @param *stream device global stream. - * @param handle devcie cublas handle. - * @param gen device curand generator. - * @param cudnn_handle cudnn handle. - * @param *gen_mutex gen lock. - */ -typedef struct { - cudaStream_t *stream; - cublasHandle_t handle; - curandGenerator_t gen; - cudnnHandle_t cudnn_handle; - pthread_mutex_t *gen_mutex; -}_global_device_resources, *global_device_resources; - -/* - * @brief thread device resources. - * - * @param *stream device thread stream. - * @param *gpu_mem device memory. - * @param *cpu_mem cpu memory. - * @param mem_event device memory lock. - */ -typedef struct { - cudaStream_t *stream; - real *gpu_mem; - real *cpu_mem; - cudaEvent_t mem_event; -}_thread_device_resources, *thread_device_resources; - -/* - * @brief hppl device properties. - * - * @param device device id. - * @param device_type 0.Nvidia, 1.AMD, 2.Intel. - * @param device_name[256] device name. - * @param device_mem total global memory. - * @param major device compute capability. - * @param minor device compute capability. - * @param is_local local device or not. - * @param device_resources device resources. - */ -typedef struct { - int device; - int device_type; - char device_name[256]; - size_t device_mem; - int major; - int minor; - bool is_local; - global_device_resources device_resources; -} _hl_device_prop, *hl_device_prop; - -/** - * @brief thread device resource allocation. - * - * create cuda stream and cuda event, allocate gpu - * memory and host page-lock memory for threads. - * - * @param[in] device device number. - * @param[out] device_res device properties. - */ -extern void hl_create_thread_resources(int device, - thread_device_resources device_res); - -/** - * @brief global device resource allocation. - * - * create cuda stream, initialize cublas, curand and cudnn. - * - * @param[out] device_prop device properties. - */ -extern void hl_create_global_resources(hl_device_prop device_prop); - -#endif /* HL_CUDA_PH_ */ diff --git a/paddle/legacy/cuda/include/hl_cuda_cublas.h b/paddle/legacy/cuda/include/hl_cuda_cublas.h deleted file mode 100644 index 3959f81677b0f73768fd45498e6da0236daca5bd..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cuda_cublas.h +++ /dev/null @@ -1,172 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CUDA_CUBLAS_H_ -#define HL_CUDA_CUBLAS_H_ - -#include "hl_base.h" - -/** - * @brief Matrix transpose: C_d = T(A_d) - * - * @param[in] A_d input matrix (dimM x dimN). - * @param[out] C_d output matrix (dimN x dimM). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] lda the first dimension of A_d. - * @param[in] ldc the first dimension of C_d. - * - */ -extern void hl_matrix_transpose( - real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc); - -/* - * @brief Matrix transpose, while lda = dimN, ldc = dimM. - * - * @param[in] A_d input matrix (dimM x dimN). - * @param[out] C_d output matrix (dimN x dimM). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN); - -/* - * @brief Matrix inverse - * - * @param[in] A_d input matrix (dimN x dimN). - * @param[out] C_d output matrix (dimN x dimN). - * @param[in] dimN matrix height = matrix width - * @param[in] lda the first dimension of A_d - * @param[in] ldc the first dimension of C_d - * - */ -extern void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc); - -/** - * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d - * - * @param[in] A_d input. - * @param[in] transa operation op(A) that is non-or transpose. - * @param[in] B_d input. - * @param[in] transb operation op(B) that is non-or transpose. - * @param[out] C_d output. - * @param[in] dimM matrix height of op(A) & C - * @param[in] dimN matrix width of op(B) & C - * @param[in] dimK width of op(A) & height of op(B) - * @param[in] alpha scalar used for multiplication. - * @param[in] beta scalar used for multiplication. - * @param[in] lda the first dimension of A_d. - * @param[in] ldb the first dimension of B_d. - * @param[in] ldc the first dimension of C_d. - * - */ -extern void hl_matrix_mul(real *A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta, - int lda, - int ldb, - int ldc); - -/** - * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d - * - * @param[in] A_d input. - * @param[in] transa operation op(A) that is non-or transpose. - * @param[in] B_d input. - * @param[in] transb operation op(B) that is non-or transpose. - * @param[out] C_d output. - * @param[in] dimM matrix height of op(A) & C - * @param[in] dimN matrix width of op(B) & C - * @param[in] dimK width of op(A) & height of op(B) - * @param[in] alpha scalar used for multiplication. - * @param[in] beta scalar used for multiplication. - * - */ -extern void hl_matrix_mul(real *A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta); - -/** - * @brief This function performs the matrix-vector multiplication. - * C_d = alpha*op(A_d)*B_d + beta*C_d - * - * @param[in] A_d matrix. - * @param[in] trans operation op(A) that is non-or transpose. - * @param[in] B_d vector with dimN(dimM) elements - * if trans==HPPL_OP_N(HPPL_OP_T). - * @param[in,out] C_d vector with dimM(dimN) elements - * if trans==HPPL_OP_N(HPPL_OP_T). - * @param[in] dimM number of rows of matrix A_d. - * @param[in] dimN number of columns of matrix A_d. - * @param[in] alpha scalar used for multiplication. - * @param[in] beta scalar used for multiplication. - * @param[in] lda the first dimension of A_d. - * @param[in] incb increase B_d size for compaction. - * @param[in] incc increase C_d size for compaction. - * - */ - -extern void hl_matrix_mul_vector(real *A_d, - hl_trans_op_t trans, - real *B_d, - real *C_d, - int dimM, - int dimN, - real alpha, - real beta, - int lda, - int incb, - int incc); - -/** - * @brief This function performs the matrix-vector multiplication. - * C_d = alpha*op(A_d)*B_d + beta*C_d - * - * @param[in] A_d matrix. - * @param[in] trans operation op(A) that is non-or transpose. - * @param[in] B_d vector with dimN(dimM) elements - * if trans==HPPL_OP_N(HPPL_OP_T). - * @param[in,out] C_d vector with dimM(dimN) elements - * if trans==HPPL_OP_N(HPPL_OP_T). - * @param[in] dimM number of rows of matrix A_d. - * @param[in] dimN number of columns of matrix A_d. - * @param[in] alpha scalar used for multiplication. - * @param[in] beta scalar used for multiplication. - * - */ -extern void hl_matrix_mul_vector(real *A_d, - hl_trans_op_t trans, - real *B_d, - real *C_d, - int dimM, - int dimN, - real alpha, - real beta); - -#endif /* HL_CUDA_CUBLAS_H_ */ diff --git a/paddle/legacy/cuda/include/hl_cuda_cudnn.h b/paddle/legacy/cuda/include/hl_cuda_cudnn.h deleted file mode 100644 index 4664e4144a8535e57520668425725fa352e44edc..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cuda_cudnn.h +++ /dev/null @@ -1,516 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CUDA_CUDNN_H_ -#define HL_CUDA_CUDNN_H_ - -#include "hl_base.h" - -/* - * hppl pooling mode - */ -typedef enum { - HL_POOLING_MAX = 0, - // average does not include padded values - HL_POOLING_AVERAGE = 1, - // average includes padded values - HL_POOLING_AVERAGE_INCLUDE_PADDING = 2, - HL_POOLING_END -} hl_pooling_mode_t; - -/** - * @brief return cudnn lib version - */ - -extern int hl_get_cudnn_lib_version(); - -/** - * @brief hppl image descriptor. - */ -typedef struct _hl_tensor_descriptor* hl_tensor_descriptor; - -/** - * @brief hppl pooling descriptor. - */ -typedef struct _hl_pooling_descriptor* hl_pooling_descriptor; - -/** - * @brief hppl filter descriptor. - */ -typedef struct _hl_filter_descriptor* hl_filter_descriptor; - -/** - * @brief hppl filter descriptor. - */ -typedef struct _hl_convolution_descriptor* hl_convolution_descriptor; - -/** - * @brief create image descriptor. - * - * @param[out] image_desc image descriptor. - * - */ -extern void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc); - -/** - * @brief reshape image descriptor. - * - * @param[in,out] image_desc image descriptor. - * @param[in] batch_size input batch size. - * @param[in] feature_maps image feature maps. - * @param[in] height image height. - * @param[in] width image width. - */ -extern void hl_tensor_reshape(hl_tensor_descriptor image_desc, - int batch_size, - int feature_maps, - int height, - int width); - -/** - * @brief reshape image descriptor. - * - * @param[in,out] image_desc image descriptor. - * @param[in] batch_size input batch size. - * @param[in] feature_maps image feature maps. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] nStride stride between two consecutive images. - * @param[in] cStride stride between two consecutive feature maps. - * @param[in] hStride stride between two consecutive rows. - * @param[in] wStride stride between two consecutive columns. - * - */ -extern void hl_tensor_reshape(hl_tensor_descriptor image_desc, - int batch_size, - int feature_maps, - int height, - int width, - int nStride, - int cStride, - int hStride, - int wStride); - -/** - * @brief destroy image descriptor. - * - * @param[in] image_desc hppl image descriptor. - */ -extern void hl_destroy_tensor_descriptor(hl_tensor_descriptor image_desc); - -/** - * @brief create pooling descriptor. - * - * @param[out] pooling_desc pooling descriptor. - * @param[in] mode pooling mode. - * @param[in] height height of the pooling window. - * @param[in] width width of the pooling window. - * @param[in] height_padding padding height. - * @param[in] width_padding padding width. - * @param[in] stride_height pooling vertical stride. - * @param[in] stride_width pooling horizontal stride. - */ -extern void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc, - hl_pooling_mode_t mode, - int height, - int width, - int height_padding, - int width_padding, - int stride_height, - int stride_width); - -/** - * @brief destroy pooling descriptor. - * - * @param[in] pooling_desc hppl pooling descriptor. - * - */ -extern void hl_destroy_pooling_descriptor(hl_pooling_descriptor pooling_desc); - -/** - * @brief pooling forward(calculate output image). - * - * @param[in] input input image descriptor. - * @param[in] input_image input image data. - * @param[in] output output image descriptor. - * @param[out] output_image output image data. - * @param[in] pooling pooling descriptor. - * - */ -extern void hl_pooling_forward(hl_tensor_descriptor input, - real* input_image, - hl_tensor_descriptor output, - real* output_image, - hl_pooling_descriptor pooling); - -/** - * @brief pooling backward(calculate input image gradient). - * - * @param[in] input input image descriptor. - * @param[in] input_image input image data. - * @param[in] input_image_grad input image gradient data. - * @param[in] output output image descriptor. - * @param[in] output_image output image data. - * @param[out] output_image_grad output image gradient data. - * @param[in] pooling pooling descriptor. - * - */ -extern void hl_pooling_backward(hl_tensor_descriptor input, - real* input_image, - real* input_image_grad, - hl_tensor_descriptor output, - real* output_image, - real* output_image_grad, - hl_pooling_descriptor pooling); - -/** - * @brief create filter descriptor. - * - * @param[out] filter filter descriptor. - * @param[in] input_feature_maps input image feature maps. - * @param[in] output_feature_maps output image feature maps. - * @param[in] height filter height. - * @param[in] width filter width. - * - */ -extern void hl_create_filter_descriptor(hl_filter_descriptor* filter, - int input_feature_maps, - int output_feature_maps, - int height, - int width); - -/** - * @brief convolution workspace configuration - * - * @param[in] input image descriptor - * @param[in] output image descriptor - * @param[in] filter filter descriptor - * @param[in] conv convolution descriptor - * @param[out] convFwdAlgo forward algorithm - * @param[out] fwdLimitBytes forward workspace size - * @param[out] convBwdDataAlgo backward data algorithm - * @param[out] bwdDataLimitBytes backward data workspace size - * @param[out] convBwdFilterAlgo backward filter algorithm - * @param[out] bwdFilterLimitBytes backward filter workspace size - * - */ -extern void hl_conv_workspace(hl_tensor_descriptor input, - hl_tensor_descriptor output, - hl_filter_descriptor filter, - hl_convolution_descriptor conv, - int* convFwdAlgo, - size_t* fwdLimitBytes, - int* convBwdDataAlgo, - size_t* bwdDataLimitBytes, - int* convBwdFilterAlgo, - size_t* bwdFilterLimitBytes, - bool useDilation); - -/** - * @brief destroy filter descriptor. - * - * @param[in] filter hppl filter descriptor. - * - */ -extern void hl_destroy_filter_descriptor(hl_filter_descriptor filter); - -/** - * @brief create convolution descriptor. - * - * @param[out] conv conv descriptor. - * @param[in] image input image descriptor. - * @param[in] filter filter descriptor. - * @param[in] padding_height padding height. - * @param[in] padding_width padding width. - * @param[in] stride_height stride height. - * @param[in] stride_width stride width. - * - */ -extern void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, - hl_tensor_descriptor image, - hl_filter_descriptor filter, - int padding_height, - int padding_width, - int stride_height, - int stride_width, - int dilation_h = 1, - int dilation_w = 1); - -/** - * @brief reset convolution descriptor. - * - * @param[in,out] conv conv descriptor. - * @param[in] image input image descriptor. - * @param[in] filter filter descriptor. - * @param[in] padding_height padding height. - * @param[in] padding_width padding width. - * @param[in] stride_height stride height. - * @param[in] stride_width stride width. - * - */ -extern void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, - hl_tensor_descriptor image, - hl_filter_descriptor filter, - int padding_height, - int padding_width, - int stride_height, - int stride_width, - int dilation_h = 1, - int dilation_w = 1); - -/** - * @brief destroy convolution descriptor. - * - * @param[in] conv hppl convolution descriptor. - */ -extern void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv); - -/** - * @brief convolution forward(calculate output image). - * - * @param[in] input input image descriptor. - * @param[in] input_data input image data. - * @param[in] output output image descriptor. - * @param[out] output_data output image data. - * @param[in] filter filter descriptor. - * @param[in] filter_data filter data. - * @param[in] conv convolution descriptor. - * @param[in] gpuWorkSpace limited gpu workspace. - * @param[in] sizeInBytes gpu workspace size (bytes). - * @param[in] convFwdAlgo forward algorithm. - */ -extern void hl_convolution_forward(hl_tensor_descriptor input, - real* input_data, - hl_tensor_descriptor output, - real* output_data, - hl_filter_descriptor filter, - real* filter_data, - hl_convolution_descriptor conv, - void* gpuWorkSpace, - size_t sizeInBytes, - int convFwdAlgo); - -/** - * @brief convolution forward add bias(calculate output add bias). - * - * @param[in] bias bias descriptor. - * @param[in] bias_data bias data. - * @param[in] output output image descriptor. - * @param[out] output_data output image data. - */ -extern void hl_convolution_forward_add_bias(hl_tensor_descriptor bias, - real* bias_data, - hl_tensor_descriptor output, - real* output_data); - -/** - * @brief convolution backward filter(calculate filter grad data). - * - * @param[in] input input image descriptor. - * @param[in] input_data input image data. - * @param[in] output output image descriptor. - * @param[in] output_grad_data output image grad data. - * @param[in] filter filter descriptor. - * @param[out] filter_grad_data filter grad data. - * @param[in] conv convolution descriptor. - * @param[in] gpuWorkSpace limited gpu workspace. - * @param[in] sizeInBytes gpu workspace size (bytes). - * @param[in] convBwdFilterAlgo backward filter algorithm. - */ -extern void hl_convolution_backward_filter(hl_tensor_descriptor input, - real* input_data, - hl_tensor_descriptor output, - real* output_grad_data, - hl_filter_descriptor filter, - real* filter_grad_data, - hl_convolution_descriptor conv, - void* gpuWorkSpace, - size_t sizeInBytes, - int convBwdFilterAlgo); - -/** - * @brief convolution backward data(calculate input image grad data). - * - * @param[in] input input image descriptor. - * @param[out] input_data_grad input image grad data. - * @param[in] output output image descriptor. - * @param[in] output_grad_data output image grad data. - * @param[in] filter filter descriptor. - * @param[in] filter_data filter data. - * @param[in] conv convolution descriptor. - * @param[in] gpuWorkSpace limited gpu workspace. - * @param[in] sizeInBytes gpu workspace size (bytes). - * @param[in] convBwdDataAlgo backward data algorithm. - */ -extern void hl_convolution_backward_data(hl_tensor_descriptor input, - real* input_data_grad, - hl_tensor_descriptor output, - real* output_grad_data, - hl_filter_descriptor filter, - real* filter_data, - hl_convolution_descriptor conv, - void* gpuWorkSpace, - size_t sizeInBytes, - int convBwdDataAlgo); - -/** - * @brief convolution backward bias(calculate bias grad data). - * - * @param[in] bias bias descriptor. - * @param[out] bias_grad_data bias grad data. - * @param[in] output output image descriptor. - * @param[in] output_grad_data output image grad data. - */ -extern void hl_convolution_backward_bias(hl_tensor_descriptor bias, - real* bias_grad_data, - hl_tensor_descriptor output, - real* output_grad_data); - -/** - * @brief softmax forward. - * - * @param[in] input input value. - * @param[out] output output value. - * @param[in] height matrix height. - * @param[in] width matrix width. - */ -extern void hl_softmax_forward(real* input, - real* output, - int height, - int width); - -/** - * @brief softmax backward. - * - * @param[in] output_value output value data. - * @param[out] output_grad output grad data. - * @param[in] height matrix height. - * @param[in] width matrix width. - */ -extern void hl_softmax_backward(real* output_value, - real* output_grad, - int height, - int width); - -/** - * @brief cudnn batch norm forward. - * - * @param[in] inputDesc input tensor descriptor desc. - * @param[in] input input data. - * @param[in] outputDesc output tensor descriptor desc. - * @param[out] output output data. - * @param[in] bnParamDesc tensor descriptor desc. - * bnScale, bnBias, running mean/var, save_mean/var. - * @param[in] scale batch normalization scale parameter (in original - * paper scale is referred to as gamma). - * @param[in] bias batch normalization bias parameter (in original - * paper scale is referred to as beta). - * @param[in] factor Factor used in the moving average computation. - * runningMean = newMean * factor - * + runningMean * (1 - factor) - * @param[in] runningMean running mean. - * @param[in] runningInvVar running variance. - * @param[in] epsilon Epsilon value used in the batch normalization - * formula. - * @param[out] savedMean optional cache to save intermediate results. - * @param[out] savedVar optional cache to save intermediate results. - * - */ -extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, - real* input, - hl_tensor_descriptor outputDesc, - real* output, - hl_tensor_descriptor bnParamDesc, - real* scale, - real* bias, - double factor, - real* runningMean, - real* runningInvVar, - double epsilon, - real* savedMean, - real* savedVar); - -/** - * @brief cudnn batch norm forward. - * - * @param[in] inputDesc input tensor descriptor desc. - * @param[in] input input data. - * @param[in] outputDesc output tensor descriptor desc. - * @param[out] output output data. - * @param[in] bnParamDesc tensor descriptor desc. - * bnScale, bnBias, running mean/var, save_mean/var. - * @param[in] scale batch normalization scale parameter (in original - * paper scale is referred to as gamma). - * @param[in] bias batch normalization bias parameter (in original - * paper scale is referred to as beta). - * @param[in] estimatedMean - * @param[in] estimatedVar It is suggested that resultRunningMean, - * resultRunningVariance from the - * cudnnBatchNormalizationForwardTraining call - * accumulated during the training phase are passed - * as inputs here. - * @param[in] epsilon Epsilon value used in the batch - * normalization formula. - * - */ -extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, - real* input, - hl_tensor_descriptor outputDesc, - real* output, - hl_tensor_descriptor bnParamDesc, - real* scale, - real* bias, - real* estimatedMean, - real* estimatedVar, - double epsilon); - -/** - * @brief cudnn batch norm forward. - * - * @param[in] inputDesc input tensor descriptor desc. - * @param[in] input input data. - * @param[in] outGradDesc output tensor descriptor desc. - * @param[out] outGrad output data. - * @param[in] inGradDesc input tensor descriptor desc. - * @param[in] inGrad input data. - * @param[in] dBnParamDesc tensor descriptor desc. - * bnScale, bnBias, running mean/var, - * save_mean/var. - * @param[in] scale batch normalization scale parameter (in original - * paper scale is referred to as gamma). - * @param[in] scaleGrad batch normalization scale parameter (in original - * paper scale is referred to as gamma) gradient. - * @param[in] biasGrad batch normalization bias parameter (in original - * paper scale is referred to as beta) gradient. - * @param[in] epsilon Epsilon value used in the batch - * normalization formula. - * @param[out] savedMean optional cache to save intermediate results. - * @param[out] savedInvVar optional cache to save intermediate results. - * - */ -extern void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, - real* input, - hl_tensor_descriptor outGradDesc, - real* outGrad, - hl_tensor_descriptor inGradDesc, - real* inGrad, - hl_tensor_descriptor dBnParamDesc, - real* scale, - real* scaleGrad, - real* biasGrad, - double epsilon, - real* savedMean, - real* savedInvVar); - -#endif // HL_CUDA_CUDNN_H_ diff --git a/paddle/legacy/cuda/include/hl_cuda_cudnn.ph b/paddle/legacy/cuda/include/hl_cuda_cudnn.ph deleted file mode 100644 index bb3b89f6faa9a9011470400fd6fcf4756cccf02a..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_cuda_cudnn.ph +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CUDA_CUDNN_PH_ -#define HL_CUDA_CUDNN_PH_ - -#include "hl_base.h" - -/* - * @brief hppl for cudnn tensor4d descriptor. - */ -typedef struct { - cudnnTensorDescriptor_t desc; - cudnnTensorFormat_t format; - cudnnDataType_t data_type; // image data type - int batch_size; // number of input batch size - int feature_maps; // number of input feature maps - int height; // height of input image - int width; // width of input image -} _cudnn_tensor_descriptor, *cudnn_tensor_descriptor; - -#define GET_TENSOR_DESCRIPTOR(image) (((cudnn_tensor_descriptor)image)->desc) - -/* - * @brief hppl for cudnn pooling descriptor. - */ -typedef struct { - cudnnPoolingDescriptor_t desc; - cudnnPoolingMode_t mode; - int window_height; - int window_width; - int stride_height; - int stride_width; -} _cudnn_pooling_descriptor, *cudnn_pooling_descriptor; - -/* - * @brief hppl for cudnn filter descriptor. - */ -typedef struct { - cudnnFilterDescriptor_t desc; - cudnnDataType_t data_type; /* data type */ - int output_feature_maps; /* number of output feature maps */ - int input_feature_maps; /* number of input feature maps */ - int filter_height; /* height of each input filter */ - int filter_width; /* width of each input fitler */ -} _cudnn_filter_descriptor, *cudnn_filter_descriptor; - -#define GET_FILTER_DESCRIPTOR(filter) (((cudnn_filter_descriptor)filter)->desc) - -/* - * @brief hppl for cudnn convolution descriptor. - */ -typedef struct { - cudnnConvolutionDescriptor_t desc; - hl_tensor_descriptor input_image; - hl_filter_descriptor filter; - int padding_height; // zero-padding height - int padding_width; // zero-padding width - int stride_height; // vertical filter stride - int stride_width; // horizontal filter stride - int upscalex; // upscale the input in x-direction - int upscaley; // upscale the input in y-direction - cudnnConvolutionMode_t mode; -} _cudnn_convolution_descriptor, *cudnn_convolution_descriptor; - -#define GET_CONVOLUTION_DESCRIPTOR(conv) \ - (((cudnn_convolution_descriptor)conv)->desc) - -#endif /* HL_CUDA_CUDNN_PH_ */ diff --git a/paddle/legacy/cuda/include/hl_device_functions.cuh b/paddle/legacy/cuda/include/hl_device_functions.cuh deleted file mode 100755 index ef068e10622c914e41423b5d0f117d5716744646..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_device_functions.cuh +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_DEVICE_FUNCTIONS_CUH_ -#define HL_DEVICE_FUNCTIONS_CUH_ - -namespace paddle { - -template -inline __device__ T paddleAtomicAdd(T* address, T val); - -template <> -inline __device__ float paddleAtomicAdd(float* address, float val) { - return atomicAdd(address, val); -} - -template <> -inline __device__ double paddleAtomicAdd(double* address, double val) { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 - return atomicAdd(address, val); -#else - // NOLINTNEXTLINE - unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; // NOLINT - - do { - assumed = old; - old = atomicCAS(address_as_ull, - assumed, - __double_as_longlong(val + - __longlong_as_double(assumed))); - } while (assumed != old); - - return __longlong_as_double(old); -#endif -} -} // namespace paddle - -/** - * @brief sum reduction - * - * @param[in,out] smem input data, better to use __shared__ memory. - * @param[in] tid thread index. - * @param[in] threads the total thread number used to reduce, - * such as, blockDim.x. - * - * @return smem[0]: the sum of each elements in smem. - */ -__device__ __forceinline__ -void simpleReduce(real* smem, int tid, int threads) { - for (unsigned int s = threads / 2; s > 0; s >>= 1) { - if (tid < s) { - smem[tid] += smem[tid + s]; - } - __syncthreads(); - } -} - -#endif /* HL_DEVICE_FUNCTIONS_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_functions.h b/paddle/legacy/cuda/include/hl_functions.h deleted file mode 100644 index 9912b4c17997b0d765ab9c67161bacb849ce3259..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_functions.h +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_FUNCTIONS_H_ -#define HL_FUNCTIONS_H_ - -#include "hl_base.h" - -/** - * sigmoid threshold maximum - */ -#define SIGMOID_THRESHOLD_MIN -40.0 - -/** - * sigmoid threshold minimum - */ -#define SIGMOID_THRESHOLD_MAX 13.0 - -#ifndef __NVCC__ -namespace hppl { -/* - * forward activation - */ -real relu(const real a); -real sigmoid(const real a); -real tanh(const real a); -real linear(const real a); - -/* - * backward activation - */ -real relu(const real a, const real b); -real sigmoid(const real a, const real b); -real tanh(const real a, const real b); -real linear(const real a, const real b); -} // namespace hppl - -#ifdef __AVX__ -#include "hl_avx_functions.h" -#endif - -#else -#include "hl_gpu_functions.cuh" -#endif - -#endif // HL_FUNCTIONS_H_ diff --git a/paddle/legacy/cuda/include/hl_gpu.h b/paddle/legacy/cuda/include/hl_gpu.h deleted file mode 100644 index 50a2e9cdd29bc3e37c2d67f772db52dd4d95cb9e..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_gpu.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_GPU_H_ -#define HL_GPU_H_ - -#include "hl_aggregate.h" -#include "hl_base.h" -#include "hl_cnn.h" -#include "hl_cuda.h" -#include "hl_cuda_cublas.h" -#include "hl_cuda_cudnn.h" -#include "hl_lstm.h" -#include "hl_matrix.h" -#include "hl_sequence.h" -#include "hl_sparse.h" -#ifndef PADDLE_MOBILE_INFERENCE -#include "hl_warpctc_wrap.h" -#endif - -#ifdef HPPL_STUB_FUNC -#include "stub/hl_aggregate_stub.h" -#include "stub/hl_cnn_stub.h" -#include "stub/hl_cuda_cublas_stub.h" -#include "stub/hl_cuda_cudnn_stub.h" -#include "stub/hl_cuda_stub.h" -#include "stub/hl_lstm_stub.h" -#include "stub/hl_matrix_stub.h" -#include "stub/hl_sequence_stub.h" -#include "stub/hl_sparse_stub.h" -#endif - -#endif /* HL_GPU_H_ */ diff --git a/paddle/legacy/cuda/include/hl_gpu_functions.cuh b/paddle/legacy/cuda/include/hl_gpu_functions.cuh deleted file mode 100644 index 705aa71f4bae94339012f17851bc8eb1a8f26c2f..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_gpu_functions.cuh +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_GPU_FUNCTIONS_CUH_ -#define HL_GPU_FUNCTIONS_CUH_ - -#include "hl_base.h" - -namespace hppl { - - __device__ static real relu(const real a) { - return a > 0.0f ? a : 0.0f; - } - - __device__ static real sigmoid(const real a) { - const real min = SIGMOID_THRESHOLD_MIN; - const real max = SIGMOID_THRESHOLD_MAX; - real tmp = (a < min) ? min : ((a > max) ? max : a); -#ifndef PADDLE_TYPE_DOUBLE - return __fdividef(1.0f, 1.0f + __expf(-tmp)); -#else - return 1.0 / (1.0 + exp(-tmp)); -#endif - } - - __device__ static real tanh(const real a) { -#ifndef PADDLE_TYPE_DOUBLE - return __fdividef(2.0f, (1.0f + __expf(-2.0f*a))) - 1.0f; -#else - return (2.0 / (1.0 + exp(-2.0*a))) - 1.0; -#endif - } - - __device__ static real linear(const real a) { - return a; - } - - __device__ static real relu(const real a, const real b) { - return a * (b > 0.0f ? 1.0f : 0.0f); - } - - __device__ static real sigmoid(const real a, const real b) { - return a * b * (1 - b); - } - - __device__ static real tanh(const real a, const real b) { - return a * (1.0f - b * b); - } - - __device__ static real linear(const real a, const real b) { - return a; - } - -} // namespace hppl - -#endif // HL_GPU_FUNCTIONS_CUH_ diff --git a/paddle/legacy/cuda/include/hl_gpu_gru.cuh b/paddle/legacy/cuda/include/hl_gpu_gru.cuh deleted file mode 100644 index 8d299572c73e879a3a1e9fb60608c4f3abd1f685..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_gpu_gru.cuh +++ /dev/null @@ -1,393 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_GPU_GRU_CUH_ -#define HL_GPU_GRU_CUH_ - -#ifdef __NVCC__ - -#include "paddle/legacy/utils/Logging.h" - -/* - * threads(framePerBlock, batchPerBlock) - * grid(frameBlocks, batchBlocks) - */ -template -__global__ void KeGruForwardResetOutput(OpResetOutput opResetOutput, - real *gateValue, - real *resetOutputValue, - real *prevOutputValue, - int frameSize, - int batchSize, - hl_activation_mode_t active_gate) { - const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; - if (frameIdx >= frameSize) return; - - int batchIdx = 0; - if (isBatch) { - batchIdx = blockIdx.y * blockDim.y + threadIdx.y; - if (batchIdx >= batchSize) return; - gateValue += batchIdx * 3 * frameSize; - resetOutputValue += batchIdx * frameSize; - } - - real rPrevOut = 0; - real rValueResetOutput; - real rValueUpdateGate = gateValue[frameIdx + frameSize * 0]; - real rValueResetGate = gateValue[frameIdx + frameSize * 1]; - - if (prevOutputValue) { - if (isBatch) prevOutputValue += batchIdx * frameSize; - rPrevOut = prevOutputValue[frameIdx]; - } - - opResetOutput(rValueUpdateGate, - rValueResetGate, - rPrevOut, - rValueResetOutput, - hppl::gpu::forward[active_gate]); - - gateValue[frameIdx + frameSize * 0] = rValueUpdateGate; - gateValue[frameIdx + frameSize * 1] = rValueResetGate; - resetOutputValue[frameIdx] = rValueResetOutput; -} - -/* - * threads(framePerBlock, batchPerBlock) - * grid(frameBlocks, batchBlocks) - */ -template -__global__ void KeGruForwardFinalOutput(OpFinalOutput opFinalOutput, - real *gateValue, - real *prevOutputValue, - real *outputValue, - int frameSize, - int batchSize, - hl_activation_mode_t active_node) { - const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; - if (frameIdx >= frameSize) return; - int batchIdx = 0; - if (isBatch) { - batchIdx = blockIdx.y * blockDim.y + threadIdx.y; - if (batchIdx >= batchSize) return; - gateValue += batchIdx * 3 * frameSize; - outputValue += batchIdx * frameSize; - } - - real rOutput; - real rPrevOut = 0; - real rValueUpdateGate = gateValue[frameIdx + frameSize * 0]; - real rValueFrameState = gateValue[frameIdx + frameSize * 2]; - - if (prevOutputValue) { - if (isBatch) prevOutputValue += batchIdx * frameSize; - rPrevOut = prevOutputValue[frameIdx]; - } - - opFinalOutput(rValueUpdateGate, - rValueFrameState, - rPrevOut, - rOutput, - hppl::gpu::forward[active_node]); - - gateValue[frameIdx + frameSize * 2] = rValueFrameState; - outputValue[frameIdx] = rOutput; -} - -template -void hl_gpu_gru_forward(OpResetOutput opResetOutput, - OpFinalOutput opFinalOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { - dim3 threads; - dim3 grid; - if (batchSize == 1) { - int framePerBlock = frameSize <= 1024 ? frameSize : 1024; - int frameBlocks = (frameSize + 1024 - 1) / 1024; - threads = dim3(framePerBlock, 1); - grid = dim3(frameBlocks, 1); - } else { - threads = dim3(32, 32); - grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32); - } - - if (value.prevOutValue) { - hl_matrix_mul(value.prevOutValue, HPPL_OP_N, - value.gateWeight, HPPL_OP_N, - value.gateValue, - batchSize, 2*frameSize, frameSize, - /*alpha = */ 1, /*beta = */ 1, - frameSize, 2* frameSize, 3*frameSize); - } - - if (batchSize == 1) { - KeGruForwardResetOutput - <<>>(opResetOutput, - value.gateValue, value.resetOutputValue, value.prevOutValue, - frameSize, batchSize, active_gate); - } else { - KeGruForwardResetOutput - <<>>(opResetOutput, - value.gateValue, value.resetOutputValue, value.prevOutValue, - frameSize, batchSize, active_gate); - } - - if (value.prevOutValue) { - hl_matrix_mul(value.resetOutputValue, HPPL_OP_N, - value.stateWeight, HPPL_OP_N, - value.gateValue + 2*frameSize, - batchSize, frameSize, frameSize, - /*alpha = */ 1, /*beta = */ 1, - frameSize, frameSize, 3*frameSize); - } - - if (batchSize == 1) { - KeGruForwardFinalOutput - <<>>(opFinalOutput, - value.gateValue, value.prevOutValue, value.outputValue, - frameSize, batchSize, active_node); - } else { - KeGruForwardFinalOutput - <<>>(opFinalOutput, - value.gateValue, value.prevOutValue, value.outputValue, - frameSize, batchSize, active_node); - } - - CHECK_SYNC("hl_gpu_gru_forward failed"); -} - -/* - * threads(framePerBlock, batchPerBlock) - * grid(frameBlocks, batchBlocks) - */ -template -__global__ void KeGruBackwardStateGrad(OpStateGrad opStateGrad, - real *gateValue, - real *gateGrad, - real *prevOutValue, - real *prevOutGrad, - real *outputGrad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node) { - const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; - if (frameIdx >= frameSize) return; - int batchIdx = 0; - if (isBatch) { - batchIdx = blockIdx.y * blockDim.y + threadIdx.y; - if (batchIdx >= batchSize) return; - gateValue += batchIdx * 3 * frameSize; - gateGrad += batchIdx * 3 * frameSize; - outputGrad += batchIdx * frameSize; - } - - real rUpdateGateGrad; - real rFrameStateGrad; - real rPrevOutValue = 0; - real rPrevOutGrad = 0; - real rUpdateGateValue = gateValue[frameIdx + frameSize * 0]; - real rFrameStateValue = gateValue[frameIdx + frameSize * 2]; - real rOutGrad = outputGrad[frameIdx]; - - if (prevOutValue && prevOutGrad) { - if (isBatch) prevOutValue += batchIdx * frameSize; - rPrevOutValue = prevOutValue[frameIdx]; - - if (isBatch) prevOutGrad += batchIdx * frameSize; - rPrevOutGrad = prevOutGrad[frameIdx]; - } - - opStateGrad(rUpdateGateValue, - rUpdateGateGrad, - rFrameStateValue, - rFrameStateGrad, - rPrevOutValue, - rPrevOutGrad, - rOutGrad, - hppl::gpu::backward[active_node]); - - gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad; - gateGrad[frameIdx + frameSize * 2] = rFrameStateGrad; - if (prevOutGrad) { - prevOutGrad[frameIdx] = rPrevOutGrad; - } -} - -/* - * threads(framePerBlock, batchPerBlock) - * grid(frameBlocks, batchBlocks) - */ -template -__global__ void KeGruBackwardResetGrad(OpResetGrad opResetGrad, - real *gateValue, - real *gateGrad, - real *prevOutValue, - real *prevOutGrad, - real *resetOutputGrad, - int frameSize, - int batchSize, - hl_activation_mode_t active_gate) { - const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; - if (frameIdx >= frameSize) return; - int batchIdx = 0; - if (isBatch) { - batchIdx = blockIdx.y * blockDim.y + threadIdx.y; - if (batchIdx >= batchSize) return; - gateValue += batchIdx * 3 * frameSize; - gateGrad += batchIdx * 3 * frameSize; - resetOutputGrad += batchIdx * frameSize; - } - - real rResetGateGrad; - real rPrevOutValue = 0; - real rPrevOutGrad = 0; - real rResetOutputGrad = 0; - real rUpdateGateValue = gateValue[frameIdx + frameSize * 0]; - real rUpdateGateGrad = gateGrad[frameIdx + frameSize * 0]; - real rResetGateValue = gateValue[frameIdx + frameSize * 1]; - - if (prevOutValue && prevOutGrad) { - if (isBatch) prevOutValue += batchIdx * frameSize; - if (isBatch) prevOutGrad += batchIdx * frameSize; - rPrevOutValue = prevOutValue[frameIdx]; - rPrevOutGrad = prevOutGrad[frameIdx]; - rResetOutputGrad = resetOutputGrad[frameIdx]; - } - - opResetGrad(rUpdateGateValue, - rUpdateGateGrad, - rResetGateValue, - rResetGateGrad, - rPrevOutValue, - rPrevOutGrad, - rResetOutputGrad, - hppl::gpu::backward[active_gate]); - - gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad; - gateGrad[frameIdx + frameSize * 1] = rResetGateGrad; - if (prevOutGrad) { - prevOutGrad[frameIdx] = rPrevOutGrad; - } -} - -template -void hl_gpu_gru_backward(OpStateGrad opStateGrad, - OpResetGrad opResetGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { - dim3 threads; - dim3 grid; - if (batchSize == 1) { - int framePerBlock = frameSize <= 1024 ? frameSize : 1024; - int frameBlocks = (frameSize + 1024 - 1) / 1024; - threads = dim3(framePerBlock, 1); - grid = dim3(frameBlocks, 1); - } else { - threads = dim3(32, 32); - grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32); - } - - if (batchSize == 1) { - KeGruBackwardStateGrad - <<>>(opStateGrad, - value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad, - grad.outputGrad, frameSize, batchSize, active_node); - } else { - KeGruBackwardStateGrad - <<>>(opStateGrad, - value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad, - grad.outputGrad, frameSize, batchSize, active_node); - } - - if (value.prevOutValue && grad.prevOutGrad) { - hl_matrix_mul(grad.gateGrad + 2*frameSize, HPPL_OP_N, - value.stateWeight, HPPL_OP_T, - grad.resetOutputGrad, - batchSize, frameSize, frameSize, - /*alpha = */ 1, /*beta = */ 0, - 3*frameSize, frameSize, frameSize); - if (grad.stateWeightGrad) { - hl_matrix_mul(value.resetOutputValue, HPPL_OP_T, - grad.gateGrad + 2*frameSize, HPPL_OP_N, - grad.stateWeightGrad, - frameSize, frameSize, batchSize, - /*alpha = */ 1, /*beta = */ 1, - frameSize, 3*frameSize, frameSize); - } - } - - if (batchSize == 1) { - KeGruBackwardResetGrad - <<>>(opResetGrad, - value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad, - grad.resetOutputGrad, frameSize, batchSize, active_gate); - } else { - KeGruBackwardResetGrad - <<>>(opResetGrad, - value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad, - grad.resetOutputGrad, frameSize, batchSize, active_gate); - } - - if (grad.prevOutGrad && value.prevOutValue) { - hl_matrix_mul(grad.gateGrad, HPPL_OP_N, - value.gateWeight, HPPL_OP_T, - grad.prevOutGrad, - batchSize, frameSize, 2*frameSize, - /*alpha = */ 1, /*beta = */ 1, - 3*frameSize, 2*frameSize, frameSize); - if (grad.gateWeightGrad) { - hl_matrix_mul(value.prevOutValue, HPPL_OP_T, - grad.gateGrad, HPPL_OP_N, - grad.gateWeightGrad, - frameSize, 2*frameSize, batchSize, - /*alpha = */ 1, /*beta = */ 1, - frameSize, 3*frameSize, 2*frameSize); - } - } - - CHECK_SYNC("hl_gpu_gru_backward failed"); -} - -#else - -template -void hl_gpu_gru_forward(OpResetOutput opResetOutput, - OpFinalOutput opFinalOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) {} - -template -void hl_gpu_gru_backward(OpStateGrad opStateGrad, - OpResetGrad opResetGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) {} - -#endif - -#endif /* HL_GPU_GRU_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_gpu_lstm.cuh b/paddle/legacy/cuda/include/hl_gpu_lstm.cuh deleted file mode 100644 index aae011b838c0eca1197f55d236d759eab8ea993c..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_gpu_lstm.cuh +++ /dev/null @@ -1,300 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_GPU_LSTM_CUH_ -#define HL_GPU_LSTM_CUH_ - -#ifdef __NVCC__ - -#include "paddle/legacy/utils/Logging.h" -#include "hl_device_functions.cuh" - -/* - * threads(framePerBlock, batchPerBlock) - * grid(frameBlocks, batchBlocks) - */ -template -__global__ void KeLstmForward(Op op, - hl_lstm_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; - if (frameIdx >= frameSize) return; - - int batchIdx = 0; - if (isBatch) { - batchIdx = blockIdx.y * blockDim.y + threadIdx.y; - if (batchIdx >= batchSize) return; - value.gateValue += batchIdx * frameSize * 4; - value.outputValue += batchIdx * frameSize; - value.stateValue += batchIdx * frameSize; - value.stateActiveValue += batchIdx * frameSize; - } - - real rState; - real rPrevState = 0; - real rStateAtv; - real rOut; - real rValueIn; - real rValueIg; - real rValueFg; - real rValueOg; - real rCheckI = value.checkIg[frameIdx]; - real rCheckF = value.checkFg[frameIdx]; - real rCheckO = value.checkOg[frameIdx]; - - rValueIn = value.gateValue[frameIdx]; - rValueIg = value.gateValue[frameIdx + frameSize]; - rValueFg = value.gateValue[frameIdx + frameSize * 2]; - rValueOg = value.gateValue[frameIdx + frameSize * 3]; - - if (value.prevStateValue) { - if (isBatch) value.prevStateValue += batchIdx * frameSize; - rPrevState = value.prevStateValue[frameIdx]; - } - - op(rValueIn, - rValueIg, - rValueFg, - rValueOg, - rPrevState, - rState, - rStateAtv, - rOut, - rCheckI, - rCheckF, - rCheckO, - hppl::gpu::forward[active_node], - hppl::gpu::forward[active_gate], - hppl::gpu::forward[active_state]); - - value.gateValue[frameIdx] = rValueIn; - value.gateValue[frameIdx + frameSize] = rValueIg; - value.gateValue[frameIdx + frameSize * 2] = rValueFg; - value.gateValue[frameIdx + frameSize * 3] = rValueOg; - - value.stateValue[frameIdx] = rState; - value.stateActiveValue[frameIdx] = rStateAtv; - value.outputValue[frameIdx] = rOut; -} - -/* - * threads(framePerBlock, batchPerBlock) - * grid(frameBlocks, batchBlocks) - */ -template -__global__ void KeLstmBackward(Op op, - hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; - if (frameIdx >= frameSize) return; - - int batchIdx = 0; - if (isBatch) { - batchIdx = blockIdx.y * blockDim.y + threadIdx.y; - if (batchIdx >= batchSize) return; - value.gateValue += batchIdx * frameSize * 4; - value.stateValue += batchIdx * frameSize; - value.stateActiveValue += batchIdx * frameSize; - grad.gateGrad += batchIdx * frameSize * 4; - grad.stateGrad += batchIdx * frameSize; - grad.outputGrad += batchIdx * frameSize; - } - - real rValueIn; - real rValueIg; - real rValueFg; - real rValueOg; - real rGradIn; - real rGradIg; - real rGradFg; - real rGradOg; - real rPrevState = 0; - real rPrevStateGrad; - real rState; - real rStateGrad; - real rStateAtv; - real rOutputGrad; - real rCheckI = value.checkIg[frameIdx]; - real rCheckF = value.checkFg[frameIdx]; - real rCheckO = value.checkOg[frameIdx]; - real rCheckIGrad; - real rCheckFGrad; - real rCheckOGrad; - - rValueIn = value.gateValue[frameIdx]; - rValueIg = value.gateValue[frameIdx + frameSize]; - rValueFg = value.gateValue[frameIdx + frameSize * 2]; - rValueOg = value.gateValue[frameIdx + frameSize * 3]; - rState = value.stateValue[frameIdx]; - rStateAtv = value.stateActiveValue[frameIdx]; - rOutputGrad = grad.outputGrad[frameIdx]; - rStateGrad = grad.stateGrad[frameIdx]; - - if (value.prevStateValue) { - if (isBatch) value.prevStateValue += batchIdx * frameSize; - rPrevState = value.prevStateValue[frameIdx]; - } - - op(rValueIn, - rValueIg, - rValueFg, - rValueOg, - rGradIn, - rGradIg, - rGradFg, - rGradOg, - rPrevState, - rPrevStateGrad, - rState, - rStateGrad, - rStateAtv, - rOutputGrad, - rCheckI, - rCheckF, - rCheckO, - rCheckIGrad, - rCheckFGrad, - rCheckOGrad, - hppl::gpu::backward[active_node], - hppl::gpu::backward[active_gate], - hppl::gpu::backward[active_state]); - - grad.gateGrad[frameIdx] = rGradIn; - grad.gateGrad[frameIdx + frameSize ] = rGradIg; - grad.gateGrad[frameIdx + frameSize * 2] = rGradFg; - grad.gateGrad[frameIdx + frameSize * 3] = rGradOg; - grad.stateGrad[frameIdx] = rStateGrad; - if (grad.prevStateGrad) { - if (isBatch) grad.prevStateGrad += batchIdx * frameSize; - grad.prevStateGrad[frameIdx] = rPrevStateGrad; - } - - if (isBatch) { - if (value.prevStateValue) { - if (grad.checkIgGrad) paddle::paddleAtomicAdd(grad.checkIgGrad+frameIdx, rCheckIGrad); - if (grad.checkFgGrad) paddle::paddleAtomicAdd(grad.checkFgGrad+frameIdx, rCheckFGrad); - } - if (grad.checkOgGrad) paddle::paddleAtomicAdd(grad.checkOgGrad+frameIdx, rCheckOGrad); - } else { - if (value.prevStateValue) { - if (grad.checkIgGrad) grad.checkIgGrad[frameIdx] += rCheckIGrad; - if (grad.checkFgGrad) grad.checkFgGrad[frameIdx] += rCheckFGrad; - } - if (grad.checkOgGrad) grad.checkOgGrad[frameIdx] += rCheckOGrad; - } -} - -template -void hl_gpu_lstm_forward(Op op, - hl_lstm_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - dim3 threads; - dim3 grid; - if (batchSize == 1) { - int framePerBlock = frameSize <= 1024 ? frameSize : 1024; - int frameBlocks = (frameSize + 1024 - 1) / 1024; - threads = dim3(framePerBlock, 1); - grid = dim3(frameBlocks, 1); - } else { - /* framePerBlock = 32 batchPerBlock = 32 */ - threads = dim3(32, 32); - grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32); - } - - if (batchSize == 1) { - KeLstmForward - <<>>(op, value, - frameSize, batchSize, active_node, active_gate, active_state); - } else { - KeLstmForward - <<>>(op, value, - frameSize, batchSize, active_node, active_gate, active_state); - } - - CHECK_SYNC("hl_gpu_lstm_forward failed"); -} - -template -void hl_gpu_lstm_backward(Op op, - hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - dim3 threads; - dim3 grid; - if (batchSize == 1) { - int framePerBlock = frameSize <= 1024 ? frameSize : 1024; - int frameBlocks = (frameSize + 1024 - 1) / 1024; - threads = dim3(framePerBlock, 1); - grid = dim3(frameBlocks, 1); - } else { - /* framePerBlock = 32 batchPerBlock = 32 */ - threads = dim3(32, 32); - grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32); - } - - if (batchSize == 1) { - KeLstmBackward - <<>>(op, value, grad, - frameSize, batchSize, active_node, active_gate, active_state); - } else { - KeLstmBackward - <<>>(op, value, grad, - frameSize, batchSize, active_node, active_gate, active_state); - } - - CHECK_SYNC("hl_gpu_lstm_backward failed"); -} - -#else - -template -void hl_gpu_lstm_forward(Op op, - hl_lstm_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) {} - -template -void hl_gpu_lstm_backward(Op op, - hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) {} - -#endif - -#endif /* HL_GPU_LSTM_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_gpu_matrix_kernel.cuh b/paddle/legacy/cuda/include/hl_gpu_matrix_kernel.cuh deleted file mode 100644 index 6177d23657fba5b2800041a3dd7b5f76bf35aa1a..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_gpu_matrix_kernel.cuh +++ /dev/null @@ -1,629 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - - -#ifndef HL_GPU_MATRIX_KERNEL_CUH_ -#define HL_GPU_MATRIX_KERNEL_CUH_ - -#include -#include "paddle/legacy/utils/Logging.h" -#include "hl_base.h" - -#ifdef __NVCC__ -/* gpu apply interface */ - -template -__global__ void KeEltWiseUnaryOp(T* A_d, const int border, Op op) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < border) { - op.gpuOperator(A_d[idx]); - } -} - -template -__global__ void KeEltWiseUnaryOp(T* A_d, - int dimM, - int dimN, - int lda, - Op op) { - const int colIdx = blockIdx.x * blockDim.x + threadIdx.x; - const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y; - for (int i = rowIdx; i < dimM; i += gridDim.y * blockDim.y) { - for (int j = colIdx; j < dimN; j += gridDim.x * blockDim.x) { - op.gpuOperator(A_d[i * lda + j]); - } - } -} - -template -__global__ void KeEltWiseBinaryOp(T* A_d, T *B_d, const int border, Op op) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < border) { - op.gpuOperator(A_d[idx], B_d[idx]); - } -} - -template -__global__ void KeEltWiseBinaryOp(T *A_d, - T *B_d, - int dimM, - int dimN, - int lda, - int ldb, - Op op) { - const int colIdx = blockIdx.x * blockDim.x + threadIdx.x; - const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y; - for (int i = rowIdx; i < dimM; i += gridDim.y * blockDim.y) { - for (int j = colIdx; j < dimN; j += gridDim.x * blockDim.x) { - if (BAsRowVector == 0 && BAsColVector == 0) { - op.gpuOperator(A_d[i * lda + j], B_d[i * ldb + j]); - } else if (BAsRowVector == 1 && BAsColVector == 0) { - op.gpuOperator(A_d[i * lda + j], B_d[j]); - } else if (BAsRowVector == 0 && BAsColVector == 1) { - op.gpuOperator(A_d[i * lda + j], B_d[i * ldb]); - } else { - op.gpuOperator(A_d[i * lda + j], B_d[0]); - } - } - } -} - -template -__global__ void KeEltWiseTernaryOp(T* A_d, - T *B_d, - T *C_d, - const int border, - Op op) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < border) { - op.gpuOperator(A_d[idx], B_d[idx], C_d[idx]); - } -} - -template -__global__ void KeEltWiseTernaryOp(T* A_d, - T* B_d, - T* C_d, - int dimM, - int dimN, - int lda, - int ldb, - int ldc, - Op op) { - const int colIdx = blockIdx.x * blockDim.x + threadIdx.x; - const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y; - for (int i = rowIdx; i < dimM; i += gridDim.y * blockDim.y) { - for (int j = colIdx; j < dimN; j += gridDim.x * blockDim.x) { - if (CAsRowVector == 0 && CAsColVector == 0) { - op.gpuOperator(A_d[i*lda + j], B_d[i*ldb + j], C_d[i*ldc + j]); - } else if (CAsRowVector == 1 && CAsColVector == 0) { - op.gpuOperator(A_d[i*lda + j], B_d[i*ldb + j], C_d[j]); - } else if (CAsRowVector == 0 && CAsColVector == 1) { - op.gpuOperator(A_d[i*lda + j], B_d[i*ldb + j], C_d[i*ldc]); - } else { - op.gpuOperator(A_d[i*lda + j], B_d[i*ldb + j], C_d[0]); - } - } - } -} - -template -__global__ void KeEltWiseQuaternaryOp(T* A_d, - T* B_d, - T* C_d, - T* D_d, - const int border, - Op op) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < border) { - op.gpuOperator(A_d[idx], B_d[idx], C_d[idx], D_d[idx]); - } -} - -template -__global__ void KeEltWiseQuaternaryOp(T* A_d, - T* B_d, - T* C_d, - T* D_d, - int dimM, - int dimN, - int lda, - int ldb, - int ldc, - int ldd, - Op op) { - const int colIdx = blockIdx.x * blockDim.x + threadIdx.x; - const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y; - for (int i = rowIdx; i < dimM; i += gridDim.y * blockDim.y) { - for (int j = colIdx; j < dimN; j += gridDim.x * blockDim.x) { - op.gpuOperator(A_d[i*lda + j], - B_d[i*ldb + j], C_d[i*ldc + j], D_d[i*ldd + j]); - } - } -} - -/** - * @brief gpu element wise unary operator. - */ -template -void hl_gpu_apply_unary_op(Op op, T* A_d, int dimM, int dimN, int lda) { - CHECK_NOTNULL(A_d); - - if (dimM == 1 || dimN == lda) { - int size = dimM * dimN; - int blockSize = size <= 1024 ? size : 1024; - int gridSize = (size + 1024 - 1) / 1024; - KeEltWiseUnaryOp<<>> - (A_d, size, op); - } else { - int blockSizeY = std::min(32, dimM); - int blockSizeX = (32 / blockSizeY) * 32; - int gridSizeX = std::min(32, (dimN + blockSizeX - 1) / blockSizeX); - int gridSizeY = std::min(32, (dimM + blockSizeY - 1) / blockSizeY); - dim3 threads(blockSizeX, blockSizeY); - dim3 grid(gridSizeX, gridSizeY); - KeEltWiseUnaryOp<<>> - (A_d, dimM, dimN, lda, op); - } - - CHECK_SYNC("hl_gpu_apply_unary_op failed"); -} - -/** - * @brief gpu element wise binary operator. - */ -template -void hl_gpu_apply_binary_op(Op op, - T* A_d, - T* B_d, - int dimM, - int dimN, - int lda, - int ldb) { - CHECK_NOTNULL(A_d); - - if ((BAsRowVector == 0 && BAsColVector == 0) && - ((dimM == 1) || (dimN == lda && dimN == ldb))) { - int size = dimM * dimN; - int blockSize = size <= 1024 ? size : 1024; - int gridSize = (size + 1024 - 1) / 1024; - KeEltWiseBinaryOp<<>> - (A_d, B_d, size, op); - } else { - int blockSizeY = std::min(32, dimM); - int blockSizeX = (32 / blockSizeY) * 32; - int gridSizeX = std::min(32, (dimN + blockSizeX - 1) / blockSizeX); - int gridSizeY = std::min(32, (dimM + blockSizeY - 1) / blockSizeY); - dim3 threads(blockSizeX, blockSizeY); - dim3 grid(gridSizeX, gridSizeY); - KeEltWiseBinaryOp - <<>> - (A_d, B_d, dimM, dimN, lda, ldb, op); - } - - CHECK_SYNC("hl_gpu_apply_binary_op failed"); -} - -/** - * @brief gpu element wise ternary operator. - */ -template -void hl_gpu_apply_ternary_op(Op op, - T* A_d, - T* B_d, - T* C_d, - int dimM, - int dimN, - int lda, - int ldb, - int ldc) { - CHECK_NOTNULL(A_d); - - if ((CAsRowVector == 0 && CAsColVector == 0) && - ((dimM == 1) || (dimN == lda && dimN == ldb && dimN == ldc))) { - int size = dimM * dimN; - int blockSize = size <= 1024 ? size : 1024; - int gridSize = (size + 1024 - 1) / 1024; - KeEltWiseTernaryOp<<>> - (A_d, B_d, C_d, size, op); - } else { - int blockSizeY = std::min(32, dimM); - int blockSizeX = (32 / blockSizeY) * 32; - int gridSizeX = std::min(32, (dimN + blockSizeX - 1) / blockSizeX); - int gridSizeY = std::min(32, (dimM + blockSizeY - 1) / blockSizeY); - dim3 threads(blockSizeX, blockSizeY); - dim3 grid(gridSizeX, gridSizeY); - KeEltWiseTernaryOp - <<>> - (A_d, B_d, C_d, dimM, dimN, lda, ldb, ldc, op); - } - - CHECK_SYNC("hl_gpu_apply_ternary_op failed"); -} - - -/** - * @brief gpu element wise quaternary operator. - */ -template -void hl_gpu_apply_quaternary_op(Op op, - T* A_d, - T* B_d, - T* C_d, - T* D_d, - int dimM, - int dimN, - int lda, - int ldb, - int ldc, - int ldd) { - CHECK_NOTNULL(A_d); - - if ((dimM == 1) || - (dimN == lda && dimN == ldb && dimN == ldc && dimN == ldd)) { - int size = dimM * dimN; - int blockSize = size <= 1024 ? size : 1024; - int gridSize = (size + 1024 - 1) / 1024; - KeEltWiseQuaternaryOp<<>> - (A_d, B_d, C_d, D_d, size, op); - } else { - int blockSizeY = std::min(32, dimM); - int blockSizeX = (32 / blockSizeY) * 32; - int gridSizeX = std::min(32, (dimN + blockSizeX - 1) / blockSizeX); - int gridSizeY = std::min(32, (dimM + blockSizeY - 1) / blockSizeY); - dim3 threads(blockSizeX, blockSizeY); - dim3 grid(gridSizeX, gridSizeY); - KeEltWiseQuaternaryOp<<>> - (A_d, B_d, C_d, D_d, dimM, dimN, lda, ldb, ldc, ldd, op); - } - - CHECK_SYNC("hl_gpu_apply_quaternary_op failed"); -} - -#else - -template -void hl_gpu_apply_unary_op(Op op, T* A_d, int dimM, int dimN, int lda) {} - -template -void hl_gpu_apply_binary_op(Op op, - T* A_d, - T* B_d, - int dimM, - int dimN, - int lda, - int ldb) {} - -template -void hl_gpu_apply_ternary_op(Op op, - T* A_d, - T* B_d, - T* C_d, - int dimM, - int dimN, - int lda, - int ldb, - int ldc) {} - -template -void hl_gpu_apply_quaternary_op(Op op, - T* A_d, - T* B_d, - T* C_d, - T* D_d, - int dimM, - int dimN, - int lda, - int ldb, - int ldc, - int ldd) {} -#endif - -#ifdef __NVCC__ -/** - * @brief matrix row operator. - */ - -template -__device__ __inline__ real sumRow(Agg agg, Op op, - int idx, int blockSize, - int dimN, real *A) { - real tmp = agg.init(); - int cnt = (dimN + blockSize -1) / blockSize; - for (int i = 0; i < cnt && idx < dimN; i++) { - tmp = agg(tmp, op(A[idx])); - idx += blockSize; - } - return tmp; -} - -template -__device__ __inline__ real sumRow(Agg agg, Op op, - int idx, int blockSize, - int dimN, real *A, real *B) { - real tmp = agg.init(); - int cnt = (dimN + blockSize -1) / blockSize; - for (int i = 0; i < cnt && idx < dimN; i++) { - tmp = agg(tmp, op(A[idx], B[idx])); - idx += blockSize; - } - return tmp; -} - -template -__device__ __inline__ void aggRow(Agg agg, real *row, int size, int tid) { - for (int stride = size/2; stride > 0; stride = stride/2) { - if (tid < stride) { - row[tid] = agg(row[tid], row[tid + stride]); - } - __syncthreads(); - } -} - -template -__global__ void KeMatrixRowOp(Agg agg, Op op, Saver sv, - int dimN, - real *dst, int ld, - real *A, int lda) { - __shared__ real row_s[blockSize]; - int rowId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x; - - A += rowId*lda; - row_s[tid] = sumRow(agg, op, tid, blockSize, dimN, A); - __syncthreads(); - - aggRow(agg, row_s, blockSize, tid); - __syncthreads(); - - if (tid == 0) { - dst[rowId*ld] = sv(dst[rowId*ld], row_s[0]); - } -} - -template -__global__ void KeMatrixRowOp(Agg agg, Op op, Saver sv, - int dimN, - real *dst, int ld, - real *A, int lda, - real *B, int ldb) { - __shared__ real row_s[blockSize]; - int rowId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x; - - A += rowId*lda; - B += rowId*ldb; - row_s[tid] = sumRow(agg, op, tid, blockSize, dimN, A, B); - __syncthreads(); - - aggRow(agg, row_s, blockSize, tid); - __syncthreads(); - - if (tid == 0) { - dst[rowId*ld] = sv(dst[rowId*ld], row_s[0]); - } -} - -/** - * @brief matrix column operator. - */ -template -__device__ __inline__ real sumCol(Agg agg, Op op, - int index, int stride, - int dimM, real *A, int lda) { - real tmp = agg.init(); - for (; index < dimM;) { - tmp = agg(tmp, op(A[index*lda])); - index += stride; - } - return tmp; -} - -template -__device__ __inline__ real sumCol(Agg agg, Op op, - int index, int stride, int dimM, - real *A, int lda, real *B, int ldb) { - real tmp = agg.init(); - for (; index < dimM;) { - tmp = agg(tmp, op(A[index*lda], B[index*ldb])); - index += stride; - } - return tmp; -} - -template -__global__ void KeMatrixColumnOp(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda) { - int rowIdx = blockIdx.x * blockDim.x + threadIdx.x; - if (rowIdx < dimN) { - A += rowIdx; - real tmp = sumCol(agg, op, 0, 1, dimM, A, lda); - dst[rowIdx] = sv(dst[rowIdx], tmp); - } -} - -template -__global__ void KeMatrixColumnOp_S(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda) { - __shared__ real col_s[blockDimX*blockDimY]; - int rowIdx = blockIdx.x * blockDim.x + threadIdx.x; - - if (rowIdx < dimN) { - A += rowIdx; - real tmp = sumCol(agg, op, threadIdx.y, blockDimY, dimM, A, lda); - col_s[threadIdx.x + threadIdx.y*blockDimX] = tmp; - } - __syncthreads(); - - if (rowIdx < dimN) { - if (threadIdx.y ==0) { - real tmp = agg.init(); - for (int i=0; i < blockDimY; i++) { - tmp = agg(tmp, col_s[threadIdx.x + i*blockDimX]); - } - dst[rowIdx] = sv(dst[rowIdx], tmp); - } - } -} - -template -__global__ void KeMatrixColumnOp(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb) { - int rowIdx = blockIdx.x * blockDim.x + threadIdx.x; - if (rowIdx < dimN) { - A += rowIdx; - B += rowIdx; - real tmp = sumCol(agg, op, 0, 1, dimM, A, lda, B, ldb); - dst[rowIdx] = sv(dst[rowIdx], tmp); - } -} - -template -__global__ void KeMatrixColumnOp_S(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb) { - __shared__ real col_s[blockDimX*blockDimY]; - int rowIdx = blockIdx.x * blockDim.x + threadIdx.x; - - if (rowIdx < dimN) { - A += rowIdx; - B += rowIdx; - real tmp = sumCol(agg, op, - threadIdx.y, blockDimY, dimM, A, lda, B, ldb); - col_s[threadIdx.x + threadIdx.y*blockDimX] = tmp; - } - __syncthreads(); - - if (rowIdx < dimN) { - if (threadIdx.y ==0) { - real tmp = agg.init(); - for (int i=0; i < blockDimY; i++) { - tmp = agg(tmp, col_s[threadIdx.x + i*blockDimX]); - } - dst[rowIdx] = sv(dst[rowIdx], tmp); - } - } -} - -#endif - -template -void hl_gpu_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda) { -#ifdef __NVCC__ - CHECK_NOTNULL(dst); - CHECK_NOTNULL(A); - - int blocksX = dimM; - int blocksY = 1; - dim3 threads(128, 1); - dim3 grid(blocksX, blocksY); - KeMatrixRowOp<<< grid, threads, 0, STREAM_DEFAULT >>> - (agg, op, sv, dimN, dst, ld, A, lda); - - CHECK_SYNC("hl_matrix_row_op failed"); -#endif -} - -template -void hl_gpu_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda, - real *B, int ldb) { -#ifdef __NVCC__ - CHECK_NOTNULL(dst); - CHECK_NOTNULL(A); - - int blocksX = dimM; - int blocksY = 1; - dim3 threads(128, 1); - dim3 grid(blocksX, blocksY); - KeMatrixRowOp<<< grid, threads, 0, STREAM_DEFAULT >>> - (agg, op, sv, dimN, dst, ld, A, lda, B, ldb); - - CHECK_SYNC("hl_matrix_row_op failed"); -#endif -} - -template -void hl_gpu_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda) { -#ifdef __NVCC__ - if (dimN >= 8192) { - int blocksX = (dimN + 128 -1) / 128; - int blocksY = 1; - dim3 threads(128, 1); - dim3 grid(blocksX, blocksY); - KeMatrixColumnOp - <<< grid, threads, 0, STREAM_DEFAULT >>> - (agg, op, sv, dimM, dimN, dst, A, lda); - } else { - int blocksX = (dimN + 32 -1) / 32; - int blocksY = 1; - dim3 threads(32, 32); - dim3 grid(blocksX, blocksY); - KeMatrixColumnOp_S - <<< grid, threads, 0, STREAM_DEFAULT>>> - (agg, op, sv, dimM, dimN, dst, A, lda); - } - - CHECK_SYNC("hl_matrix_column_op failed"); -#endif -} - -template -void hl_gpu_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb) { -#ifdef __NVCC__ - if (dimN >= 8192) { - int blocksX = (dimN + 128 -1) / 128; - int blocksY = 1; - dim3 threads(128, 1); - dim3 grid(blocksX, blocksY); - KeMatrixColumnOp - <<< grid, threads, 0, STREAM_DEFAULT >>> - (agg, op, sv, dimM, dimN, dst, A, lda, B, ldb); - } else { - int blocksX = (dimN + 32 -1) / 32; - int blocksY = 1; - dim3 threads(32, 32); - dim3 grid(blocksX, blocksY); - KeMatrixColumnOp_S - <<< grid, threads, 0, STREAM_DEFAULT>>> - (agg, op, sv, dimM, dimN, dst, A, lda, B, ldb); - } - - CHECK_SYNC("hl_matrix_column_op failed"); -#endif -} - -#endif /* HL_GPU_MATRIX_KERNEL_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_gru_ops.cuh b/paddle/legacy/cuda/include/hl_gru_ops.cuh deleted file mode 100644 index 6c647c514db6a4f22b5c472835f4c0ab0ec9b869..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_gru_ops.cuh +++ /dev/null @@ -1,205 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_GRU_OPS_CUH_ -#define HL_GRU_OPS_CUH_ - -#ifdef __CUDA_ARCH__ -#define INLINE __device__ inline -#else -#define INLINE inline -#endif - -namespace hppl { - -namespace forward { -class gru_resetOutput { -public: - /** - * @param[in,out] valueUpdateGate update gate - * @param[in,out] valueResetGate reset gate - * @param[in] prevOut previous output - * @param[out] valueResetOutput intermediate value for frame state - * @param[in] actGate forward function of gate - */ - INLINE void operator()(real &valueUpdateGate, - real &valueResetGate, - real &prevOut, - real &valueResetOutput, - Active::forward actGate) { - valueUpdateGate = actGate(valueUpdateGate); - valueResetGate = actGate(valueResetGate); - valueResetOutput = prevOut * valueResetGate; - } -#ifndef __NVCC__ -#ifndef __AVX__ - static const bool avx = false; -#else - static const bool avx = true; - INLINE void operator()(__m256 &valueUpdateGate, - __m256 &valueResetGate, - __m256 &prevOut, - __m256 &valueResetOutput, - Active<__m256>::forward actGate) { - valueUpdateGate = actGate(valueUpdateGate); - valueResetGate = actGate(valueResetGate); - valueResetOutput = _mm256_mul_ps(prevOut, valueResetGate); - } -#endif -#endif -}; - -class gru_finalOutput { -public: - /** - * @param[in] valueUpdateGate update gate - * @param[in,out] valueFrameState frame state ({\tilde{h}_t}) - * @param[in] prevOut previous output - * @param[out] valueOutput output - * @param[in] actInput forward function of node - */ - INLINE void operator()(real &valueUpdateGate, - real &valueFrameState, - real &prevOut, - real &valueOutput, - Active::forward actInput ) { - valueFrameState = actInput(valueFrameState); - valueOutput = prevOut - (valueUpdateGate * prevOut) + - (valueUpdateGate * valueFrameState); - } -#ifndef __NVCC__ -#ifndef __AVX__ - static const bool avx = false; -#else - static const bool avx = true; - INLINE void operator()(__m256 &valueUpdateGate, - __m256 &valueFrameState, - __m256 &prevOut, - __m256 &valueOutput, - Active<__m256>::forward actInput) { - valueFrameState = actInput(valueFrameState); - valueOutput = _mm256_add_ps( - _mm256_sub_ps(prevOut, _mm256_mul_ps(valueUpdateGate, prevOut)), - _mm256_mul_ps(valueUpdateGate, valueFrameState)); - } -#endif -#endif -}; -} // namespace forward - -namespace backward { -class gru_stateGrad { -public: - /** - * @param[in] valueUpdateGate update gate value - * @param[out] gradUpdateGate update gate grad - * @param[in] valueFrameState frame state value - * @param[out] gradFrameState frame state grad - * @param[in] valuePrevOut previous output value - * @param[in,out] gradPrevOut previous output grad - * @param[in] gradOutput output grad - * @param[in] actInput backward function of frame state - */ - INLINE void operator()(real &valueUpdateGate, - real &gradUpdateGate, - real &valueFrameState, - real &gradFrameState, - real &valuePrevOut, - real &gradPrevOut, - real &gradOutput, - Active::backward actInput) { - gradUpdateGate = (gradOutput * valueFrameState); - gradUpdateGate -= (gradOutput * valuePrevOut); - gradPrevOut -= (gradOutput * valueUpdateGate); - gradPrevOut += gradOutput; - gradFrameState = actInput(gradOutput * valueUpdateGate, valueFrameState); - } -#ifndef __NVCC__ -#ifndef __AVX__ - static const bool avx = false; -#else - static const bool avx = true; - INLINE void operator()(__m256 &valueUpdateGate, - __m256 &gradUpdateGate, - __m256 &valueFrameState, - __m256 &gradFrameState, - __m256 &valuePrevOut, - __m256 &gradPrevOut, - __m256 &gradOutput, - Active<__m256>::backward actInput) { - gradUpdateGate = _mm256_mul_ps(gradOutput, valueFrameState); - gradUpdateGate = _mm256_sub_ps( - gradUpdateGate, _mm256_mul_ps(gradOutput, valuePrevOut)); - gradPrevOut = _mm256_add_ps( - _mm256_sub_ps(gradPrevOut, _mm256_mul_ps(gradOutput, valueUpdateGate)), - gradOutput); - gradFrameState = actInput( - _mm256_mul_ps(gradOutput, valueUpdateGate), valueFrameState); - } -#endif -#endif -}; - -class gru_resetGrad { -public: - /** - * @param[in] valueUpdateGate update gate value - * @param[in,out] gradUpdateGate update gate grad - * @param[in] valueResetGate reset gate value - * @param[out] gradResetGate reset gate grad - * @param[in] valuePrevOut previous output value - * @param[in,out] gradPrevOut previous output grad - * @param[in] gradResetOutput reset output grad (temp val) - * @param[in] actGate backward function of gate - */ - INLINE void operator()(real &valueUpdateGate, - real &gradUpdateGate, - real &valueResetGate, - real &gradResetGate, - real &valuePrevOut, - real &gradPrevOut, - real &gradResetOutput, - Active::backward actGate) { - gradResetGate = (gradResetOutput * valuePrevOut); - gradPrevOut += (gradResetOutput * valueResetGate); - gradUpdateGate = actGate(gradUpdateGate, valueUpdateGate); - gradResetGate = actGate(gradResetGate , valueResetGate); - } -#ifndef __NVCC__ -#ifndef __AVX__ - static const bool avx = false; -#else - static const bool avx = true; - INLINE void operator()(__m256 &valueUpdateGate, - __m256 &gradUpdateGate, - __m256 &valueResetGate, - __m256 &gradResetGate, - __m256 &valuePrevOut, - __m256 &gradPrevOut, - __m256 &gradResetOutput, - Active<__m256>::backward actGate) { - gradResetGate = _mm256_mul_ps(gradResetOutput, valuePrevOut); - gradPrevOut = _mm256_add_ps( - gradPrevOut, _mm256_mul_ps(gradResetOutput, valueResetGate)); - gradUpdateGate = actGate(gradUpdateGate, valueUpdateGate); - gradResetGate = actGate(gradResetGate , valueResetGate); - } -#endif -#endif -}; -} // namespace backward -} // namespace hppl - -#endif /* HL_GRU_OPS_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_lstm.h b/paddle/legacy/cuda/include/hl_lstm.h deleted file mode 100644 index 5db4783bf4dd871a2349d527aad315e7426815c5..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_lstm.h +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_LSTM_H_ -#define HL_LSTM_H_ - -#include "hl_base.h" - -/** - * @brief Lstm sequence parallel forward. - * - * @param[in] gateValue input value. - * @param[out] stateValue state value. - * @param[out] preOutputValue prev output value. - * @param[out] outputValue output value. - * @param[in] checkIg bias. - * @param[in] checkFg bias. - * @param[in] checkOg bias. - * @param[in] weight weight. - * @param[in] sequence sequence index. - * @param[in] frameSize frame size. - * @param[in] numSequences number of sequences. - * @param[in] reversed reverse. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - * @param[in] active_state actvie gate type. - * - * - * @note Only support frameSize = 32 or 64. - */ -extern void hl_lstm_parallel_forward(real *gateValue, - real *stateValue, - real *preOutputValue, - real *outputValue, - real *checkIg, - real *checkFg, - real *checkOg, - real *weight, - const int *sequence, - int frameSize, - int numSequences, - bool reversed, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state); - -/** - * @brief Lstm sequence parallel backward data. - * - * @param[in] gateValue input value. - * @param[out] gateGrad input gradient. - * @param[in] stateValue state value. - * @param[out] stateGrad state gradient. - * @param[out] preOutputValue prev output value. - * @param[out] preOutputGrad prev output gradient. - * @param[in] outputGrad output gradient. - * @param[in] checkIg bias. - * @param[out] checkIgGrad bias gradient. - * @param[in] checkFg bias. - * @param[out] checkFgGrad bias gradient. - * @param[in] checkOg bias. - * @param[out] checkOgGrad bias gradient. - * @param[in] weight weight. - * @param[in] sequence sequence index. - * @param[in] frameSize frame size. - * @param[in] numSequences number of sequences. - * @param[in] reversed reverse. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - * @param[in] active_state actvie gate type. - * - * - * @note Only support frameSize = 32 or 64. - */ -extern void hl_lstm_parallel_backward_data(real *gateValue, - real *gateGrad, - real *stateValue, - real *stateGrad, - real *preOutputValue, - real *preOutputGrad, - real *outputGrad, - real *checkIg, - real *checkIgGrad, - real *checkFg, - real *checkFgGrad, - real *checkOg, - real *checkOgGrad, - real *weight, - const int *sequence, - int frameSize, - int numSequences, - bool reversed, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state); - -/** - * @brief Lstm sequence parallel backward weight. - * - * @param[out] weightGrad weight gradient. - * @param[in] outputValue output value. - * @param[in] gateGrad gate gradient. - * @param[in] sequence sequence index. - * @param[in] frameSize frame size. - * @param[in] batchSize batch size. - * @param[in] numSequences number of sequences. - * @param[in] reversed reverse. - * - */ -extern void hl_lstm_parallel_backward_weight(real *weightGrad, - real *outputValue, - real *gateGrad, - const int *sequence, - int frameSize, - int batchSize, - int numSequences, - bool reversed); - -#endif /* HL_LSTM_H_ */ diff --git a/paddle/legacy/cuda/include/hl_lstm_ops.cuh b/paddle/legacy/cuda/include/hl_lstm_ops.cuh deleted file mode 100644 index 394fdf5ac07d533579307bf478c5e491669f4c59..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_lstm_ops.cuh +++ /dev/null @@ -1,213 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_LSTM_OPS_CUH_ -#define HL_LSTM_OPS_CUH_ - -#ifdef __CUDA_ARCH__ -#define INLINE __device__ inline -#else -#define INLINE inline -#endif - -namespace hppl { - -namespace forward { -class lstm { -public: - /** - * @param valueIn input - * @param valueIg input gate - * @param valueFg forget gate - * @param valueOg output gate - * @param prevState previous state - * @param state current state - * @param stateAtv state active - * @param output output - * @param checkI check input gate - * @param checkF check forget gate - * @param checkO check output gate - * @param actInput forward function of input - * @param actGate forward function of gate - * @param actState forward function of state - */ - INLINE void operator()(real &valueIn, - real &valueIg, - real &valueFg, - real &valueOg, - real &prevState, - real &state, - real &stateAtv, - real &output, - real &checkI, - real &checkF, - real &checkO, - Active::forward actInput, - Active::forward actGate, - Active::forward actState) { - valueIn = actInput(valueIn); - valueIg = actGate(valueIg + prevState * checkI); - valueFg = actGate(valueFg + prevState * checkF); - state = valueIn * valueIg + prevState * valueFg; - valueOg = actGate(valueOg + state * checkO); - stateAtv = actState(state); - output = valueOg * stateAtv; - } -#ifndef __NVCC__ -#ifndef __AVX__ - static const bool avx = false; -#else - static const bool avx = true; - INLINE void operator()(__m256 &valueIn, - __m256 &valueIg, - __m256 &valueFg, - __m256 &valueOg, - __m256 &prevState, - __m256 &state, - __m256 &stateAtv, - __m256 &output, - __m256 &checkI, - __m256 &checkF, - __m256 &checkO, - Active<__m256>::forward actInput, - Active<__m256>::forward actGate, - Active<__m256>::forward actState) { - valueIn = actInput(valueIn); - valueIg = actGate( - _mm256_add_ps(valueIg, _mm256_mul_ps(prevState, checkI))); - valueFg = actGate( - _mm256_add_ps(valueFg, _mm256_mul_ps(prevState, checkF))); - state = _mm256_add_ps(_mm256_mul_ps(valueIn, valueIg) - , _mm256_mul_ps(prevState, valueFg)); - valueOg = actGate(_mm256_add_ps(valueOg, _mm256_mul_ps(state, checkO))); - stateAtv = actState(state); - output = _mm256_mul_ps(valueOg, stateAtv); - } -#endif -#endif -}; -} // namespace forward - -namespace backward { -class lstm { -public: - /** - * @param valueIn input - * @param valueIg input gate - * @param valueFg forget gate - * @param valueOg output gate - * @param gradIn input grad - * @param gradIg input gate grad - * @param gradFg forget gate grad - * @param gradOg output gate grad - * @param prevState previous state value - * @param prevStateGrad previous state grad - * @param state current state value - * @param stateGrad current state grad - * @param stateAtv state active - * @param outputGrad output grad - * @param checkI check input gate - * @param checkF check forget gate - * @param checkO check output gate - * @param checkIGrad check input gate grad - * @param checkFGrad check forget gate grad - * @param checkOGrad check output gate grad - * @param actInput backward function of input - * @param actGate backward function of gate - * @param actState backward function of state - */ - INLINE void operator()(real &valueIn, - real &valueIg, - real &valueFg, - real &valueOg, - real &gradIn, - real &gradIg, - real &gradFg, - real &gradOg, - real &prevState, - real &prevStateGrad, - real &state, - real &stateGrad, - real &stateAtv, - real &outputGrad, - real &checkI, - real &checkF, - real &checkO, - real &checkIGrad, - real &checkFGrad, - real &checkOGrad, - Active::backward actInput, - Active::backward actGate, - Active::backward actState) { - gradOg = actGate(outputGrad * stateAtv, valueOg); - stateGrad += actState(outputGrad * valueOg, stateAtv) + gradOg * checkO; - gradIn = actInput(stateGrad * valueIg, valueIn); - gradIg = actGate(stateGrad * valueIn, valueIg); - gradFg = actGate(stateGrad * prevState, valueFg); - prevStateGrad = gradIg * checkI + gradFg * checkF + stateGrad * valueFg; - checkIGrad = gradIg * prevState; - checkFGrad = gradFg * prevState; - checkOGrad = gradOg * state; - } -#ifndef __NVCC__ -#ifndef __AVX__ - static const bool avx = false; -#else - static const bool avx = true; - INLINE void operator()(__m256 &valueIn, - __m256 &valueIg, - __m256 &valueFg, - __m256 &valueOg, - __m256 &gradIn, - __m256 &gradIg, - __m256 &gradFg, - __m256 &gradOg, - __m256 &prevState, - __m256 &prevStateGrad, - __m256 &state, - __m256 &stateGrad, - __m256 &stateAtv, - __m256 &outputGrad, - __m256 &checkI, - __m256 &checkF, - __m256 &checkO, - __m256 &checkIGrad, - __m256 &checkFGrad, - __m256 &checkOGrad, - Active<__m256>::backward actInput, - Active<__m256>::backward actGate, - Active<__m256>::backward actState) { - gradOg = actGate(_mm256_mul_ps(outputGrad, stateAtv), valueOg); - stateGrad = _mm256_add_ps( - actState(_mm256_mul_ps(outputGrad, valueOg), stateAtv), stateGrad); - stateGrad = _mm256_add_ps(_mm256_mul_ps(gradOg, checkO), stateGrad); - gradIn = actInput(_mm256_mul_ps(stateGrad, valueIg), valueIn); - gradIg = actGate(_mm256_mul_ps(stateGrad, valueIn), valueIg); - gradFg = actGate(_mm256_mul_ps(stateGrad, prevState), valueFg); - prevStateGrad = _mm256_add_ps( - _mm256_mul_ps(gradIg, checkI), _mm256_mul_ps(gradFg, checkF)); - prevStateGrad = _mm256_add_ps( - _mm256_mul_ps(stateGrad, valueFg), prevStateGrad); - checkIGrad = _mm256_mul_ps(gradIg, prevState); - checkFGrad = _mm256_mul_ps(gradFg, prevState); - checkOGrad = _mm256_mul_ps(gradOg, state); - } -#endif -#endif -}; -} // namespace backward -} // namespace hppl - -#endif /* HL_LSTM_OPS_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_matrix.h b/paddle/legacy/cuda/include/hl_matrix.h deleted file mode 100644 index 88d538343f9164d7bc780cfd458586c8a553590b..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_matrix.h +++ /dev/null @@ -1,311 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_MATRIX_H_ -#define HL_MATRIX_H_ - -#include "hl_base.h" - -/** - * @brief Matrix addition: C_d[i] = alpha * A_d[i] + beta * B_d[i]. - * - * @param[in] A_d input matrix (M x N). - * @param[in] B_d input matrix (M x N). - * @param[out] C_d output matrix (M x N). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] alpha scalar used for addition. - * @param[in] beta scalar used for addition. - * - */ -extern void hl_matrix_add( - real* A_d, real* B_d, real* C_d, int dimM, int dimN, real alpha, real beta); -/** - * @brief Matrix Softmax. - * - * @param[in] A_d input maxtrix (M x N). - * @param[out] C_d output matrix (M x N). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_softmax(real* A_d, real* C_d, int dimM, int dimN); - -/** - * @brief Matrix softmax derivative. - * - * @param[out] grad_d intput matrix (M x N). - * @param[in] output_d output matrix (M x N). - * @param[in] sftmaxSum_d softmax sum (M * 1). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_softmax_derivative( - real* grad_d, real* output_d, real* sftmaxSum_d, int dimM, int dimN); - -/** - * @brief Sequence softmax. - * - * @param[in] A_d input vector. - * @param[out] C_d output vector. - * @param[in] index start positions of sequence. - * @param[in] numSequence sequence number. - * - */ -extern void hl_sequence_softmax_forward(real* A_d, - real* C_d, - const int* index, - int numSequence); - -/** - * @brief Matrix cross entropy. - * - * @param[in] A_d input matrix (M x N). - * @param[out] C_d output matrix (M X 1). - * @param[in] label_d input matrix (M x 1). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_cross_entropy( - real* A_d, real* C_d, int* label_d, int dimM, int dimN); - -/** - * @brief Matrix cross entropy back propagation. - * - * @param[out] grad_d output matrix (M x N). - * @param[in] output_d input matrix (M x N). - * @param[in] label_d input vector (M x 1). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_cross_entropy_bp( - real* grad_d, real* output_d, int* label_d, int dimM, int dimN); - -/** - * @brief Matrix multi-binary label cross entropy - * - * @param[in] output input matrix (M x N). - * @param[out] entropy output matrix (M x 1). - * @param[in] mat input sparse matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - */ -extern void hl_matrix_multi_binary_cross_entropy( - real* output, real* entropy, hl_sparse_matrix_s mat, int dimM, int dimN); - -/** - * @brief Matrix multi-binary label cross entropy backprop - * - * @param[in] output input matrix (M x N). - * @param[out] grad output matrix (M x N). - * @param[in] mat input sparse matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - */ -extern void hl_matrix_multi_binary_cross_entropy_bp( - real* output, real* grad, hl_sparse_matrix_s mat, int dimM, int dimN); - -/** - * @brief Matrix zero memory. - * - * @param[in,out] data input data. - * @param[in] num length of data. - * - */ -extern void hl_matrix_zero_mem(real* data, int num); - -/** - * @brief parameter relu forward - * - * @param[out] output output data - * @param[in] input input data - * @param[in] w parameter data - * @param[in] width matrix width - * @param[in] height matrix height - * @param[in] partial_sum - */ - -extern void hl_param_relu_forward( - real* output, real* input, real* w, int width, int height, int partial_sum); -/** - * @brief parameter relu backward w - * - * @param[out] grad_w w grad - * @param[in] grad_o output grad - * @param[in] input input data - * @param[in] width matrix width - * @param[in] height matrix height - * @param[in] partial_sum - */ -extern void hl_param_relu_backward_w(real* grad_w, - real* grad_o, - real* input, - int width, - int height, - int partial_sum); -/** - * @brief parameter relu backward diff - * - * @param[in] grad_o output grad - * @param[in] input input data - * @param[in] w parameter - * @param[out] diff diff - * @param[in] width matrix width - * @param[in] height matrix height - * @param[in] partial_sum - */ -extern void hl_param_relu_backward_diff(real* grad_o, - real* input, - real* w, - real* diff, - int width, - int height, - int partial_sum); - -/** - * @brief Matrix addition: A_d[i][j] += scale * B_d[j/channel]. - * - * @param[in] A_d input matrix (M x N). - * @param[in] B_d input matrix (1 x channel). - * @param[in] channel width of B. - * @param[in] dimM height of A. - * @param[in] dimN width of A. - * @param[in] scale scalar used for addition. - * - */ -extern void hl_matrix_add_shared_bias(real* A_d, - real* B_d, - const int channel, - const int dimM, - const int dimN, - real scale); - -/** - * @brief Matrix addition: A_d[i][j] += scale * B_d[j/channel]. - * - * @param[in] B_d input matrix (1 x channel). - * @param[in] A_d input matrix (M x N). - * @param[in] channel width of B. - * @param[in] dimM height of A. - * @param[in] dimN width of A. - * @param[in] scale scalar used for addition. - * - */ -extern void hl_matrix_collect_shared_bias(real* B_d, - real* A_d, - const int channel, - const int dimM, - const int dimN, - real scale); - -/** - * @brief Matrix rotation in 90 degrees - * - * @param[in] mat input matrix (M x N). - * @param[out] matRot output matrix (N x M). - * @param[in] dimM input matrix height. - * @param[in] dimN input matrix width. - * @param[in] clockWise rotation direction - */ -extern void hl_matrix_rotate( - real* mat, real* matRot, int dimM, int dimN, bool clockWise); - -/** - * @brief Matrix vol2Col: Convert 3D volume into col matrix - * - * @param[in] matSrc input matrix. - * @param[in] channel channel of matSrc. - * @param[in] depth depth of matSrc. - * @param[in] height height of matSrc. - * @param[in] width width of matSrc. - * @param[in] filterD depth of filter. - * @param[in] filterH height of filter. - * @param[in] filterW width of filter. - * @param[in] strideD stride in the depth. - * @param[in] strideH stride in the height. - * @param[in] strideW stride in the width. - * @param[in] paddingD padding in the depth. - * @param[in] paddingH padding in the height. - * @param[in] paddingW padding in the width. - * @param[out] dataDst output matrix. - * - */ -extern void hl_matrix_vol2Col(const real* dataSrc, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real* dataDst); - -/** - * @brief Matrix col2Vol: Convert col matrix into 3D volume - * - * @param[out] matDst output matrix. - * @param[in] channel channel of matDst. - * @param[in] depth depth of matDst. - * @param[in] height height of matDst. - * @param[in] width width of matDst. - * @param[in] filterD depth of filter. - * @param[in] filterH height of filter. - * @param[in] filterW width of filter. - * @param[in] strideD stride in the depth. - * @param[in] strideH stride in the height. - * @param[in] strideW stride in the width. - * @param[in] paddingD padding in the depth. - * @param[in] paddingH padding in the height. - * @param[in] paddingW padding in the width. - * @param[in] matSrc input matrix. - * @param[in] beta input - * @param[in] alpha input - * - */ -extern void hl_matrix_col2Vol(real* dataDst, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - const real* dataSrc, - real alpha, - real beta); - -/** - * @brief Matrix col2Vol: Convert col matrix into 3D volume - * @param[out] out output int vector. - * @param[in] vec input float vector. - * @param[in] size size of the vector. - */ -extern void hl_vector_cast2int(int* out, real* vec, int size); - -#endif /* HL_MATRIX_H_ */ diff --git a/paddle/legacy/cuda/include/hl_matrix_apply.cuh b/paddle/legacy/cuda/include/hl_matrix_apply.cuh deleted file mode 100644 index a067c8233b9b1b7f76ab766ebd467c480c0a88b7..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_matrix_apply.cuh +++ /dev/null @@ -1,423 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_MATRIX_APPLY_H_ -#define HL_MATRIX_APPLY_H_ - -#include "hl_base.h" -#include "hl_cpu_matrix_kernel.cuh" -#include "hl_gpu_matrix_kernel.cuh" - -/** - * @brief CPU element wise unary operator. - * - * element wise op(a) for 0 <= i < dimM & for 0 <= j < dimN. - * - * @param[in] op unary op. see namespace unary - * @param[in,out] A_h matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] lda leading dimension of A. - * - */ -template -extern void hl_cpu_apply_unary_op(Op op, - T* A_h, - int dimM, - int dimN, - int lda); - -/** - * @brief CPU element wise binary operator. - * - * element wise op(a, b) for 0 <= i < dimM & for 0 <= j < dimN. - * - * if (BAsRowVector == 0 && BAsColVector == 0) - * op(A[i * lda + j], B[i * ldb + j]) - * - * if (BAsRowVector == 1 && BAsColVector == 0) - * op(A[i * lda + j], B[j]) - * - * if (BAsRowVector == 0 && BAsColVector == 1) - * op(A[i * lda + j], B[i * ldb]) - * - * if (BAsRowVector == 1 && BAsColVector == 1) - * op(A[i * lda + j], B[0]) - * - * @param[in] op binary op. see namespace binary. - * @param[in,out] A_h matrix. - * @param[in,out] B_h matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] lda leading dimension of A. - * @param[in] ldb leading dimension of B. - * - */ -template -extern void hl_cpu_apply_binary_op(Op op, - T* A_h, - T* B_h, - int dimM, - int dimN, - int lda, - int ldb); - -/** - * @brief CPU element wise ternary operator. - * - * element wise op(a, b, c) for 0 <= i < dimM & for 0 <= j < dimN. - * - * if (CAsRowVector == 0 && CAsColVector == 0) - * op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j]) - * - * if (CAsRowVector == 1 && CAsColVector == 0) - * op(A[i*lda + j], B[i*ldb + j], C[j]) - * - * if (CAsRowVector == 0 && CAsColVector == 1) - * op(A[i*lda + j], B[i*ldb + j], C[i*ldc]) - * - * if (CAsRowVector == 1 && CAsColVector == 1) - * op(A[i*lda + j], B[i*ldb + j], C[0]) - * - * @param[in] op ternary op. see namespace ternary. - * @param[in,out] A_h matrix. - * @param[in,out] B_h matrix. - * @param[in,out] C_h matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] lda leading dimension of A. - * @param[in] ldb leading dimension of B. - * @param[in] ldc leading dimension of C. - * - */ -template -extern void hl_cpu_apply_ternary_op(Op op, - T* A_h, - T* B_h, - T* C_h, - int dimM, - int dimN, - int lda, - int ldb, - int ldc); - -/** - * @brief CPU element wise quaternary operator. - * element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN. - * - * @param[in] op quaternary op. see namespace ternary. - * @param[in,out] A_h matrix. - * @param[in,out] B_h matrix. - * @param[in,out] C_h matrix. - * @param[in,out] D_h matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] lda leading dimension of A. - * @param[in] ldb leading dimension of B. - * @param[in] ldc leading dimension of C. - * @param[in] ldd leading dimension of D. - * - */ -template -extern void hl_cpu_apply_quaternary_op(Op op, - T* A_h, - T* B_h, - T* C_h, - T* D_h, - int dimM, - int dimN, - int lda, - int ldb, - int ldc, - int ldd); - -/** - * @brief GPU element wise unary operator. - * element wise op(a) for 0 <= i < dimM & for 0 <= j < dimN. - * - * @param[in] op unary op. see namespace unary. - * @param[in,out] A_d matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] lda leading dimension of A. - * - */ -template -extern void hl_gpu_apply_unary_op(Op op, - T* A_d, - int dimM, - int dimN, - int lda); - -/** - * @brief GPU element wise binary operator. - * - * element wise op(a, b) for 0 <= i < dimM & for 0 <= j < dimN - * - * if (BAsRowVector == 0 && BAsColVector == 0) - * op(A[i * lda + j], B[i * ldb + j]) - * - * if (BAsRowVector == 1 && BAsColVector == 0) - * op(A[i * lda + j], B[j]) - * - * if (BAsRowVector == 0 && BAsColVector == 1) - * op(A[i * lda + j], B[i * ldb]) - * - * if (BAsRowVector == 1 && BAsColVector == 1) - * op(A[i * lda + j], B[0]) - * - * @param[in] op binary op. see namespace binary. - * @param[in,out] A_d matrix. - * @param[in,out] B_d matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] lda leading dimension of A. - * @param[in] ldb leading dimension of B. - * - */ -template -extern void hl_gpu_apply_binary_op(Op op, - T* A_d, - T* B_d, - int dimM, - int dimN, - int lda, - int ldb); -/** - * @brief GPU element wise ternary operator. - * - * element wise op(a, b, c) for 0 <= i < dimM & for 0 <= j < dimN. - * - * if (CAsRowVector == 0 && CAsColVector == 0) - * op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j]) - * - * if (CAsRowVector == 1 && CAsColVector == 0) - * op(A[i*lda + j], B[i*ldb + j], C[j]) - * - * if (CAsRowVector == 0 && CAsColVector == 1) - * op(A[i*lda + j], B[i*ldb + j], C[i*ldc]) - * - * if (CAsRowVector == 1 && CAsColVector == 1) - * op(A[i*lda + j], B[i*ldb + j], C[0]) - * - * @param[in] op ternary op. see namespace ternary. - * @param[in,out] A_d matrix. - * @param[in,out] B_d matrix. - * @param[in,out] C_d matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] lda leading dimension of A. - * @param[in] ldb leading dimension of B. - * @param[in] ldc leading dimension of C. - * - */ -template -extern void hl_gpu_apply_ternary_op(Op op, - T* A_d, - T* B_d, - T* C_d, - int dimM, - int dimN, - int lda, - int ldb, - int ldc); - - -/** - * @brief GPU element wise quaternary operator. - * element wise op(a, b, c, d) for 0 <= i < dimM & for 0 <= j < dimN. - * - * @param[in] op quaternary op. see namespace ternary. - * @param[in,out] A_d matrix. - * @param[in,out] B_d matrix. - * @param[in,out] C_d matrix. - * @param[in,out] D_d matrix. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[in] lda leading dimension of A. - * @param[in] ldb leading dimension of B. - * @param[in] ldc leading dimension of C. - * @param[in] ldd leading dimension of D. - * - */ -template -extern void hl_gpu_apply_quaternary_op(Op op, - T* A_d, - T* B_d, - T* C_d, - T* D_d, - int dimM, - int dimN, - int lda, - int ldb, - int ldc, - int ldd); - -/** - * @brief CPU matrix row operator. - */ -template -extern void hl_cpu_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda); - -/** - * @brief CPU matrix row operator. - * - * @param[in] agg aggregate operator expression. - * @param[in] op operator expression. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[out] dst destination matrix. - * @param[in] ld leading dimension of dst matrix. - * @param[in] *A matrix A. - * @param[in] lda leading dimension of matrix A. - * @param[in] *B matrix B. - * @param[in] ldb leading dimension of matrix B. - * - */ -template -extern void hl_cpu_matrix_row_op(Agg agg, Op op, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda, - real *B, int ldb); - -/** - * @brief CPU matrix column operator. - * - * @param[in] agg aggregate operator expression. - * @param[in] op operator expression. - * @param[in] sv assignment operator expression. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[out] dst destination matrix. - * @param[in] *A matrix A. - * @param[in] lda leading dimension of matrix A. - * - */ -template -extern void hl_cpu_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda); - -/** - * @brief CPU matrix column operator. - * - * @param[in] agg aggregate operator expression. - * @param[in] op operator expression. - * @param[in] sv assignment operator expression. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[out] dst destination matrix. - * @param[in] *A matrix A. - * @param[in] lda leading dimension of matrix A. - * @param[in] *B matrix B. - * @param[in] ldb leading dimension of matrix B. - * - */ -template -extern void hl_cpu_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb); - -/** - * @brief GPU matrix row operator. - * - * @param[in] agg aggregate operator expression. - * @param[in] op operator expression. - * @param[in] sv assignment operator expression. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[out] dst destination matrix. - * @param[in] ld leading dimension of dst. - * @param[in] *A matrix A. - * @param[in] lda leading dimension of matrix A. - * - */ -template -extern void hl_gpu_matrix_row_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda); - -/** - * @brief GPU matrix row operator. - * - * @param[in] agg aggregate operator expression. - * @param[in] op operator expression. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[out] dst destination matrix. - * @param[in] ld leading dimension of dst matrix. - * @param[in] *A matrix A. - * @param[in] lda leading dimension of matrix A. - * @param[in] *B matrix B. - * @param[in] ldb leading dimension of matrix B. - * - */ -template -extern void hl_gpu_matrix_row_op(Agg agg, Op op, - int dimM, int dimN, - real *dst, int ld, - real *A, int lda, - real *B, int ldb); - -/** - * @brief GPU matrix column operator. - * - * @param[in] agg aggregate operator expression. - * @param[in] op operator expression. - * @param[in] sv assignment operator expression. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[out] dst destination matrix. - * @param[in] *A matrix A. - * @param[in] lda leading dimension of matrix A. - * - */ -template -extern void hl_gpu_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda); - -/** - * @brief GPU matrix column operator. - * - * @param[in] agg aggregate operator expression. - * @param[in] op operator expression. - * @param[in] sv assignment operator expression. - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * @param[out] dst destination matrix. - * @param[in] *A matrix A. - * @param[in] lda leading dimension of matrix A. - * @param[in] *B matrix B. - * @param[in] ldb leading dimension of matrix B. - * - */ -template -extern void hl_gpu_matrix_column_op(Agg agg, Op op, Saver sv, - int dimM, int dimN, - real *dst, - real *A, int lda, - real *B, int ldb); - -#endif /* HL_MATRIX_APPLY_H_ */ diff --git a/paddle/legacy/cuda/include/hl_matrix_base.cuh b/paddle/legacy/cuda/include/hl_matrix_base.cuh deleted file mode 100644 index a309bb0011c00655ff1d9b9c6276898b3de369db..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_matrix_base.cuh +++ /dev/null @@ -1,164 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_MATRIX_BASE_CUH_ -#define HL_MATRIX_BASE_CUH_ - -#include "hl_matrix_type.cuh" - -class BaseOp { -public: - static const bool sse = false; - BaseOp() {} - explicit BaseOp(const real s1) {} - explicit BaseOp(const real s1, const real s2) {} - INLINE vecType vecOp(const vecType a) const { - return a; - } - INLINE vecType vecOp(const vecType a, const vecType b) const { - return a; - } -}; - -#ifdef __CUDA_ARCH__ -typedef BaseOp SSESum; -typedef BaseOp SSEMax; -typedef BaseOp SSEMin; -typedef BaseOp SSEIdentity; -typedef BaseOp SSEAdd; -typedef BaseOp SSEAdd2; -typedef BaseOp SSESub; -typedef BaseOp SSEMul; -typedef BaseOp SSEDiv; -typedef BaseOp SSESquaredDiff; -typedef BaseOp SSEFirst; -typedef BaseOp SSESecond; -typedef BaseOp SSEClassificationError; -#else -#include "hl_matrix_base_detail.cuh" -#endif - -namespace aggregate { -class sum : public SSESum { -public: - INLINE real init() { return 0.0f; } - INLINE real operator()(const real a, const real b) const { - return a + b; - } -}; - -class max : public SSEMax { -public: - INLINE real init() { return -HL_FLOAT_MAX; } - INLINE real operator()(const real a, const real b) const { - return a > b ? a : b; - } -}; - -class min : public SSEMin { -public: - INLINE real init() {return HL_FLOAT_MAX;} - INLINE real operator()(const real a, const real b) const { - return a > b ? b : a; - } -}; -} // namespace aggregate - -namespace base { -namespace unary { -class identity : public SSEIdentity { -public: - INLINE real operator()(const real a) const { - return a; - } -}; -} // namespace unary - -namespace binary { -class add : public SSEAdd { -public: - INLINE real operator()(const real a, const real b) const { - return a + b; - } -}; - -class add2 : public SSEAdd2 { -private: - const real p1; - const real p2; -public: - add2(const real s1, const real s2) - : SSEAdd2(s1, s2), p1(s1), p2(s2) {} - INLINE real operator()(const real a, const real b) const { - return p1 * a + p2 * b; - } -}; - -class sub : public SSESub { -public: - INLINE real operator()(const real a, const real b) const { - return a - b; - } -}; - -class mul : public SSEMul { -public: - INLINE real operator()(const real a, const real b) const { - return a * b; - } -}; - -class div : public SSEDiv { -public: - INLINE real operator()(const real a, const real b) const { - return a / b; - } -}; - -class squaredDiff : public SSESquaredDiff { -public: - INLINE real operator()(const real a, const real b) const { - return (a - b) * (a - b); - } -}; - -class first : public SSEFirst { -public: - INLINE real operator()(const real a, const real b) const { - return a; - } -}; - -class second : public SSESecond { -public: - INLINE real operator()(const real a, const real b) const { - return b; - } -}; - -class classificationError : public SSEClassificationError { -private: - const real p; -public: - explicit classificationError(const real s) - : SSEClassificationError(s), p(s) {} - INLINE real operator()(const real a, const real b) const { - return ((a > p) == (b > p)) ? 0.0f : 1.0f; - } -}; -} // namespace binary -} // namespace base - -#endif /* HL_MATRIX_BASE_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_matrix_base_detail.cuh b/paddle/legacy/cuda/include/hl_matrix_base_detail.cuh deleted file mode 100644 index 74211bcb929839f1ba6a7bf117dd3f31b7bc1bed..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_matrix_base_detail.cuh +++ /dev/null @@ -1,153 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_MATRIX_BASE_DETAIL_CUH_ -#define HL_MATRIX_BASE_DETAIL_CUH_ - -#include "hl_matrix_type.cuh" -#include "hl_tensor_ops.h" - -namespace aggregate { -class SSESum { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - return hppl::binary::add()(a, b); - } -}; - -class SSEMax { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - return hppl::binary::max()(a, b); - } -}; - -class SSEMin { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - return hppl::binary::min()(a, b); - } -}; -} // namespace aggregate - -namespace base { -namespace unary { -class SSEIdentity { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a) const { - return a; - } -}; -} // namespace unary - -namespace binary { -class SSEAdd { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - return hppl::binary::add()(a, b); - } -}; - -class SSEAdd2 { -public: - static const bool sse = VECTOR_SIMD; - const real p1; - const real p2; - vecType mp1; - vecType mp2; - -public: - SSEAdd2(const real s1, const real s2) : p1(s1), p2(s2) { - mp1 = hl_vec_set(p1); - mp2 = hl_vec_set(p2); - } - INLINE vecType vecOp(const vecType a, const vecType b) const { - return hppl::binary::add_scale(mp1, mp2)(a, b); - } -}; - -class SSESub { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - return hppl::binary::sub()(a, b); - } -}; - -class SSEMul { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - return hppl::binary::mul()(a, b); - } -}; - -class SSEDiv { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - return hppl::binary::div()(a, b); - } -}; - -class SSESquaredDiff { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - vecType tmp = hppl::binary::sub()(a, b); - return hppl::binary::mul()(tmp, tmp); - } -}; - -class SSEFirst { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - return a; - } -}; - -class SSESecond { -public: - static const bool sse = VECTOR_SIMD; - INLINE vecType vecOp(const vecType a, const vecType b) const { - return b; - } -}; - -class SSEClassificationError { -public: - static const bool sse = VECTOR_SIMD; - const real p; - vecType mp; - vecType result; - -public: - explicit SSEClassificationError(const real s) : p(s) { - mp = hl_vec_set(p); - result = hl_vec_set(1.0f); - } - INLINE vecType vecOp(const vecType a, const vecType b) const { - return hl_vec_classification_error(a, b, mp, result); - } -}; -} // namespace binary -} // namespace base - -#endif /* HL_MATRIX_BASE_DETAIL_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_matrix_ops.cuh b/paddle/legacy/cuda/include/hl_matrix_ops.cuh deleted file mode 100644 index 4e8bd91234958e6e94bdde633fb79ef10715c9a6..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_matrix_ops.cuh +++ /dev/null @@ -1,253 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_MATRIX_OPS_CUH_ -#define HL_MATRIX_OPS_CUH_ - -#include "hl_base.h" - -#ifdef __NVCC__ -#define HL_DEVICE __device__ -#else -#define HL_DEVICE -#endif - -/** - * @brief parameter macro. - */ -#define ONE_PARAMETER(name) \ - private: \ - const T p;\ - public: \ - name(const T s) : p(s) {} - -#define TWO_PARAMETER(name) \ - private: \ - const T p1;\ - const T p2;\ - public: \ - name(const T s1, T s2) : p1(s1), p2(s2) {} - -#define THREE_PARAMETER(name) \ - private: \ - const T p1;\ - const T p2;\ - const T p3;\ - public: \ - name(const T s1, T s2, T s3) : p1(s1), p2(s2), p3(s3) {} - -#define FOUR_PARAMETER(name) \ - private: \ - const T p1;\ - const T p2;\ - const T p3;\ - const T p4;\ - public: \ - name(const T s1, T s2, T s3, T s4) : p1(s1), p2(s2), p3(s3), p4(s4) {} - -/** - * @brief unary operator macro. - * - * @param name operator name. - * @param op operator expression. - * - * @note op format: op supports multiple expressions that are separated - * by a comma. e.g. a, b - * - * @see hl_gpu_apply_unary_op - * @see hl_cpu_apply_unary_op - */ -#define DEFINE_MATRIX_UNARY_OP(name, op) \ - namespace unary {\ - template\ - class name {\ - public:\ - HL_DEVICE inline void gpuOperator(T &a) {op;}\ - inline void cpuOperator(T &a) {op;}\ - };\ - } - - -/** - * @brief unary operator macro. - * - * @param name operator name. - * @param PARA_MACRO parameter macro. - * @param op operator expression. - * - * @note op format: op supports multiple expressions that are separated - * by a comma. e.g. a, b - * - * @see hl_gpu_apply_unary_op - * @see hl_cpu_apply_unary_op - */ -#define DEFINE_MATRIX_UNARY_PARAMETER_OP(name, PARA_MACRO, op) \ - namespace unary {\ - template\ - class name {\ - PARA_MACRO(name)\ - public:\ - HL_DEVICE inline void gpuOperator(T &a) {op;}\ - inline void cpuOperator(T &a) {op;}\ - };\ - } - - -/** - * @brief binary operator macro. - * - * @param name operator name. - * @param op operator expression. - * - * @note op format: op supports multiple expressions that are separated - * by a comma. e.g. a, b - * - * @see hl_gpu_apply_unary_op - * @see hl_cpu_apply_unary_op - */ -#define DEFINE_MATRIX_BINARY_OP(name, op) \ - namespace binary {\ - template\ - class name {\ - public:\ - HL_DEVICE inline void gpuOperator(T &a, T &b) {op;}\ - inline void cpuOperator(T &a, T &b) {op;}\ - };\ - } - - -/** - * @brief binary operator macro. - * - * @param name operator name. - * @param PARA_MACRO parameter macro. - * @param op operator expression. - * - * @note op format: op supports multiple expressions that are separated - * by a comma. e.g. a, b - * - * @see hl_gpu_apply_binary_op - * @see hl_cpu_apply_binary_op - */ -#define DEFINE_MATRIX_BINARY_PARAMETER_OP(name, PARA_MACRO, op) \ - namespace binary {\ - template\ - class name {\ - PARA_MACRO(name)\ - public:\ - HL_DEVICE inline void gpuOperator(T &a, T &b) {op;}\ - inline void cpuOperator(T &a, T &b) {op;}\ - };\ - } - - -/** - * @brief ternary operator macro. - * - * @param name operator name. - * @param op operator expression. - * - * @note op format: op supports multiple expressions that are separated - * by a comma. e.g. a, b, c - * - * @see hl_gpu_apply_ternary_op - * @see hl_cpu_apply_ternary_op - */ -#define DEFINE_MATRIX_TERNARY_OP(name, op) \ - namespace ternary {\ - template\ - class name {\ - public:\ - HL_DEVICE inline void gpuOperator(T &a, T &b, T &c) {op;}\ - inline void cpuOperator(T &a, T &b, T &c) {op;}\ - };\ - } - - -/** - * @brief ternary operator macro. - * - * @param name operator name. - * @param PARA_MACRO parameter macro. - * @param op operator expression. - * - * @note op format: op supports multiple expressions that are separated - * by a comma. e.g. a, b, c - * - * @see hl_gpu_apply_ternary_op - * @see hl_cpu_apply_ternary_op - */ -#define DEFINE_MATRIX_TERNARY_PARAMETER_OP(name, PARA_MACRO, op) \ - namespace ternary {\ - template\ - class name {\ - private:\ - PARA_MACRO(name)\ - public:\ - HL_DEVICE inline void gpuOperator(T &a, T &b, T &c) {op;}\ - inline void cpuOperator(T &a, T &b, T &c) {op;}\ - };\ - } - - -/** - * @brief quaternary operator macro. - * - * @param name operator name. - * @param op operator expression. - * - * @note op format: op supports multiple expressions that are separated - * by a comma. e.g. a, b, c, d - * - * @see hl_gpu_apply_quaternary_op - * @see hl_cpu_apply_quaternary_op - */ -#define DEFINE_MATRIX_QUATERNARY_OP(name, op) \ - namespace quaternary {\ - template\ - class name {\ - public:\ - HL_DEVICE inline void gpuOperator(T &a, T &b, T &c, T &d) {op;}\ - inline void cpuOperator(T&a, T &b, T &c, T &d) {op;}\ - };\ - } - - -/** - * @brief quaternary operator macro. - * - * @param name operator name. - * @param PARA_MACRO parameter macro. - * @param op operator expression. - * - * @note op format: op supports multiple expressions that are separated - * by a comma. e.g. a, b, c, d - * - * @see hl_gpu_apply_quaternary_op - * @see hl_cpu_apply_quaternary_op - */ -#define DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(name, PARA_MACRO, op) \ - namespace quaternary {\ - template\ - class name {\ - private:\ - PARA_MACRO(name)\ - public:\ - HL_DEVICE inline void gpuOperator(T &a, T &b, T &c, T &d) {op;}\ - inline void cpuOperator(T &a, T &b, T &c, T &d) {op;}\ - };\ - } - -#endif /* HL_MATRIX_OPS_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_matrix_type.cuh b/paddle/legacy/cuda/include/hl_matrix_type.cuh deleted file mode 100644 index e61c0d0a47900c7c1820f6b32690b6cdf7d2f13e..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_matrix_type.cuh +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_MATRIX_TYPE_CUH_ -#define HL_MATRIX_TYPE_CUH_ - -#include "hl_base.h" - -#ifdef __CUDA_ARCH__ -/** - * CUDA kernel inline function - */ -#define INLINE __device__ inline -#else -/** - * CPP inline function - */ -#define INLINE inline -#endif - -#ifdef __CUDA_ARCH__ -#include -#ifndef PADDLE_TYPE_DOUBLE -typedef float4 vecType; -#else -typedef double2 vecType; -#endif -#elif defined(__SSE3__) -#include "hl_cpu_simd_sse.cuh" -#define PADDLE_USE_SSE3 -#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(__NVCC__) -// Currently nvcc does not support neon intrinsic. -// TODO: Extract simd intrinsic implementation from .cu files. -#include "hl_cpu_simd_neon.cuh" -#define PADDLE_USE_NEON -#else -#include "hl_cpu_scalar.cuh" -#endif - -#endif // HL_MATRIX_TYPE_CUH_ diff --git a/paddle/legacy/cuda/include/hl_perturbation_util.cuh b/paddle/legacy/cuda/include/hl_perturbation_util.cuh deleted file mode 100644 index e0a27778caea61fb56737e330d8faad7e58b4926..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_perturbation_util.cuh +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef DISTRUB_UTIL_CUH_ -#define DISTRUB_UTIL_CUH_ - -#include "hl_base.h" - -/* - * Functionality: randomly rotate, scale and sample a minibatch of images - and their label maps - * images: (numImages, imgPixels, 3) - * targets: (numImages, imgPixels, 3) - * - * created by Wei Xu. Converted to paddle by Jiang Wang. - */ -void hl_conv_random_disturb(const real* images, int imgSize, int tgtSize, - int channels, int numImages, real scaleRatio, - real rotateAngle, int samplingRate, - real* gpu_r_angle, real* gpu_s_ratio, - int* gpu_center_r, int* gpu_center_c, - int paddingValue, bool isTrain, real* targets); - -void hl_conv_random_disturb_with_params(const real* images, int imgSize, - int tgtSize, int channels, - int numImages, int samplingRate, - const real* gpuRotationAngle, - const real* gpuScaleRatio, - const int* gpuCenterR, - const int* gpuCenterC, - int paddingValue, real* targets); - -void hl_generate_disturb_params(real*& gpuAngle, real*& gpuScaleRatio, - int*& gpuCenterR, int*& gpuCenterC, - int numImages, int imgSize, - real rotateAngle, real scaleRatio, - int samplingRate, bool isTrain); - -#endif /* DISTURB_UTIL_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_recurrent_apply.cuh b/paddle/legacy/cuda/include/hl_recurrent_apply.cuh deleted file mode 100644 index b2cc231f58d2cc6b39247c31c02208f164cad16e..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_recurrent_apply.cuh +++ /dev/null @@ -1,192 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_RECURRENT_APPLY_CUH_ -#define HL_RECURRENT_APPLY_CUH_ - -#include "hl_base.h" -#include "hl_activation_functions.h" -#include "hl_lstm_ops.cuh" -#include "hl_gpu_lstm.cuh" -#include "hl_cpu_lstm.cuh" -#include "hl_gru_ops.cuh" -#include "hl_gpu_gru.cuh" -#include "hl_cpu_gru.cuh" - -/** - * @brief Cpu lstm forward one sequence. - * - * @param[in] op hl_lstm_ops.cuh - * @param[out] value hl_lstm_value type. - * @param[in] frameSize frame size. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - * @param[in] active_state actvie gate type. - */ -template -extern void hl_cpu_lstm_forward(Op op, - hl_lstm_value value, - int frameSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state); - -/** - * @brief Cpu lstm backward one sequence. - * - * @param[in] op hl_lstm_ops.cuh - * @param[in] value lstm value. - * @param[out] grad output gradient. - * @param[in] frameSize frame size. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - * @param[in] active_state actvie gate type. - */ -template -extern void hl_cpu_lstm_backward(Op op, - hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state); - -/** - * @brief Gpu lstm batch forward. - * - * @param[in] op hl_lstm_ops.cuh - * @param[out] value lstm value. - * @param[in] frameSize frame size. - * @param[in] batchSize size of current batch. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - * @param[in] active_state actvie gate type. - */ -template -extern void hl_gpu_lstm_forward(Op op, - hl_lstm_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state); - -/** - * @brief Gpu lstm batch backward. - * - * @param[in] op hl_lstm_ops.cuh - * @param[out] value lstm value. - * @param[out] grad lstm gradient. - * @param[in] frameSize frame size. - * @param[in] batchSize size of current batch. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - * @param[in] active_state actvie gate type. - */ -template -extern void hl_gpu_lstm_backward(Op op, - hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state); - -/** - * @brief Cpu gru forward. - * - * @param[in] opResetOutput hl_gru_ops.cuh - * @param[in] opFinalOutput hl_gru_ops.cuh - * @param[in,out] value gru value. - * @param[in] frameSize frame length/size. - * @param[in] batchSize size of current batch. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - */ -template -extern void hl_cpu_gru_forward(OpResetOutput opResetOutput, - OpFinalOutput opFinalOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate); - -/** - * @brief Cpu gru forward. - * - * @param[in] opStateGrad hl_gru_ops.cuh - * @param[in] opResetGrad hl_gru_ops.cuh - * @param[in] value gru value. - * @param[in,out] grad gru gradient. - * @param[in] frameSize frame length/size. - * @param[in] batchSize size of current batch. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - */ -template -extern void hl_cpu_gru_backward(OpStateGrad opStateGrad, - OpResetGrad opResetGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate); - -/** - * @brief Gpu gru forward. - * - * @param[in] opResetOutput hl_gru_ops.cuh - * @param[in] opFinalOutput hl_gru_ops.cuh - * @param[in,out] value gru value. - * @param[in] frameSize frame length/size. - * @param[in] batchSize size of current batch. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - */ -template -extern void hl_gpu_gru_forward(OpResetOutput opResetOutput, - OpFinalOutput opFinalOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate); - -/** - * @brief Gpu gru forward. - * - * @param[in] opStateGrad hl_gru_ops.cuh - * @param[in] opResetGrad hl_gru_ops.cuh - * @param[in] value gru value. - * @param[in,out] grad gru gradient. - * @param[in] frameSize frame length/size. - * @param[in] batchSize size of current batch. - * @param[in] active_node active input type. - * @param[in] active_gate active state type. - */ -template -extern void hl_gpu_gru_backward(OpStateGrad opStateGrad, - OpResetGrad opResetGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate); - -#endif /* HL_RECURRENT_APPLY_CUH_ */ diff --git a/paddle/legacy/cuda/include/hl_sequence.h b/paddle/legacy/cuda/include/hl_sequence.h deleted file mode 100644 index 3923bdd921bae8baa604aa22611e19d7bd3b1e47..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_sequence.h +++ /dev/null @@ -1,168 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_SEQUENCE_H_ -#define HL_SEQUENCE_H_ - -#include "hl_base.h" - -/** - * @brief Maximum sequence forward. - * - * @param[in] input each sequence contains some instances. - * @param[in] sequence sequence index.. - * @param[out] output max instance in this sequence. - * @param[out] index index of max instance. - * @param[in] numSequences size of sequence[in]. - * @param[in] dim input dimension. - * - */ -extern void hl_max_sequence_forward(real* input, - const int* sequence, - real* output, - int* index, - int numSequences, - int dim); - -/** - * @brief Maximum sequence backward. - * - * @param[in] outputGrad output gradient. - * @param[in] index index of max instance. - * @param[out] inputGrad input gradient. - * @param[in] numSequences size of sequence[in]. - * @param[in] dim input dimension. - * - */ -extern void hl_max_sequence_backward( - real* outputGrad, int* index, real* inputGrad, int numSequences, int dim); - -/** - * @brief Memory copy from sequence to batch. - * - * if seq2batch == true - * - * copy from sequence to batch: batch[i] = sequence[batchIndex[i]]. - * - * if seq2batch == false - * - * copy from batch to sequence: sequence[batchIndex[i]] = batch[i]. - * - * @param[in,out] batch batch matrix. - * @param[in,out] sequence equence matrix. - * @param[in] batchIndex index vector. - * @param[in] seqWidth width of sequence. - * @param[in] batchCount number of batchIndex. - * @param[in] seq2batch copy direction. - * - */ -extern void hl_sequence2batch_copy(real* batch, - real* sequence, - const int* batchIndex, - int seqWidth, - int batchCount, - bool seq2batch); - -/** - * @brief Add sequence to batch. - * - * if seq2batch == true - * - * add sequence to batch: batch[i] = sequence[batchIndex[i]]. - * - * if seq2batch == false - * - * add batch to sequence: sequence[batchIndex[i]] = batch[i]. - * - * @param[in,out] batch batch matrix. - * @param[in,out] sequence equence matrix. - * @param[in] batchIndex index vector. - * @param[in] seqWidth width of sequence. - * @param[in] batchCount number of batchIndex. - * @param[in] seq2batch copy direction. - * - */ -extern void hl_sequence2batch_add(real* batch, - real* sequence, - int* batchIndex, - int seqWidth, - int batchCount, - bool seq2batch); - -/** - * @brief Memory copy from sequence to batch, - * while padding all sequences to the same length. - * - * if seq2batch == true - * - * copy from sequence to batch: - * batch[i] = sequence[sequenceStartPositions[i]] - * - * if seq2batch == false - * - * copy from batch to sequence: - * sequence[sequenceStartPositions[i]] = batch[i] - * - * @param[in,out] batch batch matrix. - * @param[in,out] sequence sequence matrix. - * @param[in] sequenceStartPositions index vector. - * @param[in] sequenceWidth width of sequence. - * @param[in] maxSequenceLength maximum length of sequences. - * @param[in] numSequences number of sequences. - * @param[in] normByTimes whether dividing sequence's length. - * @param[in] seq2batch copy direction. - * - */ -extern void hl_sequence2batch_copy_padding(real* batch, - real* sequence, - const int* sequenceStartPositions, - const size_t sequenceWidth, - const size_t maxSequenceLength, - const size_t numSequences, - bool normByTimes, - bool seq2batch); - -/** - * @brief dst = Op(src), src is sequence. - * - * mode = 0, Op is average. - * - * mode = 1, Op is sum. - * - * mode = 2, Op is sum(src)/sqrt(N), N is sequence length. - * - * @param[in,out] dst destination data. - * @param[in] src source data. - * @param[in] starts sequence start positions. - * @param[in] height height of dst data. - * @param[in] width width of dst data. - * @param[in] mode 0: avreage, - * 1: sum, - * 2: divide by square root - * of sequenceLength - */ -extern void hl_sequence_avg_forward(real* dst, - real* src, - const int* starts, - int height, - int width, - const int mode); - -extern void hl_sequence_avg_backward(real* dst, - real* src, - const int* starts, - int height, - int width, - const int mode); -#endif /* HL_SEQUENCE_H_ */ diff --git a/paddle/legacy/cuda/include/hl_sparse.h b/paddle/legacy/cuda/include/hl_sparse.h deleted file mode 100644 index 9aab52e045cc13cb7dde7116a8b2b400277c0eab..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_sparse.h +++ /dev/null @@ -1,523 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_SPARSE_H_ -#define HL_SPARSE_H_ - -#include "hl_base.h" - -/** - * @brief Malloc a sparse matrix. - * - * @param[out] A_d sparse matrix. - * @param[in] format format. - * @param[in] value_type valueType. - * @param[in] dimM height. - * @param[in] dimN width. - * @param[in] nnz number of none zero element. - * - */ -extern void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d, - hl_matrix_format_t format, - hl_matrix_value_t value_type, - int dimM, - int dimN, - int nnz); - -/** - * @brief Free a sparse matrix. - * - * @param[in] A_d GPU sparse matrix. - * - */ -extern void hl_free_sparse_matrix(hl_sparse_matrix_s A_d); - -/** - * @brief Construct a sparse matrix use input gpu memory. - * - * @param[out] A_d sparse matrix. - * @param[in] dest_d gpu memory. - * @param[in] size size of dest_d. - * @param[in] format format. - * @param[in] value_type valueType. - * @param[in] dimM height. - * @param[in] dimN width. - * @param[in] nnz number of none zero element. - * - * @note Destruct api is hl_destruct_sparse_matrix. - * - */ -extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, - void *dest_d, - size_t size, - hl_matrix_format_t format, - hl_matrix_value_t value_type, - int dimM, - int dimN, - int nnz); - -/** - * @brief Use three arrays to construct sparse matrix. - * - * if format is HL_SPARSE_CSR, size of rows_d is dimM + 1, - * and size of cols_d is nnz; - * - * if format is HL_SPARSE_CSC, size of rows_d is nnz, and size of - * cols_d is dimN + 1. - * - * if valueType is HL_NO_VALUE, size of value_d is zero, - * else size of value_d is nnz. - * - * @param[out] A_d sparse matrix. - * @param[in] value_d value. - * @param[in] rows_d row. - * @param[in] cols_d col. - * @param[in] format format. - * @param[in] value_type valueType. - * @param[in] dimM height. - * @param[in] dimN width. - * @param[in] nnz number of none zero element. - * - * @note The corresponding destructor interface is hl_destruct_sparse_matrix. - * - */ -extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, - real *value_d, - int *rows_d, - int *cols_d, - hl_matrix_format_t format, - hl_matrix_value_t value_type, - int dimM, - int dimN, - int nnz); - -/** - * @brief Destruct sparse matrix. - * - * @param[in] A_d sparse matrix. - * - */ -extern void hl_destruct_sparse_matrix(hl_sparse_matrix_s A_d); - -/** - * @brief Copy value & index to sparse matrix. - * - * if csr_matrix is HL_FLOAT_VALUE. - * - * 1. csr_val, csr_row, csr_col three pointers are not null. - * - * 2. csr_val is not null, csr_row adn csr_col are null. - * - * if csr_matrix is HL_NO_VALUE. - * - * 1. csr_val will be ignore, csr_row and csr_col are not null. - * - * - * @param[in,out] csr_matrix sparse matrix. - * @param[in] csr_val point to csr value array(nnz). - * @param[in] csr_row point to csr row indices array(dimM+1). - * @param[in] csr_col point to csr col indices array(nnz). - * @param[in] stream hl_stream_t type. - * - */ -extern void hl_memcpy_csr_matrix(hl_sparse_matrix_s csr_matrix, - real *csr_val, - int *csr_row, - int *csr_col, - hl_stream_t stream); - -/** - * @brief Copy value & index to sparse matrix. - * - * if csr_matrix is HL_FLOAT_VALUE. - * - * 1. csc_val, csc_row, csc_col three pointers are not null. - * - * 2. csc_val is not null, csc_row and csc_col are null. - * - * if csr_matrix is HL_NO_VALUE. - * - * 1. csc_val will be ignore, csc_row and csc_col are not null. - * - * @param[in,out] csc_matrix sparse matrix. - * @param[in] csc_val point to csc value array(nnz). - * @param[in] csc_row point to csc row indices array(nnz). - * @param[in] csc_col point to csc col indices array(dimN+1). - * @param[in] stream hl_stream_t type. - * - * - */ -extern void hl_memcpy_csc_matrix(hl_sparse_matrix_s csc_matrix, - real *csc_val, - int *csc_row, - int *csc_col, - hl_stream_t stream); - -/** - * @brief Copy sparse matrix to sparse matrix. - * - * @param[out] dst sparse matrix. - * @param[in] src sparse matrix. - * @param[in] stream hl_stream_t type. - * - * - * @note 1. Format of the src matrix and dst matrix needs to be consistent. - * 2. Source matrix has value, the destination matrix has value or - * no value can be; the source matrix is no value, then the - * destination matrix must also be no value; - */ -extern void hl_memcpy_sparse_matrix(hl_sparse_matrix_s dst, - hl_sparse_matrix_s src, - hl_stream_t stream); - -/** - * @brief csr matrix to dense matrix. - * - * @param[in] A_d csr matrix. - * @param[out] C_d dense matrix. - * @param[in] dimM height. - * @param[in] dimN width. - * - */ -extern void hl_matrix_csr2dense(hl_sparse_matrix_s A_d, - real *C_d, - int dimM, - int dimN); - -/** - * @brief csc matrix to dense matrix. - * - * @param[in] A_d csc matrix. - * @param[out] C_d dense matrix. - * @param[in] dimM height. - * @param[in] dimN width. - * - */ -extern void hl_matrix_csc2dense(hl_sparse_matrix_s A_d, - real *C_d, - int dimM, - int dimN); - -/** - * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d. - * - * @param[in] A_d csr sparse matrix. - * @param[in] transa operation op(A) that is non-or transpose. - * @param[in] B_d dense matrix. - * @param[in] transb operation op(B) that is non-or transpose. - * @param[out] C_d dense matrix. - * @param[in] dimM matrix height of op(A) & C - * @param[in] dimN matrix width of op(B) & C - * @param[in] dimK width of op(A) & height of op(B) - * @param[in] alpha scalar used for multiplication. - * @param[in] beta scalar used for multiplication. - * If beta is zero, C does not have to be a valid input. - * - * @note transb is not support HPPL_OP_T. - * - */ -extern void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta); - -/** - * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d. - * - * @param[in] A_d sparse matrix. - * @param[in] transa operation op(A) that is non-or transpose. - * @param[in] B_d dense matrix. - * @param[in] transb operation op(B) that is non-or transpose. - * @param[out] C_d dense matrix. - * @param[in] dimM matrix height of op(A) & C - * @param[in] dimN matrix width of op(B) & C - * @param[in] dimK width of op(A) & height of op(B) - * @param[in] alpha scalar used for multiplication. - * @param[in] beta scalar used for multiplication. - * If beta is zero, C does not have to be a valid input. - * - * @note transb is not support HPPL_OP_T. - * - */ -extern void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta); - -/** - * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d. - * - * @param[in] A_d dense matrix. - * @param[in] transa operation op(A) that is non-or transpose. - * @param[in] B_d csc sparse matrix. - * @param[in] transb operation op(B) that is non-or transpose. - * @param[out] C_d dense matrix. - * @param[in] dimM matrix height of op(A) & C - * @param[in] dimN matrix width of op(B) & C - * @param[in] dimK width of op(A) & height of op(B) - * @param[in] alpha scalar used for multiplication. - * @param[in] beta scalar used for multiplication. - * If beta is zero, C does not have to be a valid input. - * - * @note transa is not support HPPL_OP_T. - * - */ -extern void hl_matrix_dense_mul_csc(real *A_d, - hl_trans_op_t transa, - hl_sparse_matrix_s B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta); - -/** - * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d. - * Calculated based on the non-zero elements of the matrix C. - * - * @param[in] A_d dense matrix. - * @param[in] transa operation op(A) that is non-or transpose. - * @param[in] B_d dense matrix. - * @param[in] transb operation op(B) that is non-or transpose. - * @param[in,out] C_d sparse matrix. - * @param[in] dimM matrix height of op(A) & C - * @param[in] dimN matrix width of op(B) & C - * @param[in] dimK width of op(A) & height of op(B) - * @param[in] alpha scalar used for multiplication. - * @param[in] beta scalar used for multiplication. - * - * @note transb is not support HPPL_OP_T. - * - */ -extern void hl_sparse_matrix_mul(real *A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - hl_sparse_matrix_s C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta); - -/** - * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d - * - * @param[in] A_d dense matrix. - * @param[in] transa operation op(A) that is non-or transpose. - * @param[in] B_d sparse matrix. - * @param[in] transb operation op(B) that is non-or transpose. - * @param[out] C_d dense matrix. - * @param[in] dimM matrix height of op(A) & C - * @param[in] dimN matrix width of op(B) & C - * @param[in] dimK width of op(A) & height of op(B) - * @param[in] alpha scalar used for multiplication. - * @param[in] beta scalar used for multiplication. - * If beta is zero, C does not have to be a valid input. - * - * - * @note transa is not support HPPL_OP_T. - * - */ -extern void hl_matrix_dense_mul_csr(real *A_d, - hl_trans_op_t transa, - hl_sparse_matrix_s B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta); - -/** - * @brief Memcpy csc_matrix to host. - * - * a. according to csc_matrix, update three arrays - * - * 1. csc_val, csc_row, csc_col are dest Address. - * - * 2. if type of csc_matrix is HL_NO_VALUE, update csc_row and csc_col - * - * 3. if type of csc_matrix is HL_FLOAT_VALUE, update csc_row, - * csc_col and csc_value. - * - * b. The interface is asynchronous copy. To ensure that the data is copied - * please call the synchronous interface; - * - * - * @param[out] csc_val point to csc value array(nnz). - * @param[in] val_size csc value size. - * @param[out] csc_row point to csc row indices array(nnz). - * @param[in] row_size csc row size. - * @param[out] csc_col point to csc col indices array(dimN + 1). - * @param[in] col_size csc column size. - * @param[in] csc_matrix sparse matrix. - * @param[in] stream hl_stream_t type. - * - */ -extern void hl_memcpy_from_csc_matrix(real *csc_val, - size_t val_size, - int *csc_row, - size_t row_size, - int *csc_col, - size_t col_size, - hl_sparse_matrix_s csc_matrix, - hl_stream_t stream); - -/** - * @brief Memcpy sparse matrix to host. - * - * a. according to csr_matrix, update three arrays - * - * 1. csr_val, csr_row, csr_col are dest Address. - * - * 2. if type of csr_matrix is HL_NO_VALUE, update csr_row and csr_col - * - * 3. if type of csr_matrix is HL_FLOAT_VALUE, update csr_row, - * csr_col and csr_value - * - * b. The interface is asynchronous copy. To ensure that the data is copied - * please call the synchronous interface; - * - * @param[out] csr_val point to csr value array(nnz). - * @param[in] val_size csr value size. - * @param[out] csr_row point to csr row indices array(nnz). - * @param[in] row_size csr row size. - * @param[out] csr_col point to csr col indices array(dimN + 1). - * @param[in] col_size csr column size. - * @param[in] csr_matrix sparse matrix. - * @param[in] stream hl_stream_t type. - * - */ -extern void hl_memcpy_from_csr_matrix(real *csr_val, - size_t val_size, - int *csr_row, - size_t row_size, - int *csr_col, - size_t col_size, - hl_sparse_matrix_s csr_matrix, - hl_stream_t stream); - -/** - * @brief A_d[j] += B_d[i,j] for i in range(height) - * - * @param[in,out] A_d vector, size = width. - * @param[in] B_d sparse matrix. - * @param[in] dimM height. - * @param[in] dimN width. - * @param[in] scale scale of B_d - * - */ -extern void hl_sparse_matrix_column_sum( - real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale); -/** - * @brief implementation of csr sparse matrix in hl_sparse_matirx_column_sum - */ -extern void hl_matrix_csr_column_sum( - real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale); - -/** - * @brief A_d[i,j] += B_d[j] - * - * @param[in,out] A_d sprare matrix. - * @param[in] B_d vector, size = A_d.width. - * @param[in] scale scale of B_d. - * - */ -extern void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d, - real *B_d, - real scale); -/** - * @brief implementation of csr sparse matrix in hl_sparse_matrix_add_bias - */ -extern void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d, - real *B_d, - real scale); - -/** - * @brief sparseMatrix = alpha * denseMatrix + beta *sparseMatrix - * A_d[i,j] = alpha * B_d[i,j] + beta * A_d[i,j] - * Only add value of same (row, col) index in dense matrix and - * do not use others values whoes postions are not in sparse matirx. - * - * @param[in,out] A_d sprare matrix. - * @param[in] B_d dense matrix. - * @param[in] dimM height of B_d. - * @param[in] dimN width of B_d. - * @param[in] alpha scale of B_d. - * @param[in] beta scale of A_d. - * - */ -extern void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d, - real *B_d, - int dimM, - int dimN, - real alpha, - real beta); -/** - * @brief implementation of csr sparse matrix in hl_sparse_matrix_add_dense - */ -extern void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d, - real *B_d, - int dimM, - int dimN, - real alpha, - real beta); - -/** - * @brief get rows pionter of GpuSparseMatrix - * - * @param[in] sMat sparse matrix - * - * @return return rows pointer, which is gpu address - * - */ -extern int *hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat); - -/** - * @brief get cols pionter of GpuSparseMatrix - * - * @param[in] sMat sparse matrix - * - * @return return cols pointer, which is gpu address - * - */ -extern int *hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat); - -/** - * @brief get value pionter of GpuSparseMatrix - * - * @param[in] sMat sparse matrix - * - * @return return value pointer, which is gpu address - * - */ -extern real *hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat); - -#endif /* HL_SPARSE_H_ */ diff --git a/paddle/legacy/cuda/include/hl_sparse.ph b/paddle/legacy/cuda/include/hl_sparse.ph deleted file mode 100644 index c0fdccb942cd3968f405c657cc7cd9c51c6f2409..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_sparse.ph +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#ifndef HL_SPARSE_PH_ -#define HL_SPARSE_PH_ - -#include "hl_base.h" - -/** - * @brief sparse matrix csr format. - * - * @param *csr_val nonzero values of matrix. - * @param *csr_row row indices. - * @param *csr_col column indices. - * @param nnz_s sizeof of csr_val & csr_col. - * @param row_s sizeof of csr_row. - * @param sparsity sparsity pattern. - * - */ -typedef struct { - real *csr_val; - int *csr_row; - int *csr_col; - size_t nnz_s; - int row_s; - float sparsity; -}_hl_csr_matrix, *hl_csr_matrix; - -/** - * @brief sparse matrix csc format. - * - * @param *csc_val nonzero values of matrix. - * @param *csc_row row indices. - * @param *csc_col column indices. - * @param nnz_s sizeof of csc_val & csc_row. - * @param col_s sizeof of csc_col. - * @param sparsity sparsity pattern. - * - */ -typedef struct { - real *csc_val; - int *csc_row; - int *csc_col; - size_t nnz_s; - int col_s; - float sparsity; -}_hl_csc_matrix, *hl_csc_matrix; - -#define __sparse_get_type_return__(mat, type, field)\ - do {\ - hl_##type##_matrix type##_d = (hl_##type##_matrix)((mat)->matrix);\ - if (type##_d) {\ - return type##_d -> type##_##field;\ - } else {\ - LOG(WARNING) << "parameter " << #field << "NULL error!";\ - return NULL;\ - }\ - } while(0) - -#define __sparse_get_return__(mat, field)\ - do {\ - if ((mat) == NULL) {\ - LOG(WARNING) << "parameter NULL error!";\ - return NULL;\ - }\ - if ((mat)->format == HL_SPARSE_CSR) {\ - __sparse_get_type_return__(mat, csr, field);\ - } else {\ - __sparse_get_type_return__(mat, csc, field);\ - }\ - } while(0) - -#endif /* HL_SPARSE_PH_ */ diff --git a/paddle/legacy/cuda/include/hl_table_apply.h b/paddle/legacy/cuda/include/hl_table_apply.h deleted file mode 100644 index dff60aa0a2271189c00c067aae749692c21705d8..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_table_apply.h +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_TABLE_APPLY_H_ -#define HL_TABLE_APPLY_H_ - -/** - * @brief Get row from table. - * output[i] += table[ids[i]] - * if ids[i] == -1, it will be ignored - * - * @param[out] output output matrix. - * @param[in] ldo leading dimension of output. - * @param[in] table table matrix. - * @param[in] ldt leading dimension of table. - * @param[in] ids ids vector. - * @param[in] numSamples height of output. - * @param[in] tableSize height of table. - * @param[in] dim width of table. - * - */ -extern void hl_matrix_select_rows(real* output, - int ldo, - real* table, - int ldt, - int* ids, - int numSamples, - int tableSize, - int dim); - -/** - * @brief Add row to table. - * table[ids[i]] += output[i] - * if ids[i] == -1, it will be ignored - * - * @param[out] table table matrix. - * @param[in] ldt leading dimension of table. - * @param[in] input input matrix. - * @param[in] ldi leading dimension of input. - * @param[in] ids ids vector. - * @param[in] numSamples height of input. - * @param[in] tableSize height of table. - * @param[in] dim width of table. - * - */ -extern void hl_matrix_add_to_rows(real* table, - int ldt, - real* input, - int ldi, - int* ids, - int numSamples, - int tableSize, - int dim); - -/** - * @brief Select element from vector. - * - * @param[out] dst output vector. - * @param[in] sized size of dst. - * @param[in] src input vector. - * @param[in] sizes size of src. - * @param[in] ids index vector. - * @param[in] sizei size of ids. - * - */ -template -extern void hl_vector_select_from( - T* dst, int sized, const T* src, int sizes, const int* ids, int sizei); - -#endif /* HL_TABLE_APPLY_H_ */ diff --git a/paddle/legacy/cuda/include/hl_tensor_ops.h b/paddle/legacy/cuda/include/hl_tensor_ops.h deleted file mode 100644 index bc5e5da53d5c6ac2bae3b0067f46e39accd1b9d8..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_tensor_ops.h +++ /dev/null @@ -1,536 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_TENSOR_OPS_H_ -#define HL_TENSOR_OPS_H_ - -#include -#include "hl_matrix_type.cuh" - -namespace hppl { -namespace unary { - -template -class add_scale { - private: - const T p; - - public: - INLINE add_scale(const T s) : p(s) {} - INLINE T operator()(const T a) const { return a + p; } -}; - -template -class sub_scale { - private: - const T p; - - public: - INLINE sub_scale(const T s) : p(s) {} - INLINE T operator()(const T a) const { return a - p; } -}; - -template -class mul_scale { - private: - const T p; - - public: - INLINE mul_scale(const T s) : p(s) {} - INLINE T operator()(const T a) const { return a * p; } -}; - -template -class div_scale { - private: - const T p; - - public: - INLINE div_scale(const T s) : p(s) {} - INLINE T operator()(const T a) const { return a / p; } -}; - -template -class neg { - public: - INLINE T operator()(const T a) const { return -a; } -}; - -template -class exp_op { - public: - INLINE T operator()(const T a) const { return std::exp(a); } -}; - -template -class log_op { - public: - INLINE T operator()(const T a) const { return std::log(a); } -}; - -template -class sqrt_op { - public: - INLINE T operator()(const T a) const { return std::sqrt(a); } -}; - -template -class square { - public: - INLINE T operator()(const T a) const { return a * a; } -}; - -template -class reciprocal { - public: - INLINE T operator()(const T a) const { return T(1) / a; } -}; - -template -class abs { - public: - INLINE T operator()(const T a) const { return a > 0 ? a : -a; } -}; - -template -class sign { - public: - INLINE T operator()(const T a) const { return (a > 0) - (a < 0); } -}; - -template -class min { - private: - const T p; - - public: - INLINE min(const T s) : p(s) {} - INLINE T operator()(const T a) const { return a > p ? p : a; } -}; - -template -class max { - private: - const T p; - - public: - INLINE max(const T s) : p(s) {} - INLINE T operator()(const T a) const { return a < p ? p : a; } -}; - -template -class pow_op { - private: - const T p; - - public: - INLINE pow_op(const T s) : p(s) {} - INLINE T operator()(const T a) const { return std::pow(a, p); } -}; - -template -class constant { - private: - const T p; - - public: - INLINE constant(const T s) : p(s) {} - INLINE T operator()(int i) const { return p; } - INLINE T operator()(int i, int j) const { return p; } -}; - -template -class cmp_eq { - private: - const T p; - - public: - INLINE cmp_eq(const T s) : p(s) {} - INLINE bool operator()(const T a) const { return a == p; } -}; - -template -class cmp_ne { - private: - const T p; - - public: - INLINE cmp_ne(const T s) : p(s) {} - INLINE bool operator()(const T a) const { return a != p; } -}; - -template -class cmp_le { - private: - const T p; - - public: - INLINE cmp_le(const T s) : p(s) {} - INLINE bool operator()(const T a) const { return a <= p; } -}; - -template -class cmp_lt { - private: - const T p; - - public: - INLINE cmp_lt(const T s) : p(s) {} - INLINE bool operator()(const T a) const { return a < p; } -}; - -template -class cmp_ge { - private: - const T p; - - public: - INLINE cmp_ge(const T s) : p(s) {} - INLINE bool operator()(const T a) const { return a >= p; } -}; - -template -class cmp_gt { - private: - const T p; - - public: - INLINE cmp_gt(const T s) : p(s) {} - INLINE bool operator()(const T a) const { return a > p; } -}; - -template -class and_op { - private: - const T p; - - public: - INLINE and_op(const T s) : p(s) {} - INLINE bool operator()(const T a) const { return a && p; } -}; - -template -class or_op { - private: - const T p; - - public: - INLINE or_op(const T s) : p(s) {} - INLINE bool operator()(const T a) const { return a || p; } -}; - -} // namespace unary - -namespace binary { -template -class add { - public: - INLINE T operator()(const T a, const T b) const { return a + b; } -}; - -template -class add_scale { - private: - const T p1; - const T p2; - - public: - INLINE add_scale(const T s1, const T s2) : p1(s1), p2(s2) {} - INLINE T operator()(const T a, const T b) const { return p1 * a + p2 * b; } -}; - -template -class sub { - public: - INLINE T operator()(const T a, const T b) const { return a - b; } -}; - -template -class mul { - public: - INLINE T operator()(const T a, const T b) const { return a * b; } -}; - -template -class div { - public: - INLINE T operator()(const T a, const T b) const { return a / b; } -}; - -template -class cmp_eq { - public: - INLINE bool operator()(const T a, const T b) const { return a == b; } -}; - -template -class cmp_ne { - public: - INLINE bool operator()(const T a, const T b) const { return a != b; } -}; - -template -class cmp_le { - public: - INLINE bool operator()(const T a, const T b) const { return a <= b; } -}; - -template -class cmp_lt { - public: - INLINE bool operator()(const T a, const T b) const { return a < b; } -}; - -template -class cmp_ge { - public: - INLINE bool operator()(const T a, const T b) const { return a >= b; } -}; - -template -class cmp_gt { - public: - INLINE bool operator()(const T a, const T b) const { return a > b; } -}; - -template -class and_op { - public: - INLINE bool operator()(const T a, const T b) const { return a && b; } -}; - -template -class or_op { - public: - INLINE bool operator()(const T a, const T b) const { return a || b; } -}; - -template -class min { - public: - INLINE T operator()(const T a, const T b) const { return a > b ? b : a; } -}; - -template -class max { - public: - INLINE T operator()(const T a, const T b) const { return a < b ? b : a; } -}; - -#ifdef PADDLE_USE_SSE3 -#ifndef PADDLE_TYPE_DOUBLE -template <> -class add<__m128> { - public: - INLINE __m128 operator()(const __m128 a, const __m128 b) const { - return _mm_add_ps(a, b); - } -}; - -template <> -class add_scale<__m128> { - private: - const __m128 p1; - const __m128 p2; - - public: - INLINE add_scale(const __m128 s1, const __m128 s2) : p1(s1), p2(s2) {} - INLINE __m128 operator()(const __m128 a, const __m128 b) const { - return _mm_add_ps(_mm_mul_ps(p1, a), _mm_mul_ps(p2, b)); - } -}; - -template <> -class sub<__m128> { - public: - INLINE __m128 operator()(const __m128 a, const __m128 b) const { - return _mm_sub_ps(a, b); - } -}; - -template <> -class mul<__m128> { - public: - INLINE __m128 operator()(const __m128 a, const __m128 b) const { - return _mm_mul_ps(a, b); - } -}; - -template <> -class div<__m128> { - public: - INLINE __m128 operator()(const __m128 a, const __m128 b) const { - return _mm_div_ps(a, b); - } -}; - -template <> -class min<__m128> { - public: - INLINE __m128 operator()(const __m128 a, const __m128 b) const { - return _mm_min_ps(a, b); - } -}; - -template <> -class max<__m128> { - public: - INLINE __m128 operator()(const __m128 a, const __m128 b) const { - return _mm_max_ps(a, b); - } -}; -#else -template <> -class add<__m128d> { - public: - INLINE __m128d operator()(const __m128d a, const __m128d b) const { - return _mm_add_pd(a, b); - } -}; - -template <> -class add_scale<__m128d> { - private: - const __m128d p1; - const __m128d p2; - - public: - INLINE add_scale(const __m128d s1, const __m128d s2) : p1(s1), p2(s2) {} - INLINE __m128d operator()(const __m128d a, const __m128d b) const { - return _mm_add_pd(_mm_mul_pd(p1, a), _mm_mul_pd(p2, b)); - } -}; - -template <> -class sub<__m128d> { - public: - INLINE __m128d operator()(const __m128d a, const __m128d b) const { - return _mm_sub_pd(a, b); - } -}; - -template <> -class mul<__m128d> { - public: - INLINE __m128d operator()(const __m128d a, const __m128d b) const { - return _mm_mul_pd(a, b); - } -}; - -template <> -class div<__m128d> { - public: - INLINE __m128d operator()(const __m128d a, const __m128d b) const { - return _mm_div_pd(a, b); - } -}; - -template <> -class min<__m128d> { - public: - INLINE __m128d operator()(const __m128d a, const __m128d b) const { - return _mm_min_pd(a, b); - } -}; - -template <> -class max<__m128d> { - public: - INLINE __m128d operator()(const __m128d a, const __m128d b) const { - return _mm_max_pd(a, b); - } -}; -#endif // PADDLE_TYPE_DOUBLE -#endif // PADDLE_USE_SSE3 - -#ifdef PADDLE_USE_NEON -#ifndef PADDLE_TYPE_DOUBLE -template <> -class add { - public: - INLINE float32x4_t operator()(const float32x4_t a, - const float32x4_t b) const { - return vaddq_f32(a, b); - } -}; - -template <> -class add_scale { - private: - const float32x4_t p1; - const float32x4_t p2; - - public: - INLINE add_scale(const float32x4_t s1, const float32x4_t s2) - : p1(s1), p2(s2) {} - INLINE float32x4_t operator()(const float32x4_t a, - const float32x4_t b) const { - return vaddq_f32(vmulq_f32(p1, a), vmulq_f32(p2, b)); - } -}; - -template <> -class sub { - public: - INLINE float32x4_t operator()(const float32x4_t a, - const float32x4_t b) const { - return vsubq_f32(a, b); - } -}; - -template <> -class mul { - public: - INLINE float32x4_t operator()(const float32x4_t a, - const float32x4_t b) const { - return vmulq_f32(a, b); - } -}; - -template <> -class div { - public: - INLINE float32x4_t operator()(const float32x4_t a, - const float32x4_t b) const { - float32x4_t tmp = vrecpeq_f32(b); - return vmulq_f32(a, tmp); - } -}; - -template <> -class min { - public: - INLINE float32x4_t operator()(const float32x4_t a, - const float32x4_t b) const { - return vminq_f32(a, b); - } -}; - -template <> -class max { - public: - INLINE float32x4_t operator()(const float32x4_t a, - const float32x4_t b) const { - return vmaxq_f32(a, b); - } -}; -#else -#error To be implemented -#endif // PADDLE_TYPE_DOUBLE -#endif // PADDLE_USE_NEON - -} // namespace binary -} // namespace hppl - -#endif // HL_TENSOR_OPS_H_ diff --git a/paddle/legacy/cuda/include/hl_thread.ph b/paddle/legacy/cuda/include/hl_thread.ph deleted file mode 100644 index 4abede1517a2264d45fd390c69ffdeb95569334c..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_thread.ph +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_THREAD_PH_ -#define HL_THREAD_PH_ - -#include -#include -#include -#include -#include -#include -#include -#include "hl_base.h" - -/** - * @brief Thread resource structure. - * - * @param stream[HPPL_STREAM_END] Stream for thread. - * @param handle Cublas Handle. - * @param gen Curand Generator. - * @param cudnn_handle Cudnn handle. - * @param cudnn_desc Cudnn image descriptor. - * @param *gen_mutex Gen lock. - * @param *gpu_mem HPPL GPU Memory. - * @param *cpu_mem HPPL CPU Memory. - * @param event gpu_mem event. - * @param device Thread device context. - * @param major Compute capability. - * @param is_init Thread init or not. - */ -typedef struct { - cudaStream_t stream[HPPL_STREAM_END]; - cublasHandle_t handle; - curandGenerator_t gen; - cudnnHandle_t cudnn_handle; - cudnnTensorDescriptor_t cudnn_desc; - pthread_mutex_t *gen_mutex; - real *gpu_mem; - real *cpu_mem; - cudaEvent_t event; - int device; - int major; - bool is_init; -} _hl_thread_resource, *hl_thread_resource; - -extern __thread _hl_thread_resource t_resource; - -/** - * @brief Initialize cudnn. - * - * @param cudnn_handle Cudnn handle. - * @param stream Cudnn stream. - */ -extern void hl_cudnn_init(cudnnHandle_t *cudnn_handle, cudaStream_t stream); - -/** - * @brief Initialize cublas. - * - * @param cublas_handle Cublas handle. - * @param stream Cuda stream. - */ -extern void hl_cublas_init(cublasHandle_t *cublas_handle, cudaStream_t stream); - -/** - * @brief Initialize cudnn tensor descriptor. - * - * @param cudnn_desc Cudnn tensor descriptor. - */ - -extern void hl_cudnn_desc_init(cudnnTensorDescriptor_t* cudnn_desc); - -#endif /* HL_THREAD_PH_ */ diff --git a/paddle/legacy/cuda/include/hl_time.h b/paddle/legacy/cuda/include/hl_time.h deleted file mode 100644 index 61d80c065c805af7b12575fa7002ae6a57f9a57a..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_time.h +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_TIME_H_ -#define HL_TIME_H_ -#include -/** - * @brief High resolution timer. - * - * @return int64_t the representation value of the object as a - * count of periods, which are not necessarily - * seconds. - * - * @note It is used to generate random perturbation parameters. - */ -int64_t getCurrentTimeStick(void); - -#endif /* HL_TIME_H_ */ diff --git a/paddle/legacy/cuda/include/hl_top_k.h b/paddle/legacy/cuda/include/hl_top_k.h deleted file mode 100644 index a3c7872f525c0c07eb20a9e4e88d1b02cc2fcadc..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_top_k.h +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_TOP_K_H_ -#define HL_TOP_K_H_ - -#include "hl_base.h" - -/** - * @brief find top k element. - * - * @param[out] topVal top k element. - * @param[in] ldv leading dimension of topVal. - * @param[out] topIds top k index. - * @param[in] src input value. - * @param[in] lds leading dimension of src. - * @param[in] dim width of input value. - * @param[in] beamSize beam size. - * @param[in] numSamples height of input value. - * - */ -extern void hl_matrix_top_k(real* topVal, - int ldv, - int* topIds, - real* src, - int lds, - int dim, - int beamSize, - int numSamples); - -/** - * @brief find top k element for each row in sparse matrix. - * - * @param[out] topVal top k element. - * @param[in] ldv leading dimension of topVal. - * @param[out] topIds top k index. - * @param[in] src sparse matrix. - * @param[in] beamSize beam size. - * @param[in] numSamples height of input value. - * - * @note Only support HL_SPARSE_CSR format. - */ -extern void hl_sparse_matrix_top_k(real* topVal, - int ldv, - int* topIds, - hl_sparse_matrix_s src, - int beamSize, - int numSamples); - -/** - * @brief Matrix classification error. - * - * @param[out] topVal top k element. - * @param[in] ldv leading dimension of topVal. - * @param[out] topIds top k index. - * @param[in] src input value. - * @param[in] lds leading dimension of src. - * @param[in] dim width of input value. - * @param[in] topkSize size of top k element. - * @param[in] numSamples height of input value. - * @param[in] label ground truth label. - * @param[out] recResult top-k classification error. - * - */ -extern void hl_matrix_classification_error(real* topVal, - int ldv, - int* topIds, - real* src, - int lds, - int dim, - int topkSize, - int numSamples, - int* label, - real* recResult); - -#endif // HL_TOP_K_H_ diff --git a/paddle/legacy/cuda/include/hl_warpctc_wrap.h b/paddle/legacy/cuda/include/hl_warpctc_wrap.h deleted file mode 100644 index 09cbd6d450f77f510ad28fd0e9e86efa19c5328d..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/hl_warpctc_wrap.h +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#ifndef HL_WARPCTC_WRAP_H_ -#define HL_WARPCTC_WRAP_H_ -#include "ctc.h" -#include "hl_base.h" - -typedef ctcStatus_t hl_warpctc_status_t; -typedef ctcOptions hl_warpctc_options_t; - -/** - * @brief Init ctc options. - * - * @param[in] blank blank label used in ctc loss function. - * @param[in] useGpu whether use gpu. - * @param[out] options handle to store cpu or gpu informations. - * - */ -extern void hl_warpctc_init(const size_t blank, - bool useGpu, - hl_warpctc_options_t* options); - -/** - * @brief Compute the connectionist temporal classification loss, - * and optionally compute the gradient with respect to the inputs. - * - * if batchGrad == nullptr - * - * only compute the ctc loss. - * - * if batchGrad != nullptr - * - * compute both ctc loss and gradient. - * - * @param[in] batchInput batch matrix of input probabilities, - * in maxSequenceLength x numSequence x numClasses - * (row-major) format. - * @param[out] batchGrad batch matrix of gradient. - * @param[in] cpuLabels labels always in CPU memory. - * @param[in] cpuLabelLengths length of all labels in CPU memory. - * @param[in] cpuInputLengths length of all sequences in CPU memory. - * @param[in] numClasses number of possible output symbols. - * @param[in] numSequences number of sequence. - * @param[out] cpuCosts cost of each sequence in CPU memory. - * @param[out] workspace workspace to store some temporary results. - * @param[in] options handle to store cpu or gpu informations. - * - */ -extern void hl_warpctc_compute_loss(const real* batchInput, - real* batchGrad, - const int* cpuLabels, - const int* cpuLabelLengths, - const int* cpuInputLengths, - const size_t numClasses, - const size_t numSequences, - real* cpuCosts, - void* workspace, - hl_warpctc_options_t* options); - -/** - * @brief Compute the required workspace size. - * There is no memory allocated operations within warp-ctc. - * - * @param[in] cpuLabelLengths length of all labels in CPU memory. - * @param[in] cpuInputLengths length of all sequences in CPU memory. - * @param[in] numClasses number of possible output symbols. - * @param[in] numSequences number of sequence. - * @param[in] options handle to store cpu or gpu informations. - * @param[out] bytes pointer to a scalar where the memory - * requirement in bytes will be placed. - * - */ -extern void hl_warpctc_get_workspace_size(const int* cpuLabelLengths, - const int* cpuInputLengths, - const size_t numClasses, - const size_t numSequences, - hl_warpctc_options_t* options, - size_t* bytes); - -#endif // HL_WARPCTC_WRAP_H_ -#endif diff --git a/paddle/legacy/cuda/include/stub/hl_aggregate_stub.h b/paddle/legacy/cuda/include/stub/hl_aggregate_stub.h deleted file mode 100644 index 2ac841facc618a070781414bc02fa67a38db0382..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/stub/hl_aggregate_stub.h +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_AGGREGATE_STUB_H_ -#define HL_AGGREGATE_STUB_H_ - -#include "hl_aggregate.h" - -inline void hl_matrix_row_sum(real *A_d, real *C_d, int dimM, int dimN) {} - -inline void hl_matrix_row_max(real *A_d, real *C_d, int dimM, int dimN) {} - -inline void hl_matrix_row_min(real *A_d, real *C_d, int dimM, int dimN) {} - -inline void hl_matrix_column_sum(real *A_d, real *C_d, int dimM, int dimN) {} - -inline void hl_matrix_column_max(real *A_d, real *C_d, int dimM, int dimN) {} - -inline void hl_matrix_column_min(real *A_d, real *C_d, int dimM, int dimN) {} - -inline void hl_vector_sum(real *A_d, real *C_h, int dimM) {} - -inline void hl_vector_abs_sum(real *A_d, real *C_h, int dimM) {} - -#endif // HL_AGGREGATE_STUB_H_ diff --git a/paddle/legacy/cuda/include/stub/hl_cnn_stub.h b/paddle/legacy/cuda/include/stub/hl_cnn_stub.h deleted file mode 100644 index 997eed62e07827f375c7441554b397fdd0bd6a80..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/stub/hl_cnn_stub.h +++ /dev/null @@ -1,247 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CNN_STUB_H_ -#define HL_CNN_STUB_H_ - -#include "hl_cnn.h" - -inline void hl_maxpool_forward(const int frameCnt, - const real* inputData, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real* tgtData, - const int tgtStride, - real* MaskData) {} - -inline void hl_maxpool_backward(const int frameCnt, - const real* inputData, - const real* outData, - const real* outGrad, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real scaleA, - real scaleB, - real* targetGrad, - const int outStride) {} - -inline void hl_avgpool_forward(const int frameCnt, - const real* inputData, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real* tgtData, - const int tgtStride, - const bool excludeMode) {} - -inline void hl_avgpool_backward(const int frameCnt, - const real* outGrad, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - int paddingH, - int paddingW, - real scaleA, - real scaleB, - real* backGrad, - const int outStride, - const bool excludeMode) {} - -inline void hl_maxpool3D_forward(const int frameCnt, - const real* inputData, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, - real* tgtData, - real* maxPoolIdxData, - const int tgtStride) {} - -inline void hl_maxpool3D_backward(const int frameCnt, - const real* outGrad, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, - real scaleA, - real scaleB, - real* targetGrad, - real* maxPoolIdxData, - const int outStride) {} - -inline void hl_avgpool3D_forward(const int frameCnt, - const real* inputData, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, - real* tgtData, - const int tgtStride) {} - -inline void hl_avgpool3D_backward(const int frameCnt, - const real* outGrad, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, - real scaleA, - real scaleB, - real* backGrad, - const int outStride) {} - -inline void hl_bilinear_forward(const real* inData, - const size_t inImgH, - const size_t inImgW, - const size_t inputH, - const size_t inputW, - real* outData, - const size_t outImgH, - const size_t outImgW, - const size_t outputH, - const size_t outputW, - const size_t numChannels, - const real ratioH, - const real ratioW) {} - -inline void hl_bilinear_backward(real* inGrad, - const size_t inImgH, - const size_t inImgW, - const size_t inputH, - const size_t inputW, - const real* outGrad, - const size_t outImgH, - const size_t outImgW, - const size_t outputH, - const size_t outputW, - const size_t numChannels, - const real ratioH, - const real ratioW) {} - -inline void hl_maxout_forward(const real* inData, - real* outData, - int* idData, - size_t batchSize, - size_t size, - size_t featLen, - size_t group) {} - -inline void hl_maxout_backward(real* inGrad, - const real* outGrad, - const int* idData, - size_t batchSize, - size_t size, - size_t featLen, - size_t group) {} - -inline void hl_upsample_forward(real* inputData, - real* maskData, - size_t batchSize, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW, - real* outputData) {} - -inline void hl_upsample_backward(real* outputGradData, - real* maskData, - size_t batchSize, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW, - real* inputGradData) {} - -#endif // HL_CNN_STUB_H_ diff --git a/paddle/legacy/cuda/include/stub/hl_cuda_cublas_stub.h b/paddle/legacy/cuda/include/stub/hl_cuda_cublas_stub.h deleted file mode 100644 index 0b2300cda95f2ae32a5e669dd8c834d39f27adcd..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/stub/hl_cuda_cublas_stub.h +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CUDA_CUBLAS_STUB_H_ -#define HL_CUDA_CUBLAS_STUB_H_ - -#include "hl_cuda_cublas.h" - -inline void hl_matrix_transpose( - real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc) {} - -inline void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN) {} - -inline void hl_matrix_inverse( - real *A_d, real *C_d, int dimN, int lda, int ldc) {} - -inline void hl_matrix_mul(real *A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta, - int lda, - int ldb, - int ldc) {} - -inline void hl_matrix_mul(real *A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) {} - -#endif // HL_CUDA_CUBLAS_STUB_H_ diff --git a/paddle/legacy/cuda/include/stub/hl_cuda_cudnn_stub.h b/paddle/legacy/cuda/include/stub/hl_cuda_cudnn_stub.h deleted file mode 100644 index 4b8bdf7507b26f628c8103a79b364312bc9ebbdf..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/stub/hl_cuda_cudnn_stub.h +++ /dev/null @@ -1,201 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CUDA_CUDNN_STUB_H_ -#define HL_CUDA_CUDNN_STUB_H_ - -#include "hl_cuda_cudnn.h" - -inline int hl_get_cudnn_lib_version() { return 0; } - -inline void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc) {} - -inline void hl_tensor_reshape(hl_tensor_descriptor image_desc, - int batch_size, - int feature_maps, - int height, - int width) {} - -inline void hl_tensor_reshape(hl_tensor_descriptor image_desc, - int batch_size, - int feature_maps, - int height, - int width, - int nStride, - int cStride, - int hStride, - int wStride) {} - -inline void hl_destroy_tensor_descriptor(hl_tensor_descriptor image_desc) {} - -inline void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc, - hl_pooling_mode_t mode, - int height, - int width, - int height_padding, - int width_padding, - int stride_height, - int stride_width) {} - -inline void hl_destroy_pooling_descriptor(hl_pooling_descriptor pooling_desc) {} - -inline void hl_pooling_forward(hl_tensor_descriptor input, - real* input_image, - hl_tensor_descriptor output, - real* output_image, - hl_pooling_descriptor pooling) {} - -inline void hl_pooling_backward(hl_tensor_descriptor input, - real* input_image, - real* input_image_grad, - hl_tensor_descriptor output, - real* output_image, - real* output_image_grad, - hl_pooling_descriptor pooling) {} - -inline void hl_create_filter_descriptor(hl_filter_descriptor* filter, - int input_feature_maps, - int output_feature_maps, - int height, - int width) {} - -inline void hl_destroy_filter_descriptor(hl_filter_descriptor filter) {} - -inline void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, - hl_tensor_descriptor image, - hl_filter_descriptor filter, - int padding_height, - int padding_width, - int stride_height, - int stride_width, - int dilation_h, - int dilation_w) {} - -inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, - hl_tensor_descriptor image, - hl_filter_descriptor filter, - int padding_height, - int padding_width, - int stride_height, - int stride_width, - int dilation_h, - int dilation_w) {} - -inline void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {} - -inline void hl_conv_workspace(hl_tensor_descriptor input, - hl_tensor_descriptor output, - hl_filter_descriptor filter, - hl_convolution_descriptor conv, - int* convFwdAlgo, - size_t* fwdLimitBytes, - int* convBwdDataAlgo, - size_t* bwdDataLimitBytes, - int* convBwdFilterAlgo, - size_t* bwdFilterLimitBytes, - bool useDilation) {} - -inline void hl_convolution_forward(hl_tensor_descriptor input, - real* input_data, - hl_tensor_descriptor output, - real* output_data, - hl_filter_descriptor filter, - real* filter_data, - hl_convolution_descriptor conv, - void* gpuWorkSpace, - size_t sizeInBytes, - int convFwdAlgo) {} - -inline void hl_convolution_forward_add_bias(hl_tensor_descriptor bias, - real* bias_data, - hl_tensor_descriptor output, - real* output_data) {} - -inline void hl_convolution_backward_filter(hl_tensor_descriptor input, - real* input_data, - hl_tensor_descriptor output, - real* output_grad_data, - hl_filter_descriptor filter, - real* filter_grad_data, - hl_convolution_descriptor conv, - void* gpuWorkSpace, - size_t sizeInBytes, - int convBwdFilterAlgo) {} - -inline void hl_convolution_backward_data(hl_tensor_descriptor input, - real* input_data_grad, - hl_tensor_descriptor output, - real* output_grad_data, - hl_filter_descriptor filter, - real* filter_data, - hl_convolution_descriptor conv, - void* gpuWorkSpace, - size_t sizeInBytes, - int convBwdDataAlgo) {} - -inline void hl_convolution_backward_bias(hl_tensor_descriptor bias, - real* bias_grad_data, - hl_tensor_descriptor output, - real* output_grad_data) {} - -inline void hl_softmax_forward(real* input, - real* output, - int height, - int width) {} - -inline void hl_softmax_backward(real* output_value, - real* output_grad, - int height, - int width) {} - -inline void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, - real* input, - hl_tensor_descriptor outputDesc, - real* output, - hl_tensor_descriptor bnParamDesc, - real* scale, - real* bias, - double factor, - real* runningMean, - real* runningInvVar, - double epsilon, - real* savedMean, - real* savedVar) {} - -inline void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, - real* input, - hl_tensor_descriptor outputDesc, - real* output, - hl_tensor_descriptor bnParamDesc, - real* scale, - real* bias, - real* estimatedMean, - real* estimatedVar, - double epsilon) {} - -inline void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, - real* input, - hl_tensor_descriptor outGradDesc, - real* outGrad, - hl_tensor_descriptor inGradDesc, - real* inGrad, - hl_tensor_descriptor dBnParamDesc, - real* scale, - real* scaleGrad, - real* biasGrad, - double epsilon, - real* savedMean, - real* savedInvVar) {} - -#endif // HL_CUDA_CUDNN_STUB_H_ diff --git a/paddle/legacy/cuda/include/stub/hl_cuda_stub.h b/paddle/legacy/cuda/include/stub/hl_cuda_stub.h deleted file mode 100644 index ac8b22ef31a39c84a849f42926738a84ad2295e9..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/stub/hl_cuda_stub.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_CUDA_STUB_H_ -#define HL_CUDA_STUB_H_ - -#include "hl_cuda.h" - -inline void hl_start() {} - -inline void hl_specify_devices_start(int *device, int number) {} - -inline void hl_init(int device) {} - -inline int hl_get_cuda_lib_version(int device) { return 0; } - -inline void hl_fini() {} - -inline void hl_set_sync_flag(bool flag) {} - -inline bool hl_get_sync_flag() { return false; } - -inline int hl_get_device_count() { return 0; } - -inline void hl_set_device(int device) {} - -inline int hl_get_device() { return 0; } - -inline void *hl_malloc_device(size_t size) { return NULL; } - -inline void hl_free_mem_device(void *dest_d) {} - -inline void *hl_malloc_host(size_t size) { return NULL; } - -inline void hl_free_mem_host(void *dest_h) {} - -inline void hl_memcpy(void *dst, void *src, size_t size) {} - -inline void hl_memset_device(void *dest_d, int value, size_t size) {} - -inline void hl_memcpy_host2device(void *dest_d, void *src_h, size_t size) {} - -inline void hl_memcpy_device2host(void *dest_h, void *src_d, size_t size) {} - -inline void hl_memcpy_device2device(void *dest_d, void *src_d, size_t size) {} - -inline void hl_rand(real *dest_d, size_t num) {} - -inline void hl_srand(unsigned int seed) {} - -inline void hl_memcpy_async(void *dst, - void *src, - size_t size, - hl_stream_t stream) {} - -inline void hl_stream_synchronize(hl_stream_t stream) {} - -inline void hl_create_event(hl_event_t *event) {} - -inline void hl_destroy_event(hl_event_t event) {} - -inline float hl_event_elapsed_time(hl_event_t start, hl_event_t end) { - return 0; -} - -inline void hl_stream_record_event(hl_stream_t stream, hl_event_t event) {} - -inline void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) {} - -inline void hl_event_synchronize(hl_event_t event) {} - -inline int hl_get_device_last_error() { return 0; } - -inline const char *hl_get_device_error_string() { return NULL; } - -inline const char *hl_get_device_error_string(size_t err) { return NULL; } - -inline bool hl_cuda_event_is_ready(hl_event_t event) { return true; } - -inline void hl_device_synchronize() {} - -inline void hl_profiler_start() {} - -inline void hl_profiler_end() {} - -#endif // HL_CUDA_STUB_H_ diff --git a/paddle/legacy/cuda/include/stub/hl_lstm_stub.h b/paddle/legacy/cuda/include/stub/hl_lstm_stub.h deleted file mode 100644 index be2b71787e528c4f0c8cf81e397c9c2a31c8dde1..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/stub/hl_lstm_stub.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_LSTM_STUB_H_ -#define HL_LSTM_STUB_H_ - -#include "hl_lstm.h" - -inline void hl_lstm_parallel_forward(real *gateValue, - real *stateValue, - real *preOutputValue, - real *outputValue, - real *checkIg, - real *checkFg, - real *checkOg, - real *weight, - const int *sequence, - int frameSize, - int numSequences, - bool reversed, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) {} - -inline void hl_lstm_parallel_backward_data(real *gateValue, - real *gateGrad, - real *stateValue, - real *stateGrad, - real *preOutputValue, - real *preOutputGrad, - real *outputGrad, - real *checkIg, - real *checkIgGrad, - real *checkFg, - real *checkFgGrad, - real *checkOg, - real *checkOgGrad, - real *weight, - const int *sequence, - int frameSize, - int numSequences, - bool reversed, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) {} - -inline void hl_lstm_parallel_backward_weight(real *weightGrad, - real *outputValue, - real *gateGrad, - const int *sequence, - int frameSize, - int batchSize, - int numSequences, - bool reversed) {} - -#endif // HL_LSTM_STUB_H_ diff --git a/paddle/legacy/cuda/include/stub/hl_matrix_stub.h b/paddle/legacy/cuda/include/stub/hl_matrix_stub.h deleted file mode 100644 index 914a2edaf2122ade3d9c7eed9fef1e980c52f87a..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/stub/hl_matrix_stub.h +++ /dev/null @@ -1,138 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_MATRIX_STUB_H_ -#define HL_MATRIX_STUB_H_ - -#include "hl_matrix.h" - -inline void hl_matrix_add(real* A_d, - real* B_d, - real* C_d, - int dimM, - int dimN, - real alpha, - real beta) {} - -inline void hl_matrix_softmax(real* A_d, real* C_d, int dimM, int dimN) {} - -inline void hl_sequence_softmax_forward(real* A_d, - real* C_d, - const int* index, - int numSequence) {} - -inline void hl_matrix_softmax_derivative( - real* grad_d, real* output_d, real* sftmaxSum_d, int dimM, int dimN) {} - -inline void hl_matrix_classification_error(real* topVal, - int ldv, - int* topIds, - real* src, - int lds, - int dim, - int topkSize, - int numSamples, - int* label, - real* recResult) {} - -inline void hl_matrix_cross_entropy( - real* A_d, real* C_d, int* label_d, int dimM, int dimN) {} - -inline void hl_matrix_cross_entropy_bp( - real* grad_d, real* output_d, int* label_d, int dimM, int dimN) {} - -inline void hl_matrix_multi_binary_cross_entropy( - real* output, real* entropy, hl_sparse_matrix_s mat, int dimM, int dimN) {} - -inline void hl_matrix_multi_binary_cross_entropy_bp( - real* output, real* grad, hl_sparse_matrix_s mat, int dimM, int dimN) {} - -inline void hl_matrix_zero_mem(real* data, int num) {} - -inline void hl_param_relu_forward(real* output, - real* input, - real* w, - int width, - int height, - int partial_sum) {} - -inline void hl_param_relu_backward_w(real* grad_w, - real* grad_o, - real* input, - int width, - int height, - int partial_sum) {} - -inline void hl_param_relu_backward_diff(real* grad_o, - real* input, - real* w, - real* diff, - int width, - int height, - int partial_sum) {} - -inline void hl_matrix_add_shared_bias(real* A_d, - real* B_d, - const int channel, - const int dimM, - const int dimN, - real scale) {} - -inline void hl_matrix_collect_shared_bias(real* B_d, - real* A_d, - const int channel, - const int dimM, - const int dimN, - real scale) {} - -inline void hl_matrix_rotate( - real* mat, real* matRot, int dimM, int dimN, bool clockWise) {} - -inline void hl_matrix_vol2Col(const real* dataSrc, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real* dataDst) {} - -inline void hl_matrix_col2Vol(real* dataDst, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - const real* dataSrc, - real alpha, - real beta) {} - -inline void hl_vector_cast2int(int* out, real* vec, int size) {} - -#endif // HL_MATRIX_STUB_H_ diff --git a/paddle/legacy/cuda/include/stub/hl_sequence_stub.h b/paddle/legacy/cuda/include/stub/hl_sequence_stub.h deleted file mode 100644 index 44bc3dbaff3c89520f97f9c01d7e9d01c625d52e..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/stub/hl_sequence_stub.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_SEQUENCE_STUB_H_ -#define HL_SEQUENCE_STUB_H_ - -#include "hl_sequence.h" - -inline void hl_max_sequence_forward(real* input, - const int* sequence, - real* output, - int* index, - int numSequences, - int dim) {} - -inline void hl_max_sequence_backward( - real* outputGrad, int* index, real* inputGrad, int numSequences, int dim) {} - -inline void hl_sequence2batch_copy(real* batch, - real* sequence, - const int* batchIndex, - int seqWidth, - int batchCount, - bool seq2batch) {} - -inline void hl_sequence2batch_add(real* batch, - real* sequence, - int* batchIndex, - int seqWidth, - int batchCount, - bool seq2batch) {} - -inline void hl_sequence2batch_copy_padding(real* batch, - real* sequence, - const int* sequenceStartPositions, - const size_t sequenceWidth, - const size_t maxSequenceLength, - const size_t numSequences, - bool normByTimes, - bool seq2batch) {} - -inline void hl_sequence_avg_forward(real* dst, - real* src, - const int* starts, - int height, - int width, - const int mode) {} - -inline void hl_sequence_avg_backward(real* dst, - real* src, - const int* starts, - int height, - int width, - const int mode) {} -#endif // HL_SEQUENCE_STUB_H_ diff --git a/paddle/legacy/cuda/include/stub/hl_sparse_stub.h b/paddle/legacy/cuda/include/stub/hl_sparse_stub.h deleted file mode 100644 index 4001d4fb7416fa2457e3c057a0a121611854e6d0..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/include/stub/hl_sparse_stub.h +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef HL_SPARSE_STUB_H_ -#define HL_SPARSE_STUB_H_ - -#include "hl_sparse.h" - -inline void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d, - hl_matrix_format_t format, - hl_matrix_value_t value_type, - int dimM, - int dimN, - int nnz) {} - -inline void hl_free_sparse_matrix(hl_sparse_matrix_s A_d) {} - -inline void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, - void *dest_d, - size_t size, - hl_matrix_format_t format, - hl_matrix_value_t value_type, - int dimM, - int dimN, - int nnz) {} - -inline void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, - real *value_d, - int *rows_d, - int *cols_d, - hl_matrix_format_t format, - hl_matrix_value_t value_type, - int dimM, - int dimN, - int nnz) {} - -inline void hl_destruct_sparse_matrix(hl_sparse_matrix_s A_d) {} - -inline void hl_memcpy_csr_matrix(hl_sparse_matrix_s csr_matrix, - real *csr_val, - int *csr_row, - int *csr_col, - hl_stream_t stream) {} - -inline void hl_memcpy_csc_matrix(hl_sparse_matrix_s csc_matrix, - real *csc_val, - int *csc_row, - int *csc_col, - hl_stream_t stream) {} - -inline void hl_memcpy_sparse_matrix(hl_sparse_matrix_s dst, - hl_sparse_matrix_s src, - hl_stream_t stream) {} - -inline void hl_matrix_csr2dense(hl_sparse_matrix_s A_d, - real *C_d, - int dimM, - int dimN) {} - -inline void hl_matrix_csc2dense(hl_sparse_matrix_s A_d, - real *C_d, - int dimM, - int dimN) {} - -inline void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) {} - -inline void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) {} - -inline void hl_matrix_dense_mul_csc(real *A_d, - hl_trans_op_t transa, - hl_sparse_matrix_s B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) {} - -inline void hl_sparse_matrix_mul(real *A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - hl_sparse_matrix_s C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) {} - -inline void hl_matrix_dense_mul_csr(real *A_d, - hl_trans_op_t transa, - hl_sparse_matrix_s B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) {} - -inline void hl_memcpy_from_csc_matrix(real *csc_val, - size_t val_size, - int *csc_row, - size_t row_size, - int *csc_col, - size_t col_size, - hl_sparse_matrix_s csc_matrix, - hl_stream_t stream) {} - -inline void hl_memcpy_from_csr_matrix(real *csr_val, - size_t val_size, - int *csr_row, - size_t row_size, - int *csr_col, - size_t col_size, - hl_sparse_matrix_s csr_matrix, - hl_stream_t stream) {} - -inline void hl_sparse_matrix_column_sum( - real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale) {} - -inline void hl_matrix_csr_column_sum( - real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale) {} - -inline void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d, - real *B_d, - real scale) {} - -inline void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d, - real *B_d, - real scale) {} - -inline void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d, - real *B_d, - int dimM, - int dimN, - real alpha, - real beta) {} - -inline void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d, - real *B_d, - int dimM, - int dimN, - real alpha, - real beta) {} - -inline int *hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat) { return NULL; } - -inline int *hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat) { return NULL; } - -inline real *hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat) { - return NULL; -} - -#endif // HL_SPARSE_STUB_H_ diff --git a/paddle/legacy/cuda/src/avx_mathfun.h b/paddle/legacy/cuda/src/avx_mathfun.h deleted file mode 100644 index 8e698e746a17b845f62e1da25fc8a2a4b6d4737d..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/avx_mathfun.h +++ /dev/null @@ -1,735 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/* - AVX implementation of sin, cos, sincos, exp and log - - Based on "sse_mathfun.h", by Julien Pommier - http://gruntthepeon.free.fr/ssemath/ - - Copyright (C) 2012 Giovanni Garberoglio - Interdisciplinary Laboratory for Computational Science (LISC) - Fondazione Bruno Kessler and University of Trento - via Sommarive, 18 - I-38123 Trento (Italy) - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - (this is the zlib license) -*/ - -#include - -/* yes I know, the top of this file is quite ugly */ -#define ALIGN32_BEG -#define ALIGN32_END __attribute__((aligned(32))) - -/* __m128 is ugly to write */ -typedef __m256 v8sf; // vector of 8 float (avx) -typedef __m256i v8si; // vector of 8 int (avx) -typedef __m128i v4si; // vector of 8 int (avx) - -#define _PI32AVX_CONST(Name, Val) \ - static const ALIGN32_BEG int _pi32avx_##Name[4] ALIGN32_END = { \ - Val, Val, Val, Val} - -_PI32AVX_CONST(1, 1); -_PI32AVX_CONST(inv1, ~1); -_PI32AVX_CONST(2, 2); -_PI32AVX_CONST(4, 4); - -/* declare some AVX constants -- why can't I figure a better way to do that? */ -#define _PS256_CONST(Name, Val) \ - static const ALIGN32_BEG float _ps256_##Name[8] ALIGN32_END = { \ - Val, Val, Val, Val, Val, Val, Val, Val} -#define _PI32_CONST256(Name, Val) \ - static const ALIGN32_BEG int _pi32_256_##Name[8] ALIGN32_END = { \ - Val, Val, Val, Val, Val, Val, Val, Val} -#define _PS256_CONST_TYPE(Name, Type, Val) \ - static const ALIGN32_BEG Type _ps256_##Name[8] ALIGN32_END = { \ - Val, Val, Val, Val, Val, Val, Val, Val} - -_PS256_CONST(1, 1.0f); -_PS256_CONST(0p5, 0.5f); -/* the smallest non denormalized float number */ -_PS256_CONST_TYPE(min_norm_pos, int, 0x00800000); -_PS256_CONST_TYPE(mant_mask, int, 0x7f800000); -_PS256_CONST_TYPE(inv_mant_mask, int, ~0x7f800000); - -_PS256_CONST_TYPE(sign_mask, int, (int)0x80000000); -_PS256_CONST_TYPE(inv_sign_mask, int, ~0x80000000); - -_PI32_CONST256(0, 0); -_PI32_CONST256(1, 1); -_PI32_CONST256(inv1, ~1); -_PI32_CONST256(2, 2); -_PI32_CONST256(4, 4); -_PI32_CONST256(0x7f, 0x7f); - -_PS256_CONST(cephes_SQRTHF, 0.707106781186547524); -_PS256_CONST(cephes_log_p0, 7.0376836292E-2); -_PS256_CONST(cephes_log_p1, -1.1514610310E-1); -_PS256_CONST(cephes_log_p2, 1.1676998740E-1); -_PS256_CONST(cephes_log_p3, -1.2420140846E-1); -_PS256_CONST(cephes_log_p4, +1.4249322787E-1); -_PS256_CONST(cephes_log_p5, -1.6668057665E-1); -_PS256_CONST(cephes_log_p6, +2.0000714765E-1); -_PS256_CONST(cephes_log_p7, -2.4999993993E-1); -_PS256_CONST(cephes_log_p8, +3.3333331174E-1); -_PS256_CONST(cephes_log_q1, -2.12194440e-4); -_PS256_CONST(cephes_log_q2, 0.693359375); - -#ifndef __AVX2__ - -typedef union imm_xmm_union { - v8si imm; - v4si xmm[2]; -} imm_xmm_union; - -#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) \ - { \ - imm_xmm_union u __attribute__((aligned(32))); \ - u.imm = imm_; \ - xmm0_ = u.xmm[0]; \ - xmm1_ = u.xmm[1]; \ - } - -#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) \ - { \ - imm_xmm_union u __attribute__((aligned(32))); \ - u.xmm[0] = xmm0_; \ - u.xmm[1] = xmm1_; \ - imm_ = u.imm; \ - } - -#define AVX2_BITOP_USING_SSE2(fn) \ - static inline v8si avx2_mm256_##fn(v8si x, int a) { \ - /* use SSE2 instruction to perform the bitop AVX2 */ \ - v4si x1, x2; \ - v8si ret; \ - COPY_IMM_TO_XMM(x, x1, x2); \ - x1 = _mm_##fn(x1, a); \ - x2 = _mm_##fn(x2, a); \ - COPY_XMM_TO_IMM(x1, x2, ret); \ - return (ret); \ - } - -//#warning "Using SSE2 to perform AVX2 bitshift ops" -AVX2_BITOP_USING_SSE2(slli_epi32) -AVX2_BITOP_USING_SSE2(srli_epi32) - -#define AVX2_INTOP_USING_SSE2(fn) \ - static inline v8si avx2_mm256_##fn(v8si x, v8si y) { \ - /* use SSE2 instructions to perform the AVX2 integer operation */ \ - v4si x1, x2; \ - v4si y1, y2; \ - v8si ret; \ - COPY_IMM_TO_XMM(x, x1, x2); \ - COPY_IMM_TO_XMM(y, y1, y2); \ - x1 = _mm_##fn(x1, y1); \ - x2 = _mm_##fn(x2, y2); \ - COPY_XMM_TO_IMM(x1, x2, ret); \ - return (ret); \ - } - -//#warning "Using SSE2 to perform AVX2 integer ops" -AVX2_INTOP_USING_SSE2(and_si128) -AVX2_INTOP_USING_SSE2(andnot_si128) -AVX2_INTOP_USING_SSE2(cmpeq_epi32) -AVX2_INTOP_USING_SSE2(sub_epi32) -AVX2_INTOP_USING_SSE2(add_epi32) -#define avx2_mm256_and_si256 avx2_mm256_and_si128 -#define avx2_mm256_andnot_si256 avx2_mm256_andnot_si128 -#else -#define avx2_mm256_slli_epi32 _mm256_slli_epi32 -#define avx2_mm256_srli_epi32 _mm256_srli_epi32 -#define avx2_mm256_and_si256 _mm256_and_si256 -#define avx2_mm256_andnot_si256 _mm256_andnot_si256 -#define avx2_mm256_cmpeq_epi32 _mm256_cmpeq_epi32 -#define avx2_mm256_sub_epi32 _mm256_sub_epi32 -#define avx2_mm256_add_epi32 _mm256_add_epi32 -#endif /* __AVX2__ */ - -/* natural logarithm computed for 8 simultaneous float - return NaN for x <= 0 -*/ -v8sf log256_ps(v8sf x) { - v8si imm0; - v8sf one = *(v8sf *)_ps256_1; - - // v8sf invalid_mask = _mm256_cmple_ps(x, _mm256_setzero_ps()); - v8sf invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_LE_OS); - - x = _mm256_max_ps( - x, *(v8sf *)_ps256_min_norm_pos); /* cut off denormalized stuff */ - - // can be done with AVX2 - imm0 = avx2_mm256_srli_epi32(_mm256_castps_si256(x), 23); - - /* keep only the fractional part */ - x = _mm256_and_ps(x, *(v8sf *)_ps256_inv_mant_mask); - x = _mm256_or_ps(x, *(v8sf *)_ps256_0p5); - - // this is again another AVX2 instruction - imm0 = avx2_mm256_sub_epi32(imm0, *(v8si *)_pi32_256_0x7f); - v8sf e = _mm256_cvtepi32_ps(imm0); - - e = _mm256_add_ps(e, one); - - /* part2: - if( x < SQRTHF ) { - e -= 1; - x = x + x - 1.0; - } else { x = x - 1.0; } - */ - // v8sf mask = _mm256_cmplt_ps(x, *(v8sf*)_ps256_cephes_SQRTHF); - v8sf mask = _mm256_cmp_ps(x, *(v8sf *)_ps256_cephes_SQRTHF, _CMP_LT_OS); - v8sf tmp = _mm256_and_ps(x, mask); - x = _mm256_sub_ps(x, one); - e = _mm256_sub_ps(e, _mm256_and_ps(one, mask)); - x = _mm256_add_ps(x, tmp); - - v8sf z = _mm256_mul_ps(x, x); - - v8sf y = *(v8sf *)_ps256_cephes_log_p0; - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p1); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p2); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p3); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p4); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p5); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p6); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p7); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p8); - y = _mm256_mul_ps(y, x); - - y = _mm256_mul_ps(y, z); - - tmp = _mm256_mul_ps(e, *(v8sf *)_ps256_cephes_log_q1); - y = _mm256_add_ps(y, tmp); - - tmp = _mm256_mul_ps(z, *(v8sf *)_ps256_0p5); - y = _mm256_sub_ps(y, tmp); - - tmp = _mm256_mul_ps(e, *(v8sf *)_ps256_cephes_log_q2); - x = _mm256_add_ps(x, y); - x = _mm256_add_ps(x, tmp); - x = _mm256_or_ps(x, invalid_mask); // negative arg will be NAN - return x; -} - -_PS256_CONST(exp_hi, 88.3762626647949f); -_PS256_CONST(exp_lo, -88.3762626647949f); - -_PS256_CONST(cephes_LOG2EF, 1.44269504088896341); -_PS256_CONST(cephes_exp_C1, 0.693359375); -_PS256_CONST(cephes_exp_C2, -2.12194440e-4); - -_PS256_CONST(cephes_exp_p0, 1.9875691500E-4); -_PS256_CONST(cephes_exp_p1, 1.3981999507E-3); -_PS256_CONST(cephes_exp_p2, 8.3334519073E-3); -_PS256_CONST(cephes_exp_p3, 4.1665795894E-2); -_PS256_CONST(cephes_exp_p4, 1.6666665459E-1); -_PS256_CONST(cephes_exp_p5, 5.0000001201E-1); - -v8sf exp256_ps(v8sf x) { - v8sf tmp = _mm256_setzero_ps(), fx; - v8si imm0; - v8sf one = *(v8sf *)_ps256_1; - - x = _mm256_min_ps(x, *(v8sf *)_ps256_exp_hi); - x = _mm256_max_ps(x, *(v8sf *)_ps256_exp_lo); - - /* express exp(x) as exp(g + n*log(2)) */ - fx = _mm256_mul_ps(x, *(v8sf *)_ps256_cephes_LOG2EF); - fx = _mm256_add_ps(fx, *(v8sf *)_ps256_0p5); - - /* how to perform a floorf with SSE: just below */ - // imm0 = _mm256_cvttps_epi32(fx); - // tmp = _mm256_cvtepi32_ps(imm0); - - tmp = _mm256_floor_ps(fx); - - /* if greater, substract 1 */ - // v8sf mask = _mm256_cmpgt_ps(tmp, fx); - v8sf mask = _mm256_cmp_ps(tmp, fx, _CMP_GT_OS); - mask = _mm256_and_ps(mask, one); - fx = _mm256_sub_ps(tmp, mask); - - tmp = _mm256_mul_ps(fx, *(v8sf *)_ps256_cephes_exp_C1); - v8sf z = _mm256_mul_ps(fx, *(v8sf *)_ps256_cephes_exp_C2); - x = _mm256_sub_ps(x, tmp); - x = _mm256_sub_ps(x, z); - - z = _mm256_mul_ps(x, x); - - v8sf y = *(v8sf *)_ps256_cephes_exp_p0; - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p1); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p2); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p3); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p4); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p5); - y = _mm256_mul_ps(y, z); - y = _mm256_add_ps(y, x); - y = _mm256_add_ps(y, one); - - /* build 2^n */ - imm0 = _mm256_cvttps_epi32(fx); - // another two AVX2 instructions - imm0 = avx2_mm256_add_epi32(imm0, *(v8si *)_pi32_256_0x7f); - imm0 = avx2_mm256_slli_epi32(imm0, 23); - v8sf pow2n = _mm256_castsi256_ps(imm0); - y = _mm256_mul_ps(y, pow2n); - return y; -} - -_PS256_CONST(minus_cephes_DP1, -0.78515625); -_PS256_CONST(minus_cephes_DP2, -2.4187564849853515625e-4); -_PS256_CONST(minus_cephes_DP3, -3.77489497744594108e-8); -_PS256_CONST(sincof_p0, -1.9515295891E-4); -_PS256_CONST(sincof_p1, 8.3321608736E-3); -_PS256_CONST(sincof_p2, -1.6666654611E-1); -_PS256_CONST(coscof_p0, 2.443315711809948E-005); -_PS256_CONST(coscof_p1, -1.388731625493765E-003); -_PS256_CONST(coscof_p2, 4.166664568298827E-002); -_PS256_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI - -/* evaluation of 8 sines at onces using AVX intrisics - - The code is the exact rewriting of the cephes sinf function. - Precision is excellent as long as x < 8192 (I did not bother to - take into account the special handling they have for greater values - -- it does not return garbage for arguments over 8192, though, but - the extra precision is missing). - - Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the - surprising but correct result. - -*/ -v8sf sin256_ps(v8sf x) { // any x - v8sf xmm1, xmm2 = _mm256_setzero_ps(), xmm3, sign_bit, y; - v8si imm0, imm2; - -#ifndef __AVX2__ - v4si imm0_1, imm0_2; - v4si imm2_1, imm2_2; -#endif - - sign_bit = x; - /* take the absolute value */ - x = _mm256_and_ps(x, *(v8sf *)_ps256_inv_sign_mask); - /* extract the sign bit (upper one) */ - sign_bit = _mm256_and_ps(sign_bit, *(v8sf *)_ps256_sign_mask); - - /* scale by 4/Pi */ - y = _mm256_mul_ps(x, *(v8sf *)_ps256_cephes_FOPI); - -/* - Here we start a series of integer operations, which are in the - realm of AVX2. - If we don't have AVX, let's perform them using SSE2 directives -*/ - -#ifdef __AVX2__ - /* store the integer part of y in mm0 */ - imm2 = _mm256_cvttps_epi32(y); - /* j=(j+1) & (~1) (see the cephes sources) */ - // another two AVX2 instruction - imm2 = avx2_mm256_add_epi32(imm2, *(v8si *)_pi32_256_1); - imm2 = avx2_mm256_and_si256(imm2, *(v8si *)_pi32_256_inv1); - y = _mm256_cvtepi32_ps(imm2); - - /* get the swap sign flag */ - imm0 = avx2_mm256_and_si256(imm2, *(v8si *)_pi32_256_4); - imm0 = avx2_mm256_slli_epi32(imm0, 29); - /* get the polynom selection mask - there is one polynom for 0 <= x <= Pi/4 - and another one for Pi/4 -#include "hl_functions.h" - -namespace hppl { - -extern __m256 exp(__m256 a); - -__m256 relu(const __m256 a) { - __m256 tmp = _mm256_set1_ps(0.0f); - return _mm256_max_ps(a, tmp); -} - -__m256 sigmoid(const __m256 a) { - __m256 max = _mm256_set1_ps(SIGMOID_THRESHOLD_MAX); - __m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); - __m256 tmp = _mm256_max_ps(a, min); - tmp = _mm256_min_ps(tmp, max); - tmp = _mm256_sub_ps(_mm256_set1_ps(0.0f), tmp); - tmp = exp(tmp); - tmp = _mm256_add_ps(_mm256_set1_ps(1.0f), tmp); - tmp = _mm256_div_ps(_mm256_set1_ps(1.0f), tmp); - return tmp; -} - -__m256 tanh(const __m256 a) { - __m256 max = _mm256_set1_ps(EXP_MAX_INPUT); - __m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a); - tmp = _mm256_min_ps(tmp, max); - tmp = exp(tmp); - return _mm256_sub_ps(_mm256_div_ps(_mm256_set1_ps(2.0f), - _mm256_add_ps(_mm256_set1_ps(1.0f), tmp)), - _mm256_set1_ps(1.0f)); -} - -__m256 linear(const __m256 a) { return a; } - -__m256 relu(const __m256 a, const __m256 b) { - return _mm256_mul_ps( - a, - _mm256_and_ps(_mm256_cmp_ps(b, _mm256_set1_ps(0.0f), _CMP_GT_OS), - _mm256_set1_ps(1.0f))); -} - -__m256 sigmoid(const __m256 a, const __m256 b) { - return _mm256_mul_ps(_mm256_mul_ps(a, b), - _mm256_sub_ps(_mm256_set1_ps(1.0f), b)); -} - -__m256 tanh(const __m256 a, const __m256 b) { - return _mm256_mul_ps( - a, _mm256_sub_ps(_mm256_set1_ps(1.0f), _mm256_mul_ps(b, b))); -} - -__m256 linear(const __m256 a, const __m256 b) { return a; } -} // namespace hppl diff --git a/paddle/legacy/cuda/src/hl_batch_norm.cu b/paddle/legacy/cuda/src/hl_batch_norm.cu deleted file mode 100644 index f9ffde0d53e6cde3ddb661702923254cf6310223..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_batch_norm.cu +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_batch_norm.h" - -__global__ void batchNormInference(real* output, - const real* input, - const real* scale, - const real* bias, - const real* estimatedMean, - const real* estimatedVar, - const double epsilon, - size_t batchSize, - size_t channel, - size_t height, - size_t width) { - const int tid = threadIdx.x; - const int num = channel * height * width; - const int batch = blockIdx.x; - for (int i = tid; i < num; i += blockDim.x) { - const int c = i / (height * width); - const int id = batch * num + i; - real val = input[id] - estimatedMean[c]; - val /= sqrt(estimatedVar[c] + epsilon); - val *= scale[c]; - val += bias[c]; - output[id] = val; - } -} - -void hl_batch_norm_cuda_inference(const real* input, - real* output, - const real* scale, - const real* bias, - const real* estimatedMean, - const real* estimatedVar, - const double epsilon, - size_t batchSize, - size_t channel, - size_t height, - size_t width) { - batchNormInference<<>>(output, - input, - scale, - bias, - estimatedMean, - estimatedVar, - epsilon, - batchSize, - channel, - height, - width); - - CHECK_SYNC("hl_batch_norm_cuda_inference failed!"); -} diff --git a/paddle/legacy/cuda/src/hl_batch_transpose.cu b/paddle/legacy/cuda/src/hl_batch_transpose.cu deleted file mode 100644 index 221839905d753eb4c7a0823d0d0a4a0a77414852..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_batch_transpose.cu +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_base.h" -#include "hl_batch_transpose.h" - -const int TILE_DIM = 64; -const int BLOCK_ROWS = 16; - -// No bank-conflict transpose for a batch of data. -__global__ void batchTransposeNoBankConflicts( - real* odata, const real* idata, int numSamples, int width, int height) { - __shared__ float tile[TILE_DIM][TILE_DIM + 1]; - - const int x = blockIdx.x * TILE_DIM + threadIdx.x; - const int y = blockIdx.y * TILE_DIM + threadIdx.y; - const int sampleId = blockIdx.z; - if (sampleId > numSamples) return; - if (x < width) { - for (int j = threadIdx.y; j < TILE_DIM && j < height - y + threadIdx.y; - j += BLOCK_ROWS) - tile[j][threadIdx.x] = - idata[sampleId * width * height + (y + j - threadIdx.y) * width + x]; - } - - __syncthreads(); - - // The matrix is tranposed. Thus height is new width, and width is new height. - const int newX = blockIdx.y * TILE_DIM + threadIdx.x; - const int newY = blockIdx.x * TILE_DIM + threadIdx.y; - if (newX >= height) { - return; - } - for (int j = threadIdx.y; j < TILE_DIM && j < width - newY + threadIdx.y; - j += BLOCK_ROWS) - odata[sampleId * width * height + (newY + j - threadIdx.y) * height + - newX] = tile[threadIdx.x][j]; -} - -void batchTranspose( - const real* input, real* output, int width, int height, int batchSize) { - dim3 dimBlock(TILE_DIM, BLOCK_ROWS, 1); - dim3 dimGrid(DIVUP(width, TILE_DIM), DIVUP(height, TILE_DIM), batchSize); - batchTransposeNoBankConflicts<<>>( - output, input, batchSize, width, height); - - CHECK_SYNC("batchTranspose failed!"); -} diff --git a/paddle/legacy/cuda/src/hl_cpu_functions.cc b/paddle/legacy/cuda/src/hl_cpu_functions.cc deleted file mode 100644 index 1306576bcb9e47ac04b44914a69955cd7561d3cc..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cpu_functions.cc +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "hl_functions.h" - -namespace hppl { - -real relu(const real a) { return a > 0.0f ? a : 0.0f; } - -real sigmoid(const real a) { - const real min = SIGMOID_THRESHOLD_MIN; - const real max = SIGMOID_THRESHOLD_MAX; - real tmp = (a < min) ? min : ((a > max) ? max : a); - return 1.0 / (1.0 + exp(-tmp)); -} - -real tanh(const real a) { - real tmp = -2.0 * a; - tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; - return (2.0 / (1.0 + exp(tmp))) - 1.0; -} - -real linear(const real a) { return a; } - -real relu(const real a, const real b) { return a * (b > 0.0f ? 1.0f : 0.0f); } - -real sigmoid(const real a, const real b) { return a * b * (1 - b); } - -real tanh(const real a, const real b) { return a * (1.0f - b * b); } - -real linear(const real a, const real b) { return a; } -} // namespace hppl diff --git a/paddle/legacy/cuda/src/hl_cuda_aggregate.cu b/paddle/legacy/cuda/src/hl_cuda_aggregate.cu deleted file mode 100644 index 9831c5ecc340135c27b49d24715c63f8a8dfa8e9..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_aggregate.cu +++ /dev/null @@ -1,293 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_aggregate.h" -#include "hl_base.h" -#include "hl_cuda.h" -#include "hl_cuda.ph" -#include "hl_matrix_base.cuh" -#include "hl_thread.ph" -#include "paddle/legacy/utils/Logging.h" - -/** - * @brief matrix row operator. - */ -template -__global__ void KeMatrixRowOp(Agg agg, real *E, real *Sum, int dimN) { - __shared__ real sum_s[blockSize]; - int cnt = (dimN + blockSize - 1) / blockSize; - int rowId = blockIdx.x + blockIdx.y * gridDim.x; - int index = rowId * dimN; - int tid = threadIdx.x; - int lmt = tid; - - real tmp = agg.init(); - for (int ii = 0; ii < cnt && lmt < dimN; ii++) { - tmp = agg(tmp, E[index + lmt]); - lmt += blockSize; - } - sum_s[tid] = tmp; - __syncthreads(); - - for (int stride = blockSize / 2; stride > 0; stride = stride / 2) { - if (tid < stride) { - sum_s[tid] = agg(sum_s[tid], sum_s[tid + stride]); - } - __syncthreads(); - } - __syncthreads(); - - if (tid == 0) { - Sum[rowId] = sum_s[0]; - } -} - -template -void hl_matrix_row_op(Agg agg, real *A_d, real *C_d, int dimM, int dimN) { - int blocksX = dimM; - int blocksY = 1; - dim3 threads(128, 1); - dim3 grid(blocksX, blocksY); - - KeMatrixRowOp<<>>( - agg, A_d, C_d, dimN); -} - -void hl_matrix_row_sum(real *A_d, real *C_d, int dimM, int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - hl_matrix_row_op(aggregate::sum(), A_d, C_d, dimM, dimN); - CHECK_SYNC("hl_matrix_row_sum failed"); -} - -void hl_matrix_row_max(real *A_d, real *C_d, int dimM, int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - hl_matrix_row_op(aggregate::max(), A_d, C_d, dimM, dimN); - CHECK_SYNC("hl_matrix_row_max failed"); -} - -void hl_matrix_row_min(real *A_d, real *C_d, int dimM, int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - hl_matrix_row_op(aggregate::min(), A_d, C_d, dimM, dimN); - CHECK_SYNC("hl_matrix_row_min failed"); -} - -/** - * @brief matrix column operator. - */ -template -__global__ void KeMatrixColumnOp( - Agg agg, real *E, real *Sum, int dimM, int dimN) { - int rowIdx = blockIdx.x * blockDim.x + threadIdx.x; - real tmp = agg.init(); - if (rowIdx < dimN) { - for (int index = 0; index < dimM; index++) { - tmp = agg(tmp, E[dimN * index + rowIdx]); - } - Sum[rowIdx] = tmp; - } -} - -template -__global__ void KeMatrixColumnOp_S( - Agg agg, real *E, real *Sum, int dimM, int dimN) { - __shared__ real _sum[blockDimX * blockDimY]; - int rowIdx = blockIdx.x * blockDim.x + threadIdx.x; - int index = threadIdx.y; - - real tmp = agg.init(); - if (rowIdx < dimN) { - for (; index < dimM;) { - tmp = agg(tmp, E[dimN * index + rowIdx]); - index += blockDimY; - } - } - _sum[threadIdx.x + threadIdx.y * blockDimX] = tmp; - __syncthreads(); - - if (rowIdx < dimN) { - if (threadIdx.y == 0) { - real tmp = agg.init(); - for (int i = 0; i < blockDimY; i++) { - tmp = agg(tmp, _sum[threadIdx.x + i * blockDimX]); - } - Sum[rowIdx] = tmp; - } - } -} - -template -void hl_matrix_column_op(Agg agg, real *A_d, real *C_d, int dimM, int dimN) { - if (dimN >= 8192) { - int blocksX = (dimN + 128 - 1) / 128; - int blocksY = 1; - dim3 threads(128, 1); - dim3 grid(blocksX, blocksY); - KeMatrixColumnOp<<>>( - agg, A_d, C_d, dimM, dimN); - } else { - int blocksX = (dimN + 32 - 1) / 32; - int blocksY = 1; - dim3 threads(32, 32); - dim3 grid(blocksX, blocksY); - KeMatrixColumnOp_S<<>>( - agg, A_d, C_d, dimM, dimN); - } - - return; -} - -void hl_matrix_column_sum(real *A_d, real *C_d, int dimM, int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - hl_matrix_column_op(aggregate::sum(), A_d, C_d, dimM, dimN); - - CHECK_SYNC("hl_matrix_column_sum failed"); -} - -void hl_matrix_column_max(real *A_d, real *C_d, int dimM, int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - hl_matrix_column_op(aggregate::max(), A_d, C_d, dimM, dimN); - - CHECK_SYNC("hl_matrix_column_max failed"); -} - -void hl_matrix_column_min(real *A_d, real *C_d, int dimM, int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - hl_matrix_column_op(aggregate::min(), A_d, C_d, dimM, dimN); - - CHECK_SYNC("hl_matrix_column_min failed"); -} - -template -__global__ void KeVectorSum(real *E, real *Sum, int dimM) { - __shared__ double sum_s[blockSize]; - int tid = threadIdx.x; - int index = blockIdx.y * blockDim.x + threadIdx.x; - - sum_s[tid] = 0.0f; - while (index < dimM) { - sum_s[tid] += E[index]; - index += blockDim.x * gridDim.y; - } - __syncthreads(); - - for (int stride = blockSize / 2; stride > 0; stride = stride / 2) { - if (tid < stride) { - sum_s[tid] += sum_s[tid + stride]; - } - __syncthreads(); - } - __syncthreads(); - - if (tid == 0) { - Sum[blockIdx.y] = sum_s[0]; - } -} - -void hl_vector_sum(real *A_d, real *C_h, int dimM) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_h); - - int blockSize = 128; - int gridSize = 128; - int blocksX = 1; - int blocksY = gridSize; - dim3 threads(blockSize, 1); - dim3 grid(blocksX, blocksY); - - struct _hl_event_st hl_event_st = {.cu_event = t_resource.event}; - hl_event_t hl_event = &hl_event_st; - while (!hl_cuda_event_is_ready(hl_event)) { - } - - KeVectorSum<128><<>>( - A_d, t_resource.gpu_mem, dimM); - KeVectorSum<128><<<1, threads, 0, STREAM_DEFAULT>>>( - t_resource.gpu_mem, t_resource.cpu_mem, 128); - - hl_memcpy_async(C_h, t_resource.cpu_mem, sizeof(real), HPPL_STREAM_DEFAULT); - hl_stream_record_event(HPPL_STREAM_DEFAULT, hl_event); - - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - cudaError_t err = (cudaError_t)hl_get_device_last_error(); - CHECK_EQ(cudaSuccess, err) << "CUDA error: " - << hl_get_device_error_string((size_t)err); -} - -template -__global__ void KeVectorAbsSum(real *E, real *Sum, int dimM) { - __shared__ double sum_s[blockSize]; - int tid = threadIdx.x; - int index = blockIdx.y * blockDim.x + threadIdx.x; - - sum_s[tid] = 0.0f; - while (index < dimM) { - sum_s[tid] += abs(E[index]); - index += blockDim.x * gridDim.y; - } - __syncthreads(); - - for (int stride = blockSize / 2; stride > 0; stride = stride / 2) { - if (tid < stride) { - sum_s[tid] += sum_s[tid + stride]; - } - __syncthreads(); - } - __syncthreads(); - - if (tid == 0) { - Sum[blockIdx.y] = sum_s[0]; - } -} - -void hl_vector_abs_sum(real *A_d, real *C_h, int dimM) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_h); - - int blockSize = 128; - int gridSize = 128; - int blocksX = 1; - int blocksY = gridSize; - dim3 threads(blockSize, 1); - dim3 grid(blocksX, blocksY); - - struct _hl_event_st hl_event_st = {.cu_event = t_resource.event}; - hl_event_t hl_event = &hl_event_st; - while (!hl_cuda_event_is_ready(hl_event)) { - } - - KeVectorAbsSum<128><<>>( - A_d, t_resource.gpu_mem, dimM); - KeVectorAbsSum<128><<<1, threads, 0, STREAM_DEFAULT>>>( - t_resource.gpu_mem, t_resource.cpu_mem, 128); - - hl_memcpy_async(C_h, t_resource.cpu_mem, sizeof(real), HPPL_STREAM_DEFAULT); - hl_stream_record_event(HPPL_STREAM_DEFAULT, hl_event); - - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - cudaError_t err = (cudaError_t)hl_get_device_last_error(); - CHECK_EQ(cudaSuccess, err) << "CUDA error: " - << hl_get_device_error_string((size_t)err); -} diff --git a/paddle/legacy/cuda/src/hl_cuda_cnn.cu b/paddle/legacy/cuda/src/hl_cuda_cnn.cu deleted file mode 100644 index bac743a293cc97b114281e510d06367a86536452..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_cnn.cu +++ /dev/null @@ -1,1106 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "hl_base.h" -#include "hl_cnn.h" -#include "hl_device_functions.cuh" - -__global__ void KeMaxPoolForward(const int nthreads, - const real* inputData, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int ksizeW, - const int ksizeH, - const int strideH, - const int strideW, - const int offsetH, - const int offsetW, - real* tgtData, - const int tgtStride, - real* maskData) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < nthreads) { - int pw = index % pooledW; - int ph = (index / pooledW) % pooledH; - int c = (index / pooledW / pooledH) % channels; - int frameNum = index / pooledW / pooledH / channels; - int hstart = ph * strideH - offsetH; - int wstart = pw * strideW - offsetW; - int hend = min(hstart + ksizeH, height); - int wend = min(wstart + ksizeW, width); - hstart = max(hstart, 0); - wstart = max(wstart, 0); - real maxval = -FLT_MAX; - int max_index = -1; - inputData += (frameNum * channels + c) * height * width; - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - if (maxval < inputData[h * width + w]) { - max_index = h * width + w; - maxval = inputData[max_index]; - } - } - } - int tgtIndex = - index % (pooledW * pooledH * channels) + frameNum * tgtStride; - tgtData[tgtIndex] = maxval; - if (maskData != NULL) { - maskData[tgtIndex] = max_index; - } - } -} - -void hl_maxpool_forward(const int frameCnt, - const real* inputData, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real* tgtData, - const int tgtStride, - real* maskData) { - int num_kernels = pooledH * pooledW * channels * frameCnt; - int blocks = (num_kernels + 1024 - 1) / 1024; - dim3 threads(1024, 1); - dim3 grid(blocks, 1); - - KeMaxPoolForward<<>>(num_kernels, - inputData, - channels, - height, - width, - pooledH, - pooledW, - sizeX, - sizeY, - strideH, - strideW, - paddingH, - paddingW, - tgtData, - tgtStride, - maskData); - CHECK_SYNC("hl_maxpool_forward failed"); -} - -__global__ void KeMaxPoolBackward(const int nthreads, - const real* inputData, - const real* outData, - const real* outGrad, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int padH, - const int padW, - real scaleA, - real scaleB, - real* targetGrad, - const int outStride) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < nthreads) { - // find out the local index - // find out the local offset - int offsetW = index % width + padW; - int offsetH = (index / width) % height + padH; - int offsetC = (index / width / height) % channels; - - int frameNum = index / width / height / channels; - int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; - int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; - int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0; - int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0; - real gradient = 0; - real input = inputData[index]; - outData += (frameNum * outStride + offsetC * pooledH * pooledW); - outGrad += (frameNum * outStride + offsetC * pooledH * pooledW); - for (int ph = phstart; ph < phend; ++ph) { - for (int pw = pwstart; pw < pwend; ++pw) { - if (input == outData[ph * pooledW + pw]) { - gradient += outGrad[ph * pooledW + pw]; - } - } - } - targetGrad[index] = scaleB * targetGrad[index] + scaleA * gradient; - } -} - -void hl_maxpool_backward(const int frameCnt, - const real* inputData, - const real* outData, - const real* outGrad, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real scaleA, - real scaleB, - real* targetGrad, - const int outStride) { - int num_kernels = height * width * channels * frameCnt; - int blocks = (num_kernels + 1024 - 1) / 1024; - - KeMaxPoolBackward<<>>(num_kernels, - inputData, - outData, - outGrad, - channels, - height, - width, - pooledH, - pooledW, - sizeX, - sizeY, - strideH, - strideW, - paddingH, - paddingW, - scaleA, - scaleB, - targetGrad, - outStride); - CHECK_SYNC("hl_maxpool_backward"); -} - -__global__ void KeAvgPoolForward(const int nthreads, - const real* inputData, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int padH, - const int padW, - real* tgtData, - const int tgtStride, - const bool excludeMode) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < nthreads) { - int pw = index % pooledW; - int ph = (index / pooledW) % pooledH; - int c = (index / pooledW / pooledH) % channels; - int frameNum = index / pooledW / pooledH / channels; - - int hstart = ph * strideH - padH; - int wstart = pw * strideW - padW; - int hend = min(hstart + sizeY, height); - int wend = min(wstart + sizeX, width); - hstart = max(hstart, 0); - wstart = max(wstart, 0); - int poolSize = - excludeMode ? (hend - hstart) * (wend - wstart) : sizeY * sizeX; - - real aveval = 0; - inputData += (frameNum * channels + c) * height * width; - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - aveval += inputData[h * width + w]; - } - } - int tgtIndex = - index % (pooledW * pooledH * channels) + frameNum * tgtStride; - tgtData[tgtIndex] = aveval / poolSize; - } -} - -void hl_avgpool_forward(const int frameCnt, - const real* inputData, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real* tgtData, - const int tgtStride, - const bool excludeMode) { - int num_kernels = pooledH * pooledW * channels * frameCnt; - int blocks = (num_kernels + 1024 - 1) / 1024; - KeAvgPoolForward<<>>(num_kernels, - inputData, - channels, - height, - width, - pooledH, - pooledW, - sizeX, - sizeY, - strideH, - strideW, - paddingH, - paddingW, - tgtData, - tgtStride, - excludeMode); - CHECK_SYNC("hl_avgpool_forward failed"); -} - -__global__ void KeAvgPoolBackward(const int nthreads, - const real* outGrad, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int padH, - const int padW, - real scaleA, - real scaleB, - real* tgtGrad, - const int outStride, - const bool excludeMode) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < nthreads) { - int offsetW = index % width + padW; - int offsetH = (index / width) % height + padH; - int offsetC = (index / width / height) % channels; - int frameNum = index / width / height / channels; - - int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; - int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; - int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0; - int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0; - real gradient = 0; - outGrad += (frameNum * outStride + offsetC * pooledH * pooledW); - - for (int ph = phstart; ph < phend; ++ph) { - int hstart = ph * strideH - padH; - int hend = min(hstart + sizeY, height); - hstart = max(hstart, 0); - for (int pw = pwstart; pw < pwend; ++pw) { - // figure out the pooling size - int wstart = pw * strideW - padW; - int wend = min(wstart + sizeX, width); - wstart = max(wstart, 0); - int poolSize = - excludeMode ? (hend - hstart) * (wend - wstart) : sizeY * sizeX; - gradient += outGrad[ph * pooledW + pw] / poolSize; - } - } - tgtGrad[index] = scaleB * tgtGrad[index] + scaleA * gradient; - } -} - -void hl_avgpool_backward(const int frameCnt, - const real* outGrad, - const int channels, - const int height, - const int width, - const int pooledH, - const int pooledW, - const int sizeX, - const int sizeY, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - real scaleA, - real scaleB, - real* backGrad, - const int outStride, - const bool excludeMode) { - int num_kernels = height * width * channels * frameCnt; - int blocks = (num_kernels + 1024 - 1) / 1024; - - KeAvgPoolBackward<<>>(num_kernels, - outGrad, - channels, - height, - width, - pooledH, - pooledW, - sizeX, - sizeY, - strideH, - strideW, - paddingH, - paddingW, - scaleA, - scaleB, - backGrad, - outStride, - excludeMode); - CHECK_SYNC("hl_avgpool_backward failed"); -} - -__global__ void KeMaxPool3DForward(const int nthreads, - const real* inputData, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int ksizeD, - const int ksizeH, - const int ksizeW, - const int strideD, - const int strideH, - const int strideW, - const int padD, - const int padH, - const int padW, - real* tgtData, - real* maxPoolIdxData, - const int tgtStride) { - for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); - index += blockDim.x * gridDim.x) { - int pw = index % pooledW; - int ph = (index / pooledW) % pooledH; - int pd = (index / pooledW / pooledH) % pooledD; - int c = (index / pooledW / pooledH / pooledD) % channels; - int frameNum = index / pooledW / pooledH / pooledD / channels; - int dstart = pd * strideD - padD; - int hstart = ph * strideH - padH; - int wstart = pw * strideW - padW; - int dend = min(dstart + ksizeD, depth); - int hend = min(hstart + ksizeH, height); - int wend = min(wstart + ksizeW, width); - dstart = max(dstart, 0); - hstart = max(hstart, 0); - wstart = max(wstart, 0); - real maxval = -FLT_MAX; - int maxIdx = -1; - inputData += (frameNum * channels + c) * depth * height * width; - for (int d = dstart; d < dend; ++d) { - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - if (maxval < inputData[(d * height + h) * width + w]) { - maxval = inputData[(d * height + h) * width + w]; - maxIdx = (d * height + h) * width + w; - } - } - } - } - int tgtIndex = - index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride; - tgtData[tgtIndex] = maxval; - maxPoolIdxData[tgtIndex] = maxIdx; - } -} - -void hl_maxpool3D_forward(const int frameCnt, - const real* inputData, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int padD, - const int padH, - const int padW, - real* tgtData, - real* maxPoolIdxData, - const int tgtStride) { - int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt; - int blocks = (num_kernels + 1024 - 1) / 1024; - dim3 threads(1024, 1); - dim3 grid(blocks, 1); - - KeMaxPool3DForward<<>>(num_kernels, - inputData, - channels, - depth, - height, - width, - pooledD, - pooledH, - pooledW, - sizeZ, - sizeY, - sizeX, - strideD, - strideH, - strideW, - padD, - padH, - padW, - tgtData, - maxPoolIdxData, - tgtStride); - CHECK_SYNC("hl_maxpool3D_forward failed"); -} - -__global__ void KeMaxPool3DBackward(const int nthreads, - const real* outGrad, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int padD, - const int padH, - const int padW, - real scaleA, - real scaleB, - real* targetGrad, - real* maxPoolIdxData, - const int outStride) { - for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); - index += blockDim.x * gridDim.x) { - int offsetW = index % width; - int offsetH = (index / width) % height; - int offsetD = (index / width / height) % depth; - int offsetC = (index / width / height / depth) % channels; - int frameNum = index / width / height / depth / channels; - - int pdstart = - (offsetD + padD < sizeZ) ? 0 : (offsetD + padD - sizeZ) / strideD + 1; - int phstart = - (offsetH + padH < sizeY) ? 0 : (offsetH + padH - sizeY) / strideH + 1; - int pwstart = - (offsetW + padW < sizeX) ? 0 : (offsetW + padW - sizeX) / strideW + 1; - int pdend = min((offsetD + padD) / strideD + 1, pooledD); - int phend = min((offsetH + padH) / strideH + 1, pooledH); - int pwend = min((offsetW + padW) / strideW + 1, pooledW); - - real gradient = 0; - outGrad += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); - maxPoolIdxData += - ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); - for (int pd = pdstart; pd < pdend; ++pd) { - for (int ph = phstart; ph < phend; ++ph) { - for (int pw = pwstart; pw < pwend; ++pw) { - if (((offsetD * height + offsetH) * width + offsetW) == - maxPoolIdxData[(pd * pooledH + ph) * pooledW + pw]) - gradient += outGrad[(pd * pooledH + ph) * pooledW + pw]; - } - } - } - targetGrad[index] = scaleA * gradient + scaleB * targetGrad[index]; - } -} - -void hl_maxpool3D_backward(const int frameCnt, - const real* outGrad, - const int channels, - const int depth, - const int height, - const int width, - const int outputD, - const int outputH, - const int outputW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, - real scaleA, - real scaleB, - real* targetGrad, - real* maxPoolIdxData, - const int outStride) { - int num_kernels = depth * height * width * channels * frameCnt; - int blocks = (num_kernels + 1024 - 1) / 1024; - - KeMaxPool3DBackward<<>>(num_kernels, - outGrad, - channels, - depth, - height, - width, - outputD, - outputH, - outputW, - sizeZ, - sizeY, - sizeX, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - scaleA, - scaleB, - targetGrad, - maxPoolIdxData, - outStride); - CHECK_SYNC("hl_maxpool3D_backward"); -} - -__global__ void KeAvgPool3DForward(const int nthreads, - const real* inputData, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int padD, - const int padH, - const int padW, - real* tgtData, - const int tgtStride) { - for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); - index += blockDim.x * gridDim.x) { - int pw = index % pooledW; - int ph = (index / pooledW) % pooledH; - int pd = (index / pooledW / pooledH) % pooledD; - int c = (index / pooledW / pooledH / pooledD) % channels; - int frameNum = index / pooledW / pooledH / pooledD / channels; - int dstart = pd * strideD - padD; - int hstart = ph * strideH - padH; - int wstart = pw * strideW - padW; - int dend = min(dstart + sizeZ, depth); - int hend = min(hstart + sizeY, height); - int wend = min(wstart + sizeX, width); - dstart = max(dstart, 0); - hstart = max(hstart, 0); - wstart = max(wstart, 0); - int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); - - real aveval = 0; - inputData += (frameNum * channels + c) * depth * height * width; - for (int d = dstart; d < dend; ++d) { - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - aveval += inputData[(d * height + h) * width + w]; - } - } - } - int tgtIndex = - index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride; - tgtData[tgtIndex] = aveval / pool_size; - } -} - -void hl_avgpool3D_forward(const int frameCnt, - const real* inputData, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, - real* tgtData, - const int tgtStride) { - int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt; - int blocks = (num_kernels + 1024 - 1) / 1024; - KeAvgPool3DForward<<>>(num_kernels, - inputData, - channels, - depth, - height, - width, - pooledD, - pooledH, - pooledW, - sizeZ, - sizeY, - sizeX, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - tgtData, - tgtStride); - CHECK_SYNC("hl_avgpool3D_forward failed"); -} - -__global__ void KeAvgPool3DBackward(const int nthreads, - const real* outGrad, - const int channels, - const int depth, - const int height, - const int width, - const int pooledD, - const int pooledH, - const int pooledW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - const int padD, - const int padH, - const int padW, - real scaleA, - real scaleB, - real* tgtGrad, - const int outStride) { - for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); - index += blockDim.x * gridDim.x) { - int offsetW = index % width + padW; - int offsetH = (index / width) % height + padH; - int offsetD = (index / width / height) % depth + padD; - int offsetC = (index / width / height / depth) % channels; - int frameNum = index / width / height / depth / channels; - - int pdstart = (offsetD < sizeZ) ? 0 : (offsetD - sizeZ) / strideD + 1; - int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; - int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; - int pdend = min(offsetD / strideD + 1, pooledD); - int phend = min(offsetH / strideH + 1, pooledH); - int pwend = min(offsetW / strideW + 1, pooledW); - - real gradient = 0; - outGrad += (frameNum * channels + offsetC) * pooledD * pooledH * pooledW; - - for (int pd = pdstart; pd < pdend; ++pd) { - int dstart = pd * strideD - padD; - int dend = min(dstart + sizeZ, depth); - dstart = max(dstart, 0); - for (int ph = phstart; ph < phend; ++ph) { - int hstart = ph * strideH - padH; - int hend = min(hstart + sizeY, height); - hstart = max(hstart, 0); - for (int pw = pwstart; pw < pwend; ++pw) { - // figure out the pooling size - int wstart = pw * strideW - padW; - int wend = min(wstart + sizeX, width); - wstart = max(wstart, 0); - int poolsize = (dend - dstart) * (hend - hstart) * (wend - wstart); - gradient += outGrad[(pd * pooledH + ph) * pooledW + pw] / poolsize; - } - } - } - tgtGrad[index] = scaleA * gradient + scaleB * tgtGrad[index]; - } -} - -void hl_avgpool3D_backward(const int frameCnt, - const real* outGrad, - const int channels, - const int depth, - const int height, - const int width, - const int outputD, - const int outputH, - const int outputW, - const int sizeZ, - const int sizeY, - const int sizeX, - const int strideD, - const int strideH, - const int strideW, - int paddingD, - int paddingH, - int paddingW, - real scaleA, - real scaleB, - real* backGrad, - const int outStride) { - int num_kernels = depth * height * width * channels * frameCnt; - int blocks = (num_kernels + 1024 - 1) / 1024; - - KeAvgPool3DBackward<<>>(num_kernels, - outGrad, - channels, - depth, - height, - width, - outputD, - outputH, - outputW, - sizeZ, - sizeY, - sizeX, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - scaleA, - scaleB, - backGrad, - outStride); - CHECK_SYNC("hl_avgpool3D_backward failed"); -} - -__global__ void KeBilinearInterpFw(const real* in, - const size_t inImgH, - const size_t inImgW, - const size_t inputH, - const size_t inputW, - real* out, - const size_t outImgH, - const size_t outImgW, - const size_t outputH, - const size_t outputW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - int nthreads = outputH * outputW; - int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < nthreads) { - int outIdH = tid / outputW; - int outIdW = tid % outputW; - int inImgSize = inputW / numChannels; - int outImgSize = outputW / numChannels; - int channelId = outIdW / outImgSize; - - int outImgIdy = (outIdW % outImgSize) / outImgW; - int inImgIdy = ratioH * outImgIdy; - int hId = (inImgIdy < inImgH - 1) ? 1 : 0; - real h1lambda = ratioH * outImgIdy - inImgIdy; - real h2lambda = 1.f - h1lambda; - - int outImgIdx = tid % outImgW; - int inImgIdx = ratioW * outImgIdx; - int wId = (inImgIdx < inImgW - 1) ? 1 : 0; - real w1lambda = ratioW * outImgIdx - inImgIdx; - real w2lambda = 1.f - w1lambda; - - const real* inPos = &in[outIdH * inputW + channelId * inImgSize + - inImgIdy * inImgW + inImgIdx]; - - // bilinear interpolation - out[outIdH * outputW + outIdW] = - h2lambda * (w2lambda * inPos[0] + w1lambda * inPos[wId]) + - h1lambda * (w2lambda * inPos[hId * inImgW] + - w1lambda * inPos[hId * inImgW + wId]); - } -} - -void hl_bilinear_forward(const real* inData, - const size_t inImgH, - const size_t inImgW, - const size_t inputH, - const size_t inputW, - real* outData, - const size_t outImgH, - const size_t outImgW, - const size_t outputH, - const size_t outputW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - int threadNum = outputH * outputW; - int blocks = (threadNum + 1024 - 1) / 1024; - - KeBilinearInterpFw<<>>(inData, - inImgH, - inImgW, - inputH, - inputW, - outData, - outImgH, - outImgW, - outputH, - outputW, - numChannels, - ratioH, - ratioW); - CHECK_SYNC("hl_bilinear_forward failed"); -} - -__global__ void KeBilinearInterpBw(real* in, - const size_t inImgH, - const size_t inImgW, - const size_t inputH, - const size_t inputW, - const real* out, - const size_t outImgH, - const size_t outImgW, - const size_t outputH, - const size_t outputW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - int nthreads = outputH * outputW; - int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < nthreads) { - int outIdH = tid / outputW; - int outIdW = tid % outputW; - int inImgSize = inputW / numChannels; - int outImgSize = outputW / numChannels; - int channelId = outIdW / outImgSize; - - int outImgIdy = (outIdW % outImgSize) / outImgW; - int inImgIdy = ratioH * outImgIdy; - int hId = (inImgIdy < inImgH - 1) ? 1 : 0; - real h1lambda = ratioH * outImgIdy - inImgIdy; - real h2lambda = 1.f - h1lambda; - - int outImgIdx = tid % outImgW; - int inImgIdx = ratioW * outImgIdx; - int wId = (inImgIdx < inImgW - 1) ? 1 : 0; - real w1lambda = ratioW * outImgIdx - inImgIdx; - real w2lambda = 1.f - w1lambda; - - real* inPos = &in[outIdH * inputW + channelId * inImgSize + - inImgIdy * inImgW + inImgIdx]; - const real* outPos = &out[outIdH * outputW + outIdW]; - paddle::paddleAtomicAdd(&inPos[0], h2lambda * w2lambda * outPos[0]); - paddle::paddleAtomicAdd(&inPos[wId], h2lambda * w1lambda * outPos[0]); - paddle::paddleAtomicAdd(&inPos[hId * inImgW], - h1lambda * w2lambda * outPos[0]); - paddle::paddleAtomicAdd(&inPos[hId * inImgW + wId], - h1lambda * w1lambda * outPos[0]); - } -} - -void hl_bilinear_backward(real* inGrad, - const size_t inImgH, - const size_t inImgW, - const size_t inputH, - const size_t inputW, - const real* outGrad, - const size_t outImgH, - const size_t outImgW, - const size_t outputH, - const size_t outputW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - int threadNum = outputH * outputW; - int blocks = (threadNum + 1024 - 1) / 1024; - - KeBilinearInterpBw<<>>(inGrad, - inImgH, - inImgW, - inputH, - inputW, - outGrad, - outImgH, - outImgW, - outputH, - outputW, - numChannels, - ratioH, - ratioW); - CHECK_SYNC("hl_bilinear_backward failed"); -} - -__global__ void maxoutFpCompute(size_t nthreads, - const real* inData, - real* outData, - int* idData, - size_t size, - size_t featLen, - size_t groups) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < nthreads) { - size_t batch_idx = index / size; - size_t i = index % size; - size_t channel_idx = i / featLen; - size_t feat_idx = i % featLen; - size_t data_idx = - (batch_idx * size + channel_idx * featLen) * groups + feat_idx; - real max = inData[data_idx]; - int maxId = 0; - for (size_t g = 1; g < groups; ++g) { - real tmp = inData[data_idx + g * featLen]; - if (tmp > max) { - max = tmp; - maxId = g; - } - } - outData[index] = max; - idData[index] = maxId; - } -} - -void hl_maxout_forward(const real* inData, - real* outData, - int* idData, - size_t batchSize, - size_t size, - size_t featLen, - size_t groups) { - int num_kernels = size * batchSize; - int blocks = (num_kernels + 1024 - 1) / 1024; - maxoutFpCompute<<>>( - num_kernels, inData, outData, idData, size, featLen, groups); - CHECK_SYNC("hl_maxout_forward failed"); -} - -__global__ void maxoutBpCompute(size_t nthreads, - real* inGrad, - const real* outGrad, - const int* idData, - size_t size, - size_t featLen, - size_t groups) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < nthreads) { - size_t batch_idx = index / size; - size_t i = index % size; - size_t channel_idx = i / featLen; - size_t feat_idx = i % featLen; - size_t newIndex = batch_idx * size; - size_t gradIdx = - (channel_idx * groups + (idData + newIndex)[i]) * featLen + feat_idx; - (inGrad + newIndex * groups)[gradIdx] += (outGrad + newIndex)[i]; - } -} - -void hl_maxout_backward(real* inGrad, - const real* outGrad, - const int* idData, - size_t batchSize, - size_t size, - size_t featLen, - size_t groups) { - int num_kernels = size * batchSize; - int blocks = (num_kernels + 1024 - 1) / 1024; - maxoutBpCompute<<>>( - num_kernels, inGrad, outGrad, idData, size, featLen, groups); - CHECK_SYNC("hl_maxout_backward failed"); -} - -__global__ void upsampleForwardCompute(real* input_data, - real* mask_data, - size_t nthreads, - size_t in_h, - size_t in_w, - size_t out_h, - size_t out_w, - real* output_data) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < nthreads) { - int offset = index / (in_w * in_h) * out_h * out_w; - int upsample_idx = static_cast(mask_data[index]); - output_data[offset + upsample_idx] = input_data[index]; - } -} - -__global__ void upsampleBackwardCompute(real* out_grad, - real* mask_data, - size_t nthreads, - size_t in_h, - size_t in_w, - size_t out_h, - size_t out_w, - real* input_grad) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < nthreads) { - int offset = index / (in_w * in_h) * out_h * out_w; - int upsample_idx = static_cast(mask_data[index]); - input_grad[index] = out_grad[offset + upsample_idx]; - } -} - -void hl_upsample_forward(real* inputData, - real* maskData, - size_t batchSize, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW, - real* outputData) { - int num_kernels = batchSize * imgSizeH * imgSizeW * channels; - int blocks = (num_kernels + 1024 - 1) / 1024; - upsampleForwardCompute<<>>(inputData, - maskData, - num_kernels, - imgSizeH, - imgSizeW, - outputH, - outputW, - outputData); - CHECK_SYNC("hl_upsample_forward failed"); -} - -void hl_upsample_backward(real* outputGradData, - real* maskData, - size_t batchSize, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW, - real* inputGradData) { - int num_kernels = batchSize * imgSizeH * imgSizeW * channels; - int blocks = (num_kernels + 1024 - 1) / 1024; - upsampleBackwardCompute<<>>(outputGradData, - maskData, - num_kernels, - imgSizeH, - imgSizeW, - outputH, - outputW, - inputGradData); - CHECK_SYNC("hl_upsample_backward failed"); -} diff --git a/paddle/legacy/cuda/src/hl_cuda_cublas.cc b/paddle/legacy/cuda/src/hl_cuda_cublas.cc deleted file mode 100644 index 283b8b6e9c8e7b843a8d28b940c6ef53b77ef655..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_cublas.cc +++ /dev/null @@ -1,400 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_cuda_cublas.h" -#include -#include "hl_cuda.h" -#include "hl_thread.ph" -#include "paddle/legacy/utils/DynamicLoader.h" -#include "paddle/legacy/utils/Logging.h" - -namespace dynload { - -std::once_flag cublas_dso_flag; -void *cublas_dso_handle = nullptr; - -/** - * The following macro definition can generate structs - * (for each function) to dynamic load cublas routine - * via operator overloading. - * - * note: default dynamic linked libs - */ -#ifdef PADDLE_USE_DSO -#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - cublasStatus_t operator()(Args... args) { \ - typedef cublasStatus_t (*cublasFunc)(Args...); \ - std::call_once(cublas_dso_flag, GetCublasDsoHandle, &cublas_dso_handle); \ - void *p_##__name = dlsym(cublas_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - } __name; // struct DynLoad__##__name -#else -#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - cublasStatus_t operator()(Args... args) { \ - return __name(args...); \ - } \ - } __name; // struct DynLoad__##__name -#endif - -#define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) DYNAMIC_LOAD_CUBLAS_WRAP(__name) - -// include all needed cublas functions in HPPL -// clang-format off -#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ - __macro(cublasSgemv) \ - __macro(cublasDgemv) \ - __macro(cublasSgemm) \ - __macro(cublasDgemm) \ - __macro(cublasSgeam) \ - __macro(cublasDgeam) \ - -DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasCreate) -DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasDestroy) -DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetStream) -DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetPointerMode) -DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasGetPointerMode) -DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgemmBatched) -DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgemmBatched) -DYNAMIC_LOAD_CUBLAS_WRAP(cublasCgemmBatched) -DYNAMIC_LOAD_CUBLAS_WRAP(cublasZgemmBatched) -DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetrfBatched) -DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetriBatched) -DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetrfBatched) -DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetriBatched) -CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP) - -#undef DYNAMIC_LOAD_CUBLAS_WRAP -#undef DYNAMIC_LOAD_CUBLAS_V2_WRAP -#undef CUBLAS_BLAS_ROUTINE_EACH - -} /* namespace dynload */ - -// clang-format on -#ifndef PADDLE_TYPE_DOUBLE -#define CUBLAS_GEAM dynload::cublasSgeam -#define CUBLAS_GEMV dynload::cublasSgemv -#define CUBLAS_GEMM dynload::cublasSgemm -#define CUBLAS_GETRF dynload::cublasSgetrfBatched -#define CUBLAS_GETRI dynload::cublasSgetriBatched -#else -#define CUBLAS_GEAM dynload::cublasDgeam -#define CUBLAS_GEMV dynload::cublasDgemv -#define CUBLAS_GEMM dynload::cublasDgemm -#define CUBLAS_GETRF dynload::cublasDgetrfBatched -#define CUBLAS_GETRI dynload::cublasDgetriBatched -#endif - -const char *hl_cublas_get_error_string(cublasStatus_t status) { - switch (status) { - case CUBLAS_STATUS_NOT_INITIALIZED: - return "[cublas status]: not initialized"; - case CUBLAS_STATUS_ALLOC_FAILED: - return "[cublas status]: allocate failed"; - case CUBLAS_STATUS_INVALID_VALUE: - return "[cublas status]: invalid value"; - case CUBLAS_STATUS_ARCH_MISMATCH: - return "[cublas status]: arch mismatch"; - case CUBLAS_STATUS_MAPPING_ERROR: - return "[cublas status]: mapping error"; - case CUBLAS_STATUS_EXECUTION_FAILED: - return "[cublas status]: execution failed"; - case CUBLAS_STATUS_INTERNAL_ERROR: - return "[cublas status]: internal error"; - case CUBLAS_STATUS_SUCCESS: - return "[cublas status]: success"; - default: - return "[cublas status]: unknown error"; - } -} - -/** - * Check build-in cublas function using glog and it also - * support << operator for more details error info. - */ -cublasStatus_t g_cublasStat; -#define CHECK_CUBLAS(cublas_func) \ - g_cublasStat = cublas_func; \ - CHECK_EQ(CUBLAS_STATUS_SUCCESS, g_cublasStat) \ - << "Cublas Error: " << hl_cublas_get_error_string(g_cublasStat) << " " - -void hl_cublas_init(cublasHandle_t *cublas_handle, cudaStream_t stream) { - CHECK_CUBLAS(dynload::cublasCreate(cublas_handle)) - << "[cublas init] Cublas create handle faild!"; - - CHECK_CUBLAS(dynload::cublasSetStream(*cublas_handle, stream)) - << "[cublas init] Cublas set stream faild!"; -} - -void hl_matrix_transpose( - real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc) { - real alpha = 1.0; - real beta = 0.0; - - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - CHECK_CUBLAS(CUBLAS_GEAM(t_resource.handle, - CUBLAS_OP_T, - CUBLAS_OP_N, - dimM, - dimN, - &alpha, - A_d, - lda, - &beta, - nullptr, - dimM, - C_d, - ldc)); - CHECK_SYNC("hl_matrix_transpose failed"); -} - -void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN) { - hl_matrix_transpose(A_d, C_d, dimM, dimN, dimN, dimM); -} - -void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) { - /* Solve Ax = I */ - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - /* Step 1: Compute the LU decomposition of matrix A */ - real **inout_h = &A_d; - real **inout_d = (real **)hl_malloc_device(sizeof(real *)); - hl_memcpy(inout_d, inout_h, sizeof(real *)); - - int *pivot_d = (int *)hl_malloc_device(dimN * sizeof(int)); - int *info_d = (int *)t_resource.gpu_mem; - - /* Note: cublasSgetrfBatched is used to calculate a number of - small-sized matrices. There may be a better way to reconstruct - the API for better performance. - */ - CHECK_CUBLAS( - CUBLAS_GETRF(t_resource.handle, dimN, inout_d, lda, pivot_d, info_d, 1)); - - int info_h; - hl_memcpy(&info_h, info_d, sizeof(int)); - if (info_h != 0) { - LOG(FATAL) << "Factorization of matrix failed: matrix may be singular.\n"; - } - - /* Step 2: Compute the inverse of the matrix given its LU decomposition */ - real **out_h = &C_d; - real **out_d = (real **)hl_malloc_device(sizeof(real *)); - hl_memcpy(out_d, out_h, sizeof(real *)); - - CHECK_CUBLAS(CUBLAS_GETRI(t_resource.handle, - dimN, - (const real **)inout_d, - lda, - pivot_d, - out_d, - ldc, - info_d, - 1)); - - hl_memcpy(&info_h, info_d, sizeof(int)); - if (info_h != 0) { - LOG(FATAL) << "Inversion of matrix failed: matrix may be singular.\n"; - } - - hl_free_mem_device(inout_d); - hl_free_mem_device(pivot_d); - hl_free_mem_device(out_d); - - CHECK_SYNC("hl_matrix_inverse failed"); -} - -void hl_matrix_mul(real *A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta, - int lda, - int ldb, - int ldc) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - CHECK_NOTNULL(C_d); - - if (dimN == 1 && dimM != 1 && dimK != 1 && transb == HPPL_OP_N) { - int m = (transa == HPPL_OP_N) ? dimM : dimK; - int n = (transa == HPPL_OP_N) ? dimK : dimM; - hl_matrix_mul_vector( - A_d, transa, B_d, C_d, m, n, alpha, beta, lda, ldb, ldc); - return; - } - - if (dimM == 1 && dimN != 1 && dimK != 1 && transa == HPPL_OP_N) { - int m = (transb == HPPL_OP_N) ? dimK : dimN; - int n = (transb == HPPL_OP_N) ? dimN : dimK; - hl_trans_op_t trans = (transb == HPPL_OP_N) ? HPPL_OP_T : HPPL_OP_N; - hl_matrix_mul_vector(B_d, trans, A_d, C_d, m, n, alpha, beta, ldb, 1, 1); - return; - } - - cublasStatus_t stat; - if ((HPPL_OP_N == transa) && (HPPL_OP_N == transb)) { - stat = CUBLAS_GEMM(t_resource.handle, - CUBLAS_OP_N, - CUBLAS_OP_N, - dimN, - dimM, - dimK, - &alpha, - B_d, - ldb, - A_d, - lda, - &beta, - C_d, - ldc); - } else if ((HPPL_OP_T == transa) && (HPPL_OP_N == transb)) { - stat = CUBLAS_GEMM(t_resource.handle, - CUBLAS_OP_N, - CUBLAS_OP_T, - dimN, - dimM, - dimK, - &alpha, - B_d, - ldb, - A_d, - lda, - &beta, - C_d, - ldc); - } else if ((HPPL_OP_N == transa) && (HPPL_OP_T == transb)) { - stat = CUBLAS_GEMM(t_resource.handle, - CUBLAS_OP_T, - CUBLAS_OP_N, - dimN, - dimM, - dimK, - &alpha, - B_d, - ldb, - A_d, - lda, - &beta, - C_d, - ldc); - } else { - LOG(FATAL) << "parameter transa error!"; - } - CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS) << hl_cublas_get_error_string(stat); - CHECK_SYNC("hl_matrix_mul failed"); -} - -void hl_matrix_mul(real *A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - int lda = (HPPL_OP_N == transa) ? dimK : dimM; - int ldb = (HPPL_OP_N == transb) ? dimN : dimK; - int ldc = dimN; - - hl_matrix_mul(A_d, - transa, - B_d, - transb, - C_d, - dimM, - dimN, - dimK, - alpha, - beta, - lda, - ldb, - ldc); -} - -void hl_matrix_mul_vector(real *A_d, - hl_trans_op_t trans, - real *B_d, - real *C_d, - int dimM, - int dimN, - real alpha, - real beta, - int lda, - int incb, - int incc) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - CHECK_NOTNULL(C_d); - - cublasStatus_t stat; - if (HPPL_OP_N == trans) { - stat = CUBLAS_GEMV(t_resource.handle, - CUBLAS_OP_T, - dimN, - dimM, - &alpha, - A_d, - lda, - B_d, - incb, - &beta, - C_d, - incc); - } else if (HPPL_OP_T == trans) { - stat = CUBLAS_GEMV(t_resource.handle, - CUBLAS_OP_N, - dimN, - dimM, - &alpha, - A_d, - lda, - B_d, - incb, - &beta, - C_d, - incc); - } else { - LOG(FATAL) << "parameter transa error!"; - } - - CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS) << hl_cublas_get_error_string(stat); - CHECK_SYNC("hl_matrix_mul_vector"); -} - -void hl_matrix_mul_vector(real *A_d, - hl_trans_op_t trans, - real *B_d, - real *C_d, - int dimM, - int dimN, - real alpha, - real beta) { - hl_matrix_mul_vector( - A_d, trans, B_d, C_d, dimM, dimN, alpha, beta, dimN, 1, 1); -} diff --git a/paddle/legacy/cuda/src/hl_cuda_cudnn.cc b/paddle/legacy/cuda/src/hl_cuda_cudnn.cc deleted file mode 100644 index b0ac5aaac284cd939fc46be6a7320242312674ab..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_cudnn.cc +++ /dev/null @@ -1,1117 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_cuda_cudnn.h" -#include -#include -#include "hl_cuda_cudnn.ph" -#include "hl_thread.ph" -#include "paddle/legacy/utils/DynamicLoader.h" -#include "paddle/legacy/utils/Logging.h" - -DEFINE_int32(cudnn_conv_workspace_limit_in_mb, - 4096, - "Specify cuDNN max workspace limit, in units MB, " - "4096MB=4GB by default."); - -namespace dynload { - -std::once_flag cudnn_dso_flag; -void* cudnn_dso_handle = nullptr; - -/** - * The following macro definition can generate structs - * (for each function) to dynamic load cudbnn routine - * via operator overloading: operator () - * - * note: default dynamic linked libs - **/ - -#ifdef PADDLE_USE_DSO - -#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using cudnn_func = decltype(__name(args...)) (*)(Args...); \ - std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, &cudnn_dso_handle); \ - void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - } __name; /* struct DynLoad__##__name */ - -#else - -#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - return __name(args...); \ - } \ - } __name; /* struct DynLoad__##__name */ - -#endif - -/** - * include all needed cudnn functions in HPPL - * different cudnn version has different interfaces - **/ -// clang-format off -#define CUDNN_DNN_ROUTINE_EACH(__macro) \ - __macro(cudnnSetTensor4dDescriptor) \ - __macro(cudnnSetTensor4dDescriptorEx) \ - __macro(cudnnGetConvolutionNdForwardOutputDim) \ - __macro(cudnnGetConvolutionForwardAlgorithm) \ - __macro(cudnnCreateTensorDescriptor) \ - __macro(cudnnDestroyTensorDescriptor) \ - __macro(cudnnCreateFilterDescriptor) \ - __macro(cudnnSetFilter4dDescriptor) \ - __macro(cudnnSetPooling2dDescriptor) \ - __macro(cudnnDestroyFilterDescriptor) \ - __macro(cudnnCreateConvolutionDescriptor) \ - __macro(cudnnCreatePoolingDescriptor) \ - __macro(cudnnDestroyPoolingDescriptor) \ - __macro(cudnnSetConvolution2dDescriptor) \ - __macro(cudnnDestroyConvolutionDescriptor) \ - __macro(cudnnCreate) \ - __macro(cudnnDestroy) \ - __macro(cudnnSetStream) \ - __macro(cudnnActivationForward) \ - __macro(cudnnConvolutionForward) \ - __macro(cudnnConvolutionBackwardBias) \ - __macro(cudnnGetConvolutionForwardWorkspaceSize) \ - __macro(cudnnTransformTensor) \ - __macro(cudnnPoolingForward) \ - __macro(cudnnPoolingBackward) \ - __macro(cudnnSoftmaxBackward) \ - __macro(cudnnSoftmaxForward) \ - __macro(cudnnGetVersion) \ - __macro(cudnnGetErrorString) -CUDNN_DNN_ROUTINE_EACH(DYNAMIC_LOAD_CUDNN_WRAP) - -#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \ - __macro(cudnnAddTensor) \ - __macro(cudnnConvolutionBackwardData) \ - __macro(cudnnConvolutionBackwardFilter) -CUDNN_DNN_ROUTINE_EACH_R2(DYNAMIC_LOAD_CUDNN_WRAP) - -// APIs available after R3: -#if CUDNN_VERSION >= 3000 -#define CUDNN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \ - __macro(cudnnGetConvolutionBackwardFilterWorkspaceSize) \ - __macro(cudnnGetConvolutionBackwardDataAlgorithm) \ - __macro(cudnnGetConvolutionBackwardFilterAlgorithm) \ - __macro(cudnnGetConvolutionBackwardDataWorkspaceSize) -CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP) -#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R3 -#endif - - -// APIs available after R4: -#if CUDNN_VERSION >= 4007 -#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \ - __macro(cudnnBatchNormalizationForwardTraining) \ - __macro(cudnnBatchNormalizationForwardInference) \ - __macro(cudnnBatchNormalizationBackward) -CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DYNAMIC_LOAD_CUDNN_WRAP) -#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R4 -#endif - -// APIs in R5 -#if CUDNN_VERSION >= 5000 -#define CUDNN_DNN_ROUTINE_EACH_R5(__macro) \ - __macro(cudnnCreateActivationDescriptor) \ - __macro(cudnnSetActivationDescriptor) \ - __macro(cudnnGetActivationDescriptor) \ - __macro(cudnnDestroyActivationDescriptor) -CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP) -#undef CUDNN_DNN_ROUTINE_EACH_R5 -#endif - -#undef CUDNN_DNN_ROUTINE_EACH -// clang-format on -} /* namespace dynload */ - -/** - * Check build-in cudnn function using glog and it **does not** - * support << operator for more details error info. - */ -#define CHECK_CUDNN(cudnnFunc) \ - do { \ - cudnnStatus_t cudnnStat = cudnnFunc; \ - CHECK_EQ(CUDNN_STATUS_SUCCESS, cudnnStat) \ - << "Cudnn Error: " << dynload::cudnnGetErrorString(cudnnStat); \ - } while (0) - -bool g_is_libcudnn_init = false; -int g_cudnn_lib_version = 0; - -void hl_cudnn_desc_init(cudnnTensorDescriptor_t* cudnn_desc) { - CHECK_CUDNN(dynload::cudnnCreateTensorDescriptor(cudnn_desc)); -} - -void hl_cudnn_init(cudnnHandle_t* cudnn_handle, cudaStream_t stream) { - size_t cudnn_dso_ver = dynload::cudnnGetVersion(); - size_t cudnn_dso_major = cudnn_dso_ver / 1000; - size_t cudnn_cuh_major = CUDNN_VERSION / 1000; - - // Compare cudnn header version with that of cudnn.so. - CHECK((cudnn_cuh_major < 4 && cudnn_dso_major < 4) || - (cudnn_cuh_major == cudnn_dso_major)) - << "[cudnn init] libcudnn v" << cudnn_dso_major << " with header v" - << cudnn_cuh_major << " unmatched!\n" - << "PaddlePaddle Requirement: " - << "(header v[2-3] with libcudnn v[2-3]) Or " - << "(header v4 with libcudnn v4) Or " - << "(header v5 with libcudnn v5) Or" - << "(header v6 with libcudnn v6)."; - - CHECK(!(CUDNN_VERSION < 6000 && CUDNN_VERSION >= 5000 && CUDA_VERSION < 7050)) - << "cudnn v5 requires cuda version >= 7.5"; - - CHECK(!(CUDNN_VERSION >= 6000 && CUDA_VERSION < 8000)) - << "cudnn v6 requires cuda version >= 8.0"; - - CHECK_CUDNN(dynload::cudnnCreate(cudnn_handle)); - CHECK_CUDNN(dynload::cudnnSetStream(*cudnn_handle, stream)); - - g_is_libcudnn_init = true; - g_cudnn_lib_version = cudnn_dso_ver; -} - -int hl_get_cudnn_lib_version() { return g_cudnn_lib_version; } - -void hl_conv_workspace(hl_tensor_descriptor input, - hl_tensor_descriptor output, - hl_filter_descriptor filter, - hl_convolution_descriptor conv, - int* convFwdAlgo, - size_t* fwdLimitBytes, - int* convBwdDataAlgo, - size_t* bwdDataLimitBytes, - int* convBwdFilterAlgo, - size_t* bwdFilterLimitBytes, - bool useDilation) { -#if CUDNN_VERSION >= 4000 - - CHECK_NOTNULL(input); - CHECK_NOTNULL(output); - CHECK_NOTNULL(filter); - CHECK_NOTNULL(conv); - - // Specify workspace limit directly - size_t memoryLimitBytes = - (1LL << 20) * FLAGS_cudnn_conv_workspace_limit_in_mb; - - // For dilation - int algo = 0; - - // cudnn convolution forward configuration - cudnnTensorDescriptor_t fwd_src_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnTensorDescriptor_t fwd_dest_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnFilterDescriptor_t fwd_filter_desc = GET_FILTER_DESCRIPTOR(filter); - cudnnConvolutionDescriptor_t fwd_conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv); - // cudnn convolution backward data configuration - cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter); - cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnConvolutionDescriptor_t bwd_data_conv_desc = - GET_CONVOLUTION_DESCRIPTOR(conv); - // cudnn convolution backward filter configuration - cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnConvolutionDescriptor_t bwd_filter_conv_desc = - GET_CONVOLUTION_DESCRIPTOR(conv); - cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter); - - if (useDilation) { - convFwdAlgo = &algo; - convBwdDataAlgo = &algo; - convBwdFilterAlgo = &algo; - } else { - CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm( - t_resource.cudnn_handle, - fwd_src_desc, - fwd_filter_desc, - fwd_conv_desc, - fwd_dest_desc, - CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, - memoryLimitBytes, - reinterpret_cast(convFwdAlgo))); - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm( - t_resource.cudnn_handle, - bwd_data_filter_desc, - bwd_data_diff_desc, - bwd_data_conv_desc, - bwd_data_grad_desc, - CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, - memoryLimitBytes, - reinterpret_cast(convBwdDataAlgo))); - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm( - t_resource.cudnn_handle, - bwd_filter_src_desc, - bwd_filter_diff_desc, - bwd_filter_conv_desc, - bwd_filter_grad_desc, - CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, - memoryLimitBytes, - reinterpret_cast(convBwdFilterAlgo))); - } - - CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize( - t_resource.cudnn_handle, - fwd_src_desc, - fwd_filter_desc, - fwd_conv_desc, - fwd_dest_desc, - static_cast(*convFwdAlgo), - fwdLimitBytes)); - - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( - t_resource.cudnn_handle, - bwd_data_filter_desc, - bwd_data_diff_desc, - bwd_data_conv_desc, - bwd_data_grad_desc, - static_cast(*convBwdDataAlgo), - bwdDataLimitBytes)); - - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize( - t_resource.cudnn_handle, - bwd_filter_src_desc, - bwd_filter_diff_desc, - bwd_filter_conv_desc, - bwd_filter_grad_desc, - static_cast(*convBwdFilterAlgo), - bwdFilterLimitBytes)); - -#endif -} - -void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc, - int batch_size, - int feature_maps, - int height, - int width) { - CHECK_NOTNULL(image_desc); - - cudnn_tensor_descriptor hl_desc = - (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor)); - CHECK_NOTNULL(hl_desc); - -#ifndef PADDLE_TYPE_DOUBLE - cudnnDataType_t data_type = CUDNN_DATA_FLOAT; -#else - cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; -#endif - CHECK_CUDNN(dynload::cudnnCreateTensorDescriptor(&hl_desc->desc)); - - CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptor(hl_desc->desc, - CUDNN_TENSOR_NCHW, - data_type, - batch_size, - feature_maps, - height, - width)); - - hl_desc->format = CUDNN_TENSOR_NCHW; - hl_desc->data_type = data_type; - hl_desc->batch_size = batch_size; - hl_desc->feature_maps = feature_maps; - hl_desc->height = height; - hl_desc->width = width; - - *image_desc = (hl_tensor_descriptor)hl_desc; -} - -void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc) { - CHECK_NOTNULL(image_desc); - - cudnn_tensor_descriptor hl_desc = - (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor)); - CHECK_NOTNULL(hl_desc); - -#ifndef PADDLE_TYPE_DOUBLE - cudnnDataType_t data_type = CUDNN_DATA_FLOAT; -#else - cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; -#endif - CHECK_CUDNN(dynload::cudnnCreateTensorDescriptor(&hl_desc->desc)); - - hl_desc->data_type = data_type; - - *image_desc = (hl_tensor_descriptor)hl_desc; -} - -void hl_tensor_reshape(hl_tensor_descriptor image_desc, - int batch_size, - int feature_maps, - int height, - int width) { - const int stride_w = 1; - const int stride_h = width * stride_w; - const int stride_c = height * stride_h; - const int stride_n = feature_maps * stride_c; - return hl_tensor_reshape(image_desc, - batch_size, - feature_maps, - height, - width, - stride_n, - stride_c, - stride_h, - stride_w); -} - -void hl_tensor_reshape(hl_tensor_descriptor image_desc, - int batch_size, - int feature_maps, - int height, - int width, - int nStride, - int cStride, - int hStride, - int wStride) { - CHECK_NOTNULL(image_desc); - - cudnn_tensor_descriptor hl_desc = (cudnn_tensor_descriptor)image_desc; - CHECK_NOTNULL(hl_desc->desc); - - CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptorEx(hl_desc->desc, - hl_desc->data_type, - batch_size, - feature_maps, - height, - width, - nStride, - cStride, - hStride, - wStride)); - - hl_desc->batch_size = batch_size; - hl_desc->feature_maps = feature_maps; - hl_desc->height = height; - hl_desc->width = width; -} - -void hl_destroy_tensor_descriptor(hl_tensor_descriptor image_desc) { - CHECK_NOTNULL(image_desc); - - cudnn_tensor_descriptor hl_desc = (cudnn_tensor_descriptor)image_desc; - CHECK_NOTNULL(hl_desc->desc); - - CHECK_CUDNN(dynload::cudnnDestroyTensorDescriptor(hl_desc->desc)); - - hl_desc->desc = NULL; - - free(image_desc); -} - -void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc, - hl_pooling_mode_t mode, - int height, - int width, - int height_padding, - int width_padding, - int stride_height, - int stride_width) { - cudnnPoolingMode_t cudnn_mode; - switch (mode) { - case HL_POOLING_MAX: - cudnn_mode = CUDNN_POOLING_MAX; - break; - case HL_POOLING_AVERAGE: - cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; - break; - case HL_POOLING_AVERAGE_INCLUDE_PADDING: - cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; - break; - default: - LOG(FATAL) << "parameter mode error"; - } - - CHECK_NOTNULL(pooling_desc); - - cudnn_pooling_descriptor hl_pooling_desc = - (cudnn_pooling_descriptor)malloc(sizeof(_cudnn_pooling_descriptor)); - CHECK_NOTNULL(hl_pooling_desc); - - CHECK_CUDNN(dynload::cudnnCreatePoolingDescriptor(&hl_pooling_desc->desc)); - - CHECK_CUDNN(dynload::cudnnSetPooling2dDescriptor(hl_pooling_desc->desc, - cudnn_mode, -#if CUDNN_VERSION >= 5000 - CUDNN_PROPAGATE_NAN, -#endif - height, - width, - height_padding, - width_padding, - stride_height, - stride_width)); - - hl_pooling_desc->mode = cudnn_mode; - hl_pooling_desc->window_height = height; - hl_pooling_desc->window_width = width; - hl_pooling_desc->stride_height = stride_height; - hl_pooling_desc->stride_width = stride_width; - - *pooling_desc = (hl_pooling_descriptor)hl_pooling_desc; -} - -void hl_destroy_pooling_descriptor(hl_pooling_descriptor pooling_desc) { - CHECK_NOTNULL(pooling_desc); - - cudnn_pooling_descriptor hl_pooling = (cudnn_pooling_descriptor)pooling_desc; - - CHECK_NOTNULL(hl_pooling->desc); - CHECK_CUDNN(dynload::cudnnDestroyPoolingDescriptor(hl_pooling->desc)); - - hl_pooling->desc = NULL; - - free(pooling_desc); -} - -void hl_pooling_forward(hl_tensor_descriptor input, - real* input_image, - hl_tensor_descriptor output, - real* output_image, - hl_pooling_descriptor pooling) { - cudnnPoolingDescriptor_t pooling_desc; - cudnnTensorDescriptor_t input_desc; - cudnnTensorDescriptor_t output_desc; - - CHECK_NOTNULL(input); - CHECK_NOTNULL(output); - CHECK_NOTNULL(pooling); - CHECK_NOTNULL(input_image); - CHECK_NOTNULL(output_image); - - real alpha = 1.0f; - real beta = 1.0f; - input_desc = ((cudnn_tensor_descriptor)input)->desc; - output_desc = ((cudnn_tensor_descriptor)output)->desc; - pooling_desc = ((cudnn_pooling_descriptor)pooling)->desc; - CHECK_CUDNN(dynload::cudnnPoolingForward(t_resource.cudnn_handle, - pooling_desc, - &alpha, - input_desc, - input_image, - &beta, - output_desc, - output_image)); - CHECK_SYNC("hl_pooling_forward failed"); -} - -void hl_pooling_backward(hl_tensor_descriptor input, - real* input_image, - real* input_image_grad, - hl_tensor_descriptor output, - real* output_image, - real* output_image_grad, - hl_pooling_descriptor pooling) { - cudnnPoolingDescriptor_t pooling_desc; - cudnnTensorDescriptor_t input_desc; - cudnnTensorDescriptor_t output_desc; - - CHECK_NOTNULL(input); - CHECK_NOTNULL(output); - CHECK_NOTNULL(pooling); - CHECK_NOTNULL(input_image); - CHECK_NOTNULL(input_image_grad); - CHECK_NOTNULL(output_image); - CHECK_NOTNULL(output_image_grad); - - real alpha = 1.0f; - real beta = 1.0f; - input_desc = ((cudnn_tensor_descriptor)input)->desc; - output_desc = ((cudnn_tensor_descriptor)output)->desc; - pooling_desc = ((cudnn_pooling_descriptor)pooling)->desc; - CHECK_CUDNN(dynload::cudnnPoolingBackward(t_resource.cudnn_handle, - pooling_desc, - &alpha, - output_desc, - output_image, - output_desc, - output_image_grad, - input_desc, - input_image, - &beta, - input_desc, - input_image_grad)); - CHECK_SYNC("hl_pooling_backward failed"); -} - -void hl_create_filter_descriptor(hl_filter_descriptor* filter, - int input_feature_maps, - int output_feature_maps, - int height, - int width) { - CHECK_NOTNULL(filter); - - cudnn_filter_descriptor hl_filter = - (cudnn_filter_descriptor)malloc(sizeof(_cudnn_filter_descriptor)); - CHECK_NOTNULL(hl_filter); - - CHECK_CUDNN(dynload::cudnnCreateFilterDescriptor(&hl_filter->desc)); - -#ifndef PADDLE_TYPE_DOUBLE - cudnnDataType_t data_type = CUDNN_DATA_FLOAT; -#else - cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; -#endif - CHECK_CUDNN(dynload::cudnnSetFilter4dDescriptor(hl_filter->desc, - data_type, -#if CUDNN_VERSION >= 5000 - CUDNN_TENSOR_NCHW, -#endif - output_feature_maps, - input_feature_maps, - height, - width)); - - hl_filter->data_type = data_type; - hl_filter->output_feature_maps = output_feature_maps; - hl_filter->input_feature_maps = input_feature_maps; - hl_filter->filter_height = height; - hl_filter->filter_width = width; - - *filter = (hl_filter_descriptor)hl_filter; -} - -void hl_destroy_filter_descriptor(hl_filter_descriptor filter) { - CHECK_NOTNULL(filter); - - cudnn_filter_descriptor hl_filter = (cudnn_filter_descriptor)filter; - CHECK_NOTNULL(hl_filter->desc); - - CHECK_CUDNN(dynload::cudnnDestroyFilterDescriptor(hl_filter->desc)); - - hl_filter->desc = NULL; - - free(filter); -} - -void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, - hl_tensor_descriptor image, - hl_filter_descriptor filter, - int padding_height, - int padding_width, - int stride_height, - int stride_width, - int dilation_h, - int dilation_w) { - CHECK_NOTNULL(conv); - - cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)malloc( - sizeof(_cudnn_convolution_descriptor)); - - CHECK_NOTNULL(hl_conv); - CHECK_CUDNN(dynload::cudnnCreateConvolutionDescriptor(&hl_conv->desc)); - - cudnnConvolutionMode_t mode = CUDNN_CROSS_CORRELATION; - -#if CUDNN_VERSION >= 6000 -#ifndef PADDLE_TYPE_DOUBLE - cudnnDataType_t data_type = CUDNN_DATA_FLOAT; -#else - cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; -#endif - CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc, - padding_height, - padding_width, - stride_height, - stride_width, - dilation_h, - dilation_w, - mode, - data_type)); -#else - if (dilation_h > 1 || dilation_w > 1) { - LOG(FATAL) - << "Current cuDNN version does't support for dilation convolution. " - << "The dilation convolution requires cuDNN >= v6.0."; - } - - CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc, - padding_height, - padding_width, - stride_height, - stride_width, - dilation_h, - dilation_w, - mode)); -#endif - - hl_conv->input_image = image; - hl_conv->filter = filter; - hl_conv->padding_height = padding_height; - hl_conv->padding_width = padding_width; - hl_conv->stride_height = stride_height; - hl_conv->stride_width = stride_width; - hl_conv->upscalex = 1; - hl_conv->upscaley = 1; - hl_conv->mode = mode; - - *conv = (hl_convolution_descriptor)hl_conv; -} - -void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, - hl_tensor_descriptor image, - hl_filter_descriptor filter, - int padding_height, - int padding_width, - int stride_height, - int stride_width, - int dilation_h, - int dilation_w) { - CHECK_NOTNULL(conv); - CHECK_NOTNULL(image); - CHECK_NOTNULL(filter); - - cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv); - cudnnConvolutionMode_t mode = CUDNN_CROSS_CORRELATION; - -#if CUDNN_VERSION >= 6000 -#ifndef PADDLE_TYPE_DOUBLE - cudnnDataType_t data_type = CUDNN_DATA_FLOAT; -#else - cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; -#endif - CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(conv_desc, - padding_height, - padding_width, - stride_height, - stride_width, - dilation_h, - dilation_w, - mode, - data_type)); -#else - CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(conv_desc, - padding_height, - padding_width, - stride_height, - stride_width, - dilation_h, - dilation_w, - mode)); -#endif - - cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)conv; - hl_conv->input_image = image; - hl_conv->filter = filter; - hl_conv->padding_height = padding_height; - hl_conv->padding_width = padding_width; - hl_conv->stride_height = stride_height; - hl_conv->stride_width = stride_width; - hl_conv->upscalex = 1; - hl_conv->upscaley = 1; - hl_conv->mode = mode; -} - -void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) { - CHECK_NOTNULL(conv); - - cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)conv; - CHECK_NOTNULL(hl_conv->desc); - - CHECK_CUDNN(dynload::cudnnDestroyConvolutionDescriptor(hl_conv->desc)); - hl_conv->desc = NULL; - - free(conv); -} - -void hl_convolution_forward(hl_tensor_descriptor input, - real* input_data, - hl_tensor_descriptor output, - real* output_data, - hl_filter_descriptor filter, - real* filter_data, - hl_convolution_descriptor conv, - void* gpuWorkSpace, - size_t sizeInBytes, - int convFwdAlgo) { - CHECK_NOTNULL(input); - CHECK_NOTNULL(output); - CHECK_NOTNULL(filter); - CHECK_NOTNULL(conv); - CHECK_NOTNULL(input_data); - CHECK_NOTNULL(output_data); - CHECK_NOTNULL(filter_data); - cudnnTensorDescriptor_t src_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnTensorDescriptor_t dest_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnFilterDescriptor_t filter_desc = GET_FILTER_DESCRIPTOR(filter); - cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv); - real alpha = 1.0f; - real beta = 1.0f; - CHECK_CUDNN(dynload::cudnnConvolutionForward( - t_resource.cudnn_handle, - &alpha, - src_desc, - input_data, - filter_desc, - filter_data, - conv_desc, - static_cast(convFwdAlgo), - gpuWorkSpace, - sizeInBytes, - &beta, - dest_desc, - output_data)); - CHECK_SYNC("hl_convolution_forward failed"); -} - -void hl_convolution_forward_add_bias(hl_tensor_descriptor bias, - real* bias_data, - hl_tensor_descriptor output, - real* output_data) { - CHECK_NOTNULL(bias); - CHECK_NOTNULL(output); - CHECK_NOTNULL(bias_data); - CHECK_NOTNULL(output_data); - - cudnnTensorDescriptor_t output_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnTensorDescriptor_t bias_desc = GET_TENSOR_DESCRIPTOR(bias); - real alpha = 1.0f; - real beta = 1.0f; - - CHECK_CUDNN(dynload::cudnnAddTensor(t_resource.cudnn_handle, -#if CUDNN_VERSION < 4000 - CUDNN_ADD_SAME_C, -#endif - &alpha, - bias_desc, - bias_data, - &beta, - output_desc, - output_data)); - CHECK_SYNC("hl_convolution_forward_add_bias failed"); -} - -void hl_convolution_backward_bias(hl_tensor_descriptor bias, - real* bias_grad_data, - hl_tensor_descriptor output, - real* output_grad_data) { - CHECK_NOTNULL(bias); - CHECK_NOTNULL(output); - CHECK_NOTNULL(bias_grad_data); - CHECK_NOTNULL(output_grad_data); - - real alpha = 1.0f; - real beta = 1.0f; - cudnnTensorDescriptor_t diff_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnTensorDescriptor_t bias_desc = GET_TENSOR_DESCRIPTOR(bias); - CHECK_CUDNN(dynload::cudnnConvolutionBackwardBias(t_resource.cudnn_handle, - &alpha, - diff_desc, - output_grad_data, - &beta, - bias_desc, - bias_grad_data)); - CHECK_SYNC("hl_convolution_backward_bias failed"); -} - -void hl_convolution_backward_filter(hl_tensor_descriptor input, - real* input_data, - hl_tensor_descriptor output, - real* output_grad_data, - hl_filter_descriptor filter, - real* filter_grad_data, - hl_convolution_descriptor conv, - void* gpuWorkSpace, - size_t sizeInBytes, - int convBwdFilterAlgo) { - CHECK_NOTNULL(input); - CHECK_NOTNULL(output); - CHECK_NOTNULL(filter); - CHECK_NOTNULL(conv); - CHECK_NOTNULL(input_data); - CHECK_NOTNULL(output_grad_data); - CHECK_NOTNULL(filter_grad_data); - - real alpha = 1.0f; - real beta = 1.0f; - cudnnTensorDescriptor_t src_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnTensorDescriptor_t diff_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv); - cudnnFilterDescriptor_t grad_desc = GET_FILTER_DESCRIPTOR(filter); - - CHECK_CUDNN(dynload::cudnnConvolutionBackwardFilter( - t_resource.cudnn_handle, - &alpha, - src_desc, - input_data, - diff_desc, - output_grad_data, - conv_desc, -#if CUDNN_VERSION >= 4000 - static_cast(convBwdFilterAlgo), - gpuWorkSpace, - sizeInBytes, -#endif - &beta, - grad_desc, - filter_grad_data)); - CHECK_SYNC("hl_convolution_backward_filter failed"); -} - -void hl_convolution_backward_data(hl_tensor_descriptor input, - real* input_data_grad, - hl_tensor_descriptor output, - real* output_grad_data, - hl_filter_descriptor filter, - real* filter_data, - hl_convolution_descriptor conv, - void* gpuWorkSpace, - size_t sizeInBytes, - int convBwdDataAlgo) { - real alpha = 1.0f; - real beta = 1.0f; - cudnnFilterDescriptor_t filter_desc = GET_FILTER_DESCRIPTOR(filter); - cudnnTensorDescriptor_t diff_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnTensorDescriptor_t grad_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv); - - CHECK_CUDNN(dynload::cudnnConvolutionBackwardData( - t_resource.cudnn_handle, - &alpha, - filter_desc, - filter_data, - diff_desc, - output_grad_data, - conv_desc, -#if CUDNN_VERSION >= 4000 - static_cast(convBwdDataAlgo), - gpuWorkSpace, - sizeInBytes, -#endif - &beta, - grad_desc, - input_data_grad)); - CHECK_SYNC("hl_convolution_backward_data failed"); -} - -void hl_softmax_forward(real* input, real* output, int height, int width) { -#ifndef PADDLE_TYPE_DOUBLE - cudnnDataType_t data_type = CUDNN_DATA_FLOAT; -#else - cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; -#endif - CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptor(t_resource.cudnn_desc, - CUDNN_TENSOR_NCHW, - data_type, - height, - width, - 1, - 1)); - - real alpha = 1.0f; - real beta = 0.0f; - CHECK_CUDNN(dynload::cudnnSoftmaxForward(t_resource.cudnn_handle, - CUDNN_SOFTMAX_ACCURATE, - CUDNN_SOFTMAX_MODE_CHANNEL, - &alpha, - t_resource.cudnn_desc, - input, - &beta, - t_resource.cudnn_desc, - output)); - CHECK_SYNC("hl_softmax_forward failed"); -} - -void hl_softmax_backward(real* output_value, - real* output_grad, - int height, - int width) { -#ifndef PADDLE_TYPE_DOUBLE - cudnnDataType_t data_type = CUDNN_DATA_FLOAT; -#else - cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; -#endif - CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptor(t_resource.cudnn_desc, - CUDNN_TENSOR_NCHW, - data_type, - height, - width, - 1, - 1)); - - real alpha = 1.0f; - real beta = 0.0f; - CHECK_CUDNN(dynload::cudnnSoftmaxBackward(t_resource.cudnn_handle, - CUDNN_SOFTMAX_ACCURATE, - CUDNN_SOFTMAX_MODE_CHANNEL, - &alpha, - t_resource.cudnn_desc, - output_value, - t_resource.cudnn_desc, - output_grad, - &beta, - t_resource.cudnn_desc, - output_grad)); - CHECK_SYNC("hl_softmax_backward failed"); -} - -void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, - real* input, - hl_tensor_descriptor outputDesc, - real* output, - hl_tensor_descriptor bnParamDesc, - real* scale, - real* bias, - double factor, - real* runningMean, - real* runningInvVar, - double epsilon, - real* savedMean, - real* savedVar) { -#if CUDNN_VERSION >= 4007 - if ((NULL != runningMean && NULL == runningInvVar) || - (NULL == runningMean && NULL != runningInvVar)) { - LOG(FATAL) << "runningMean and runningInvVar can be NULL " - << "but only at the same time."; - } - if ((NULL != savedMean && NULL == savedVar) || - (NULL == savedMean && NULL != savedVar)) { - LOG(FATAL) << "savedMean and savedVar can be NULL " - << "but only at the same time."; - } - - cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc); - cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc); - cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc); - real alpha = 1.0f; - real beta = 1.0f; - cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; - CHECK_CUDNN( - dynload::cudnnBatchNormalizationForwardTraining(t_resource.cudnn_handle, - mode, - &alpha, - &beta, - xDesc, - input, - yDesc, - output, - bnDesc, - scale, - bias, - factor, - runningMean, - runningInvVar, - epsilon, - savedMean, - savedVar)); - - CHECK_SYNC("hl_batch_norm_forward_training failed"); -#else - LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. " - << "But cudnn lib version is " << g_cudnn_lib_version; -#endif -} - -void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, - real* input, - hl_tensor_descriptor outputDesc, - real* output, - hl_tensor_descriptor bnParamDesc, - real* scale, - real* bias, - real* estimatedMean, - real* estimatedInvVar, - double epsilon) { -#if CUDNN_VERSION >= 4007 - cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc); - cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc); - cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc); - real alpha = 1.0f; - real beta = 1.0f; - cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; - - CHECK_CUDNN( - dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle, - mode, - &alpha, - &beta, - xDesc, - input, - yDesc, - output, - bnDesc, - scale, - bias, - estimatedMean, - estimatedInvVar, - epsilon)); - - CHECK_SYNC("hl_batch_norm_forward_inference failed"); -#else - LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. " - << "But cudnn lib version is " << g_cudnn_lib_version; -#endif -} - -void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, - real* input, - hl_tensor_descriptor outGradDesc, - real* outGrad, - hl_tensor_descriptor inGradDesc, - real* inGrad, - hl_tensor_descriptor dBnParamDesc, - real* scale, - real* scaleGrad, - real* biasGrad, - double epsilon, - real* savedMean, - real* savedInvVar) { -#if CUDNN_VERSION >= 4007 - if ((NULL != savedMean && NULL == savedInvVar) || - (NULL == savedMean && NULL != savedInvVar)) { - LOG(FATAL) << "savedMean and savedVar can be NULL " - << "but only at the same time."; - } - - cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc); - cudnnTensorDescriptor_t dyDesc = GET_TENSOR_DESCRIPTOR(outGradDesc); - cudnnTensorDescriptor_t dxDesc = GET_TENSOR_DESCRIPTOR(inGradDesc); - cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(dBnParamDesc); - real alpha = 1.0f; - real beta = 1.0f; - cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; - CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward(t_resource.cudnn_handle, - mode, - &alpha, - &beta, - &alpha, - &beta, - xDesc, - input, - dyDesc, - outGrad, - dxDesc, - inGrad, - bnDesc, - scale, - scaleGrad, - biasGrad, - epsilon, - savedMean, - savedInvVar)); - - CHECK_SYNC("hl_batch_norm_backward failed"); -#else - LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. " - << "But cudnn lib version is " << g_cudnn_lib_version; -#endif -} diff --git a/paddle/legacy/cuda/src/hl_cuda_device.cc b/paddle/legacy/cuda/src/hl_cuda_device.cc deleted file mode 100644 index 92197afb3d47e89c371fcd8b0c65051a3ce25cf7..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_device.cc +++ /dev/null @@ -1,681 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -// clang-format off -// Because clang-format 4.X and clang-format 3.8+ format -// following lines in different. So disable clang-format. -#include "hl_cuda.h" -#include -#include -#include -#include -#include -#include "hl_cuda.ph" -#include "hl_thread.ph" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/DynamicLoader.h" -// clang-format on - -namespace dynload { - -std::once_flag curand_dso_flag; -void *curand_dso_handle = nullptr; - -/** - * The following macro definition can generate structs - * (for each function) to dynamic load curand routine - * via operator overloading. - * - * note: default dynamic linked libs - */ -#ifdef PADDLE_USE_DSO -#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - curandStatus_t operator()(Args... args) { \ - typedef curandStatus_t (*curandFunc)(Args...); \ - std::call_once(curand_dso_flag, GetCurandDsoHandle, &curand_dso_handle); \ - void *p_##__name = dlsym(curand_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - } __name; /* struct DynLoad__##__name */ -#else -#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - curandStatus_t operator()(Args... args) { \ - return __name(args...); \ - } \ - } __name; /* struct DynLoad__##__name */ -#endif - -/* include all needed curand functions in HPPL */ -// clang-format off -#define CURAND_RAND_ROUTINE_EACH(__macro) \ - __macro(curandCreateGenerator) \ - __macro(curandSetStream) \ - __macro(curandSetPseudoRandomGeneratorSeed)\ - __macro(curandGenerateUniform) \ - __macro(curandGenerateUniformDouble) -// clang-format on - -CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP) - -#undef CURAND_RAND_ROUTINE_EACH -#undef DYNAMIC_LOAD_CURAND_WRAP - -} /* namespace dynload */ - -/** - * @brief global resource. - */ -int g_system_device_num = 0; /* system device number */ -int device_num = 0; /* use device number */ -hl_device_prop *g_device; /* device info table */ -__thread thread_device_resources *t_device; /* device resources table */ -int g_cuda_lib_version = 0; - -/* number of global stream */ -#define NUMBER_OF_GLOBAL_STREAM (HPPL_THREAD_STREAM_1) -/* number of thread stream */ -#define NUMBER_OF_THREAD_STREAM (HPPL_STREAM_END - HPPL_THREAD_STREAM_1) -/* sizeof of device memory */ -#define HPPL_GPU_MEMORY_SIZE (256 * 4) - -/** - * Check build-in cuda function using glog and it **does not** - * support << operator for more details error info. - */ -#define CHECK_CUDA(cudaFunc) \ - do { \ - cudaError_t cudaStat = cudaFunc; \ - CHECK_EQ(cudaSuccess, cudaStat) << "Cuda Error: " \ - << cudaGetErrorString(cudaStat); \ - } while (0) - -/** - * @brief thread resource. - */ -__thread _hl_thread_resource t_resource = {{0}, /* stream */ - 0, /* handle */ - 0, /* gen */ - 0, /* cudnn_handle */ - 0, /* cudnn_desc */ - NULL, /* gen_mutex */ - NULL, /* gpu_mem */ - NULL, /* cpu_mem */ - 0, /* event */ - -1, /* device */ - 0, /* major */ - false}; /* is_init */ - -__thread cudaStream_t default_stream = 0; -__thread bool g_sync_flag = true; -bool hl_start_flag = false; - -inline pid_t gettid() { -#if defined(__APPLE__) || defined(__OSX__) - // syscall is deprecated: first deprecated in macOS 10.12. - // syscall is unsupported; - // syscall pid_t tid = syscall(SYS_thread_selfid); - uint64_t tid; - pthread_threadid_np(NULL, &tid); -#else -#ifndef _WIN32 -#ifndef __NR_gettid -#define __NR_gettid 224 -#endif - pid_t tid = syscall(__NR_gettid); -#else // _WIN32 - pid_t tid = _getpid(); -#endif // _WIN32 -#endif - CHECK_NE((int)tid, -1); - return tid; -} - -void hl_init(int device) { - CHECK(hl_start_flag) << "[Init failed] hl_start() did not succeed."; - - /* thread has been initialized */ - if (true == t_resource.is_init) { - hl_set_device(device); - return; - } - - /* create thread devcie resources */ - char *tmp; - thread_device_resources device_res; - tmp = (char *)malloc(g_system_device_num * sizeof(thread_device_resources *) + - device_num * sizeof(_thread_device_resources)); - CHECK_NOTNULL(tmp); - t_device = (thread_device_resources *)tmp; - device_res = (thread_device_resources)( - (char *)tmp + g_system_device_num * sizeof(thread_device_resources *)); - memset(t_device, 0, g_system_device_num * sizeof(thread_device_resources *)); - - char *tmp_stream = (char *)malloc(device_num * NUMBER_OF_THREAD_STREAM * - sizeof(cudaStream_t)); - CHECK_NOTNULL(tmp_stream); - - int num = 0; - for (int dev = 0; dev < g_system_device_num; dev++) { - if (!g_device[dev]) { - continue; - } - - t_device[dev] = &device_res[num]; - t_device[dev]->stream = - (cudaStream_t *)(tmp_stream + - num * NUMBER_OF_THREAD_STREAM * sizeof(cudaStream_t)); - - hl_create_thread_resources(dev, t_device[dev]); - num++; - } - - hl_cudnn_desc_init(&t_resource.cudnn_desc); - - /* thread initialization is complete */ - t_resource.is_init = true; - /* set device */ - t_resource.device = -1; - hl_set_device(device); -} - -void hl_fini() { - if (false == t_resource.is_init) { - return; - } - - /* hppl stream fini */ - t_resource.device = -1; - for (int i = NUMBER_OF_GLOBAL_STREAM; i < HPPL_STREAM_END; i++) { - t_resource.stream[i] = 0; - } - - char *tmp = (char *)t_device; - char *tmp_stream = NULL; - for (int dev = 0; dev < g_system_device_num; dev++) { - if (!t_device[dev]) { - continue; - } - if (!tmp_stream) { - tmp_stream = (char *)t_device[dev]->stream; - } - for (int j = 0; j < NUMBER_OF_THREAD_STREAM; j++) { - CHECK_CUDA(cudaStreamDestroy(t_device[dev]->stream[j])); - } - - /* free device memory */ - hl_free_mem_device(t_device[dev]->gpu_mem); - hl_free_mem_host(t_device[dev]->cpu_mem); - CHECK_CUDA(cudaEventDestroy(t_device[dev]->mem_event)); - } - - free(tmp); - free(tmp_stream); - t_resource.is_init = false; -} - -int hl_get_device_count() { return device_num; } - -void hl_set_device(int device) { - if (device == t_resource.device) { - return; - } - - CHECK(device >= 0 && device < g_system_device_num && g_device[device]) - << "Device: " << device << " is not specified in startup."; - - CHECK_CUDA(cudaSetDevice(device)); - - /* switch thread stream */ - for (int i = 0; i < NUMBER_OF_GLOBAL_STREAM; i++) { - t_resource.stream[i] = g_device[device]->device_resources->stream[i]; - } - - if (true == t_resource.is_init) { - for (int i = NUMBER_OF_GLOBAL_STREAM; i < HPPL_STREAM_END; i++) { - t_resource.stream[i] = - t_device[device]->stream[i - NUMBER_OF_GLOBAL_STREAM]; - } - t_resource.gpu_mem = t_device[device]->gpu_mem; - t_resource.cpu_mem = t_device[device]->cpu_mem; - t_resource.event = t_device[device]->mem_event; - } - - t_resource.handle = g_device[device]->device_resources->handle; - t_resource.gen = g_device[device]->device_resources->gen; - t_resource.cudnn_handle = g_device[device]->device_resources->cudnn_handle; - t_resource.gen_mutex = g_device[device]->device_resources->gen_mutex; - t_resource.device = device; - t_resource.major = g_device[device]->major; - default_stream = t_resource.stream[0]; -} - -int hl_get_device() { - int device; - CHECK_CUDA(cudaGetDevice(&device)); - return device; -} - -void *hl_malloc_device(size_t size) { - void *dest_d; - - CHECK(size) << __func__ << ": the size for device memory is 0, please check."; - CHECK_CUDA(cudaMalloc((void **)&dest_d, size)); - - return dest_d; -} - -void hl_free_mem_device(void *dest_d) { - CHECK_NOTNULL(dest_d); - - cudaError_t err = cudaFree(dest_d); - CHECK(cudaSuccess == err || cudaErrorCudartUnloading == err) - << hl_get_device_error_string(); -} - -void *hl_malloc_host(size_t size) { - void *dest_h; - - CHECK(size) << __func__ << ": the size for device memory is 0, please check."; - CHECK_CUDA(cudaHostAlloc((void **)&dest_h, size, cudaHostAllocDefault)); - - return dest_h; -} - -void hl_free_mem_host(void *dest_h) { - CHECK_NOTNULL(dest_h); - - cudaError_t err = cudaFreeHost(dest_h); - CHECK(cudaSuccess == err || cudaErrorCudartUnloading == err) - << hl_get_device_error_string(); -} - -void hl_memcpy(void *dst, void *src, size_t size) { - if (0 == size) { - return; - } - CHECK_NOTNULL(dst); - CHECK_NOTNULL(src); - CHECK_CUDA(cudaMemcpy(dst, src, size, cudaMemcpyDefault)); -} - -void hl_memset_device(void *dest_d, int value, size_t size) { - CHECK_CUDA(cudaMemset(dest_d, value, size)); -} - -void hl_memcpy_host2device(void *dest_d, void *src_h, size_t size) { - if (0 == size) { - return; - } - CHECK_NOTNULL(src_h); - CHECK_NOTNULL(dest_d); - CHECK_CUDA(cudaMemcpy(dest_d, src_h, size, cudaMemcpyHostToDevice)); -} - -void hl_memcpy_device2host(void *dest_h, void *src_d, size_t size) { - if (0 == size) { - return; - } - CHECK_NOTNULL(dest_h); - CHECK_NOTNULL(src_d); - CHECK_CUDA(cudaMemcpy(dest_h, src_d, size, cudaMemcpyDeviceToHost)); -} - -void hl_memcpy_device2device(void *dest_d, void *src_d, size_t size) { - if (0 == size) { - return; - } - CHECK_NOTNULL(dest_d); - CHECK_NOTNULL(src_d); - CHECK_CUDA(cudaMemcpy(dest_d, src_d, size, cudaMemcpyDeviceToDevice)); -} - -void hl_memcpy_async(void *dst, void *src, size_t size, hl_stream_t stream) { - cudaStream_t cu_stream; - - if (0 == size) { - return; - } - CHECK_NOTNULL(dst); - CHECK_NOTNULL(src); - CHECK_LT(stream, HPPL_STREAM_END); - cu_stream = t_resource.stream[stream]; - - CHECK_CUDA(cudaMemcpyAsync(dst, src, size, cudaMemcpyDefault, cu_stream)); -} - -void hl_start() { - hl_specify_devices_start(NULL, 0); - /* set default device */ - hl_set_device(0); -} - -bool hl_device_can_access_peer(int device, int peerDevice) { - int canAccessPeer; - CHECK_CUDA(cudaDeviceCanAccessPeer(&canAccessPeer, device, peerDevice)); - - if (canAccessPeer == 1) { - return true; - } else { - return false; - } -} - -void hl_device_enable_peer_access(int peerDevice) { - cudaError_t err = cudaDeviceEnablePeerAccess(peerDevice, 0); - if (cudaErrorPeerAccessAlreadyEnabled == err) { - cudaGetLastError(); - } else { - CHECK_CUDA(err); - } -} - -void hl_create_global_resources(hl_device_prop device_prop) { - struct cudaDeviceProp cu_prop; - int device = device_prop->device; - global_device_resources device_res = device_prop->device_resources; - - CHECK_CUDA(cudaSetDevice(device)); - /* device properties */ - CHECK_CUDA(cudaGetDeviceProperties(&cu_prop, device)); - - device_prop->major = cu_prop.major; - device_prop->minor = cu_prop.minor; - strncpy(device_prop->device_name, cu_prop.name, 256); - device_prop->device_mem = cu_prop.totalGlobalMem; - - /* create device stream */ - for (int j = 0; j < NUMBER_OF_GLOBAL_STREAM; j++) { - CHECK_CUDA(cudaStreamCreate(&device_res->stream[j])); - } - - /* cublas init */ - hl_cublas_init(&device_res->handle, device_res->stream[0]); - - /* create curand gen */ - CHECK_EQ(dynload::curandCreateGenerator(&device_res->gen, - CURAND_RNG_PSEUDO_DEFAULT), - CURAND_STATUS_SUCCESS) - << "[Start failed] Curand init failed."; - - CHECK_EQ(dynload::curandSetStream(device_res->gen, device_res->stream[0]), - CURAND_STATUS_SUCCESS) - << "[Start failed] Curand set stream failed!"; - - /* create cudnn handle */ - hl_cudnn_init(&device_res->cudnn_handle, device_res->stream[0]); - - int seed = gettid(); - CHECK_EQ(dynload::curandSetPseudoRandomGeneratorSeed(device_res->gen, - seed + device), - CURAND_STATUS_SUCCESS); - - device_res->gen_mutex = (pthread_mutex_t *)(malloc(sizeof(pthread_mutex_t))); - pthread_mutex_init(device_res->gen_mutex, NULL); - - CHECK_CUDA(cudaRuntimeGetVersion(&g_cuda_lib_version)); -} - -int hl_get_cuda_version() { return g_cuda_lib_version; } - -void hl_create_thread_resources(int device, - thread_device_resources device_res) { - CHECK_CUDA(cudaSetDevice(device)); - - /* create thread stream */ - for (int j = 0; j < NUMBER_OF_THREAD_STREAM; j++) { - CHECK_CUDA(cudaStreamCreate(&device_res->stream[j])); - } - - /* allocation device memory */ - device_res->gpu_mem = (real *)hl_malloc_device(HPPL_GPU_MEMORY_SIZE); - - /* allocation host memory */ - device_res->cpu_mem = (real *)hl_malloc_host(HPPL_GPU_MEMORY_SIZE); - - CHECK_CUDA(cudaEventCreate(&device_res->mem_event)); -} - -void hl_specify_devices_start(int *device, int number) { - if (hl_start_flag) return; - - /* 1. get the number of devices */ - CHECK_CUDA(cudaGetDeviceCount(&g_system_device_num)); - CHECK_NE(g_system_device_num, 0) << "[Start failed] there is no GPU device"; - if (device == NULL) { - number = g_system_device_num; - } - - /* 2. check device & create device property table */ - CHECK_LE(number, g_system_device_num) - << "[Start failed] System does not have enough device. " - << "Device number: " << g_system_device_num << "Input number: " << number; - - char *tmp; - hl_device_prop device_prop; - tmp = (char *)malloc(g_system_device_num * sizeof(hl_device_prop *) + - number * sizeof(_hl_device_prop)); - CHECK(tmp) << "[Start failed] System memory is not enough."; - - g_device = (hl_device_prop *)tmp; - device_prop = (hl_device_prop)( - (char *)tmp + g_system_device_num * sizeof(hl_device_prop *)); - memset(g_device, 0, g_system_device_num * sizeof(hl_device_prop *)); - int num = 0; - for (int i = 0; i < number; i++) { - int dev; - if (device == NULL) { - dev = i; - } else { - dev = device[i]; - } - - CHECK_LT(dev, g_system_device_num) - << "[Start failed] The specified device number is " - << "out of range. Max device number: " << g_system_device_num - 1 - << " Specified devcie number: " << dev; - - if (g_device[dev]) { - /* Warning */ - LOG(WARNING) << "[Warning] Repeat specify device: " << dev; - continue; - } - - g_device[dev] = &device_prop[num]; - g_device[dev]->device = dev; - num++; - } - device_num = num; - - /* 3. create global device resources */ - char *tmp_res = (char *)malloc(device_num * sizeof(_global_device_resources)); - CHECK_NOTNULL(tmp_res); - - char *tmp_stream = (char *)malloc(device_num * NUMBER_OF_GLOBAL_STREAM * - sizeof(cudaStream_t)); - CHECK_NOTNULL(tmp_stream); - - num = 0; - for (int i = 0; i < g_system_device_num; i++) { - if (!g_device[i]) { - continue; - } - - g_device[i]->device_resources = (global_device_resources)( - tmp_res + num * sizeof(_global_device_resources)); - g_device[i]->device_resources->stream = - (cudaStream_t *)(tmp_stream + - num * NUMBER_OF_GLOBAL_STREAM * sizeof(cudaStream_t)); - - hl_create_global_resources(g_device[i]); - num++; - } - - /* hl_start() is ok */ - hl_start_flag = true; - /* set default device */ - if (device == NULL) { - hl_set_device(0); - } else { - hl_set_device(device[0]); - } -} - -void hl_rand(real *dest_d, size_t num) { - pthread_mutex_lock(t_resource.gen_mutex); - CHECK_EQ( -#ifndef PADDLE_TYPE_DOUBLE - dynload::curandGenerateUniform(t_resource.gen, dest_d, num), -#else - dynload::curandGenerateUniformDouble(t_resource.gen, dest_d, num), -#endif - CURAND_STATUS_SUCCESS); - pthread_mutex_unlock(t_resource.gen_mutex); - CHECK_SYNC("hl_rand failed"); -} - -void hl_srand(unsigned int seed) { - pthread_mutex_lock(t_resource.gen_mutex); - CHECK_EQ(dynload::curandSetPseudoRandomGeneratorSeed(t_resource.gen, seed), - CURAND_STATUS_SUCCESS); - pthread_mutex_unlock(t_resource.gen_mutex); -} - -void hl_set_sync_flag(bool flag) { g_sync_flag = flag; } - -bool hl_get_sync_flag() { return g_sync_flag; } - -void hl_stream_synchronize(hl_stream_t stream) { - cudaStream_t cu_stream; - - CHECK_LT(stream, HPPL_STREAM_END) << __func__ - << ": the parameter stream is error."; - - cu_stream = t_resource.stream[stream]; - CHECK_CUDA(cudaStreamSynchronize(cu_stream)); -} - -void hl_create_event(hl_event_t *event) { - CHECK_NOTNULL(event); - - struct _hl_event_st *st_event = - (struct _hl_event_st *)malloc(sizeof(struct _hl_event_st)); - - CHECK_CUDA(cudaEventCreate(&st_event->cu_event)); - - *event = st_event; -} - -float hl_event_elapsed_time(hl_event_t start, hl_event_t end) { - float time; - CHECK_NOTNULL(start); - CHECK_NOTNULL(end); - - CHECK_CUDA(cudaEventElapsedTime(&time, start->cu_event, end->cu_event)); - return time; -} - -void hl_stream_record_event(hl_stream_t stream, hl_event_t event) { - cudaStream_t cu_stream; - - CHECK_NOTNULL(event); - CHECK_LT(stream, HPPL_STREAM_END) << __func__ - << ": the parameter stream is error."; - - cu_stream = t_resource.stream[stream]; - CHECK_CUDA(cudaEventRecord(event->cu_event, cu_stream)); -} - -void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) { - cudaStream_t cu_stream; - - CHECK_NOTNULL(event); - CHECK_LT(stream, HPPL_STREAM_END) << __func__ - << ": the parameter stream is error."; - - cu_stream = t_resource.stream[stream]; - CHECK_CUDA(cudaStreamWaitEvent(cu_stream, event->cu_event, 0)); -} - -void hl_destroy_event(hl_event_t event) { - CHECK_NOTNULL(event); - CHECK_CUDA(cudaEventDestroy(event->cu_event)); - - free(event); - event = NULL; -} - -void hl_event_synchronize(hl_event_t event) { - CHECK_NOTNULL(event); - CHECK_CUDA(cudaEventSynchronize(event->cu_event)); -} - -void hl_get_device_name(char *name, int len, int device) { - CHECK_NOTNULL(name); - CHECK(device >= 0 && device < g_system_device_num && g_device[device]) - << "Device(" << device << ") is not specified in startup."; - - strncpy(name, g_device[device]->device_name, len); -} - -void hl_get_device_memory(size_t *mem_size, int device) { - CHECK_NOTNULL(mem_size); - CHECK(device >= 0 && device < g_system_device_num && g_device[device]) - << "Device(" << device << ") is not specified in startup."; - - *mem_size = g_device[device]->device_mem; -} - -void hl_get_device_compute_capability(int *major, int *minor, int device) { - CHECK_NOTNULL(major); - CHECK_NOTNULL(minor); - CHECK(device >= 0 && device < g_system_device_num && g_device[device]) - << "Device(" << device << ") is not specified in startup."; - - *major = g_device[device]->major; - *minor = g_device[device]->minor; -} - -int hl_get_device_last_error() { return (int)cudaGetLastError(); } - -const char *hl_get_device_error_string() { - cudaError_t err = cudaGetLastError(); - return cudaGetErrorString(err); -} - -const char *hl_get_device_error_string(size_t err) { - return cudaGetErrorString((cudaError_t)err); -} - -void hl_device_synchronize() { CHECK_CUDA(cudaDeviceSynchronize()); } -void hl_set_device_flags_block() { - CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync)); -} - -bool hl_cuda_event_is_ready(hl_event_t event) { - cudaError_t err = cudaEventQuery(event->cu_event); - CHECK(cudaSuccess == err || cudaErrorNotReady == err); - - if (cudaErrorNotReady == err) { - return false; - } - return true; -} - -void hl_profiler_start() { CHECK_CUDA(cudaProfilerStart()); } - -void hl_profiler_end() { CHECK_CUDA(cudaProfilerStop()); } diff --git a/paddle/legacy/cuda/src/hl_cuda_lstm.cu b/paddle/legacy/cuda/src/hl_cuda_lstm.cu deleted file mode 100644 index 9ac564fd2548cc782bee2380350f4ab888670ca3..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_lstm.cu +++ /dev/null @@ -1,876 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_activation_functions.h" -#include "hl_base.h" -#include "hl_cuda_cublas.h" -#include "hl_device_functions.cuh" -#include "paddle/legacy/utils/Logging.h" - -typedef hppl::Active::forward t_forward; -typedef hppl::Active::backward t_backward; - -bool hl_lstm_sequence_parallel(int frameSize) { - if (frameSize == 32 || frameSize == 64) { - return true; - } else { - return false; - } -} - -class frameValue { - public: - real *value_; - __device__ frameValue(real *value) : value_(value) {} - template - __device__ inline void init(int start, int length, int idx) { - if (reversed == 0) { - value_ += start * frameSize + idx; - } else { - value_ += (start + length - 1) * frameSize + idx; - } - } - __device__ inline real *getPtr() const { return value_; } - __device__ inline real getValue() { return *value_; } - __device__ inline void setValue(real value) { *value_ = value; } - template - __device__ inline void nextFrame() { - if (reversed == 0) { - value_ += frameSize; - } else { - value_ -= frameSize; - } - } -}; - -__device__ __forceinline__ void ptx_sync(const int id, const int barriers) { - asm volatile("bar.sync %0, %1;" : : "r"(id), "r"(barriers) : "memory"); -} - -__device__ __forceinline__ void ptx_arrive(const int id, const int barriers) { - asm volatile("bar.arrive %0, %1;" : : "r"(id), "r"(barriers) : "memory"); -} - -template -__device__ __forceinline__ real forward_sequence(real value, - real *shValue, - real *state, - real *preOutput, - real *output, - real check, - int index, - t_forward activeNode, - t_forward activeGate, - t_forward activeState) { - real out; - real prevOut; - real state_r; - const int idx = index % frameSize; - const int idy = index / frameSize; - // assert(index < valueSize); - - if (idy == 0) { - value = activeNode(value); - shValue[index] = value; - } - if (idy == 1 || idy == 2) { - state_r = state[idx]; - value += state_r * check; - value = activeGate(value); - shValue[index] = value; - } - ptx_sync(1, valueSize); - if (idy == 3) { - state_r = state[idx]; - state_r = state_r * shValue[idx + frameSize * 2]; - state_r += shValue[idx] * shValue[idx + frameSize]; - state[idx] = state_r; - ptx_arrive(2, frameSize * 2); - value += state_r * check; - value = activeGate(value); - shValue[index] = value; - ptx_sync(3, frameSize * 2); - prevOut = preOutput[idx]; - out = prevOut * value; - output[idx] = out; - } - if (idy == 0) { - ptx_sync(2, frameSize * 2); - prevOut = state[idx]; - prevOut = activeState(prevOut); - preOutput[idx] = prevOut; - ptx_arrive(3, frameSize * 2); - } - return value; -} - -#define OUTPUT_BARRIER_ID 10 -#define OUTPUT_BARRIER_ID2 11 -template -__global__ void KeLstmForward(real *gateValue, - real *state, - real *output, - real *preOutput, - real *checkIg, - real *checkFg, - real *checkOg, - real *weight, - const int *starts, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - __shared__ real shValue[valueSize]; - __shared__ real shState[frameSize]; - __shared__ real shPrevOutput[frameSize]; - __shared__ real shOutput[frameSize]; - - const int index = threadIdx.x; - int start = starts[blockIdx.x]; - int length = starts[blockIdx.x + 1] - start; - - /* init */ - real check; - real value; - frameValue frameGate(gateValue); - frameValue frameState(state); - frameValue frameOutput(output); - frameValue framePreOutput(preOutput); - if (index < valueSize) { - const int idx = index % frameSize; - const int idy = index / frameSize; - frameGate.init(start, length, index); - value = frameGate.getValue(); - if (idy == 0) { - shState[idx] = 0.0; - } else if (idy == 1) { - check = checkIg[idx]; - } else if (idy == 2) { - check = checkFg[idx]; - } else if (idy == 3) { - check = checkOg[idx]; - } - - if (idy == 3) { - frameState.init(start, length, idx); - frameOutput.init(start, length, idx); - framePreOutput.init(start, length, idx); - } - - ptx_sync(1, valueSize); - } - - for (int i = 0; i < length; ++i) { - if (index < valueSize) { - if (valueSize == 128) { - if (i != 0) { - ptx_sync(OUTPUT_BARRIER_ID2, blockSize); - value += shValue[index]; - } - } - value = forward_sequence( - value, - shValue, - shState, - shPrevOutput, - shOutput, - check, - index, - hppl::gpu::forward[active_node], - hppl::gpu::forward[active_gate], - hppl::gpu::forward[active_state]); - const int idx = index % frameSize; - const int idy = index / frameSize; - if (valueSize == 128) { - if (idy == 3) { - ptx_arrive(OUTPUT_BARRIER_ID, frameSize + 128); - } - } - if (valueSize == 256) { - ptx_sync(OUTPUT_BARRIER_ID, valueSize); - } - frameGate.setValue(value); - if (idy == 3) { - frameState.setValue(shState[idx]); - frameOutput.setValue(shOutput[idx]); - framePreOutput.setValue(shPrevOutput[idx]); - frameState.nextFrame(); - frameOutput.nextFrame(); - framePreOutput.nextFrame(); - } - if (i != length - 1) { - frameGate.nextFrame(); - value = frameGate.getValue(); - } - } - if (i != length - 1) { - if (valueSize == 128) { - if (valueSize <= index) { - real B_r[frameSize]; - const int computeIdx = index - valueSize; - if (i == 0) { -#pragma unroll - for (int n = 0; n < frameSize; n++) { - B_r[n] = weight[n * valueSize + computeIdx]; - } - } - ptx_sync(OUTPUT_BARRIER_ID, frameSize + 128); - real A_r[frameSize]; - for (int n = 0; n < frameSize; n++) { - A_r[n] = shOutput[n]; - } - real sum = 0.0f; - for (int n = 0; n < frameSize; n++) { - sum += A_r[n] * B_r[n]; - } - shValue[computeIdx] = sum; - ptx_arrive(OUTPUT_BARRIER_ID2, blockSize); - } - } - if (valueSize == 256) { - real B_r[frameSize]; - if (i == 0) { -#pragma unroll - for (int n = 0; n < frameSize; n++) { - B_r[n] = weight[n * valueSize + index]; - } - } - real sum = 0.0f; - for (int n = 0; n < frameSize; n++) { - sum += shOutput[n] * B_r[n]; - } - value += sum; - } - } - } -} - -void hl_lstm_parallel_forward(real *gateValue, - real *stateValue, - real *preOutputValue, - real *outputValue, - real *checkIg, - real *checkFg, - real *checkOg, - real *weight, - const int *sequence, - int frameSize, - int numSequences, - bool reversed, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - CHECK(frameSize == 32 || frameSize == 64); - dim3 grid(numSequences, 1); - if (!reversed) { - if (frameSize == 32) { - KeLstmForward<128, 32, 0, 128, 256><<>>( - gateValue, - stateValue, - outputValue, - preOutputValue, - checkIg, - checkFg, - checkOg, - weight, - sequence, - active_node, - active_gate, - active_state); - } else if (frameSize == 64) { - KeLstmForward<256, 64, 0, 256, 256><<>>( - gateValue, - stateValue, - outputValue, - preOutputValue, - checkIg, - checkFg, - checkOg, - weight, - sequence, - active_node, - active_gate, - active_state); - } - } else { - if (frameSize == 32) { - KeLstmForward<128, 32, 1, 128, 256><<>>( - gateValue, - stateValue, - outputValue, - preOutputValue, - checkIg, - checkFg, - checkOg, - weight, - sequence, - active_node, - active_gate, - active_state); - } else if (frameSize == 64) { - KeLstmForward<256, 64, 1, 256, 256><<>>( - gateValue, - stateValue, - outputValue, - preOutputValue, - checkIg, - checkFg, - checkOg, - weight, - sequence, - active_node, - active_gate, - active_state); - } - } - CHECK_SYNC("hl_lstm_parallel_forward failed"); -} - -__device__ __forceinline__ void transpose_32x32(real a[], const int idx) { - const int warp_size = 32; - int addr = idx % warp_size; - unsigned mask = 0u; - CREATE_SHFL_MASK(mask, addr < warp_size); -#pragma unroll - for (int k = 1; k < 32; k++) { - // rSrc[k] = __shfl_sync(rSrc[k], (threadIdx.x + k) % 32, 32); - addr = __shfl_sync(mask, addr, (idx + 1) % 32, 32); - a[k] = __shfl_sync(mask, a[k], addr, 32); - } - -#pragma unroll - for (int tid = 0; tid < 31; tid++) { - real tmp = (idx > tid) ? a[0] : a[1]; -#pragma unroll - for (int k = 31; k > 0; k--) { - a[(k + 1) % 32] = (idx > tid) ? a[k] : a[(k + 1) % 32]; - } - a[1] = tmp; - } - - addr = (32 - idx) % 32; - CREATE_SHFL_MASK(mask, idx % 32 < warp_size); -#pragma unroll - for (int k = 0; k < 32; k++) { - a[k] = __shfl_sync(mask, a[k], addr, 32); - addr = __shfl_sync(mask, addr, (idx + 31) % 32, 32); - } -} - -template -__device__ void backward_sequence(real rGateValue, - real rOutputGrad, - real rPreOutputValue, - real &rGateGrad, - real &rStateGrad, - real *shStateGrad, - real *shStateValue, - real *shGateValue, - real rCheck, - real &rGateValuePrev, - int index, - t_backward activeNode, - t_backward activeGate, - t_backward activeState) { - const int frameIdx = index % frameSize; - const int frameIdy = index / frameSize; - if (frameIdy == 3) { - real rPrevOutputGrad; - rPrevOutputGrad = rOutputGrad * rGateValue; - rStateGrad = activeState(rPrevOutputGrad, rPreOutputValue); - rGateGrad = rOutputGrad * rPreOutputValue; - rGateGrad = activeGate(rGateGrad, rGateValue); - rStateGrad += rGateGrad * rCheck; - shStateGrad[index] = rStateGrad; - ptx_arrive(3, valueSize); - } else if (frameIdy == 1) { - shGateValue[frameIdx + frameSize] = rGateValue; - rStateGrad = rGateGrad * rCheck; - shStateGrad[index] = rStateGrad; - ptx_sync(3, valueSize); - rStateGrad += shStateGrad[frameIdx + frameSize * 2]; - rStateGrad += shStateGrad[frameIdx + frameSize * 3]; - rGateGrad = rStateGrad * shGateValue[frameIdx]; - rGateGrad = activeGate(rGateGrad, rGateValue); - } else if (frameIdy == 2) { - rStateGrad = rStateGrad * rGateValuePrev; - rStateGrad += rGateGrad * rCheck; - shStateGrad[index] = rStateGrad; - ptx_sync(3, valueSize); - rStateGrad += shStateGrad[frameIdx + frameSize]; - rStateGrad += shStateGrad[frameIdx + frameSize * 3]; - rGateValuePrev = rGateValue; - rGateGrad = rStateGrad * shStateValue[frameIdx]; - rGateGrad = activeGate(rGateGrad, rGateValue); - } else if (frameIdy == 0) { - shGateValue[frameIdx] = rGateValue; - ptx_sync(3, valueSize); - rStateGrad = shStateGrad[frameIdx + frameSize]; - rStateGrad += shStateGrad[frameIdx + frameSize * 2]; - rStateGrad += shStateGrad[frameIdx + frameSize * 3]; - rGateGrad = rStateGrad * shGateValue[frameIdx + frameSize]; - rGateGrad = activeNode(rGateGrad, rGateValue); - } -} - -template -__device__ void load_weight(real rWeight[], real *weight, const int index) { - if (valueSize == 128) { - weight += index; -#pragma unroll - for (int n = 0; n < frameSize; n++) { - rWeight[n] = weight[n * valueSize]; - } - transpose_32x32(rWeight, index % 32); - } - if (valueSize == 256) { - int id = (index / 32) % 2; - weight += index - id * 32 + id * 32 * valueSize; -#pragma unroll - for (int n = 0; n < 32; n++) { - rWeight[n] = weight[n * valueSize]; - rWeight[n + 32] = weight[n * valueSize + 32]; - } - transpose_32x32(rWeight, index % 32); - transpose_32x32(&rWeight[32], index % 32); - } -} - -template -__global__ void KeLstmBackward(real *gateValue, - real *gateGrad, - real *stateValue, - real *stateGrad, /* do not need save */ - real *preOutputValue, - real *preOutputGrad, /* do not need save */ - real *checkIg, - real *checkIgGrad, - real *checkFg, - real *checkFgGrad, - real *checkOg, - real *checkOgGrad, - real *outputGrad, - real *weightValue, - const int *starts, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - __shared__ real shGateValue[valueSize]; - __shared__ real shStateGrad[valueSize]; - __shared__ real shStateValue[frameSize]; - __shared__ real shGateGrad[4][frameSize]; - __shared__ real shOutputGrad[4][frameSize]; - const int index = threadIdx.x; - int start = starts[blockIdx.x]; - int length = starts[blockIdx.x + 1] - start; - - const int frameIdx = index % frameSize; - const int frameIdy = index / frameSize; - real rCheck; - real rCheckGrad; - real rGateGrad; - real rStateGrad; - real rGateValuePrev; - real rPreOutputValue; - real rOutputGrad; - real rGateValue; - real rStateValue; - - frameValue frameGateValue(gateValue); - frameValue frameGateGrad(gateGrad); - frameValue framePreOutputValue(preOutputValue); - frameValue frameStateValue(stateValue); - frameValue frameOutputGrad(outputGrad); - if (frameIdy == 0) { - } else if (frameIdy == 1) { - rCheck = checkIg[frameIdx]; - } else if (frameIdy == 2) { - rCheck = checkFg[frameIdx]; - rGateValuePrev = 0.0; - rStateGrad = 0.0; - } else if (frameIdy == 3) { - rCheck = checkOg[frameIdx]; - framePreOutputValue.init(start, length, frameIdx); - frameOutputGrad.init(start, length, frameIdx); - rOutputGrad = frameOutputGrad.getValue(); - rPreOutputValue = framePreOutputValue.getValue(); - frameStateValue.init(start, length, frameIdx); - rStateValue = frameStateValue.getValue(); - } - - frameGateValue.init(start, length, index); - frameGateGrad.init(start, length, index); - rGateValue = frameGateValue.getValue(); - rGateGrad = 0.0; - rCheckGrad = 0.0; - - real B_r[frameSize]; - load_weight(B_r, weightValue, index); - - for (int i = 0; i < length; ++i) { - if (frameIdy == 3) { - if (i != length - 1) { - frameStateValue.nextFrame(); - shStateValue[frameIdx] = frameStateValue.getValue(); - } else { - shStateValue[frameIdx] = 0.0; - } - } - backward_sequence(rGateValue, - rOutputGrad, - rPreOutputValue, - rGateGrad, - rStateGrad, - shStateGrad, - shStateValue, - shGateValue, - rCheck, - rGateValuePrev, - index, - hppl::gpu::backward[active_node], - hppl::gpu::backward[active_gate], - hppl::gpu::backward[active_state]); - if (frameIdy == 3) { - rCheckGrad += rGateGrad * rStateValue; - rStateValue = shStateValue[frameIdx]; - } - - frameGateGrad.setValue(rGateGrad); - frameGateGrad.nextFrame(); - - if (i != length - 1) { - if (frameIdy == 3) { - framePreOutputValue.nextFrame(); - rPreOutputValue = framePreOutputValue.getValue(); - frameOutputGrad.nextFrame(); - rOutputGrad = frameOutputGrad.getValue(); - } else if (frameIdy == 2) { - rCheckGrad += rGateGrad * shStateValue[frameIdx]; - } else if (frameIdy == 1) { - rCheckGrad += rGateGrad * shStateValue[frameIdx]; - } - - frameGateValue.nextFrame(); - rGateValue = frameGateValue.getValue(); - shGateGrad[frameIdy][frameIdx] = rGateGrad; - if (valueSize == 128) { - real sum = 0.0f; -#pragma unroll - for (int n = 0; n < frameSize; n++) { - sum += shGateGrad[frameIdy][n] * B_r[n]; - } - if (frameIdy == 3) { - rOutputGrad += sum; - } else { - shOutputGrad[frameIdy][frameIdx] = sum; - } - } - if (valueSize == 256) { - ptx_sync(5, valueSize); - real A_r[frameSize]; - for (int n = 0; n < frameSize; n++) { - A_r[n] = shGateGrad[frameIdy][n]; - } - real sum = 0.0f; - for (int n = 0; n < frameSize; n++) { - sum += A_r[n] * B_r[n]; - } - if (frameIdy == 3) { - rOutputGrad += sum; - } else { - shOutputGrad[frameIdy][frameIdx] = sum; - } - } - - if (frameIdy == 3) { - ptx_sync(6, valueSize); -#pragma unroll - for (int i = 0; i < 3; i++) { - rOutputGrad += shOutputGrad[i][frameIdx]; - } - } else { - ptx_arrive(6, valueSize); - } - } - } - - /* TODO: Temporary save & merger in another kernel */ - if (frameIdy == 1) { - if (checkIgGrad) - paddle::paddleAtomicAdd(checkIgGrad + frameIdx, rCheckGrad); - } else if (frameIdy == 2) { - if (checkFgGrad) - paddle::paddleAtomicAdd(checkFgGrad + frameIdx, rCheckGrad); - } else if (frameIdy == 3) { - if (checkOgGrad) - paddle::paddleAtomicAdd(checkOgGrad + frameIdx, rCheckGrad); - } -} - -void hl_lstm_parallel_backward_data(real *gateValue, - real *gateGrad, - real *stateValue, - real *stateGrad, - real *preOutputValue, - real *preOutputGrad, - real *outputGrad, - real *checkIg, - real *checkIgGrad, - real *checkFg, - real *checkFgGrad, - real *checkOg, - real *checkOgGrad, - real *weight, - const int *sequence, - int frameSize, - int numSequences, - bool reversed, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate, - hl_activation_mode_t active_state) { - CHECK(frameSize == 32 || frameSize == 64 || frameSize == 128 || - frameSize == 256); - dim3 grid(numSequences, 1); - if (!reversed) { - if (frameSize == 32) { - KeLstmBackward<128, 32, 0><<>>( - gateValue, - gateGrad, - stateValue, - stateGrad, - preOutputValue, - preOutputGrad, - checkIg, - checkIgGrad, - checkFg, - checkFgGrad, - checkOg, - checkOgGrad, - outputGrad, - weight, - sequence, - active_node, - active_gate, - active_state); - } else if (frameSize == 64) { - KeLstmBackward<256, 64, 0><<>>( - gateValue, - gateGrad, - stateValue, - stateGrad, - preOutputValue, - preOutputGrad, - checkIg, - checkIgGrad, - checkFg, - checkFgGrad, - checkOg, - checkOgGrad, - outputGrad, - weight, - sequence, - active_node, - active_gate, - active_state); - } else if (frameSize == 128) { - KeLstmBackward<512, 128, 0><<>>( - gateValue, - gateGrad, - stateValue, - stateGrad, - preOutputValue, - preOutputGrad, - checkIg, - checkIgGrad, - checkFg, - checkFgGrad, - checkOg, - checkOgGrad, - outputGrad, - weight, - sequence, - active_node, - active_gate, - active_state); - } else if (frameSize == 256) { - KeLstmBackward<1024, 256, 0><<>>( - gateValue, - gateGrad, - stateValue, - stateGrad, - preOutputValue, - preOutputGrad, - checkIg, - checkIgGrad, - checkFg, - checkFgGrad, - checkOg, - checkOgGrad, - outputGrad, - weight, - sequence, - active_node, - active_gate, - active_state); - } - } else { - if (frameSize == 32) { - KeLstmBackward<128, 32, 1><<>>( - gateValue, - gateGrad, - stateValue, - stateGrad, - preOutputValue, - preOutputGrad, - checkIg, - checkIgGrad, - checkFg, - checkFgGrad, - checkOg, - checkOgGrad, - outputGrad, - weight, - sequence, - active_node, - active_gate, - active_state); - } else if (frameSize == 64) { - KeLstmBackward<256, 64, 1><<>>( - gateValue, - gateGrad, - stateValue, - stateGrad, - preOutputValue, - preOutputGrad, - checkIg, - checkIgGrad, - checkFg, - checkFgGrad, - checkOg, - checkOgGrad, - outputGrad, - weight, - sequence, - active_node, - active_gate, - active_state); - } else if (frameSize == 128) { - KeLstmBackward<512, 128, 1><<>>( - gateValue, - gateGrad, - stateValue, - stateGrad, - preOutputValue, - preOutputGrad, - checkIg, - checkIgGrad, - checkFg, - checkFgGrad, - checkOg, - checkOgGrad, - outputGrad, - weight, - sequence, - active_node, - active_gate, - active_state); - } else if (frameSize == 256) { - KeLstmBackward<1024, 256, 1><<>>( - gateValue, - gateGrad, - stateValue, - stateGrad, - preOutputValue, - preOutputGrad, - checkIg, - checkIgGrad, - checkFg, - checkFgGrad, - checkOg, - checkOgGrad, - outputGrad, - weight, - sequence, - active_node, - active_gate, - active_state); - } - } - CHECK_SYNC("hl_lstm_parallel_backward_data"); -} - -template -__global__ void KeSetGradZero(real *gateGrad, - const int *starts, - int valueSize, - int numSequences, - bool reversed) { - // const int tid = threadIdx.x; - - const int frameIdx = blockIdx.x * B_X + threadIdx.x; - const int numSeqId = blockIdx.y * B_Y + threadIdx.y; - - if (numSeqId >= numSequences || frameIdx >= valueSize) return; - - if (!reversed) { - int seqId = starts[numSeqId]; - gateGrad[seqId * valueSize + frameIdx] = 0.0; - } else { - int seqId = starts[numSeqId + 1] - 1; - gateGrad[seqId * valueSize + frameIdx] = 0.0; - } -} - -void hl_lstm_parallel_backward_weight(real *weightGrad, - real *outputValue, - real *gateGrad, - const int *sequence, - int frameSize, - int batchSize, - int numSequences, - bool reversed) { - int valueSize = 4 * frameSize; - dim3 threads(32, 32); - dim3 grid((valueSize + 32 - 1) / 32, (numSequences + 32 - 1) / 32); - KeSetGradZero<32, 32><<>>( - gateGrad, sequence, valueSize, numSequences, reversed); - - if (!reversed) { - hl_matrix_mul(outputValue, - HPPL_OP_T, - gateGrad + valueSize, - HPPL_OP_N, - weightGrad, - frameSize, - valueSize, - batchSize - 1, - 1.0, - 1.0); - } else { - hl_matrix_mul(outputValue + frameSize, - HPPL_OP_T, - gateGrad, - HPPL_OP_N, - weightGrad, - frameSize, - valueSize, - batchSize - 1, - 1.0, - 1.0); - } - CHECK_SYNC("hl_lstm_parallel_backward_weight"); -} diff --git a/paddle/legacy/cuda/src/hl_cuda_matrix.cu b/paddle/legacy/cuda/src/hl_cuda_matrix.cu deleted file mode 100644 index 6fe460026bbd404e15b43bd221551094a7abeda2..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_matrix.cu +++ /dev/null @@ -1,806 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_base.h" -#include "hl_device_functions.cuh" -#include "hl_gpu_matrix_kernel.cuh" -#include "hl_matrix.h" -#include "hl_matrix_apply.cuh" -#include "hl_matrix_ops.cuh" -#include "hl_sequence.h" -#include "hl_sparse.ph" -#include "paddle/legacy/utils/Logging.h" - -DEFINE_MATRIX_UNARY_OP(Zero, a = 0); -DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1 * a + p2 * b); -void hl_matrix_add(real* A_d, - real* B_d, - real* C_d, - int dimM, - int dimN, - real alpha, - real beta) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - CHECK_NOTNULL(C_d); - - hl_gpu_apply_ternary_op, 0, 0>( - ternary::_add(alpha, beta), - A_d, - B_d, - C_d, - dimM, - dimN, - dimN, - dimN, - dimN); - CHECK_SYNC("hl_matrix_add failed"); -} - -#ifdef PADDLE_TYPE_DOUBLE -#define THRESHOLD 128 -#else -#define THRESHOLD 64 -#endif -__device__ __forceinline__ void findMax(real* I, - real* dfMax_s, - int blockSize, - int base, - int curIdx, - int nextIdx, - int dimN, - real* max) { - dfMax_s[base] = -1.0e20; - while (curIdx < dimN) { - if (dfMax_s[base] < I[nextIdx]) { - dfMax_s[base] = I[nextIdx]; - } - nextIdx += blockSize; - curIdx += blockSize; - } - __syncthreads(); - - for (int stride = blockSize >> 1; stride > 0; stride >>= 1) { - __syncthreads(); - if (base < stride) { - nextIdx = base + stride; - if (dfMax_s[base] < dfMax_s[nextIdx]) { - dfMax_s[base] = dfMax_s[nextIdx]; - } - } - } - - if (0 == base) { - max[0] = dfMax_s[0]; - } - __syncthreads(); -} - -__device__ __forceinline__ void subMaxAndExp(real* I, - real* O, - int curIdx, - int nextIdx, - int blockSize, - int dimN, - real max) { - real val; - while (curIdx < dimN) { - val = I[nextIdx] - max; - if (val < -THRESHOLD) { - val = -THRESHOLD; - } - I[nextIdx] = val; -#ifndef PADDLE_TYPE_DOUBLE - O[nextIdx] = __expf(val); -#else - O[nextIdx] = exp(val); -#endif - nextIdx += blockSize; - curIdx += blockSize; - } - __syncthreads(); -} - -__device__ __forceinline__ void valueSum(real* O, - real* dfMax_s, - int blockSize, - int base, - int curIdx, - int nextIdx, - int dimN) { - dfMax_s[base] = 0; - while (curIdx < dimN) { - dfMax_s[base] += O[nextIdx]; - nextIdx += blockSize; - curIdx += blockSize; - } - __syncthreads(); - - for (int stride = blockSize >> 1; stride > 0; stride >>= 1) { - __syncthreads(); - if (base < stride) { - nextIdx = base + stride; - dfMax_s[base] += dfMax_s[nextIdx]; - } - } - __syncthreads(); -} - -__device__ __forceinline__ void divSum( - real* O, real sum, int curIdx, int nextIdx, int blockSize, int dimN) { - while (curIdx < dimN) { - O[nextIdx] /= sum; - nextIdx += blockSize; - curIdx += blockSize; - } -} - -__device__ __forceinline__ void softmax(real* I, - real* O, - real* dfMax_s, - int blockSize, - int base, - int curIdx, - int nextIdx, - int dimN) { - __shared__ real max; - - // find the max number - findMax(I, dfMax_s, blockSize, base, curIdx, nextIdx, dimN, &max); - - // sub max Value and do Exp operation - subMaxAndExp(I, O, base, nextIdx, blockSize, dimN, max); - - // add dimN values into blockDim.x buffer - // sum is in dfMax_s[0] - valueSum(O, dfMax_s, blockSize, base, curIdx, nextIdx, dimN); - - // divided by sum - divSum(O, dfMax_s[0], curIdx, nextIdx, blockSize, dimN); -} - -template -__global__ void KeMatrixSoftMax(real* O, real* I, int dimN) { - int base = threadIdx.x; - __shared__ real dfMax_s[blockSize]; - int nextIdx = blockIdx.x * dimN + base; - int curIdx = base; - - softmax(I, O, dfMax_s, blockSize, base, curIdx, nextIdx, dimN); -} - -void hl_matrix_softmax(real* A_d, real* C_d, int dimM, int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - dim3 block(512, 1); - dim3 grid(dimM, 1); - KeMatrixSoftMax<512><<>>(C_d, A_d, dimN); - CHECK_SYNC("hl_matrix_softmax failed"); -} - -template -__global__ void KeSequenceSoftMax(real* O, real* I, const int* index) { - int base = threadIdx.x; - int bid = blockIdx.x; - __shared__ real dfMax_s[blockSize]; - - int start = index[bid]; - int dimN = index[bid + 1] - start; - - int nextIdx = start + base; - int curIdx = base; - - softmax(I, O, dfMax_s, blockSize, base, curIdx, nextIdx, dimN); -} - -void hl_sequence_softmax_forward(real* A_d, - real* C_d, - const int* index, - int numSequence) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - dim3 block(512, 1); - dim3 grid(numSequence, 1); - KeSequenceSoftMax<512><<>>(C_d, A_d, index); - CHECK_SYNC("hl_sequence_softmax_forward failed"); -} - -__global__ void KeMatrixDerivative( - real* grad_d, real* output_d, real* sftmaxSum_d, int dimM, int dimN) { - int rowIdx = blockIdx.x * blockDim.x + threadIdx.x; - int colIdx = blockIdx.y * blockDim.y + threadIdx.y; - int index; - - if (rowIdx < dimM && colIdx < dimN) { - index = rowIdx * dimN + colIdx; - grad_d[index] = output_d[index] * (grad_d[index] - sftmaxSum_d[rowIdx]); - } -} - -void hl_matrix_softmax_derivative( - real* grad_d, real* output_d, real* sftmaxSum_d, int dimM, int dimN) { - CHECK_NOTNULL(grad_d); - CHECK_NOTNULL(output_d); - CHECK_NOTNULL(sftmaxSum_d); - - int blocksX = (dimM + 0) / 1; - int blocksY = (dimN + 1024 - 1) / 1024; - dim3 threads(1, 1024); - dim3 grid(blocksX, blocksY); - - KeMatrixDerivative<<>>( - grad_d, output_d, sftmaxSum_d, dimM, dimN); - CHECK_SYNC("hl_matrix_softmax_derivative failed"); -} - -__global__ void KeMatrixMultiBinaryCrossEntropy( - real* output, real* entropy, int* row, int* col, int dimM, int dimN) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < dimM) { - for (int i = 0; i < dimN; i++) { - entropy[index] -= log(1 - output[index * dimN + i]); - } - int* row_col = col + row[index]; - int col_num = row[index + 1] - row[index]; - for (int i = 0; i < col_num; i++) { - real o = output[index * dimN + row_col[i]]; - entropy[index] -= log(o / (1 - o)); - } - } -} - -void hl_matrix_multi_binary_cross_entropy(real* output, - real* entropy, - hl_sparse_matrix_s csr_mat, - int dimM, - int dimN) { - CHECK_NOTNULL(output); - CHECK_NOTNULL(entropy); - CHECK_NOTNULL(csr_mat); - CHECK_EQ(csr_mat->format, HL_SPARSE_CSR); - int n_threads = 1024; - int blocks = (dimM + n_threads - 1) / n_threads; - dim3 threads(n_threads); - dim3 grid(blocks); - hl_csr_matrix mat = (hl_csr_matrix)(csr_mat->matrix); - KeMatrixMultiBinaryCrossEntropy<<>>( - output, entropy, mat->csr_row, mat->csr_col, dimM, dimN); - CHECK_SYNC("hl_matrix_multi_binary_cross_entropy failed"); -} - -__global__ void KeMatrixMultiBinaryCrossEntropyBp( - real* output, real* grad, int* row, int* col, int dimM, int dimN) { - int row_idx = blockIdx.x * blockDim.x + threadIdx.x; - if (row_idx < dimM) { - for (int i = 0; i < dimN; i++) { - int index = row_idx * dimN + i; - grad[index] += 1.0 / (1 - output[index]); - } - int col_num = row[row_idx + 1] - row[row_idx]; - int* row_col = col + row[row_idx]; - for (int i = 0; i < col_num; i++) { - int index = row_idx * dimN + row_col[i]; - grad[index] -= 1.0 / (output[index] * (1 - output[index])); - } - } -} - -void hl_matrix_multi_binary_cross_entropy_bp( - real* output, real* grad, hl_sparse_matrix_s csr_mat, int dimM, int dimN) { - CHECK_NOTNULL(output); - CHECK_NOTNULL(grad); - CHECK_NOTNULL(csr_mat); - CHECK_EQ(csr_mat->format, HL_SPARSE_CSR); - int n_threads = 1024; - int blocks = (dimM + n_threads - 1) / n_threads; - dim3 threads(n_threads); - dim3 grid(blocks); - hl_csr_matrix mat = (hl_csr_matrix)(csr_mat->matrix); - KeMatrixMultiBinaryCrossEntropyBp<<>>( - output, grad, mat->csr_row, mat->csr_col, dimM, dimN); - CHECK_SYNC("hl_matrix_multi_binary_cross_entropy_bp failed"); -} - -__global__ void KeMatrixCrossEntropy( - real* O, real* E, int* label, int dimM, int dimN) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int newBase; - if (index < dimM) { - newBase = label[index]; - newBase = newBase % dimN; - E[index] = -log(O[index * dimN + newBase]); - } -} - -void hl_matrix_cross_entropy( - real* A_d, real* C_d, int* label_d, int dimM, int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - - int blocks = (dimM + 1024 - 1) / 1024; - dim3 threads(1024, 1); - dim3 grid(blocks, 1); - KeMatrixCrossEntropy<<>>( - A_d, C_d, label_d, dimM, dimN); - CHECK_SYNC("hl_matrix_cross_entropy failed"); -} - -__global__ void KeMatrixCrossEntropyBp( - real* grad_d, real* output_d, int* label_d, int dimM, int dimN) { - int rowIdx = blockIdx.x * blockDim.x + threadIdx.x; - int colIdx = blockIdx.y * blockDim.y + threadIdx.y; - int index; - if (rowIdx < dimM && colIdx < dimN) { - index = rowIdx * dimN + colIdx; - if (label_d[rowIdx] == colIdx) { - grad_d[index] -= 1.0f / output_d[index]; - } - } -} - -void hl_matrix_cross_entropy_bp( - real* grad_d, real* output_d, int* label_d, int dimM, int dimN) { - CHECK_NOTNULL(grad_d); - CHECK_NOTNULL(output_d); - CHECK_NOTNULL(label_d); - - int blocksX = (dimM + 0) / 1; - int blocksY = (dimN + 1024 - 1) / 1024; - dim3 threads(1, 1024); - dim3 grid(blocksX, blocksY); - KeMatrixCrossEntropyBp<<>>( - grad_d, output_d, label_d, dimM, dimN); - CHECK_SYNC("hl_matrix_cross_entropy_bp failed"); -} - -void hl_matrix_zero_mem(real* data, int num) { - hl_gpu_apply_unary_op(unary::Zero(), data, 1, num, num); -} - -__global__ void KeParamReluForward(real* output, - real* input, - real* w, - int width, - int height, - int partial_sum) { - int tx = blockIdx.x * blockDim.x + threadIdx.x; - int ty = blockIdx.y * blockDim.y + threadIdx.y; - if (tx < width && ty < height) { - int index = ty * width + tx; - output[index] = - input[index] > 0 ? input[index] : input[index] * w[tx / partial_sum]; - } -} - -void hl_param_relu_forward(real* output, - real* input, - real* w, - int width, - int height, - int partial_sum) { - CHECK_NOTNULL(output); - CHECK_NOTNULL(input); - CHECK_NOTNULL(w); - dim3 threads(16, 16); - int blockX = (width + 16 - 1) / 16; - int blockY = (height + 16 - 1) / 16; - dim3 grid(blockX, blockY); - KeParamReluForward<<>>( - output, input, w, width, height, partial_sum); - CHECK_SYNC("hl_param_relu_forward failed"); -} - -template -__global__ void KeParamReluBackWardW(real* grad_w, - real* grad_o, - real* input, - int width, - int height, - int partial_sum) { - const int tid = threadIdx.x; - __shared__ real temp[blockSize]; - grad_o += partial_sum * blockIdx.x; - input += partial_sum * blockIdx.x; - real tmp = 0.0; - for (int index = tid; index < partial_sum * height; index += blockSize) { - int row = index / partial_sum; - int offset = row * width + (index - row * partial_sum); - if (input[offset] < 0) { - tmp += grad_o[offset] * input[offset]; - } - } - temp[tid] = tmp; - __syncthreads(); - for (int s = blockSize / 2; s > 0; s >>= 1) { - if (tid < s) { - temp[tid] += temp[tid + s]; - } - __syncthreads(); - } - if (tid == 0) { - grad_w[blockIdx.x] += temp[0]; - } -} - -void hl_param_relu_backward_w(real* grad_w, - real* grad_o, - real* input, - int width, - int height, - int partial_sum) { - CHECK_NOTNULL(grad_w); - CHECK_NOTNULL(grad_o); - CHECK_NOTNULL(input); - const int blockSize = 1024; - int grid_num = width / partial_sum; - dim3 threads(blockSize, 1); - dim3 grid(grid_num, 1); - KeParamReluBackWardW<<>>( - grad_w, grad_o, input, width, height, partial_sum); - CHECK_SYNC("hl_param_relu_backward_w failed"); -} - -__global__ void KeParamReluBackwardDiff(real* grad_o, - real* input, - real* w, - real* diff, - int width, - int height, - int partial_sum) { - int tx = blockIdx.x * blockDim.x + threadIdx.x; - int ty = blockIdx.y * blockDim.y + threadIdx.y; - if (tx < width && ty < height) { - int index = ty * width + tx; - diff[index] += grad_o[index] * (input[index] > 0 ? 1 : w[tx / partial_sum]); - } -} - -void hl_param_relu_backward_diff(real* grad_o, - real* data, - real* w, - real* diff, - int width, - int height, - int partial_sum) { - CHECK_NOTNULL(grad_o); - CHECK_NOTNULL(data); - CHECK_NOTNULL(w); - CHECK_NOTNULL(diff); - dim3 threads(16, 16); - int blockX = (width + 16 - 1) / 16; - int blockY = (height + 16 - 1) / 16; - dim3 grid(blockX, blockY); - KeParamReluBackwardDiff<<>>( - grad_o, data, w, diff, width, height, partial_sum); - CHECK_SYNC("hl_param_relu_backward_diff failed"); -} - -__global__ void KeMatrixAddSharedBias( - real* A, real* B, const int channel, const int M, const int N, real scale) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int dim = N / channel; - if (index < M * N) { - int i = index % N; - i = i / dim; - A[index] += scale * B[i]; - } -} - -void hl_matrix_add_shared_bias(real* A_d, - real* B_d, - const int channel, - const int dimM, - const int dimN, - real scale) { - const int blocks = 512; - const int grids = DIVUP(dimM * dimN, blocks); - KeMatrixAddSharedBias<<>>( - A_d, B_d, channel, dimM, dimN, scale); - CHECK_SYNC("hl_matrix_add_shared_bias failed"); -} - -template -__global__ void KeMatrixCollectSharedBias(real* B, - real* A, - const int channel, - const int M, - const int N, - const int dim, - const int limit, - real scale) { - if (dim < limit) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < channel) { - real sum = 0.0; - for (int i = 0; i < M; ++i) { - for (int j = 0; j < dim; ++j) { - sum += A[i * N + index * dim + j]; - } - } - B[index] += scale * sum; - } - } else { - const int tid = threadIdx.x; - const int bid = blockIdx.x; - __shared__ real smem[blockSize]; - real sum = 0.0; - for (int j = 0; j < ((dim * M + blockSize - 1) / blockSize); ++j) { - int n = j * blockSize + tid; - int m = n / dim; - int w = n % dim; - smem[tid] = (m < M && w < dim) ? A[m * N + bid * dim + w] : 0.0; - __syncthreads(); - simpleReduce(smem, tid, blockSize); - sum += smem[0]; - } - if (tid == 0) { - B[bid] += scale * sum; - } - } -} - -void hl_matrix_collect_shared_bias(real* B_d, - real* A_d, - const int channel, - const int dimM, - const int dimN, - real scale) { - const int dim = dimN / channel; - const int blocks = 256; - const int limit = 64; - int grids = (dimM * dim) < limit ? DIVUP(channel, blocks) : channel; - - KeMatrixCollectSharedBias<<>>( - B_d, A_d, channel, dimM, dimN, dim, limit, scale); - CHECK_SYNC("hl_matrix_collect_shared_bias failed"); -} - -__global__ void keMatrixRotate( - real* mat, real* matRot, int dimM, int dimN, bool clockWise) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < dimM * dimN) { - int i = idx / dimN; - int j = idx % dimN; - if (clockWise) { - matRot[j * dimM + i] = mat[(dimM - i - 1) * dimN + j]; - } else { - matRot[j * dimM + i] = mat[i * dimN + (dimN - j - 1)]; - } - } -} - -void hl_matrix_rotate( - real* mat, real* matRot, int dimM, int dimN, bool clockWise) { - CHECK_NOTNULL(mat); - CHECK_NOTNULL(matRot); - const int threads = 512; - const int blocks = DIVUP(dimM * dimN, threads); - keMatrixRotate<<>>( - mat, matRot, dimM, dimN, clockWise); - CHECK_SYNC("hl_matrix_rotate failed"); -} - -__global__ void keMatrixVol2Col(int num_kernels, - const real* dataSrc, - real* dataDst, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - int depth_col, - int height_col, - int width_col) { - for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels; - index += blockDim.x * gridDim.x) { - int w_out = index % width_col; - int h_out = (index / width_col) % height_col; - int d_out = (index / width_col / height_col) % depth_col; - int channel_in = index / width_col / height_col / depth_col; - int channel_out = channel_in * filterD * filterH * filterW; - int w_in = w_out * strideW - paddingW; - int h_in = h_out * strideH - paddingH; - int d_in = d_out * strideD - paddingD; - - dataDst += - ((channel_out * depth_col + d_out) * height_col + h_out) * width_col + - w_out; - dataSrc += ((channel_in * depth + d_in) * height + h_in) * width + w_in; - for (int k = 0; k < filterD; ++k) { - for (int i = 0; i < filterH; ++i) { - for (int j = 0; j < filterW; ++j) { - int d = d_in + k; - int h = h_in + i; - int w = w_in + j; - *dataDst = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 && - w < width) - ? dataSrc[(k * height + i) * width + j] - : 0; - dataDst += depth_col * height_col * width_col; - } - } - } - } -} - -void hl_matrix_vol2Col(const real* dataSrc, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real* dataDst) { - int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1; - int height_col = (height + 2 * paddingH - filterH) / strideH + 1; - int width_col = (width + 2 * paddingW - filterW) / strideW + 1; - int num_kernels = channels * depth_col * height_col * width_col; - - const int threads = 512; - const int blocks = DIVUP(num_kernels, threads); - - keMatrixVol2Col<<>>(num_kernels, - dataSrc, - dataDst, - depth, - height, - width, - filterD, - filterH, - filterW, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - depth_col, - height_col, - width_col); - CHECK_SYNC("hl_matrix_vol2Col failed"); -} - -__global__ void keMatrixCol2Vol(int num_kernels, - real* dataDst, - const real* dataSrc, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - int depth_col, - int height_col, - int width_col, - real alpha, - real beta) { - for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels; - index += blockDim.x * gridDim.x) { - real srcVal = 0; - real dstVal = dataDst[index]; - int w = index % width + paddingW; - int h = (index / width) % height + paddingH; - int d = (index / width / height) % depth + paddingD; - int c = index / width / height / depth; - // compute the start and end of the output - int w_col_start = (w < filterW) ? 0 : (w - filterW) / strideW + 1; - int w_col_end = min(w / strideW + 1, width_col); - int h_col_start = (h < filterH) ? 0 : (h - filterH) / strideH + 1; - int h_col_end = min(h / strideH + 1, height_col); - int d_col_start = (d < filterD) ? 0 : (d - filterD) / strideD + 1; - int d_col_end = min(d / strideD + 1, depth_col); - - int offset = (c * filterD * filterW * filterH + d * filterW * filterH + - h * filterW + w) * - depth_col * height_col * width_col; - - int coeff_d_col = - (1 - strideD * filterW * filterH * depth_col) * height_col * width_col; - int coeff_h_col = - (1 - strideH * filterW * depth_col * height_col) * width_col; - int coeff_w_col = (1 - strideW * depth_col * height_col * width_col); - - for (int d_col = d_col_start; d_col < d_col_end; ++d_col) { - for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { - for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - srcVal += dataSrc[offset + d_col * coeff_d_col + h_col * coeff_h_col + - w_col * coeff_w_col]; - } - } - } - dataDst[index] = alpha * srcVal + beta * dstVal; - } -} - -void hl_matrix_col2Vol(real* dataDst, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - const real* dataSrc, - real alpha, - real beta) { - int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1; - int height_col = (height + 2 * paddingH - filterH) / strideH + 1; - int width_col = (width + 2 * paddingW - filterW) / strideW + 1; - int num_kernels = channels * depth * height * width; - - const int threads = 512; - const int blocks = DIVUP(num_kernels, threads); - - keMatrixCol2Vol<<>>(num_kernels, - dataDst, - dataSrc, - depth, - height, - width, - filterD, - filterH, - filterW, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - depth_col, - height_col, - width_col, - alpha, - beta); - - CHECK_SYNC("hl_matrix_col2Vol failed"); -} - -__global__ void keVectorCast2Int(int* out, real* vec, int size) { - for (int i = threadIdx.x; i < (size); i += blockDim.x) { - out[i] = int(vec[i]); - } -} - -void hl_vector_cast2int(int* out, real* vec, int size) { - keVectorCast2Int<<<1, 512, 0, STREAM_DEFAULT>>>(out, vec, size); - CHECK_SYNC("hl_vector_cast2int failed"); -} diff --git a/paddle/legacy/cuda/src/hl_cuda_sequence.cu b/paddle/legacy/cuda/src/hl_cuda_sequence.cu deleted file mode 100644 index 1d772b5ce27615673d85231ec8fd3ab1d0aed523..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_sequence.cu +++ /dev/null @@ -1,408 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_base.h" -#include "hl_device_functions.cuh" -#include "paddle/legacy/utils/Logging.h" - -__global__ void KeMaxSequenceForward(real* input, - const int* sequence, - real* output, - int* index, - int numSequences, - int dim) { - int dimIdx = threadIdx.x; - int sequenceId = blockIdx.x; - if (sequenceId >= numSequences) return; - int start = sequence[sequenceId]; - int end = sequence[sequenceId + 1]; - - for (int i = dimIdx; i < dim; i += blockDim.x) { - real tmp = -HL_FLOAT_MAX; - int tmpId = -1; - for (int insId = start; insId < end; insId++) { - if (tmp < input[insId * dim + i]) { - tmp = input[insId * dim + i]; - tmpId = insId; - } - } - output[sequenceId * dim + i] = tmp; - index[sequenceId * dim + i] = tmpId; - } -} - -void hl_max_sequence_forward(real* input, - const int* sequence, - real* output, - int* index, - int numSequences, - int dim) { - CHECK_NOTNULL(input); - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(output); - CHECK_NOTNULL(index); - - dim3 threads(256, 1); - dim3 grid(numSequences, 1); - KeMaxSequenceForward<<>>( - input, sequence, output, index, numSequences, dim); - CHECK_SYNC("hl_max_sequence_forward failed"); -} - -__global__ void KeMaxSequenceBackward( - real* outputGrad, int* index, real* inputGrad, int numSequences, int dim) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - int colIdx = idx % dim; - if (idx < numSequences * dim) { - int insId = index[idx]; - inputGrad[insId * dim + colIdx] += outputGrad[idx]; - } -} - -void hl_max_sequence_backward( - real* outputGrad, int* index, real* inputGrad, int numSequences, int dim) { - CHECK_NOTNULL(outputGrad); - CHECK_NOTNULL(index); - CHECK_NOTNULL(inputGrad); - - unsigned int blocks = (numSequences * dim + 128 - 1) / 128; - dim3 threads(128, 1); - dim3 grid(blocks, 1); - KeMaxSequenceBackward<<>>( - outputGrad, index, inputGrad, numSequences, dim); - CHECK_SYNC("hl_max_sequence_backward failed"); -} - -template -__global__ void KeMatrixAddRows(real* output, - real* table, - int* ids, - int numSamples, - int tableSize, - int dim) { - int idx = threadIdx.x; - int idy = threadIdx.y; - int sampleId = blockIdx.x + idy * gridDimX; - - while (sampleId < numSamples) { - int tableId = ids[sampleId]; - if ((0 <= tableId) && (tableId < tableSize)) { - real* outputData = output + sampleId * dim; - real* tableData = table + tableId * dim; - for (int i = idx; i < dim; i += blockDimX) { - if (AddRow == 0) { - outputData[i] += tableData[i]; - } else { - paddle::paddleAtomicAdd(&tableData[i], outputData[i]); - } - } - } - sampleId += blockDimY * gridDimX; - } -} - -template -__global__ void KeSequence2Batch(real* batch, - real* sequence, - const int* batchIndex, - int seqWidth, - int batchCount) { - int idx = threadIdx.x; - int idy = threadIdx.y; - int id = blockIdx.x + idy * gridDimX; - while (id < batchCount) { - int seqId = batchIndex[id]; - real* batchData = batch + id * seqWidth; - real* seqData = sequence + seqId * seqWidth; - for (int i = idx; i < seqWidth; i += blockDimX) { - if (seq2batch) { - if (isAdd) { - batchData[i] += seqData[i]; - } else { - batchData[i] = seqData[i]; - } - } else { - if (isAdd) { - seqData[i] += batchData[i]; - } else { - seqData[i] = batchData[i]; - } - } - } - id += blockDimY * gridDimX; - } -} - -void hl_sequence2batch_copy(real* batch, - real* sequence, - const int* batchIndex, - int seqWidth, - int batchCount, - bool seq2batch) { - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(batch); - CHECK_NOTNULL(batchIndex); - - dim3 threads(128, 8); - dim3 grid(8, 1); - if (seq2batch) { - KeSequence2Batch<128, 8, 8, 1, 0><<>>( - batch, sequence, batchIndex, seqWidth, batchCount); - } else { - KeSequence2Batch<128, 8, 8, 0, 0><<>>( - batch, sequence, batchIndex, seqWidth, batchCount); - } - CHECK_SYNC("hl_sequence2batch_copy failed"); -} - -void hl_sequence2batch_add(real* batch, - real* sequence, - int* batchIndex, - int seqWidth, - int batchCount, - bool seq2batch) { - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(batch); - CHECK_NOTNULL(batchIndex); - - dim3 threads(128, 8); - dim3 grid(8, 1); - if (seq2batch) { - KeSequence2Batch<128, 8, 8, 1, 1><<>>( - batch, sequence, batchIndex, seqWidth, batchCount); - } else { - KeSequence2Batch<128, 8, 8, 0, 1><<>>( - batch, sequence, batchIndex, seqWidth, batchCount); - } - CHECK_SYNC("hl_sequence2batch_add failed"); -} - -template -__global__ void KeSequence2BatchPadding(real* batch, - real* sequence, - const int* sequenceStartPositions, - const size_t sequenceWidth, - const size_t maxSequenceLength, - const size_t numSequences) { - int batchIdx = blockIdx.y; - int sequenceStart = sequenceStartPositions[batchIdx]; - int sequenceLength = sequenceStartPositions[batchIdx + 1] - sequenceStart; - - int sequenceIdx = blockIdx.x * blockDim.y + threadIdx.y; - int batchBaseIdx = (sequenceIdx * numSequences + batchIdx) * sequenceWidth; - int sequenceBaseIdx = (sequenceStart + sequenceIdx) * sequenceWidth; - - real scale = normByTimes ? (1.0f / (real)sequenceLength) : 1.0f; - - if (sequenceIdx < sequenceLength) { - if (seq2batch) { - /* sequence -> batch */ - for (int i = threadIdx.x; i < sequenceWidth; i += blockDim.x) { - batch[batchBaseIdx + i] = scale * sequence[sequenceBaseIdx + i]; - } - } else { - /* batch -> sequence */ - for (int i = threadIdx.x; i < sequenceWidth; i += blockDim.x) { - sequence[sequenceBaseIdx + i] = scale * batch[batchBaseIdx + i]; - } - } - } else if (sequenceIdx < maxSequenceLength) { - if (seq2batch) { - /* sequence -> batch */ - for (int i = threadIdx.x; i < sequenceWidth; i += blockDim.x) { - batch[batchBaseIdx + i] = 0; - } - } - } -} - -void hl_sequence2batch_copy_padding(real* batch, - real* sequence, - const int* sequenceStartPositions, - const size_t sequenceWidth, - const size_t maxSequenceLength, - const size_t numSequences, - bool normByTimes, - bool seq2batch) { - CHECK_NOTNULL(batch); - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(sequenceStartPositions); - - if (!normByTimes && numSequences == 1) { - size_t elementCount = maxSequenceLength * sequenceWidth; - if (seq2batch) { - /* sequence -> batch */ - hl_memcpy_device2device(batch, sequence, sizeof(real) * elementCount); - } else { - /* batch -> sequence */ - hl_memcpy_device2device(sequence, batch, sizeof(real) * elementCount); - } - return; - } - - const int CUDA_BLOCK_SIZE = 512; - - /* At least use 32 threads to copy sequenceWidth elements, - and at least 8 elements for each thread. */ - int blockDimX = ((((sequenceWidth + 7) >> 3) + 31) >> 5) << 5; - blockDimX = (blockDimX < CUDA_BLOCK_SIZE) ? blockDimX : CUDA_BLOCK_SIZE; - - int blockDimY = CUDA_BLOCK_SIZE / blockDimX; - dim3 threads(blockDimX, blockDimY); - - int gridDimX = (maxSequenceLength + blockDimY - 1) / blockDimY; - int gridDimY = numSequences; - dim3 grid(gridDimX, gridDimY); - - if (seq2batch) { - /* sequence -> batch */ - if (normByTimes) { - KeSequence2BatchPadding<1, 1><<>>( - batch, - sequence, - sequenceStartPositions, - sequenceWidth, - maxSequenceLength, - numSequences); - } else { - KeSequence2BatchPadding<0, 1><<>>( - batch, - sequence, - sequenceStartPositions, - sequenceWidth, - maxSequenceLength, - numSequences); - } - } else { - /* batch -> sequence */ - if (normByTimes) { - KeSequence2BatchPadding<1, 0><<>>( - batch, - sequence, - sequenceStartPositions, - sequenceWidth, - maxSequenceLength, - numSequences); - } else { - KeSequence2BatchPadding<0, 0><<>>( - batch, - sequence, - sequenceStartPositions, - sequenceWidth, - maxSequenceLength, - numSequences); - } - } - - CHECK_SYNC("hl_sequence2batch_copy_padding failed"); -} - -__device__ inline float my_rsqrt(float x) { return rsqrtf(x); } - -__device__ inline double my_rsqrt(double x) { return rsqrt(x); } - -__global__ void KeSequenceAvgForward(real* dst, - real* src, - const int* starts, - int height, - int width, - const int mode) { - int gid = blockIdx.x * blockDim.x + threadIdx.x; - int row = gid / width; - int col = gid % width; - - if (gid < height * width) { - int start = starts[row]; - int end = starts[row + 1]; - int seqLength = end - start; - if (seqLength == 0) return; - real sum = 0.0; - for (int i = start; i < end; i++) { - sum += src[i * width + col]; - } - sum = mode == 1 ? sum : (mode == 0 ? sum / seqLength - : sum * my_rsqrt((real)seqLength)); - dst[gid] += sum; - } -} - -void hl_sequence_avg_forward(real* dst, - real* src, - const int* starts, - int height, - int width, - const int mode) { - CHECK_NOTNULL(dst); - CHECK_NOTNULL(src); - CHECK_NOTNULL(starts); - - int block = 512; - int grid = DIVUP(width * height, 512); - - CHECK(mode == 0 || mode == 1 || mode == 2) - << "mode error in hl_sequence_avg_forward!"; - - KeSequenceAvgForward<<>>( - dst, src, starts, height, width, mode); - CHECK_SYNC("hl_sequence_avg_forward failed"); -} - -__global__ void KeSequenceAvgBackward(real* dst, - real* src, - const int* starts, - int height, - int width, - const int mode) { - int gid = blockIdx.x * blockDim.x + threadIdx.x; - int row = gid / width; - int col = gid % width; - - if (gid < height * width) { - int start = starts[row]; - int end = starts[row + 1]; - int seqLength = end - start; - if (seqLength == 0) return; - real grad = src[gid]; - grad = mode == 1 ? grad : (mode == 0 ? grad / seqLength - : grad * my_rsqrt((real)seqLength)); - for (int i = start; i < end; i++) { - dst[i * width + col] += grad; - } - } -} - -void hl_sequence_avg_backward(real* dst, - real* src, - const int* starts, - int height, - int width, - const int mode) { - CHECK_NOTNULL(dst); - CHECK_NOTNULL(src); - CHECK_NOTNULL(starts); - - int block = 512; - int grid = DIVUP(width * height, 512); - - CHECK(mode == 0 || mode == 1 || mode == 2) - << "mode error in hl_sequence_avg_backward!"; - - KeSequenceAvgBackward<<>>( - dst, src, starts, height, width, mode); - CHECK_SYNC("hl_sequence_avg_backward failed"); -} diff --git a/paddle/legacy/cuda/src/hl_cuda_sparse.cu b/paddle/legacy/cuda/src/hl_cuda_sparse.cu deleted file mode 100644 index 8065a6f9f6f2ac4cacf9a63b7b80dd00391824a0..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_sparse.cu +++ /dev/null @@ -1,1262 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_cuda.h" -#include "hl_cuda_sparse.cuh" -#include "hl_matrix_apply.cuh" -#include "hl_matrix_ops.cuh" -#include "hl_sparse.h" -#include "hl_sparse.ph" -#include "paddle/legacy/utils/Logging.h" - -DEFINE_MATRIX_UNARY_PARAMETER_OP(mul_scalar, ONE_PARAMETER, a = a * p); -DEFINE_MATRIX_UNARY_OP(Zero, a = 0); - -void hl_matrix_csr2dense(hl_sparse_matrix_s A_d, - real *C_d, - int dimM, - int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - CHECK(dimM > 0 && dimN > 0 && A_d->rows == dimM && A_d->cols == dimN); - CHECK(A_d->format == HL_SPARSE_CSR) << "matrix format error!"; - - if (A_d->nnz == 0) { - hl_gpu_apply_unary_op(unary::Zero(), C_d, dimM, dimN, dimN); - return; - } - - /* nnz != 0 */ - hl_csr_matrix A_d2 = (hl_csr_matrix)(A_d->matrix); - CHECK((A_d2->csr_val || A_d->type == HL_NO_VALUE) && A_d2->csr_row && - A_d2->csr_col) - << "parameter transa error!"; - - int blocksX = (dimN + CU_CSR2DENSE_THREAD_X - 1) / CU_CSR2DENSE_THREAD_X; - int blocksY = (dimM + CU_CSR2DENSE_THREAD_X - 1) / CU_CSR2DENSE_THREAD_X; - dim3 threads(CU_CSR2DENSE_THREAD_X, CU_CSR2DENSE_THREAD_X); - dim3 grid(blocksX, blocksY); - - if (A_d->type == HL_NO_VALUE) { - KeSMatrixCsr2Dense<0><<>>( - A_d2->csr_val, A_d2->csr_row, A_d2->csr_col, C_d, dimM, dimN); - } else if (A_d->type == HL_FLOAT_VALUE) { - KeSMatrixCsr2Dense<1><<>>( - A_d2->csr_val, A_d2->csr_row, A_d2->csr_col, C_d, dimM, dimN); - } else { - } - CHECK_SYNC("hl_matrix_csr2dense failed"); -} - -void hl_matrix_csc2dense(hl_sparse_matrix_s A_d, - real *C_d, - int dimM, - int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(C_d); - CHECK(dimM > 0 && dimN > 0 && A_d->rows == dimM && A_d->cols == dimN); - CHECK(A_d->format == HL_SPARSE_CSC) << "matrix format error!"; - - if (A_d->nnz == 0) { - hl_gpu_apply_unary_op(unary::Zero(), C_d, dimM, dimN, dimN); - return; - } - - /* nnz != 0 */ - hl_csc_matrix A_d2 = (hl_csc_matrix)(A_d->matrix); - CHECK((A_d2->csc_val || A_d->type == HL_NO_VALUE) && A_d2->csc_row && - A_d2->csc_col) - << "parameter transa error!"; - - int blocksX = (dimN + CU_CSR2DENSE_THREAD_X - 1) / CU_CSR2DENSE_THREAD_X; - int blocksY = (dimM + CU_CSR2DENSE_THREAD_X - 1) / CU_CSR2DENSE_THREAD_X; - dim3 threads(CU_CSR2DENSE_THREAD_X, CU_CSR2DENSE_THREAD_X); - dim3 grid(blocksX, blocksY); - - if (A_d->type == HL_NO_VALUE) { - KeSMatrixCsc2Dense<0><<>>( - A_d2->csc_val, A_d2->csc_row, A_d2->csc_col, C_d, dimM, dimN); - } else if (A_d->type == HL_FLOAT_VALUE) { - KeSMatrixCsc2Dense<1><<>>( - A_d2->csc_val, A_d2->csc_row, A_d2->csc_col, C_d, dimM, dimN); - } else { - } - CHECK_SYNC("hl_matrix_csc2dense failed"); -} - -void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d, - hl_matrix_format_t format, - hl_matrix_value_t value_type, - int dimM, - int dimN, - int nnz) { - CHECK_NOTNULL(A_d); - CHECK(format == HL_SPARSE_CSR || format == HL_SPARSE_CSC) - << "sparse matrix format error!"; - CHECK(value_type == HL_FLOAT_VALUE || value_type == HL_NO_VALUE) - << "sparse matrix value type error!"; - /* avoid malloc 0 bytes */ - int nnz_s = (nnz == 0 ? 1 : nnz); - - if (format == HL_SPARSE_CSR) { - CHECK(dimM > 0 && nnz >= 0) << "sparse matrix size error!"; - - char *tmp = - (char *)malloc(sizeof(_hl_sparse_matrix_s) + sizeof(_hl_csr_matrix)); - CHECK_NOTNULL(tmp); - - hl_csr_matrix csr = (hl_csr_matrix)(tmp + sizeof(_hl_sparse_matrix_s)); - csr->sparsity = -1.0; - - if (value_type == HL_NO_VALUE) { - csr->csr_val = NULL; - csr->nnz_s = nnz_s; - csr->row_s = dimM + 1; - csr->csr_row = (int *)hl_malloc_device((dimM + 1) * sizeof(int)); - csr->csr_col = (int *)hl_malloc_device((nnz_s) * sizeof(int)); - - *A_d = (hl_sparse_matrix_s)tmp; - (*A_d)->matrix = (hl_matrix_s)csr; - } else if (value_type == HL_FLOAT_VALUE) { - csr->nnz_s = nnz_s; - csr->row_s = dimM + 1; - csr->csr_val = (real *)hl_malloc_device((nnz_s) * sizeof(real)); - csr->csr_row = (int *)hl_malloc_device((dimM + 1) * sizeof(int)); - csr->csr_col = (int *)hl_malloc_device((nnz_s) * sizeof(int)); - - *A_d = (hl_sparse_matrix_s)tmp; - (*A_d)->matrix = (hl_matrix_s)csr; - } - } else if (format == HL_SPARSE_CSC) { - CHECK(dimM > 0 && nnz >= 0) << "sparse matrix size error!"; - - char *tmp = - (char *)malloc(sizeof(_hl_sparse_matrix_s) + sizeof(_hl_csc_matrix)); - CHECK_NOTNULL(tmp); - - hl_csc_matrix csc = (hl_csc_matrix)(tmp + sizeof(_hl_sparse_matrix_s)); - csc->sparsity = -1.0f; - - if (value_type == HL_NO_VALUE) { - csc->csc_val = NULL; - csc->nnz_s = nnz_s; - csc->col_s = dimN + 1; - csc->csc_row = (int *)hl_malloc_device((nnz_s) * sizeof(int)); - csc->csc_col = (int *)hl_malloc_device((dimN + 1) * sizeof(int)); - - *A_d = (hl_sparse_matrix_s)tmp; - (*A_d)->matrix = (hl_matrix_s)csc; - } else if (value_type == HL_FLOAT_VALUE) { - csc->nnz_s = nnz_s; - csc->col_s = dimN + 1; - csc->csc_val = (real *)hl_malloc_device((nnz_s) * sizeof(real)); - csc->csc_row = (int *)hl_malloc_device((nnz_s) * sizeof(int)); - csc->csc_col = (int *)hl_malloc_device((dimN + 1) * sizeof(int)); - - *A_d = (hl_sparse_matrix_s)tmp; - (*A_d)->matrix = (hl_matrix_s)csc; - } - } - - (*A_d)->format = format; - (*A_d)->type = value_type; - (*A_d)->rows = dimM; - (*A_d)->cols = dimN; - (*A_d)->nnz = nnz; -} - -void hl_free_sparse_matrix(hl_sparse_matrix_s A_d) { - CHECK_NOTNULL(A_d); - CHECK(A_d->format == HL_SPARSE_CSR || A_d->format == HL_SPARSE_CSC) - << "sparse matrix format error!"; - - if (A_d->matrix == NULL) { - free(A_d); - return; - } - - if (A_d->format == HL_SPARSE_CSR) { - hl_csr_matrix csr = (hl_csr_matrix)A_d->matrix; - if (csr->csr_val != NULL) { - hl_free_mem_device(csr->csr_val); - csr->csr_val = NULL; - } - - if (csr->csr_row != NULL) { - hl_free_mem_device(csr->csr_row); - csr->csr_row = NULL; - } - - if (csr->csr_col != NULL) { - hl_free_mem_device(csr->csr_col); - csr->csr_col = NULL; - } - - A_d->matrix = NULL; - free(A_d); - } else if (A_d->format == HL_SPARSE_CSC) { - hl_csc_matrix csc = (hl_csc_matrix)A_d->matrix; - if (csc->csc_val != NULL) { - hl_free_mem_device(csc->csc_val); - csc->csc_val = NULL; - } - - if (csc->csc_row != NULL) { - hl_free_mem_device(csc->csc_row); - csc->csc_row = NULL; - } - - if (csc->csc_col != NULL) { - hl_free_mem_device(csc->csc_col); - csc->csc_col = NULL; - } - - A_d->matrix = NULL; - free(A_d); - } -} - -void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, - void *dest_d, - size_t size, - hl_matrix_format_t format, - hl_matrix_value_t value_type, - int dimM, - int dimN, - int nnz) { - CHECK_NOTNULL(A_d); - CHECK(format == HL_SPARSE_CSR || format == HL_SPARSE_CSC) - << "sparse matrix format error!"; - - if (format == HL_SPARSE_CSR) { - CHECK(dimM > 0 && nnz >= 0) << "sparse matrix size error!"; - - size_t size_ = (dimM + 1) * sizeof(int) + nnz * sizeof(int); - if (value_type != HL_NO_VALUE) { - size_ += nnz * sizeof(real); - } - CHECK_LE(size_, size) << "dest_d size(" << size - << ") too small, should bigger than(" << size_ - << ")!"; - - char *tmp = - (char *)malloc(sizeof(_hl_sparse_matrix_s) + sizeof(_hl_csr_matrix)); - CHECK_NOTNULL(tmp); - - hl_csr_matrix csr = (hl_csr_matrix)(tmp + sizeof(_hl_sparse_matrix_s)); - - if (value_type == HL_NO_VALUE) { - csr->csr_val = NULL; - csr->csr_row = (int *)dest_d; - csr->csr_col = (int *)((char *)dest_d + (dimM + 1) * sizeof(int)); - } else { - csr->csr_val = (real *)dest_d; - csr->csr_row = (int *)((char *)dest_d + nnz * sizeof(real)); - csr->csr_col = (int *)((char *)dest_d + nnz * sizeof(real) + - (dimM + 1) * sizeof(int)); - } - csr->nnz_s = nnz; - csr->row_s = dimM + 1; - csr->sparsity = -1.0; - *A_d = (hl_sparse_matrix_s)tmp; - (*A_d)->matrix = (hl_matrix_s)csr; - } else if (format == HL_SPARSE_CSC) { - CHECK(dimM > 0 && nnz >= 0) << "sparse matrix size error!"; - - size_t size_ = (dimN + 1) * sizeof(int) + nnz * sizeof(int); - if (value_type != HL_NO_VALUE) { - size_ += nnz * sizeof(real); - } - CHECK_LE(size_, size) << "dest_d size(" << size - << ") too small, should bigger than(" << size_ - << ")!"; - - char *tmp = - (char *)malloc(sizeof(_hl_sparse_matrix_s) + sizeof(_hl_csc_matrix)); - CHECK_NOTNULL(tmp); - - hl_csc_matrix csc = (hl_csc_matrix)(tmp + sizeof(_hl_sparse_matrix_s)); - if (value_type == HL_NO_VALUE) { - csc->csc_val = NULL; - csc->csc_col = (int *)dest_d; - csc->csc_row = (int *)((char *)dest_d + (dimN + 1) * sizeof(int)); - } else { - csc->csc_val = (real *)dest_d; - csc->csc_col = (int *)((char *)dest_d + nnz * sizeof(real)); - csc->csc_row = (int *)((char *)dest_d + nnz * sizeof(real) + - (dimN + 1) * sizeof(int)); - } - csc->nnz_s = nnz; - csc->col_s = dimN + 1; - csc->sparsity = -1.0f; - *A_d = (hl_sparse_matrix_s)tmp; - (*A_d)->matrix = (hl_matrix_s)csc; - } - - (*A_d)->format = format; - (*A_d)->type = value_type; - (*A_d)->rows = dimM; - (*A_d)->cols = dimN; - (*A_d)->nnz = nnz; -} - -void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d, - real *value_d, - int *rows_d, - int *cols_d, - hl_matrix_format_t format, - hl_matrix_value_t value_type, - int dimM, - int dimN, - int nnz) { - CHECK_NOTNULL(A_d); - CHECK(dimM > 0 && nnz >= 0) << "sparse matrix size error!"; - - CHECK(format == HL_SPARSE_CSR || format == HL_SPARSE_CSC) - << "sparse matrix format error!"; - - if (format == HL_SPARSE_CSR) { - char *tmp = - (char *)malloc(sizeof(_hl_sparse_matrix_s) + sizeof(_hl_csr_matrix)); - CHECK_NOTNULL(tmp); - - hl_csr_matrix csr = (hl_csr_matrix)(tmp + sizeof(_hl_sparse_matrix_s)); - csr->csr_row = rows_d; - csr->csr_col = cols_d; - csr->csr_val = value_d; - csr->nnz_s = nnz; - csr->row_s = dimM + 1; - csr->sparsity = -1.0; - *A_d = (hl_sparse_matrix_s)tmp; - (*A_d)->matrix = (hl_matrix_s)csr; - } else if (format == HL_SPARSE_CSC) { - char *tmp = - (char *)malloc(sizeof(_hl_sparse_matrix_s) + sizeof(_hl_csc_matrix)); - CHECK_NOTNULL(tmp); - - hl_csc_matrix csc = (hl_csc_matrix)(tmp + sizeof(_hl_sparse_matrix_s)); - csc->csc_row = rows_d; - csc->csc_col = cols_d; - csc->csc_val = value_d; - csc->nnz_s = nnz; - csc->col_s = dimN + 1; - csc->sparsity = -1.0f; - *A_d = (hl_sparse_matrix_s)tmp; - (*A_d)->matrix = (hl_matrix_s)csc; - } - - (*A_d)->format = format; - (*A_d)->type = value_type; - (*A_d)->rows = dimM; - (*A_d)->cols = dimN; - (*A_d)->nnz = nnz; -} - -void hl_destruct_sparse_matrix(hl_sparse_matrix_s A_d) { - CHECK_NOTNULL(A_d); - free(A_d); -} - -void hl_memcpy_csr_matrix(hl_sparse_matrix_s csr_matrix, - real *csr_val, - int *csr_row, - int *csr_col, - hl_stream_t stream) { - CHECK_NOTNULL(csr_matrix); - CHECK_EQ(csr_matrix->format, HL_SPARSE_CSR) - << "csr_matrix is not csr format!"; - CHECK_NOTNULL(csr_matrix->matrix); - - hl_csr_matrix csr = (hl_csr_matrix)(csr_matrix->matrix); - CHECK_LE(csr_matrix->nnz, csr->nnz_s) << "copy size " << csr_matrix->nnz - << " is big than alloc size " - << csr->nnz_s; - - CHECK_LE((csr_matrix->rows + 1), csr->row_s) - << "copy size " << (csr_matrix->rows + 1) << " is big than alloc size " - << csr->row_s; - - CHECK(csr_matrix->type == HL_FLOAT_VALUE || csr_matrix->type == HL_NO_VALUE) - << "sparse matrix value type error!"; - - if (csr_matrix->type == HL_NO_VALUE) { - if (csr_row == NULL && csr_col == NULL) { - return; - } else if (csr_row != NULL && csr_col != NULL) { - hl_memcpy_async( - csr->csr_row, csr_row, (csr_matrix->rows + 1) * sizeof(int), stream); - - hl_memcpy_async( - csr->csr_col, csr_col, (csr_matrix->nnz) * sizeof(int), stream); - } else { - LOG(FATAL) << "parameter csr_row or csr_col is null pointer!"; - } - } else if (csr_matrix->type == HL_FLOAT_VALUE) { - if (csr_val == NULL && csr_row == NULL && csr_col == NULL) { - return; - } else if (csr_val != NULL && csr_row == NULL && csr_col == NULL) { - hl_memcpy_async( - csr->csr_val, csr_val, (csr_matrix->nnz) * sizeof(real), stream); - } else if (csr_val != NULL && csr_row != NULL && csr_col != NULL) { - hl_memcpy_async( - csr->csr_val, csr_val, (csr_matrix->nnz) * sizeof(real), stream); - hl_memcpy_async( - csr->csr_row, csr_row, (csr_matrix->rows + 1) * sizeof(int), stream); - hl_memcpy_async( - csr->csr_col, csr_col, (csr_matrix->nnz) * sizeof(int), stream); - } else { - LOG(FATAL) << "parameter csr_row or csr_col is null pointer!"; - } - } - - csr->sparsity = ((float)csr_matrix->nnz) / ((float)csr_matrix->rows) / - ((float)csr_matrix->cols); -} - -void hl_memcpy_csc_matrix(hl_sparse_matrix_s csc_matrix, - real *csc_val, - int *csc_row, - int *csc_col, - hl_stream_t stream) { - CHECK_NOTNULL(csc_matrix); - CHECK_EQ(csc_matrix->format, HL_SPARSE_CSC) - << "csc_matrix is not csc format error!"; - - hl_csc_matrix csc = (hl_csc_matrix)(csc_matrix->matrix); - CHECK_LE(csc_matrix->nnz, csc->nnz_s) << "copy size " << csc_matrix->nnz - << " is big than alloc size " - << csc->nnz_s; - - CHECK_LE((csc_matrix->cols + 1), csc->col_s) - << "copy size " << (csc_matrix->cols + 1) << " is big than alloc size " - << csc->col_s; - - CHECK(csc_matrix->type == HL_FLOAT_VALUE || csc_matrix->type == HL_NO_VALUE) - << "sparse matrix value type error!"; - - if (csc_matrix->type == HL_NO_VALUE) { - if (csc_row == NULL && csc_col == NULL) { - return; - } else if (csc_row != NULL && csc_col != NULL) { - hl_memcpy_async( - csc->csc_row, csc_row, (csc_matrix->nnz) * sizeof(int), stream); - hl_memcpy_async( - csc->csc_col, csc_col, (csc_matrix->cols + 1) * sizeof(int), stream); - } else { - LOG(FATAL) << "parameter csc_row or csc_col is null pointer!"; - } - } else if (csc_matrix->type == HL_FLOAT_VALUE) { - if (csc_val == NULL && csc_row == NULL && csc_col == NULL) { - return; - } else if (csc_val != NULL && csc_row == NULL && csc_col == NULL) { - hl_memcpy_async( - csc->csc_val, csc_val, (csc_matrix->nnz) * sizeof(real), stream); - } else if (csc_val != NULL && csc_row != NULL && csc_col != NULL) { - hl_memcpy_async( - csc->csc_val, csc_val, (csc_matrix->nnz) * sizeof(real), stream); - hl_memcpy_async( - csc->csc_row, csc_row, (csc_matrix->nnz) * sizeof(int), stream); - hl_memcpy_async( - csc->csc_col, csc_col, (csc_matrix->cols + 1) * sizeof(int), stream); - } else { - LOG(FATAL) << "parameter csc_row or csc_col is null pointer!"; - } - } - - csc->sparsity = ((float)csc_matrix->nnz) / ((float)csc_matrix->rows) / - ((float)csc_matrix->cols); -} - -void hl_memcpy_sparse_matrix(hl_sparse_matrix_s dst, - hl_sparse_matrix_s src, - hl_stream_t stream) { - CHECK(dst && src && dst->matrix && src->matrix) - << "parameter dst or src is null pointer!"; - CHECK_EQ(dst->format, src->format) << "sparse matrix format does not match!"; - CHECK(dst->type != HL_FLOAT_VALUE || src->type != HL_NO_VALUE) - << "src sparse matrix is no value, dst sparse matrix has value!"; - - if (dst->format == HL_SPARSE_CSR) { - dst->rows = src->rows; - dst->cols = src->cols; - dst->nnz = src->nnz; - hl_csr_matrix csr = (hl_csr_matrix)src->matrix; - hl_memcpy_csr_matrix(dst, csr->csr_val, csr->csr_row, csr->csr_col, stream); - } else if (dst->format == HL_SPARSE_CSC) { - dst->rows = src->rows; - dst->cols = src->cols; - dst->nnz = src->nnz; - hl_csc_matrix csc = (hl_csc_matrix)src->matrix; - hl_memcpy_csc_matrix(dst, csc->csc_val, csc->csc_row, csc->csc_col, stream); - } else { - LOG(FATAL) << "sparse matrix format error!"; - } -} - -/** - * Calculate beta * C, if beta is zero, C does not have to be a valid input. - */ -static void _beta_mul_c(real *c, int dimM, int dimN, real beta) { - if (beta == 0.0) { - hl_gpu_apply_unary_op(unary::Zero(), c, dimM, dimN, dimN); - } else { - if (beta != 1.0) { - hl_gpu_apply_unary_op(unary::mul_scalar(beta), c, dimM, dimN, dimN); - } - } - - return; -} - -void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - CHECK_EQ(transb, HPPL_OP_N); - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - CHECK_NOTNULL(C_d); - CHECK(dimM > 0 && dimN > 0 && dimK > 0); - CHECK_EQ(A_d->format, HL_SPARSE_CSR) << "matrix format error!"; - - if ((HPPL_OP_N == transa && (A_d->rows != dimM || A_d->cols != dimK)) || - (HPPL_OP_T == transa && (A_d->rows != dimK || A_d->cols != dimM))) { - LOG(FATAL) << "parameter error!"; - } - - if (A_d->nnz == 0) { - _beta_mul_c(C_d, dimM, dimN, beta); - return; - } - - /* nnz != 0 */ - hl_csr_matrix A_d2 = (hl_csr_matrix)(A_d->matrix); - if ((A_d2->csr_val == NULL && A_d->type != HL_NO_VALUE) || - A_d2->csr_row == NULL || A_d2->csr_col == NULL) { - LOG(FATAL) << "parameter error!"; - } - - if (HPPL_OP_N == transa) { - int blocksX = (dimN + CU_CSRMM_BLOCK_N - 1) / CU_CSRMM_BLOCK_N; - int blocksY = (dimM + CU_CSRMM_THREAD_Y - 1) / CU_CSRMM_THREAD_Y; - dim3 threads(CU_CSRMM_THREAD_X, CU_CSRMM_THREAD_Y); - dim3 grid(blocksX, blocksY); - - /* sparsity pattern */ - // A_d->sparsity; - if (A_d->type == HL_NO_VALUE) { - KeSMatrixCsrMulDense<0><<>>( - C_d, - A_d2->csr_val, - A_d2->csr_col, - A_d2->csr_row, - B_d, - dimM, - dimN, - dimK, - alpha, - beta); - } else { - KeSMatrixCsrMulDense<1><<>>( - C_d, - A_d2->csr_val, - A_d2->csr_col, - A_d2->csr_row, - B_d, - dimM, - dimN, - dimK, - alpha, - beta); - } - } else if (HPPL_OP_T == transa) { - _beta_mul_c(C_d, dimM, dimN, beta); - - int blocksX = - (dimN + CU_CSC_MUL_DENSE_BLOCK_N - 1) / CU_CSC_MUL_DENSE_BLOCK_N; - int blocksY = - (dimK + CU_CSC_MUL_DENSE_BLOCK_K - 1) / CU_CSC_MUL_DENSE_BLOCK_K; - dim3 threads(CU_CSC_MUL_DENSE_THREAD_X, CU_CSC_MUL_DENSE_THREAD_Y); - dim3 grid(blocksX, blocksY); - if (A_d->type == HL_NO_VALUE) { - KeSMatrixCscMulDense<0><<>>( - C_d, - A_d2->csr_val, - A_d2->csr_col, - A_d2->csr_row, - B_d, - dimM, - dimN, - dimK, - alpha, - beta); - } else { - KeSMatrixCscMulDense<1><<>>( - C_d, - A_d2->csr_val, - A_d2->csr_col, - A_d2->csr_row, - B_d, - dimM, - dimN, - dimK, - alpha, - beta); - } - } else { - LOG(FATAL) << "parameter transa error!"; - } - - CHECK_SYNC("hl_matrix_csr_mul_dense failed"); -} - -void hl_matrix_dense_mul_csc(real *A_d, - hl_trans_op_t transa, - hl_sparse_matrix_s B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - CHECK_EQ(transa, HPPL_OP_N); - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - CHECK_NOTNULL(C_d); - - if (dimM <= 0 || dimN <= 0 || dimK <= 0 || - ((transb == HPPL_OP_N) && (B_d->rows != dimK || B_d->cols != dimN)) || - ((transb == HPPL_OP_T) && (B_d->rows != dimN || B_d->cols != dimK))) { - LOG(FATAL) << "parameter dims error!"; - } - - CHECK_EQ(B_d->format, HL_SPARSE_CSC) << "matrix format error!"; - - if (B_d->nnz == 0) { - _beta_mul_c(C_d, dimM, dimN, beta); - return; - } - - /* nnz != 0 */ - hl_csc_matrix B_d2 = (hl_csc_matrix)(B_d->matrix); - if ((B_d2->csc_val == NULL && B_d->type != HL_NO_VALUE) || - B_d2->csc_row == NULL || B_d2->csc_col == NULL) { - LOG(FATAL) << "parameter B is null!"; - } - - if (transb == HPPL_OP_N) { - int blocksX = (dimM + CU_CSCMM_BLOCK_M_BEST - 1) / CU_CSCMM_BLOCK_M_BEST; - int blocksY = (dimN + CU_CSCMM_BLOCK_N_BEST - 1) / CU_CSCMM_BLOCK_N_BEST; - dim3 threads(CU_CSCMM_THREAD_X_BEST, CU_CSCMM_THREAD_Y_BEST); - dim3 grid(blocksX, blocksY); - - if (B_d->type == HL_NO_VALUE) { - KeSMatrixDenseMulCsc<0><<>>( - C_d, - A_d, - B_d2->csc_val, - B_d2->csc_row, - B_d2->csc_col, - dimM, - dimN, - dimK, - alpha, - beta); - } else { - KeSMatrixDenseMulCsc<1><<>>( - C_d, - A_d, - B_d2->csc_val, - B_d2->csc_row, - B_d2->csc_col, - dimM, - dimN, - dimK, - alpha, - beta); - } - } else if (transb == HPPL_OP_T) { - _beta_mul_c(C_d, dimM, dimN, beta); - int blocksX = 1 + (dimK - 1) / CU_DM_CSR_THREAD_X; - int blocksY = 1 + (dimM - 1) / CU_DM_CSR_BLOCK_M; - dim3 threads(CU_DM_CSR_THREAD_X, CU_DM_CSR_THREAD_Y); - dim3 grid(blocksX, blocksY); - if (B_d->type == HL_NO_VALUE) { - KeSMatrixDenseMulCsr<0><<>>( - C_d, - A_d, - B_d2->csc_val, - B_d2->csc_col, - B_d2->csc_row, - dimM, - dimN, - dimK, - alpha, - beta); - } else { - KeSMatrixDenseMulCsr<1><<>>( - C_d, - A_d, - B_d2->csc_val, - B_d2->csc_col, - B_d2->csc_row, - dimM, - dimN, - dimK, - alpha, - beta); - } - } else { - LOG(FATAL) << "parameter transb error!"; - } - - CHECK_SYNC("hl_matrix_dense_mul_csc failed"); -} - -void hl_matrix_dense_mul_csr(real *A_d, - hl_trans_op_t transa, - hl_sparse_matrix_s B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - CHECK_EQ(transa, HPPL_OP_N); - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - CHECK_NOTNULL(C_d); - - if (dimM <= 0 || dimN <= 0 || dimK <= 0 || - (transb == HPPL_OP_N && (B_d->rows != dimK || B_d->cols != dimN)) || - (transb == HPPL_OP_T && (B_d->rows != dimN || B_d->cols != dimK))) { - LOG(FATAL) << "parameter dims error!"; - } - - CHECK_EQ(B_d->format, HL_SPARSE_CSR) << "matrix format error!"; - - if (B_d->nnz == 0) { - _beta_mul_c(C_d, dimM, dimN, beta); - return; - } - - /* nnz != 0 */ - hl_csr_matrix B_d2 = (hl_csr_matrix)(B_d->matrix); - if ((B_d2->csr_val == NULL && B_d->type != HL_NO_VALUE) || - B_d2->csr_row == NULL || B_d2->csr_col == NULL) { - LOG(FATAL) << "parameter transa error!"; - } - - if (transb == HPPL_OP_N) { - _beta_mul_c(C_d, dimM, dimN, beta); - int blocksX = 1 + (dimK - 1) / CU_DM_CSR_THREAD_X; - int blocksY = 1 + (dimM - 1) / CU_DM_CSR_BLOCK_M; - dim3 threads(CU_DM_CSR_THREAD_X, CU_DM_CSR_THREAD_Y); - dim3 grid(blocksX, blocksY); - if (B_d->type == HL_NO_VALUE) { - KeSMatrixDenseMulCsr<0><<>>( - C_d, - A_d, - B_d2->csr_val, - B_d2->csr_row, - B_d2->csr_col, - dimM, - dimN, - dimK, - alpha, - beta); - } else { - KeSMatrixDenseMulCsr<1><<>>( - C_d, - A_d, - B_d2->csr_val, - B_d2->csr_row, - B_d2->csr_col, - dimM, - dimN, - dimK, - alpha, - beta); - } - } else if (transb == HPPL_OP_T) { - int blocksX = (dimM + CU_CSCMM_BLOCK_M_BEST - 1) / CU_CSCMM_BLOCK_M_BEST; - int blocksY = (dimN + CU_CSCMM_BLOCK_N_BEST - 1) / CU_CSCMM_BLOCK_N_BEST; - dim3 threads(CU_CSCMM_THREAD_X_BEST, CU_CSCMM_THREAD_Y_BEST); - dim3 grid(blocksX, blocksY); - if (B_d->type == HL_NO_VALUE) { - KeSMatrixDenseMulCsc<0><<>>( - C_d, - A_d, - B_d2->csr_val, - B_d2->csr_col, - B_d2->csr_row, - dimM, - dimN, - dimK, - alpha, - beta); - } else { - KeSMatrixDenseMulCsc<1><<>>( - C_d, - A_d, - B_d2->csr_val, - B_d2->csr_col, - B_d2->csr_row, - dimM, - dimN, - dimK, - alpha, - beta); - } - } else { - LOG(FATAL) << "parameter transb error!"; - } - - CHECK_SYNC("hl_matrix_dense_mul_csr failed"); -} - -void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - real *C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - CHECK_EQ(transb, HPPL_OP_N); - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - CHECK_NOTNULL(C_d); - CHECK(dimM > 0 && dimN > 0 && dimK > 0) << "parameter error!"; - CHECK_EQ(A_d->format, HL_SPARSE_CSC) << "matrix format error!"; - - if ((HPPL_OP_N == transa && (A_d->rows != dimM || A_d->cols != dimK)) || - (HPPL_OP_T == transa && (A_d->rows != dimK || A_d->cols != dimM))) { - LOG(FATAL) << "parameter error!"; - } - - if (A_d->nnz == 0) { - _beta_mul_c(C_d, dimM, dimN, beta); - return; - } - - /* nnz != 0 */ - hl_csc_matrix A_d2 = (hl_csc_matrix)(A_d->matrix); - if ((A_d2->csc_val == NULL && A_d->type != HL_NO_VALUE) || - A_d2->csc_row == NULL || A_d2->csc_col == NULL) { - LOG(FATAL) << "parameter error!"; - } - - if (HPPL_OP_N == transa) { - _beta_mul_c(C_d, dimM, dimN, beta); - - int blocksX = - (dimN + CU_CSC_MUL_DENSE_BLOCK_N - 1) / CU_CSC_MUL_DENSE_BLOCK_N; - int blocksY = - (dimK + CU_CSC_MUL_DENSE_BLOCK_K - 1) / CU_CSC_MUL_DENSE_BLOCK_K; - dim3 threads(CU_CSC_MUL_DENSE_THREAD_X, CU_CSC_MUL_DENSE_THREAD_Y); - dim3 grid(blocksX, blocksY); - if (A_d->type == HL_NO_VALUE) { - KeSMatrixCscMulDense<0><<>>( - C_d, - A_d2->csc_val, - A_d2->csc_row, - A_d2->csc_col, - B_d, - dimM, - dimN, - dimK, - alpha, - beta); - } else { - KeSMatrixCscMulDense<1><<>>( - C_d, - A_d2->csc_val, - A_d2->csc_row, - A_d2->csc_col, - B_d, - dimM, - dimN, - dimK, - alpha, - beta); - } - } else if (HPPL_OP_T == transa) { - int blocksX = (dimN + CU_CSRMM_BLOCK_N - 1) / CU_CSRMM_BLOCK_N; - int blocksY = (dimM + CU_CSRMM_THREAD_Y - 1) / CU_CSRMM_THREAD_Y; - dim3 threads(CU_CSRMM_THREAD_X, CU_CSRMM_THREAD_Y); - dim3 grid(blocksX, blocksY); - - /* sparsity pattern */ - // A_d->sparsity; - if (A_d->type == HL_NO_VALUE) { - KeSMatrixCsrMulDense<0><<>>( - C_d, - A_d2->csc_val, - A_d2->csc_row, - A_d2->csc_col, - B_d, - dimM, - dimN, - dimK, - alpha, - beta); - } else { - KeSMatrixCsrMulDense<1><<>>( - C_d, - A_d2->csc_val, - A_d2->csc_row, - A_d2->csc_col, - B_d, - dimM, - dimN, - dimK, - alpha, - beta); - } - } else { - LOG(FATAL) << "parameter transa error!"; - } - - CHECK_SYNC("hl_matrix_csc_mul_dense failed"); -} - -void hl_sparse_matrix_mul(real *A_d, - hl_trans_op_t transa, - real *B_d, - hl_trans_op_t transb, - hl_sparse_matrix_s C_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - CHECK_NOTNULL(C_d); - CHECK(dimM > 0 && dimN > 0 && dimK > 0) << "parameter error!"; - CHECK_NE(C_d->type, HL_NO_VALUE) << "C value type error!"; - - if (C_d->nnz == 0) return; - - if (C_d->format == HL_SPARSE_CSC) { - hl_csc_matrix C_d2 = (hl_csc_matrix)(C_d->matrix); - if (C_d2->csc_val == NULL || C_d2->csc_row == NULL || - C_d2->csc_col == NULL) { - LOG(FATAL) << "parameter error!"; - } - - if (beta != 1.0) { - hl_gpu_apply_unary_op( - unary::mul_scalar(beta), C_d2->csc_val, 1, C_d->nnz, C_d->nnz); - } - - int blocksX = dimN; - int blocksY = 1; - dim3 threads(CU_CSCMM_DMD2CSC_THREAD_X, 1); - dim3 grid(blocksX, blocksY); - bool transA = transa == HPPL_OP_T ? 1 : 0; - bool transB = transb == HPPL_OP_T ? 1 : 0; - KeSMatrixDenseMulDense2CSC<<>>( - C_d2->csc_val, - C_d2->csc_row, - C_d2->csc_col, - A_d, - B_d, - transA, - transB, - dimM, - dimN, - dimK, - alpha, - beta); - CHECK_SYNC("hl_sparse_matrix_mul failed"); - } else { - hl_csr_matrix C_d2 = (hl_csr_matrix)(C_d->matrix); - if ((C_d2->csr_val == NULL && C_d->type != HL_NO_VALUE) || - C_d2->csr_row == NULL || C_d2->csr_col == NULL) { - LOG(FATAL) << "parameter error!"; - } - - if (beta != 1.0) { - hl_gpu_apply_unary_op( - unary::mul_scalar(beta), C_d2->csr_val, 1, C_d->nnz, C_d->nnz); - } - - bool transA = transa == HPPL_OP_T ? 1 : 0; - bool transB = transb == HPPL_OP_T ? 1 : 0; - if (!transB) { - int blocksX = dimM; - int blocksY = 1; - dim3 threads(CU_CSCMM_DMD2CSR_THREAD_X, 1); - dim3 grid(blocksX, blocksY); - - KeSMatrixDenseMulDense2CSR<<>>( - C_d2->csr_val, - C_d2->csr_row, - C_d2->csr_col, - A_d, - B_d, - transA, - transB, - dimM, - dimN, - dimK, - alpha, - beta); - CHECK_SYNC("hl_sparse_matrix_mul failed"); - } else { - CHECK(!transA) << "Not supported A is trans and B is not trans!"; - - dim3 block(CU_BLOCK_SIZE, 1); - int avgNnzPerRow = C_d->nnz / dimM; - avgNnzPerRow = avgNnzPerRow > 0 ? avgNnzPerRow : 1; - int gridx = DIVUP(avgNnzPerRow, CU_BLOCK_SIZE); - dim3 grid(gridx, dimM); - KeSMatrixDenseMulDenseTrans2CSR<<>>( - C_d2->csr_val, - C_d2->csr_row, - C_d2->csr_col, - A_d, - B_d, - transA, - transB, - dimM, - dimN, - dimK, - alpha, - beta); - CHECK_SYNC("hl_sparse_matrix_mul failed"); - } - } -} - -void hl_memcpy_from_csc_matrix(real *csc_val, - size_t val_size, - int *csc_row, - size_t row_size, - int *csc_col, - size_t col_size, - hl_sparse_matrix_s csc_matrix, - hl_stream_t stream) { - CHECK_NOTNULL(csc_matrix); - CHECK_NOTNULL(csc_row); - CHECK_NOTNULL(csc_col); - - CHECK_EQ(csc_matrix->format, HL_SPARSE_CSC) - << "csc_matrix is not csc format error!"; - - if (csc_matrix->nnz > row_size || - csc_matrix->cols + 1 > static_cast(col_size)) { - LOG(FATAL) << "size not match!"; - } - - hl_csc_matrix csc = (hl_csc_matrix)(csc_matrix->matrix); - hl_memcpy_async((void *)csc_row, - (void *)csc->csc_row, - (csc_matrix->nnz) * sizeof(int), - stream); - hl_memcpy_async((void *)csc_col, - (void *)csc->csc_col, - (csc_matrix->cols + 1) * sizeof(int), - stream); - if (csc_matrix->type == HL_FLOAT_VALUE) { - if (csc_val != NULL) { - CHECK_LE(csc_matrix->nnz, val_size) << "size not match!"; - hl_memcpy_async((void *)csc_val, - (void *)csc->csc_val, - (csc_matrix->nnz) * sizeof(real), - stream); - } else { - LOG(FATAL) << "parameter csr_val is null pointer!"; - } - } -} - -void hl_memcpy_from_csr_matrix(real *csr_val, - size_t val_size, - int *csr_row, - size_t row_size, - int *csr_col, - size_t col_size, - hl_sparse_matrix_s csr_matrix, - hl_stream_t stream) { - CHECK_NOTNULL(csr_matrix); - CHECK_NOTNULL(csr_row); - CHECK_NOTNULL(csr_col); - CHECK_EQ(csr_matrix->format, HL_SPARSE_CSR) - << "csr_matrix is not csr format error!"; - - if (csr_matrix->nnz > col_size || - csr_matrix->rows + 1 > static_cast(row_size)) { - LOG(FATAL) << "size not match!"; - } - - hl_csr_matrix csr = (hl_csr_matrix)(csr_matrix->matrix); - hl_memcpy_async((void *)csr_row, - (void *)csr->csr_row, - (csr_matrix->rows + 1) * sizeof(int), - stream); - hl_memcpy_async((void *)csr_col, - (void *)csr->csr_col, - (csr_matrix->nnz) * sizeof(int), - stream); - if (csr_matrix->type == HL_FLOAT_VALUE) { - if (csr_val != NULL) { - CHECK_LE(csr_matrix->nnz, val_size) << "size not match!"; - hl_memcpy_async((void *)csr_val, - (void *)csr->csr_val, - (csr_matrix->nnz) * sizeof(real), - stream); - } else { - LOG(FATAL) << "parameter csr_val is null pointer!"; - } - } -} - -void hl_sparse_matrix_column_sum( - real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale) { - if (B_d->format == HL_SPARSE_CSR) { - hl_matrix_csr_column_sum(A_d, B_d, dimM, dimN, scale); - } else { - LOG(FATAL) << "Not support CSC format error!"; - } -} - -void hl_matrix_csr_column_sum( - real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - - if (dimM <= 0 || dimN <= 0 || (B_d->rows != dimM || B_d->cols != dimN)) { - LOG(FATAL) << "parameter dims error!"; - } - - hl_csr_matrix B_d2 = (hl_csr_matrix)(B_d->matrix); - if ((B_d2->csr_val == NULL && B_d->type != HL_NO_VALUE) || - B_d2->csr_row == NULL || B_d2->csr_col == NULL) { - LOG(FATAL) << "parameter B is null!"; - } - - if (B_d->nnz == 0) return; - - int nnz = B_d->nnz; - int block = 512; - int grid = DIVUP(nnz, 512); - KeSMatrixCsrColumnSum<<>>( - A_d, B_d2->csr_val, B_d2->csr_col, nnz); - - CHECK_SYNC("hl_matrix_csr_column_sum failed"); -} - -void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d, real *B_d, real scale) { - if (A_d->format == HL_SPARSE_CSR) { - hl_matrix_csr_add_bias(A_d, B_d, scale); - } else { - LOG(FATAL) << "Not support CSC format error!"; - } -} - -void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d, real *B_d, real scale) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - - hl_csr_matrix A_d2 = (hl_csr_matrix)(A_d->matrix); - if ((A_d2->csr_val == NULL && A_d->type != HL_NO_VALUE) || - A_d2->csr_row == NULL || A_d2->csr_col == NULL) { - LOG(FATAL) << "parameter A_d is null!"; - } - - if (A_d->nnz == 0) return; - - int nnz = A_d->nnz; - int block = 512; - int grid = DIVUP(nnz, 512); - KeSMatrixCsrAddBias<<>>( - A_d2->csr_val, A_d2->csr_col, B_d, scale, nnz); - - CHECK_SYNC("hl_sparse_matrix_add_bias failed"); -} - -void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d, - real *B_d, - int dimM, - int dimN, - real alpha, - real beta) { - if (A_d->format == HL_SPARSE_CSR) { - hl_matrix_csr_add_dense(A_d, B_d, dimM, dimN, alpha, beta); - } else { - LOG(FATAL) << "Not support CSC format error!"; - } -} - -void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d, - real *B_d, - int dimM, - int dimN, - real alpha, - real beta) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - - if (dimM <= 0 || dimN <= 0 || A_d->rows != dimM || A_d->cols != dimN) { - LOG(FATAL) << "parameter dim error!"; - } - - hl_csr_matrix A_d2 = (hl_csr_matrix)(A_d->matrix); - if ((A_d2->csr_val == NULL && A_d->type != HL_NO_VALUE) || - A_d2->csr_row == NULL || A_d2->csr_col == NULL) { - LOG(FATAL) << "parameter A_d is null!"; - } - - if (A_d->nnz == 0) return; - - int gridX = DIVUP((A_d->nnz / dimM), 512); - gridX = gridX > 0 ? gridX : 1; - dim3 block(512, 1); - dim3 grid(gridX, dimM); - KeSMatrixCsrAddDense<<>>(A_d2->csr_val, - A_d2->csr_row, - A_d2->csr_col, - B_d, - alpha, - beta, - dimM, - dimN); - - CHECK_SYNC("hl_sparse_matrix_add_dense failed"); -} - -int *hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat) { - __sparse_get_return__(sMat, row); -} - -int *hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat) { - __sparse_get_return__(sMat, col); -} - -real *hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat) { - __sparse_get_return__(sMat, val); -} diff --git a/paddle/legacy/cuda/src/hl_cuda_sparse.cuh b/paddle/legacy/cuda/src/hl_cuda_sparse.cuh deleted file mode 100644 index adb898c9ac6c108c2e98d0baa9003eca4ad6a133..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_cuda_sparse.cuh +++ /dev/null @@ -1,1015 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - - -#include "hl_device_functions.cuh" - -template -__device__ real findvalue(real* csr_val, - int* csr_col, - int col_start, - int col_end, - int index) { - int start = col_start; - int end = col_end-1; - int mid = -1; - - while (start < end) { - mid = start + ((end - start) / 2); - if (csr_col[mid] < index) - start = mid + 1; - else - end = mid; - } - - if ((start < col_end) && (csr_col[start] == index)) { - real ret = VALUE_TYPE == 0 ? 1.0 : csr_val[start]; - return ret; - } else { - return 0.0; - } -} - -#define CU_CSR2DENSE_THREAD_X 16 -#define CU_CSR2DENSE_THREAD_Y 16 -template -__global__ void KeSMatrixCsr2Dense(real * csr_val, - int * csr_row, - int * csr_col, - real * C_d, - const int dimM, - const int dimN) { - const int row = blockIdx.y*blockDim.y+threadIdx.y; - const int col = blockIdx.x*blockDim.x+threadIdx.x; - - if (row >= dimM || col >= dimN) { - return; - } - - int start = csr_row[row]; - int end = csr_row[row+1]; - - real sum = findvalue(csr_val, csr_col, start, end, col); - C_d[row*dimN + col] = sum; -} - -template -__global__ void KeSMatrixCsc2Dense(real * csc_val, - int * csc_row, - int * csc_col, - real * C_d, - const int dimM, - const int dimN) { - const int row = blockIdx.y*blockDim.y+threadIdx.y; - const int col = blockIdx.x*blockDim.x+threadIdx.x; - - if (row >= dimM || col >= dimN) { - return; - } - - int start = csc_col[col]; - int end = csc_col[col+1]; - - real sum = findvalue(csc_val, csc_row, start, end, row); - C_d[row*dimN + col] = sum; -} - -__device__ __forceinline__ -void _calculate_c(real &c, real sum) { - c = sum; -} -__device__ __forceinline__ -void _calculate_c(real &c, real sum, real beta) { - c = sum + beta * c; -} - -#define CU_CSRMM_N 4 -#define CU_CSRMM_THREAD_X 32 -#define CU_CSRMM_THREAD_Y 32 -#define CU_CSRMM_BLOCK_N (32*CU_CSRMM_N) -#define CU_CSRMM_SHARED_ELEMENT (2*CU_CSRMM_THREAD_X) -template -__global__ void KeSMatrixCsrMulDense(real *C_d, - real * csr_val, - int * csr_col, - int * csr_row, - real *B_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - const int idx = threadIdx.x; - const int idy = threadIdx.y; - const int index_m = blockIdx.y*CU_CSRMM_THREAD_Y+threadIdx.y; - int index_n = blockIdx.x*CU_CSRMM_BLOCK_N+threadIdx.x; - - __shared__ real csr_val_sh[CU_CSRMM_THREAD_Y][CU_CSRMM_SHARED_ELEMENT]; - __shared__ int csr_col_sh[CU_CSRMM_THREAD_Y][CU_CSRMM_SHARED_ELEMENT]; - - if (index_m >= dimM) { - return; - } - - // possible optimization, cache this in shared memory - int csr_start = csr_row[index_m]; - int csr_end = csr_row[index_m+1]; - int csr_index = csr_start + idx; - - int csr_iter = (csr_end-csr_start)/CU_CSRMM_SHARED_ELEMENT; - int csr_rem = (csr_end-csr_start)%CU_CSRMM_SHARED_ELEMENT; - - int index_k = -1; - real sum[CU_CSRMM_N] = {0}; - real b_r[CU_CSRMM_N] = {0}; - - for (int csr_i = 0; csr_i < csr_iter; csr_i++) { - #pragma unroll - for (int i = 0; i < (CU_CSRMM_SHARED_ELEMENT/CU_CSRMM_THREAD_X); i++) { - if (VALUE_TYPE != 0) { - csr_val_sh[idy][idx + i*CU_CSRMM_THREAD_X] = csr_val[csr_index]; - } - csr_col_sh[idy][idx + i*CU_CSRMM_THREAD_X] = csr_col[csr_index]; - csr_index += CU_CSRMM_THREAD_X; - } - - for (int index = 0; index < CU_CSRMM_SHARED_ELEMENT; index++) { - index_k = csr_col_sh[idy][index]; - real a_r = VALUE_TYPE == 0 ? 1.0 : csr_val_sh[idy][index]; - int tmp_index = index_n; - real *B_d_r = B_d + tmp_index; - #pragma unroll - for (int n = 0; n < CU_CSRMM_N; n++) { - if (tmp_index >= dimN) break; - b_r[n] = B_d_r[index_k*dimN]; - B_d_r += CU_CSRMM_THREAD_X; - tmp_index += CU_CSRMM_THREAD_X; - } - - #pragma unroll - for (int n = 0; n < CU_CSRMM_N; n++) { - sum[n] = VALUE_TYPE == 0 ? sum[n] + b_r[n] : sum[n] + a_r*b_r[n]; - } - } - // __syncthreads(); - } - - if (csr_rem != 0) { - #pragma unroll - for (int i = 0; i < (CU_CSRMM_SHARED_ELEMENT/CU_CSRMM_THREAD_X); i++) { - if (csr_index < csr_end) { - if (VALUE_TYPE != 0) { - csr_val_sh[idy][idx + i*CU_CSRMM_THREAD_X] = csr_val[csr_index]; - } - csr_col_sh[idy][idx + i*CU_CSRMM_THREAD_X] = csr_col[csr_index]; - } - csr_index += CU_CSRMM_THREAD_X; - } - // __syncthreads(); - - #pragma unroll - for (int index = 0; index < csr_rem; index++) { - index_k = csr_col_sh[idy][index]; - real a_r = VALUE_TYPE == 0 ? 1.0 : csr_val_sh[idy][index]; - int tmp_index = index_n; - real *B_d_r = B_d + tmp_index; - #pragma unroll - for (int n = 0; n < CU_CSRMM_N; n++) { - if (tmp_index >= dimN) break; - b_r[n] = B_d_r[index_k*dimN]; - B_d_r += CU_CSRMM_THREAD_X; - tmp_index += CU_CSRMM_THREAD_X; - } - - #pragma unroll - for (int n = 0; n < CU_CSRMM_N; n++) { - sum[n] = VALUE_TYPE == 0 ? sum[n] + b_r[n] : sum[n] + a_r*b_r[n]; - } - } - } - - C_d += __mul24(index_m, dimN); - if (beta == 0.0) { - for (int n = 0; n < CU_CSRMM_N; n++) { - if (index_n < dimN) { - _calculate_c(C_d[index_n], alpha * sum[n]); - index_n += CU_CSRMM_THREAD_X; - } - } - } else { - for (int n = 0; n < CU_CSRMM_N; n++) { - if (index_n < dimN) { - _calculate_c(C_d[index_n], alpha * sum[n], beta); - index_n += CU_CSRMM_THREAD_X; - } - } - } -} - -#define CU_CSC_MUL_DENSE_THREAD_N 1 -#define CU_CSC_MUL_DENSE_THREAD_X 32 -#define CU_CSC_MUL_DENSE_THREAD_Y 4 -#define CU_CSC_MUL_DENSE_BLOCK_K (CU_CSC_MUL_DENSE_THREAD_Y) -#define CU_CSC_MUL_DENSE_BLOCK_N \ - (CU_CSC_MUL_DENSE_THREAD_N * CU_CSC_MUL_DENSE_THREAD_X) -#define CU_CSC_MUL_DENSE_SHARED_ELEMENT (CU_CSC_MUL_DENSE_THREAD_X) -template -__global__ void KeSMatrixCscMulDense(real *C_d, - real * csc_val, - int * csc_row, - int * csc_col, - real *B_d, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - const int idx = threadIdx.x; - const int idy = threadIdx.y; - const int index_k = blockIdx.y*CU_CSC_MUL_DENSE_BLOCK_K+threadIdx.y; - const int index_n = blockIdx.x*CU_CSC_MUL_DENSE_BLOCK_N+threadIdx.x; - - if (index_k >= dimK) { - return; - } - - __shared__ - real csc_val_sh[CU_CSC_MUL_DENSE_THREAD_Y][CU_CSC_MUL_DENSE_SHARED_ELEMENT]; - __shared__ - int csc_row_sh[CU_CSC_MUL_DENSE_THREAD_Y][CU_CSC_MUL_DENSE_SHARED_ELEMENT]; - - // possible optimization, cache this in shared memory - int csc_start = csc_col[index_k]; - int csc_end = csc_col[index_k+1]; - int csc_index = csc_start + idx; - int csc_iter = (csc_end-csc_start)/CU_CSC_MUL_DENSE_SHARED_ELEMENT; - int csc_rem = (csc_end-csc_start)%CU_CSC_MUL_DENSE_SHARED_ELEMENT; - int index_m = -1; - - real b_r[CU_CSC_MUL_DENSE_THREAD_N] = {0}; - real *B_d_r; - real *C_d_r; - int index_n_t; - B_d += index_n + __mul24(index_k, dimN); - C_d += index_n; - for (int csr_i = 0; csr_i < csc_iter; csr_i++) { - #pragma unroll - for (int i = 0; - i < (CU_CSC_MUL_DENSE_SHARED_ELEMENT/CU_CSC_MUL_DENSE_THREAD_X); i++) { - if (VALUE_TYPE != 0) { - csc_val_sh[idy][idx + i*CU_CSC_MUL_DENSE_THREAD_X] = csc_val[csc_index]; - } - csc_row_sh[idy][idx + i*CU_CSC_MUL_DENSE_THREAD_X] = csc_row[csc_index]; - csc_index += CU_CSC_MUL_DENSE_THREAD_X; - } - - #pragma unroll - for (int index = 0; index < CU_CSC_MUL_DENSE_SHARED_ELEMENT; index++) { - index_m = csc_row_sh[idy][index]; - real a_r = VALUE_TYPE == 0 ? 1.0 : csc_val_sh[idy][index]; - B_d_r = B_d; - C_d_r = C_d + __mul24(index_m, dimN); - - index_n_t = index_n; - #pragma unroll - for (int n = 0; n < CU_CSC_MUL_DENSE_THREAD_N; n++) { - if (index_n_t < dimN) { - b_r[n] = B_d_r[0]; - B_d_r += CU_CSC_MUL_DENSE_THREAD_X; - index_n_t += CU_CSC_MUL_DENSE_THREAD_X; - } - } - - index_n_t = index_n; - #pragma unroll - for (int n = 0; n < CU_CSC_MUL_DENSE_THREAD_N; n++) { - if (index_n_t < dimN) { - real tmp; - tmp = alpha*a_r*b_r[n]; - paddle::paddleAtomicAdd(C_d_r, tmp); - C_d_r += CU_CSC_MUL_DENSE_THREAD_X; - index_n_t += CU_CSC_MUL_DENSE_THREAD_X; - } - } - } - // __syncthreads(); - } - - if (csc_rem != 0) { - #pragma unroll - for (int i = 0; - i < (CU_CSC_MUL_DENSE_SHARED_ELEMENT/CU_CSC_MUL_DENSE_THREAD_X); i++) { - if (csc_index < csc_end) { - if (VALUE_TYPE != 0) { - csc_val_sh[idy][idx + i * CU_CSC_MUL_DENSE_THREAD_X] = - csc_val[csc_index]; - } - csc_row_sh[idy][idx + i * CU_CSC_MUL_DENSE_THREAD_X] = - csc_row[csc_index]; - } - csc_index += CU_CSC_MUL_DENSE_THREAD_X; - } - // __syncthreads(); - - #pragma unroll - for (int index = 0; index < csc_rem; index++) { - index_m = csc_row_sh[idy][index]; - real a_r = VALUE_TYPE == 0 ? 1.0 : csc_val_sh[idy][index]; - B_d_r = B_d; - C_d_r = C_d + __mul24(index_m, dimN); - - index_n_t = index_n; - #pragma unroll - for (int n = 0; n < CU_CSC_MUL_DENSE_THREAD_N; n++) { - if (index_n_t < dimN) { - b_r[n] = B_d_r[0]; - B_d_r += CU_CSC_MUL_DENSE_THREAD_X; - index_n_t += CU_CSC_MUL_DENSE_THREAD_X; - } - } - - index_n_t = index_n; - #pragma unroll - for (int n = 0; n < CU_CSC_MUL_DENSE_THREAD_N; n++) { - if (index_n_t < dimN) { - real tmp; - tmp = alpha*a_r*b_r[n]; - paddle::paddleAtomicAdd(C_d_r, tmp); - C_d_r += CU_CSC_MUL_DENSE_THREAD_X; - index_n_t += CU_CSC_MUL_DENSE_THREAD_X; - } - } - } - } -} - -/* best perf */ -#ifndef PADDLE_TYPE_DOUBLE -#define CU_CSCMM_THREAD_M_BEST 9 -#else -#define CU_CSCMM_THREAD_M_BEST 4 -#endif -#define CU_CSCMM_THREAD_X_BEST 32 -#define CU_CSCMM_THREAD_Y_BEST 32 -#define CU_CSCMM_BLOCK_M_BEST (CU_CSCMM_THREAD_M_BEST * CU_CSCMM_THREAD_X_BEST) -#define CU_CSCMM_BLOCK_N_BEST (CU_CSCMM_THREAD_Y_BEST) -template -__global__ void KeSMatrixDenseMulCsc(real *C_d, - const real *A_d, - const real *csc_val, - const int *csc_row, - const int *csc_col, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - __shared__ real csc_val_sh[CU_CSCMM_BLOCK_N_BEST][CU_CSCMM_THREAD_X_BEST]; - __shared__ int csc_row_sh[CU_CSCMM_BLOCK_N_BEST][CU_CSCMM_THREAD_X_BEST]; - __shared__ real A_s[CU_CSCMM_BLOCK_M_BEST][CU_CSCMM_THREAD_Y_BEST+1]; - - int iter_k = dimK/CU_CSCMM_THREAD_Y_BEST; - int rem_k = dimK%CU_CSCMM_THREAD_Y_BEST; - const int idx = threadIdx.x; - const int idy = threadIdx.y; - const int index_n = blockIdx.y*CU_CSCMM_BLOCK_N_BEST+threadIdx.y; - - int csc_start; - int csc_end; - if (index_n < dimN) { - csc_start = csc_col[index_n]; - csc_end = csc_col[index_n+1]; - } else { - csc_start = 0; - csc_end = 0; - } - int csc_index = csc_start + idx; - int csc_iter = (csc_end-csc_start)/CU_CSCMM_THREAD_X_BEST; - int csc_rem = (csc_end-csc_start)%CU_CSCMM_THREAD_X_BEST; - int index_k = -1; - - if (csc_index < csc_end) { - if (VALUE_TYPE != 0) { - csc_val_sh[idy][idx] = csc_val[csc_index]; - } - csc_row_sh[idy][idx] = csc_row[csc_index]; - csc_index += CU_CSCMM_THREAD_X_BEST; - } - - const int ibx = blockIdx.x * CU_CSCMM_BLOCK_M_BEST; - int dim = ibx+idy; - A_d += idx + __mul24(dim, dimK); - #pragma unroll - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - A_s[idy + m * 32][idx] = 0.0f; - if (dim + m * 32 < dimM && idx < dimK) { - A_s[idy + m * 32][idx] = A_d[m * 32 * dimK]; - } - } - __syncthreads(); - - real b_r; - real a_r[CU_CSCMM_THREAD_M_BEST] = {0}; - real sum[CU_CSCMM_THREAD_M_BEST] = {0}; - real A_r_s[CU_CSCMM_THREAD_M_BEST] = {0}; - int index = 0; - int block_end_k = 0;; - int index_iter_csc = csc_iter; - - for (int i_k = 0; i_k < iter_k; i_k++) { - A_d += CU_CSCMM_THREAD_Y_BEST; - block_end_k += CU_CSCMM_THREAD_Y_BEST; - #pragma unroll - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - if (dim + m*32 < dimM && (idx + (i_k+1)*CU_CSCMM_THREAD_Y_BEST < dimK)) { - A_r_s[m] = A_d[m*32*dimK]; - } else { - A_r_s[m] = 0.0f; - } - } - - if (index_iter_csc > 0) { - goto WARP_SYNC; - } else { - goto WARP_SYNC_2; - } - - while (index_iter_csc) { - if (VALUE_TYPE != 0) { - csc_val_sh[idy][idx] = csc_val[csc_index]; - } - csc_row_sh[idy][idx] = csc_row[csc_index]; - csc_index += CU_CSCMM_THREAD_X_BEST; - index = 0; - -WARP_SYNC: - for (; index < CU_CSCMM_THREAD_X_BEST; index++) { - index_k = csc_row_sh[idy][index]; - if (index_k >= block_end_k) { - goto BLOCK_SYNC; - } - b_r = VALUE_TYPE == 0 ? 1.0 : csc_val_sh[idy][index]; - #pragma unroll - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - a_r[m] = A_s[idx+m*32][index_k-i_k*CU_CSCMM_THREAD_Y_BEST]; - sum[m] = VALUE_TYPE == 0 ? sum[m] + a_r[m] : sum[m] + a_r[m]*b_r; - } - } - index_iter_csc--; - } - - if (csc_rem != 0) { - if (csc_iter != 0) { - if (csc_index < csc_end) { - if (VALUE_TYPE != 0) { - csc_val_sh[idy][idx] = csc_val[csc_index]; - } - csc_row_sh[idy][idx] = csc_row[csc_index]; - csc_index += CU_CSCMM_THREAD_X_BEST; - } - index = 0; - } - __threadfence_block(); - -WARP_SYNC_2: - for (; index < csc_rem; index++) { - index_k = csc_row_sh[idy][index]; - if (index_k >= block_end_k) { - goto BLOCK_SYNC; - } - b_r = VALUE_TYPE == 0 ? 1.0 : csc_val_sh[idy][index]; - #pragma unroll - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - a_r[m] = A_s[idx+m*32][index_k-i_k*CU_CSCMM_THREAD_Y_BEST]; - sum[m] = VALUE_TYPE == 0 ? sum[m] + a_r[m] : sum[m] + a_r[m]*b_r; - } - } - } - -BLOCK_SYNC: - __syncthreads(); - #pragma unroll - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - A_s[idy+m*32][idx] = A_r_s[m]; - } - __syncthreads(); - } - - if (rem_k != 0) { - if (index_iter_csc == 0) { - goto TEMP_TEST; - } - - for (; index < CU_CSCMM_THREAD_X_BEST; index++) { - index_k = csc_row_sh[idy][index]; - if (index_k >= dimK) { - break; - } - - b_r = VALUE_TYPE == 0 ? 1.0 : csc_val_sh[idy][index]; - #pragma unroll - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - a_r[m] = A_s[idx+m*32][index_k-iter_k*CU_CSCMM_THREAD_Y_BEST]; - sum[m] = VALUE_TYPE == 0 ? sum[m] + a_r[m] : sum[m] + a_r[m]*b_r; - } - } - - if (csc_rem != 0) { - if (csc_index < csc_end) { - if (VALUE_TYPE != 0) { - csc_val_sh[idy][idx] = csc_val[csc_index]; - } - csc_row_sh[idy][idx] = csc_row[csc_index]; - csc_index += CU_CSCMM_THREAD_X_BEST; - } - index = 0; - -TEMP_TEST: - for (; index < csc_rem; index++) { - index_k = csc_row_sh[idy][index]; - if (index_k >= dimK) { - break; - } - b_r = VALUE_TYPE == 0 ? 1.0 : csc_val_sh[idy][index]; - #pragma unroll - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - a_r[m] = A_s[idx+m*32][index_k-iter_k*CU_CSCMM_THREAD_Y_BEST]; - sum[m] = VALUE_TYPE == 0 ? sum[m] + a_r[m] : sum[m] + a_r[m]*b_r; - } - } - } - } - - __syncthreads(); - #pragma unroll - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - A_s[idx+m*32][idy] = alpha*sum[m]; - } - __syncthreads(); - - int index_m_c = ibx + idy; - int index_n_c = blockIdx.y*CU_CSCMM_BLOCK_N_BEST + idx; - C_d += index_n_c + __mul24(index_m_c, dimN); - if (beta == 0.0) { - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - if (index_m_c < dimM && index_n_c < dimN) { - _calculate_c(C_d[0], A_s[idy + m * 32][idx]); - } - index_m_c += 32; - C_d += dimN*32; - } - } else { - for (int m = 0; m < CU_CSCMM_THREAD_M_BEST; m++) { - if (index_m_c < dimM && index_n_c < dimN) { - _calculate_c(C_d[0], A_s[idy + m * 32][idx], beta); - } - index_m_c += 32; - C_d += dimN*32; - } - } -} - -#define CU_DM_CSR_THREAD_X 32 -#define CU_DM_CSR_THREAD_Y 4 -#define CU_DM_CSR_N 4 -#define CU_DM_CSR_BLOCK_M (CU_DM_CSR_N*CU_DM_CSR_THREAD_Y) -#define CU_DM_CSR_BLOCK_K (CU_DM_CSR_THREAD_X) -#define CU_DM_CSR_SHARED_ELEMENT (1*CU_DM_CSR_THREAD_Y) -template -__global__ void KeSMatrixDenseMulCsr(real *C_d, - real *A_d, - real *csr_val, - const int *csr_row, - const int *csr_col, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - const int idx = threadIdx.x; - const int idy = threadIdx.y; - int index_k = __mul24(blockIdx.x, CU_DM_CSR_THREAD_X) + threadIdx.x; - int index_m = __mul24(blockIdx.y, CU_DM_CSR_BLOCK_M) + - __mul24(threadIdx.y, CU_DM_CSR_N); - - if (index_k >= dimK) { - return; - } - - __shared__ real csr_val_sh[CU_DM_CSR_THREAD_X][CU_DM_CSR_SHARED_ELEMENT]; - __shared__ int csr_col_sh[CU_DM_CSR_THREAD_X][CU_DM_CSR_SHARED_ELEMENT]; - - // possible optimization, cache this in shared memory - int csr_start = csr_row[index_k]; - int csr_end = csr_row[index_k+1]; - int csr_index = csr_start + idy; - int csr_iter = (csr_end-csr_start)/CU_DM_CSR_SHARED_ELEMENT; - int csr_rem = (csr_end-csr_start)%CU_DM_CSR_SHARED_ELEMENT; - - real tmp = 0.0; - int index_n = -1; - int index_m_t = index_m; - real a_r[CU_DM_CSR_N] = {0}; - real *A_d_tmp = A_d + __mul24(index_m, dimK) + index_k; - real *A_d_r = A_d_tmp; - - #pragma unroll - for (int n=0; n < CU_DM_CSR_N; n++) { - if ( index_m_t++ < dimM ) { - a_r[n] = A_d_r[0]; - A_d_r += dimK; - } - } - - for (int csr_i = 0; csr_i < csr_iter; csr_i++) { - #pragma unroll - for (int i = 0; i < (CU_DM_CSR_SHARED_ELEMENT/CU_DM_CSR_THREAD_Y); i++) { - if (VALUE_TYPE != 0) { - csr_val_sh[idx][idy + i*CU_DM_CSR_THREAD_Y] = csr_val - [csr_index]; - } - csr_col_sh[idx][idy + i*CU_DM_CSR_THREAD_Y] = csr_col[csr_index]; - csr_index += CU_DM_CSR_THREAD_Y; - } - __syncthreads(); - - #pragma unroll - for (int index = 0; index < CU_DM_CSR_SHARED_ELEMENT; index++) { - index_n = csr_col_sh[idx][index]; - real b_r = VALUE_TYPE == 0 ? 1.0 : csr_val_sh[idx][index]; - real *C_d_r = C_d + __mul24(index_m, dimN) + index_n; - - index_m_t = index_m; - #pragma unroll - for (int n=0; n < CU_DM_CSR_N; n++) { - if (index_m_t++ < dimM) { - tmp = alpha * b_r * a_r[n]; - paddle::paddleAtomicAdd(C_d_r, tmp); - C_d_r += dimN; - } - } - } - __syncthreads(); - } - - if (csr_rem != 0) { - #pragma unroll - for (int i = 0; i < (CU_DM_CSR_SHARED_ELEMENT/CU_DM_CSR_THREAD_Y); i++) { - if (csr_index < csr_end) { - if (VALUE_TYPE !=0) { - csr_val_sh[idx][idy + i*CU_DM_CSR_THREAD_Y] = csr_val[csr_index]; - } - csr_col_sh[idx][idy + i*CU_DM_CSR_THREAD_Y] = csr_col[csr_index]; - } - csr_index += CU_DM_CSR_THREAD_Y; - } - __syncthreads(); - - #pragma unroll - for (int index = 0; index < csr_rem; index++) { - index_n = csr_col_sh[idx][index]; - real b_r = VALUE_TYPE == 0 ? 1.0 : csr_val_sh[idx][index]; - real *C_d_r = C_d + __mul24(index_m, dimN) + index_n; - index_m_t = index_m; - #pragma unroll - for (int n=0; n < CU_DM_CSR_N; n++) { - if (index_m_t++ < dimM) { - tmp = alpha * b_r * a_r[n]; - paddle::paddleAtomicAdd(C_d_r, tmp); - C_d_r += dimN; - } - } - } - } -} - -#define CU_CSCMM_DMD2CSC_THREAD_X 128 -#define CU_CSCMM_DMD2CSC_SHARE_X 128 -__global__ void KeSMatrixDenseMulDense2CSC(real *csc_val, - const int *csc_row, - const int *csc_col, - real *A_d, - real *B_d, - bool trans_A, - bool trans_B, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - __shared__ real B_s[CU_CSCMM_DMD2CSC_SHARE_X]; - const int idx = threadIdx.x; // one block compute one column - const int ibx = blockIdx.x; // col index - int csc_start; - int csc_end; - if (ibx < dimN) { - csc_start = csc_col[ibx]; - csc_end = csc_col[ibx + 1]; - } else { - csc_start = 0; - csc_end = 0; - } - - int iter_num = dimK / CU_CSCMM_DMD2CSC_SHARE_X; - int iter_rem = dimK % CU_CSCMM_DMD2CSC_SHARE_X; - real * B_tmp = B_d + ibx; // column index - - for (int j = 0; j < iter_num; j++) { - int rowStart = (j * CU_CSCMM_DMD2CSC_SHARE_X + idx) * dimN; - int index = rowStart; - for (int m = idx; - m < CU_CSCMM_DMD2CSC_SHARE_X; m += CU_CSCMM_DMD2CSC_THREAD_X) { - B_s[m] = B_tmp[index]; - index = index + CU_CSCMM_DMD2CSC_THREAD_X * dimN; - } - __syncthreads(); - - for (int i = csc_col[ibx] + idx; - i < csc_col[ibx + 1]; i += CU_CSCMM_DMD2CSC_THREAD_X) { - int row = csc_row[i]; // row Index - /* compute C[row, ibx] */ - float results = 0; - if (!trans_A) { - int index = row * dimK + j * CU_CSCMM_DMD2CSC_SHARE_X; - for (int k = 0; k < CU_CSCMM_DMD2CSC_SHARE_X; k++) { - results += A_d[index + k] * B_s[k]; - } - } else { - int index = j * CU_CSCMM_DMD2CSC_SHARE_X; - for (int k = 0; k < CU_CSCMM_DMD2CSC_SHARE_X; k++) { - results += A_d[(index + k) * dimM + row] * B_s[k]; - } - } - csc_val[i] += results * alpha; - } - } - - if (iter_rem) { - int rowStart = (iter_num * CU_CSCMM_DMD2CSC_SHARE_X + idx) * dimN; - int index = rowStart; - // #pragma unroll - for (int m = idx; m < iter_rem; m += CU_CSCMM_DMD2CSC_THREAD_X) { - B_s[m] = B_tmp[index]; - index = index + CU_CSCMM_DMD2CSC_THREAD_X * dimN; - } - __syncthreads(); - for (int i = csc_start + idx; - i < csc_end; i += CU_CSCMM_DMD2CSC_THREAD_X) { - int row = csc_row[i]; // row Index - /* compute C[row, ibx] */ - float results = 0; - if (!trans_A) { - int index = row * dimK + iter_num * CU_CSCMM_DMD2CSC_SHARE_X; - for (int k = 0; k < iter_rem; k++) { - results += A_d[index + k] * B_s[k]; - } - } else { - int index = iter_num * CU_CSCMM_DMD2CSC_SHARE_X; - for (int k = 0; k < iter_rem; k++) { - results += A_d[(index + k) * dimM + row] * B_s[k]; - } - } - csc_val[i] += alpha * results; - } - } -} - -#define CU_CSCMM_DMD2CSR_THREAD_X 128 -#define CU_CSCMM_DMD2CSR_SHARE_X 128 -__global__ void KeSMatrixDenseMulDense2CSR(real *csr_val, - const int *csr_row, - const int *csr_col, - real *A_d, - real *B_d, - bool trans_A, - bool trans_B, - int dimM, - int dimN, - int dimK, - real alpha, - real beta) { - __shared__ real A_s[CU_CSCMM_DMD2CSR_SHARE_X]; - const int idx = threadIdx.x; // one block comput one row - const int ibx = blockIdx.x; // row index - - int csr_start; - int csr_end; - if (ibx < dimM) { - csr_start = csr_row[ibx]; - csr_end = csr_row[ibx+1]; - } else { - csr_start = 0; - csr_end = 0; - } - - int iter_num = dimK / CU_CSCMM_DMD2CSR_SHARE_X; - int csr_rem = dimK % CU_CSCMM_DMD2CSR_SHARE_X; - for (int j = 0; j < iter_num; j++) { - if (!trans_A) { - int colStart = j * CU_CSCMM_DMD2CSR_SHARE_X + ibx * dimK; - int index = colStart + idx; - #pragma unroll - for (int m = idx; - m < CU_CSCMM_DMD2CSR_SHARE_X; m += CU_CSCMM_DMD2CSR_THREAD_X) { - A_s[m] = A_d[index]; - index = index + CU_CSCMM_DMD2CSR_THREAD_X; - } - } else { - int colStart = (j * CU_CSCMM_DMD2CSR_SHARE_X) * dimM + ibx; - int index = colStart + idx * dimM; - for (int m = idx; - m < CU_CSCMM_DMD2CSR_SHARE_X; m += CU_CSCMM_DMD2CSR_THREAD_X) { - A_s[m] = A_d[index]; - index = index + CU_CSCMM_DMD2CSR_THREAD_X * dimM; - } - } - __syncthreads(); - for (int i = csr_start + idx; i < csr_end; i += CU_CSCMM_DMD2CSR_THREAD_X) { - int col_idx = csr_col[i]; // col index - /* comput C[ibx, col_idx] */ - real results = 0; - int index = (j * CU_CSCMM_DMD2CSR_SHARE_X) * dimN + col_idx; - for (int k = 0; k < CU_CSCMM_DMD2CSR_SHARE_X; k++) { - results += A_s[k] * B_d[k * dimN + index]; - } - csr_val[i] += alpha * results; - } - } - - if (csr_rem) { - if (!trans_A) { - int colStart = (ibx + 1) * dimK- csr_rem; - int index = colStart + idx; - #pragma unroll - for (int m = idx; m < csr_rem; m += CU_CSCMM_DMD2CSR_THREAD_X) { - A_s[m] = A_d[index]; - index = index + CU_CSCMM_DMD2CSR_THREAD_X; - } - } else { - int colStart = (iter_num * CU_CSCMM_DMD2CSR_SHARE_X) * dimM + ibx; - int index = colStart + idx * dimM; - for (int m = idx; m < csr_rem; m += CU_CSCMM_DMD2CSR_THREAD_X) { - A_s[m] = A_d[index]; - index = index + CU_CSCMM_DMD2CSR_THREAD_X * dimM; - } - } - __syncthreads(); - for (int i = csr_start + idx; - i < csr_end; i += CU_CSCMM_DMD2CSR_THREAD_X) { - int col_idx = csr_col[i]; - float results = 0; - int index = (iter_num *CU_CSCMM_DMD2CSR_SHARE_X) * dimN + col_idx; - for (int k = 0; k < csr_rem; k++) { - results += A_s[k ] * B_d[k * dimN + index]; - } - csr_val[i] += alpha * results; - } - } -} - - -/** - * @brief Use to calculate row/col index for CSR/CSC sparse matrix - * according to csr_row(csc_col) and - * the value position in csr_val/csc_val - * - * @param indice csr_row for hl_csr_matrix - * csc_col for hl_csc_matrix - * @param num length of csr_row/csc_col - * @param index the value position in csr_val/csc_val - * but need to add 1 - * that is, 1,2,3,...,nnz - * @note the following kernels doesn't use findIndex, - * but may be used in the future. - */ -__device__ __forceinline__ -int findIndex(int* indice, int num, int index) { - int start = 0; - int end = num - 1; - int mid = -1; - while (start < end) { - mid = start + ((end - start) / 2); - if (indice[mid] < index) - start = mid + 1; - else - end = mid; - } - return (end - 1); -} - - -/** - * @brief sum columns of csr sparse matrix (csr_val), then add to a_val. - * This kernel used atomicAdd and adapted to w >> h, w is the - * width of csr, and h is the height of csr. - */ -__global__ void KeSMatrixCsrColumnSum(real* a_val, real* csr_val, - int* csr_col, const int dimNNZ) { - int gid = blockIdx.x * blockDim.x + threadIdx.x; - for (int idx = gid; idx < dimNNZ; idx += gridDim.x * blockDim.x) { - int colIdx = csr_col[idx]; - real val = csr_val[idx]; - paddle::paddleAtomicAdd(a_val + colIdx, val); - } -} - -__global__ void KeSMatrixCsrAddBias(real* csr_val, int* csr_col, real* b_d, - real scale, const int nnz) { - int gid = blockIdx.x * blockDim.x + threadIdx.x; // global index - for (int idx = gid; idx < nnz; idx += gridDim.x * blockDim.x) { - int colIdx = csr_col[idx]; - // not coalesced access to b_d - csr_val[idx] += scale * b_d[colIdx]; - } -} - -/** - * @brief csr sparse matrix add dense matrix. - * This kernel occurs load imbalances - * if number of each row is different greatly. - */ -__global__ void KeSMatrixCsrAddDense(real* csr_val, int* csr_row, - int* csr_col, real* b_d, real alpha, - real beta, int dimM, int dimN) { - int gidx = blockIdx.x * blockDim.x + threadIdx.x; - int gidy = blockIdx.y; - if (gidy < dimM) { - int start = csr_row[gidy]; - int end = csr_row[gidy + 1]; - for (int x = gidx; x < (end - start); x += gridDim.x * blockDim.x) { - int col = csr_col[start + x]; - real val = csr_val[start + x]; - csr_val[start + x] = beta * val + alpha * b_d[gidy * dimN + col]; - } - } -} - -#define CU_BLOCK_K 16 -#define CU_BLOCK_SIZE 128 - -__global__ void KeSMatrixDenseMulDenseTrans2CSR( - real* csr_val, const int* csr_row, const int* csr_col, real* A_d, - real* B_d, bool trans_A, bool trans_B, int dimM, int dimN, int dimK, - real alpha, real beta) { - - __shared__ real B_s[CU_BLOCK_SIZE][CU_BLOCK_K]; - __shared__ real A_s[CU_BLOCK_K]; - - const int idx = threadIdx.x; - - const int gidx_begin = blockIdx.x * CU_BLOCK_SIZE; - const int gidy = blockIdx.y; - const int gx_dim = gridDim.x * blockDim.x; - - int start = csr_row[gidy]; - int end = csr_row[gidy + 1]; - int size = end - start; - - int c_iter_num = (size + gx_dim - 1) / gx_dim; - int iter_num = (dimK + CU_BLOCK_K - 1) / CU_BLOCK_K; - for (int i = 0; i < c_iter_num; ++i) { - if ((gidx_begin + i * gx_dim) >= size) { - return; // No need to calculate in this block. - } - - real res = 0.0; - int c_idx = gidx_begin + i * gx_dim + idx; - - for (int j = 0; j < iter_num; ++j) { - int col = j * CU_BLOCK_K + idx; - if (idx < CU_BLOCK_K) { - A_s[idx] = col < dimK ? A_d[gidy * dimK + col] : 0.0; - } - for (int m = 0; m < CU_BLOCK_K; ++m) { - int row = (idx / CU_BLOCK_K) + m * (CU_BLOCK_SIZE / CU_BLOCK_K); - col = idx % CU_BLOCK_K; - int csr_idx = gidx_begin + i * gx_dim + row; - int ldRow = csr_idx < size ? csr_col[start + csr_idx] : 0; - int ldCol = j * CU_BLOCK_K + col; - B_s[row][col] = (csr_idx < size && ldCol < dimK) ? - B_d[ldRow * dimK + ldCol] : 0.0; - } - __syncthreads(); - - for (int k = 0; k < CU_BLOCK_K; k++) { - res += A_s[k] * B_s[idx][k]; - } - __syncthreads(); - } - - if (c_idx < size) { - csr_val[start + c_idx] += alpha * res; - } - } -} diff --git a/paddle/legacy/cuda/src/hl_math.cc b/paddle/legacy/cuda/src/hl_math.cc deleted file mode 100644 index 585b356d0a7b6c1fd95267d24f350b1c2eb99787..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_math.cc +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "avx_mathfun.h" - -namespace hppl { -__m256 exp(__m256 a) { return exp256_ps(a); } - -__m256 log(__m256 a) { return log256_ps(a); } - -__m256 sin(__m256 a) { return sin256_ps(a); } - -__m256 cos(__m256 a) { return cos256_ps(a); } - -} // namespace hppl diff --git a/paddle/legacy/cuda/src/hl_perturbation_util.cu b/paddle/legacy/cuda/src/hl_perturbation_util.cu deleted file mode 100644 index e15cbb143936b1f816eb9410b964db699131c3cc..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_perturbation_util.cu +++ /dev/null @@ -1,289 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "hl_base.h" -#include "hl_cuda.h" -#include "hl_perturbation_util.cuh" -#include "hl_time.h" - -#define _USE_MATH_DEFINES - -/* - * Get the original coordinate for a pixel in a transformed image. - * x, y: coordiate in the transformed image. - * tgtCenter: the center coordiate of the transformed image. - * imgSCenter: the center coordinate of the source image. - * centerX, centerY: translation. - * sourceX, sourceY: output coordinates in the original image. - */ -__device__ void getTranformCoord(int x, - int y, - real theta, - real scale, - real tgtCenter, - real imgCenter, - real centerR, - real centerC, - int* sourceX, - int* sourceY) { - real H[4] = {cosf(-theta), -sinf(-theta), sinf(-theta), cosf(-theta)}; - - // compute coornidates in the rotated and scaled image - real x_new = x - tgtCenter + centerC; - real y_new = y - tgtCenter + centerR; - - // compute coornidates in the original image - x_new -= imgCenter; - y_new -= imgCenter; - real xx = H[0] * x_new + H[1] * y_new; - real yy = H[2] * x_new + H[3] * y_new; - *sourceX = __float2int_rn(xx / scale + imgCenter); - *sourceY = __float2int_rn(yy / scale + imgCenter); -} - -/* - * imgs: (numImages, imgPixels) - * target: (numImages * samplingRate, tgtPixels) - * the channels of one pixel are stored continuously in memory. - * - * created by Wei Xu (genome), converted by Jiang Wang - */ - -__global__ void kSamplingPatches(const real* imgs, - real* targets, - int imgSize, - int tgtSize, - const int channels, - int samplingRate, - const real* thetas, - const real* scales, - const int* centerRs, - const int* centerCs, - const real padValue, - const int numImages) { - const int caseIdx = blockIdx.x * 4 + threadIdx.x; - const int pxIdx = blockIdx.y * 128 + threadIdx.y; - const int imgPixels = imgSize * imgSize; - const int tgtPixels = tgtSize * tgtSize; - const int numPatches = numImages * samplingRate; - - real tgtCenter = (tgtSize - 1) / 2; - real imgCenter = (imgSize - 1) / 2; - - if (pxIdx < tgtPixels && caseIdx < numPatches) { - const int imgIdx = caseIdx / samplingRate; - - // transform coordiates - const int pxX = pxIdx % tgtSize; - const int pxY = pxIdx / tgtSize; - - int srcPxX, srcPxY; - getTranformCoord(pxX, - pxY, - thetas[imgIdx], - scales[imgIdx], - tgtCenter, - imgCenter, - centerCs[caseIdx], - centerRs[caseIdx], - &srcPxX, - &srcPxY); - - imgs += (imgIdx * imgPixels + srcPxY * imgSize + srcPxX) * channels; - targets += (caseIdx * tgtPixels + pxIdx) * channels; - if (srcPxX >= 0 && srcPxX < imgSize && srcPxY >= 0 && srcPxY < imgSize) { - for (int j = 0; j < channels; j++) targets[j] = imgs[j]; - } else { - for (int j = 0; j < channels; j++) targets[j] = padValue; - } - } -} - -/* - * Functionality: generate the disturb (rotation and scaling) and - * sampling location sequence - * - * created by Wei Xu - */ -void hl_generate_disturb_params(real*& gpuAngle, - real*& gpuScaleRatio, - int*& gpuCenterR, - int*& gpuCenterC, - int numImages, - int imgSize, - real rotateAngle, - real scaleRatio, - int samplingRate, - bool isTrain) { - // The number of output samples. - int numPatches = numImages * samplingRate; - - // create CPU perturbation parameters. - real* r_angle = new real[numImages]; - real* s_ratio = new real[numImages]; - int* center_r = new int[numPatches]; - int* center_c = new int[numPatches]; - - // generate the random disturbance sequence and the sampling locations - if (isTrain) { // random sampling for training - // generate rotation ans scaling parameters - // TODO(yuyang18): Since it will initialize random seed here, we can use - // rand_r instead of rand to make this method thread safe. - srand(getCurrentTimeStick()); - for (int i = 0; i < numImages; i++) { - r_angle[i] = - (rotateAngle * M_PI / 180.0) * (rand() / (RAND_MAX + 1.0) // NOLINT - - - 0.5); - s_ratio[i] = - 1 + (rand() / (RAND_MAX + 1.0) - 0.5) * scaleRatio; // NOLINT - } - - int imgCenter = (imgSize - 1) / 2; - - // generate sampling location parameters - for (int i = 0; i < numImages; i++) { - int j = 0; - srand((unsigned)time(NULL)); - while (j < samplingRate) { - int pxX = - (int)(real(imgSize - 1) * rand() / (RAND_MAX + 1.0)); // NOLINT - int pxY = - (int)(real(imgSize - 1) * rand() / (RAND_MAX + 1.0)); // NOLINT - - const real H[4] = {cos(-r_angle[i]), - -sin(-r_angle[i]), - sin(-r_angle[i]), - cos(-r_angle[i])}; - real x = pxX - imgCenter; - real y = pxY - imgCenter; - real xx = H[0] * x + H[1] * y; - real yy = H[2] * x + H[3] * y; - - real srcPxX = xx / s_ratio[i] + imgCenter; - real srcPxY = yy / s_ratio[i] + imgCenter; - - if (srcPxX >= 0 && srcPxX <= imgSize - 1 && srcPxY >= 0 && - srcPxY <= imgSize - 1) { - center_r[i * samplingRate + j] = pxY; - center_c[i * samplingRate + j] = pxX; - j++; - } - } - } - } else { // central crop for testing - for (int i = 0; i < numImages; i++) { - r_angle[i] = 0.0; - s_ratio[i] = 1.0; - - for (int j = 0; j < samplingRate; j++) { - center_r[i * samplingRate + j] = (imgSize - 1) / 2; - center_c[i * samplingRate + j] = (imgSize - 1) / 2; - } - } - } - - // copy disturbance sequence to gpu - hl_memcpy_host2device(gpuAngle, r_angle, sizeof(real) * numImages); - hl_memcpy_host2device(gpuScaleRatio, s_ratio, sizeof(real) * numImages); - - delete[] r_angle; - delete[] s_ratio; - - // copy sampling location sequence to gpu - hl_memcpy_host2device(gpuCenterR, center_r, sizeof(int) * numPatches); - hl_memcpy_host2device(gpuCenterC, center_c, sizeof(int) * numPatches); - - delete[] center_r; - delete[] center_c; -} - -void hl_conv_random_disturb_with_params(const real* images, - int imgSize, - int tgtSize, - int channels, - int numImages, - int samplingRate, - const real* gpuRotationAngle, - const real* gpuScaleRatio, - const int* gpuCenterR, - const int* gpuCenterC, - int paddingValue, - real* target) { - // The number of output samples. - int numPatches = numImages * samplingRate; - // The memory size of one output patch. - int targetSize = tgtSize * tgtSize; - - dim3 threadsPerBlock(4, 128); - dim3 numBlocks(DIVUP(numPatches, 4), DIVUP(targetSize, 128)); - - kSamplingPatches<<>>(images, - target, - imgSize, - tgtSize, - channels, - samplingRate, - gpuRotationAngle, - gpuScaleRatio, - gpuCenterR, - gpuCenterC, - paddingValue, - numImages); - - hl_device_synchronize(); -} - -void hl_conv_random_disturb(const real* images, - int imgSize, - int tgtSize, - int channels, - int numImages, - real scaleRatio, - real rotateAngle, - int samplingRate, - real* gpu_r_angle, - real* gpu_s_ratio, - int* gpu_center_r, - int* gpu_center_c, - int paddingValue, - bool isTrain, - real* targets) { - // generate the random disturbance sequence and the sampling locations - hl_generate_disturb_params(gpu_r_angle, - gpu_s_ratio, - gpu_center_r, - gpu_center_c, - numImages, - imgSize, - rotateAngle, - scaleRatio, - samplingRate, - isTrain); - - hl_conv_random_disturb_with_params(images, - imgSize, - tgtSize, - channels, - numImages, - samplingRate, - gpu_r_angle, - gpu_s_ratio, - gpu_center_r, - gpu_center_r, - paddingValue, - targets); -} diff --git a/paddle/legacy/cuda/src/hl_table_apply.cu b/paddle/legacy/cuda/src/hl_table_apply.cu deleted file mode 100644 index 7411ae35d382833253e3ceabe36b3a1938138028..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_table_apply.cu +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_base.h" -#include "hl_cuda.h" -#include "hl_device_functions.cuh" -#include "paddle/legacy/utils/Logging.h" - -template -__global__ void KeMatrixAddRows(real* output, - int ldo, - real* table, - int ldt, - int* ids, - int numSamples, - int tableSize, - int dim) { - int idx = threadIdx.x; - int idy = blockIdx.x + threadIdx.y * gridDimX; - - while (idy < numSamples) { - int tableId = ids[idy]; - if ((0 <= tableId) && (tableId < tableSize)) { - real* out = output + idy * ldo; - real* tab = table + tableId * ldt; - for (int i = idx; i < dim; i += blockDimX) { - if (AddRow) { - paddle::paddleAtomicAdd(&tab[i], out[i]); - } else { - out[i] += tab[i]; - } - } - } - idy += blockDimY * gridDimX; - } -} - -void hl_matrix_select_rows(real* output, - int ldo, - real* table, - int ldt, - int* ids, - int numSamples, - int tableSize, - int dim) { - CHECK_NOTNULL(output); - CHECK_NOTNULL(table); - CHECK_NOTNULL(ids); - - dim3 threads(128, 8); - dim3 grid(8, 1); - KeMatrixAddRows<128, 8, 8, 0><<>>( - output, ldo, table, ldt, ids, numSamples, tableSize, dim); - - CHECK_SYNC("hl_matrix_select_rows failed"); -} - -void hl_matrix_add_to_rows(real* table, - int ldt, - real* input, - int ldi, - int* ids, - int numSamples, - int tableSize, - int dim) { - CHECK_NOTNULL(input); - CHECK_NOTNULL(table); - CHECK_NOTNULL(ids); - - dim3 threads(128, 8); - dim3 grid(8, 1); - KeMatrixAddRows<128, 8, 8, 1><<>>( - input, ldi, table, ldt, ids, numSamples, tableSize, dim); - - CHECK_SYNC("hl_matrix_add_to_rows failed"); -} - -template -__global__ void KeVectorSelect( - T* dst, int sized, const T* src, int sizes, const int* ids, int sizei) { - int idx = threadIdx.x + blockDimX * blockIdx.x; - while (idx < sizei) { - int index = ids[idx]; - // check(index < sizes); - dst[idx] = src[index]; - idx += blockDimX * gridDimX; - } -} - -template -void hl_vector_select_from( - T* dst, int sized, const T* src, int sizes, const int* ids, int sizei) { - CHECK_NOTNULL(dst); - CHECK_NOTNULL(src); - CHECK_NOTNULL(ids); - CHECK_EQ(sized, sizei); - - dim3 threads(512, 1); - dim3 grid(8, 1); - KeVectorSelect<<>>( - dst, sized, src, sizes, ids, sizei); - - CHECK_SYNC("hl_vector_select_from failed"); -} - -template void hl_vector_select_from(real* dst, - int sized, - const real* src, - int sizes, - const int* ids, - int sizei); -template void hl_vector_select_from( - int* dst, int sized, const int* src, int sizes, const int* ids, int sizei); diff --git a/paddle/legacy/cuda/src/hl_time.cc b/paddle/legacy/cuda/src/hl_time.cc deleted file mode 100644 index 26af9ec806a75bab429e65fbf1fcb7b47cd348cf..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_time.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_time.h" -#include -#include -#include -#include - -using std::chrono::high_resolution_clock; - -int64_t getCurrentTimeStick() { - high_resolution_clock::time_point tp = high_resolution_clock::now(); - high_resolution_clock::duration dtn = tp.time_since_epoch(); - return dtn.count(); -} diff --git a/paddle/legacy/cuda/src/hl_top_k.cu b/paddle/legacy/cuda/src/hl_top_k.cu deleted file mode 100644 index 041ac419f5addfa49148270b8a8b421eb8ada78c..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_top_k.cu +++ /dev/null @@ -1,481 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/cuda/include/hl_base.h" -#include "paddle/legacy/cuda/include/hl_sparse.ph" -#include "paddle/legacy/cuda/include/hl_top_k.h" -#include "paddle/legacy/utils/Logging.h" - -// using namespace hppl; - -struct Pair { - __device__ __forceinline__ Pair() {} - - __device__ __forceinline__ Pair(real value, int id) : v_(value), id_(id) {} - - __device__ __forceinline__ void set(real value, int id) { - v_ = value; - id_ = id; - } - - __device__ __forceinline__ void operator=(const Pair& in) { - v_ = in.v_; - id_ = in.id_; - } - - __device__ __forceinline__ bool operator<(const real value) const { - return (v_ < value); - } - - __device__ __forceinline__ bool operator<(const Pair& in) const { - return (v_ < in.v_) || ((v_ == in.v_) && (id_ > in.id_)); - } - - __device__ __forceinline__ bool operator>(const Pair& in) const { - return (v_ > in.v_) || ((v_ == in.v_) && (id_ < in.id_)); - } - - real v_; - int id_; -}; - -__device__ __forceinline__ void addTo(Pair topK[], - const Pair& p, - int beamSize) { - for (int k = beamSize - 2; k >= 0; k--) { - if (topK[k] < p) { - topK[k + 1] = topK[k]; - } else { - topK[k + 1] = p; - return; - } - } - topK[0] = p; -} - -template -__device__ __forceinline__ void addTo(Pair topK[], const Pair& p) { - for (int k = beamSize - 2; k >= 0; k--) { - if (topK[k] < p) { - topK[k + 1] = topK[k]; - } else { - topK[k + 1] = p; - return; - } - } - topK[0] = p; -} - -template -__device__ __forceinline__ void getTopK( - Pair topK[], real* src, int idx, int dim, int beamSize) { - while (idx < dim) { - if (topK[beamSize - 1] < src[idx]) { - Pair tmp(src[idx], idx); - addTo(topK, tmp, beamSize); - } - idx += blockSize; - } -} - -template -__device__ __forceinline__ void getTopK( - Pair topK[], real* src, int idx, int dim, const Pair& max, int beamSize) { - while (idx < dim) { - if (topK[beamSize - 1] < src[idx]) { - Pair tmp(src[idx], idx); - if (tmp < max) { - addTo(topK, tmp, beamSize); - } - } - idx += blockSize; - } -} - -template -__device__ __forceinline__ void getTopK( - Pair topK[], real* val, int* col, int idx, int dim, int beamSize) { - while (idx < dim) { - if (topK[beamSize - 1] < val[idx]) { - Pair tmp(val[idx], col[idx]); - addTo(topK, tmp, beamSize); - } - idx += blockSize; - } -} - -template -__device__ __forceinline__ void getTopK(Pair topK[], - real* val, - int* col, - int idx, - int dim, - const Pair& max, - int beamSize) { - while (idx < dim) { - if (topK[beamSize - 1] < val[idx]) { - Pair tmp(val[idx], col[idx]); - if (tmp < max) { - addTo(topK, tmp, beamSize); - } - } - idx += blockSize; - } -} - -template -__device__ __forceinline__ void threadGetTopK(Pair topK[], - int& beam, - int beamSize, - real* src, - bool& firstStep, - bool& isEmpty, - Pair& max, - int dim, - const int tid) { - if (beam > 0) { - int length = beam < beamSize ? beam : beamSize; - if (firstStep) { - firstStep = false; - getTopK(topK, src, tid, dim, length); - } else { - for (int k = 0; k < maxLength; k++) { - if (k < maxLength - beam) { - topK[k] = topK[k + beam]; - } else { - topK[k].set(-HL_FLOAT_MAX, -1); - } - } - if (!isEmpty) { - getTopK(topK + maxLength - beam, src, tid, dim, max, length); - } - } - - max = topK[maxLength - 1]; - if (max.id_ == -1) isEmpty = true; - beam = 0; - } -} - -template -__device__ __forceinline__ void threadGetTopK(Pair topK[], - int& beam, - int beamSize, - real* val, - int* col, - bool& firstStep, - bool& isEmpty, - Pair& max, - int dim, - const int tid) { - if (beam > 0) { - int length = beam < beamSize ? beam : beamSize; - if (firstStep) { - firstStep = false; - getTopK(topK, val, col, tid, dim, length); - } else { - for (int k = 0; k < maxLength; k++) { - if (k < maxLength - beam) { - topK[k] = topK[k + beam]; - } else { - topK[k].set(-HL_FLOAT_MAX, -1); - } - } - if (!isEmpty) { - getTopK( - topK + maxLength - beam, val, col, tid, dim, max, length); - } - } - - max = topK[maxLength - 1]; - if (max.id_ == -1) isEmpty = true; - beam = 0; - } -} - -template -__device__ __forceinline__ void blockReduce(Pair* shTopK, - int* maxId, - Pair topK[], - real** topVal, - int** topIds, - int& beam, - int& beamSize, - const int tid, - const int warp) { - while (true) { - __syncthreads(); - if (tid < blockSize / 2) { - if (shTopK[tid] < shTopK[tid + blockSize / 2]) { - maxId[tid] = tid + blockSize / 2; - } else { - maxId[tid] = tid; - } - } - __syncthreads(); - for (int stride = blockSize / 4; stride > 0; stride = stride / 2) { - if (tid < stride) { - if (shTopK[maxId[tid]] < shTopK[maxId[tid + stride]]) { - maxId[tid] = maxId[tid + stride]; - } - } - __syncthreads(); - } - __syncthreads(); - - if (tid == 0) { - **topVal = shTopK[maxId[0]].v_; - **topIds = shTopK[maxId[0]].id_; - (*topVal)++; - (*topIds)++; - } - if (tid == maxId[0]) beam++; - if (--beamSize == 0) break; - __syncthreads(); - - // NOTE(zcd): temporary solution - unsigned mask = 0u; - CREATE_SHFL_MASK(mask, true); - - if (tid == maxId[0]) { - if (beam < maxLength) { - shTopK[tid] = topK[beam]; - } - } - if (maxId[0] / 32 == warp) { - if (__shfl_sync(mask, beam, (maxId[0]) % 32, 32) == maxLength) break; - } - } -} - -/** - * Each block compute one sample. - * In a block: - * 1. every thread get top maxLength value; - * 2. merge to shTopK, block reduce and get max value; - * 3. go to the second setp, until one thread's topK value is null; - * 4. go to the first setp, until get the topK value. - */ -template -__global__ void KeMatrixTopK(real* topVal, - int ldv, - int* topIds, - real* src, - int lds, - int dim, - int beamSize) { - __shared__ Pair shTopK[blockSize]; - __shared__ int maxId[blockSize / 2]; - const int tid = threadIdx.x; - const int warp = threadIdx.x / 32; - src += blockIdx.x * lds; - topVal += blockIdx.x * ldv; - topIds += blockIdx.x * beamSize; - - Pair topK[maxLength]; // NOLINT - int beam = maxLength; - Pair max; - bool isEmpty = false; - bool firstStep = true; - - for (int k = 0; k < maxLength; k++) { - topK[k].set(-HL_FLOAT_MAX, -1); - } - while (beamSize) { - threadGetTopK( - topK, beam, beamSize, src, firstStep, isEmpty, max, dim, tid); - - shTopK[tid] = topK[0]; - blockReduce( - shTopK, maxId, topK, &topVal, &topIds, beam, beamSize, tid, warp); - } -} - -template -__global__ void KeSMatrixTopK(real* topVal, - int ldv, - int* topIds, - real* val, - int* row, - int* col, - int beamSize) { - __shared__ Pair shTopK[blockSize]; - __shared__ int maxId[blockSize / 2]; - const int tid = threadIdx.x; - const int warp = threadIdx.x / 32; - topVal += blockIdx.x * ldv; - topIds += blockIdx.x * beamSize; - - Pair topK[maxLength]; // NOLINT - int beam = maxLength; - Pair max; - bool isEmpty = false; - bool firstStep = true; - - int start = row[blockIdx.x]; - int end = row[blockIdx.x + 1]; - int dim = end - start; - val += start; - col += start; - - if (beamSize > dim) { - // if the number of values to sort are less than the output size, - // use -1 to indicate the end of valid sorted values. - if (tid == 0) { - topIds[dim] = -1; - } - - beamSize = dim; - } - - for (int k = 0; k < maxLength; k++) { - topK[k].set(-HL_FLOAT_MAX, -1); - } - while (beamSize) { - threadGetTopK( - topK, beam, beamSize, val, col, firstStep, isEmpty, max, dim, tid); - - shTopK[tid] = topK[0]; - blockReduce( - shTopK, maxId, topK, &topVal, &topIds, beam, beamSize, tid, warp); - } -} - -void hl_matrix_top_k(real* topVal, - int ldv, - int* topIds, - real* src, - int lds, - int dim, - int beamSize, - int numSamples) { - CHECK_NOTNULL(topVal); - CHECK_NOTNULL(topIds); - CHECK_NOTNULL(src); - - if (beamSize > dim) beamSize = dim; - - dim3 threads(256, 1); - dim3 grid(numSamples, 1); - KeMatrixTopK<5, 256><<>>( - topVal, ldv, topIds, src, lds, dim, beamSize); - - CHECK_SYNC("hl_matrix_top_k failed"); -} - -void hl_sparse_matrix_top_k(real* topVal, - int ldv, - int* topIds, - hl_sparse_matrix_s src, - int beamSize, - int numSamples) { - CHECK_NOTNULL(topVal); - CHECK_NOTNULL(topIds); - CHECK_NOTNULL(src); - CHECK_EQ(src->format, HL_SPARSE_CSR) << "sparse matrix format error!"; - - hl_csr_matrix csr = (hl_csr_matrix)src->matrix; - if (csr->csr_val == NULL || csr->csr_row == NULL || csr->csr_col == NULL) { - LOG(FATAL) << "parameter src is null!"; - } - - dim3 threads(256, 1); - dim3 grid(numSamples, 1); - KeSMatrixTopK<5, 256><<>>( - topVal, ldv, topIds, csr->csr_val, csr->csr_row, csr->csr_col, beamSize); - - CHECK_SYNC("hl_sparse_matrix_top_k failed"); -} - -/** - * Each block compute one sample. - * In a block: - * 1. every thread get top maxLength value; - * 2. merge to shTopK, block reduce and get max value; - * 3. go to the second setp, until one thread's topK value is null; - * 4. go to the first setp, until get the topK value. - */ -template -__global__ void KeMatrixTopKClassificationError(real* topVal, - int ldv, - int* topIds, - real* src, - int lds, - int dim, - int beamSize, - int* label, - real* recResult) { - __shared__ Pair shTopK[blockSize]; - __shared__ int maxId[blockSize / 2]; - const int tid = threadIdx.x; - const int warp = threadIdx.x / 32; - src += blockIdx.x * lds; - topVal += blockIdx.x * ldv; - topIds += blockIdx.x * beamSize; - - Pair topK[maxLength]; // NOLINT - int beam = maxLength; - Pair max; - bool isEmpty = false; - bool firstStep = true; - int topkSize = beamSize; - - for (int k = 0; k < maxLength; k++) { - topK[k].set(-HL_FLOAT_MAX, -1); - } - - while (beamSize) { - threadGetTopK( - topK, beam, beamSize, src, firstStep, isEmpty, max, dim, tid); - - shTopK[tid] = topK[0]; - blockReduce( - shTopK, maxId, topK, &topVal, &topIds, beam, beamSize, tid, warp); - } - - __syncthreads(); - if (tid == 0) { - for (int i = 0; i < topkSize; i++) { - if (*--topIds == label[blockIdx.x]) { - recResult[blockIdx.x] = 0; - break; - } - recResult[blockIdx.x] = 1.0f; - } - } -} - -void hl_matrix_classification_error(real* topVal, - int ldv, - int* topIds, - real* src, - int lds, - int dim, - int topkSize, - int numSamples, - int* label, - real* recResult) { - CHECK_NOTNULL(topVal); - CHECK_NOTNULL(topIds); - CHECK_NOTNULL(src); - - if (topkSize > dim) topkSize = dim; - - dim3 threads(256, 1); - dim3 grid(numSamples, 1); - KeMatrixTopKClassificationError<5, 256><<>>( - topVal, ldv, topIds, src, lds, dim, topkSize, label, recResult); - - CHECK_SYNC("hl_matrix_top_k classification error failed"); -} diff --git a/paddle/legacy/cuda/src/hl_warpctc_wrap.cc b/paddle/legacy/cuda/src/hl_warpctc_wrap.cc deleted file mode 100644 index 31a8652f1f55387ae48cb516cd092442be784cbb..0000000000000000000000000000000000000000 --- a/paddle/legacy/cuda/src/hl_warpctc_wrap.cc +++ /dev/null @@ -1,151 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_warpctc_wrap.h" -#include -#include "paddle/legacy/utils/DynamicLoader.h" -#include "paddle/legacy/utils/Logging.h" - -namespace dynload { - -std::once_flag warpctc_dso_flag; -void* warpctc_dso_handle = nullptr; - -/** - * The following macro definition can generate structs - * (for each function) to dynamic load warpctc routine - * via operator overloading. When PADDLE_USE_DSO is - * false, you need to add the path of libwarp-ctc.so to - * the linked-libs of paddle or to LD_PRELOAD. - */ -#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using warpctcFunc = decltype(__name(args...)) (*)(Args...); \ - std::call_once( \ - warpctc_dso_flag, GetWarpCTCDsoHandle, &warpctc_dso_handle); \ - void* p_##_name = dlsym(warpctc_dso_handle, #__name); \ - return reinterpret_cast(p_##_name)(args...); \ - } \ - } __name; // struct DynLoad__##__name - -// include all needed warp-ctc functions -DYNAMIC_LOAD_WARPCTC_WRAP(get_warpctc_version) -DYNAMIC_LOAD_WARPCTC_WRAP(ctcGetStatusString) -DYNAMIC_LOAD_WARPCTC_WRAP(compute_ctc_loss) -DYNAMIC_LOAD_WARPCTC_WRAP(get_workspace_size) - -#undef DYNAMIC_LOAD_WARPCTC_WRAP - -} /* namespace dynload */ - -#define WARPCTC_GET_VERSION dynload::get_warpctc_version -#define WARPCTC_GET_STATUS_STRING dynload::ctcGetStatusString - -static int g_warpctcVersion = -1; -#ifndef PADDLE_TYPE_DOUBLE -#define WARPCTC_COMPUTE_LOSS dynload::compute_ctc_loss -#define WARPCTC_GET_WORKSPACE_SIZE dynload::get_workspace_size -#else -hl_warpctc_status_t fatal(...) { - LOG(FATAL) << "warp-ctc [version " << g_warpctcVersion - << "] Error: not support double precision."; - // both of get_warpctc_version() and get_workspace_size() return an ctcStatus - // type value - return CTC_STATUS_EXECUTION_FAILED; -} -#define WARPCTC_COMPUTE_LOSS fatal -#define WARPCTC_GET_WORKSPACE_SIZE fatal -#endif - -/** - * Check build-in warp-ctc function using glog and it also - * support << operator for more details error info. - */ -#define CHECK_WARPCTC(warpctcStat) \ - CHECK_EQ(CTC_STATUS_SUCCESS, warpctcStat) \ - << "warp-ctc [version " << g_warpctcVersion \ - << "] Error: " << WARPCTC_GET_STATUS_STRING(warpctcStat) << " " - -void hl_warpctc_init(const size_t blank, - bool useGpu, - hl_warpctc_options_t* options) { - CHECK_NOTNULL(options); - - g_warpctcVersion = WARPCTC_GET_VERSION(); - - if (useGpu) { -#ifdef __NVCC__ - options->loc = CTC_GPU; - options->stream = STREAM_DEFAULT; -#else - LOG(FATAL) << "[warpctc init] GPU is not enabled."; -#endif - } else { - options->loc = CTC_CPU; - options->num_threads = 1; - } - - options->blank_label = blank; -} - -void hl_warpctc_compute_loss(const real* batchInput, - real* batchGrad, - const int* cpuLabels, - const int* cpuLabelLengths, - const int* cpuInputLengths, - const size_t numClasses, - const size_t numSequences, - real* cpuCosts, - void* workspace, - hl_warpctc_options_t* options) { - CHECK_NOTNULL(batchInput); - CHECK_NOTNULL(cpuLabels); - CHECK_NOTNULL(cpuLabelLengths); - CHECK_NOTNULL(cpuInputLengths); - CHECK_NOTNULL(cpuCosts); - CHECK_NOTNULL(workspace); - CHECK_NOTNULL(options); - - CHECK_WARPCTC(WARPCTC_COMPUTE_LOSS(batchInput, - batchGrad, - cpuLabels, - cpuLabelLengths, - cpuInputLengths, - numClasses, - numSequences, - cpuCosts, - workspace, - *options)); -} - -void hl_warpctc_get_workspace_size(const int* cpuLabelLengths, - const int* cpuInputLengths, - const size_t numClasses, - const size_t numSequences, - hl_warpctc_options_t* options, - size_t* bytes) { - CHECK_NOTNULL(cpuLabelLengths); - CHECK_NOTNULL(cpuInputLengths); - CHECK_NOTNULL(options); - CHECK_NOTNULL(bytes); - - CHECK_WARPCTC(WARPCTC_GET_WORKSPACE_SIZE(cpuLabelLengths, - cpuInputLengths, - numClasses, - numSequences, - *options, - bytes)); -} diff --git a/paddle/legacy/function/BlockExpandOp.cpp b/paddle/legacy/function/BlockExpandOp.cpp deleted file mode 100644 index f01f89a7277acc5fe494b92a3e7ca3ca18498c97..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/BlockExpandOp.cpp +++ /dev/null @@ -1,202 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Function.h" -#include "Im2Col.h" - -namespace paddle { - -/* - * \brief Converts the image data of four dimensions(NCHW) into - * a sequence data of three dimensions(NST) in the forward calculation, - * which is reversed in the backward calculation. - * Where N is batch size, S is the length of the sequence after each - * image is expanded, T is the size of each time step in the sequence. - * - * Arguments in forward function: - * \param inputs[0] Image data of NCHW format. - * \param outputs[0] Sequence data of NST format. - * - * Arguments in backward function: - * \param inputs[0] Sequence data of NST format. - * \param outputs[0] Image data of NCHW format. - */ -class BlockExpandFunction : public FunctionBase { - public: - void init(const FuncConfig& config) override { - // function arguments - strides_ = config.get>("strides"); - paddings_ = config.get>("paddings"); - blocks_ = config.get>("blocks"); - - // number of inputs and outputs - numInputs_ = 1; - numOutputs_ = 1; - } - - void checkShape(const TensorShape& image, const TensorShape& sequence) const { - // image shape should be 4-dimensional. - CHECK_EQ(image.ndims(), (size_t)4); - // sequence shape should be 3-dimensional. - CHECK_EQ(sequence.ndims(), (size_t)3); - // The batchSize of the image needs to be equal to - // the batchSize of the sequence. - CHECK_EQ(image[0], sequence[0]); - } - - // Calculate the shape of colData based on the shape of the image - // and the shape of the sequence. - TensorShape getColShape(const TensorShape& image, - const TensorShape& sequence) const { - size_t inputChannels = image[1]; - size_t inputHeight = image[2]; - size_t inputWidth = image[3]; - size_t seqLength = sequence[1]; - size_t stepSize = sequence[2]; - size_t outputHeight = - 1 + - (inputHeight + 2 * paddingH() - blockH() + strideH() - 1) / strideH(); - size_t outputWidth = - 1 + - (inputWidth + 2 * paddingW() - blockW() + strideW() - 1) / strideW(); - CHECK_EQ(seqLength, outputHeight * outputWidth); - CHECK_EQ(stepSize, inputChannels * blockH() * blockW()); - - // [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] - return TensorShape({outputHeight, - outputWidth, - inputChannels, - (size_t)blockH(), - (size_t)blockW()}); - } - - protected: - std::vector strides_; - std::vector paddings_; - std::vector blocks_; - - inline int strideH() const { return strides_[0]; } - - inline int strideW() const { return strides_[1]; } - - inline int paddingH() const { return paddings_[0]; } - - inline int paddingW() const { return paddings_[1]; } - - inline int blockH() const { return blocks_[0]; } - - inline int blockW() const { return blocks_[1]; } -}; - -template -class BlockExpandForward : public BlockExpandFunction { - public: - void init(const FuncConfig& config) override { - BlockExpandFunction::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& image = inputs[0].shape(); - const TensorShape& sequence = outputs[0].shape(); - checkShape(image, sequence); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - check(inputs, outputs); - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); - const TensorShape& image = inputs[0].shape(); - const TensorShape& sequence = outputs[0].shape(); - - TensorShape imShape = TensorShape({image[1], image[2], image[3]}); - TensorShape colShape = getColShape(image, sequence); - size_t batchSize = image[0]; - - real* imageData = inputs[0].data(); - real* seqData = outputs[0].data(); - Im2ColFunctor im2col; - for (size_t i = 0; i < batchSize; i++) { - // The result of im2col is [outputHeight, outputWidth, - // inputChannels, filterHeight, filterWidth], and it is easy to - // reshape into [seqLength, stepSize], where seqLength is equal - // output_height * output_width, stepSize is equal - // input_channels * filter_height * filter_width - im2col(imageData, - imShape, - seqData, - colShape, - strideH(), - strideW(), - paddingH(), - paddingW()); - imageData += imShape.getElements(); - seqData += colShape.getElements(); - } - } -}; - -template -class BlockExpandBackward : public BlockExpandFunction { - public: - void init(const FuncConfig& config) override { - BlockExpandFunction::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& image = outputs[0].shape(); - const TensorShape& sequence = inputs[0].shape(); - checkShape(image, sequence); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - check(inputs, outputs); - // Since the implementation of Col2ImFunctor is ADD_TO, - // this function only supports ADD_TO mode. - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - const TensorShape& image = outputs[0].shape(); - const TensorShape& sequence = inputs[0].shape(); - - TensorShape imShape = TensorShape({image[1], image[2], image[3]}); - TensorShape colShape = getColShape(image, sequence); - size_t batchSize = image[0]; - - real* imageData = outputs[0].data(); - real* seqData = inputs[0].data(); - Col2ImFunctor col2im; - for (size_t i = 0; i < batchSize; i++) { - col2im(imageData, - imShape, - seqData, - colShape, - strideH(), - strideW(), - paddingH(), - paddingW()); - imageData += imShape.getElements(); - seqData += colShape.getElements(); - } - } -}; - -REGISTER_TYPED_FUNC(BlockExpand, CPU, BlockExpandForward); -REGISTER_TYPED_FUNC(BlockExpandGrad, CPU, BlockExpandBackward); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(BlockExpand, GPU, BlockExpandForward); -REGISTER_TYPED_FUNC(BlockExpandGrad, GPU, BlockExpandBackward); -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/BlockExpandOpTest.cpp b/paddle/legacy/function/BlockExpandOpTest.cpp deleted file mode 100644 index 8fca4f6fdc82082986b2ec3469aa8f78990bc8a7..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/BlockExpandOpTest.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" - -namespace paddle { - -TEST(BlockExpandForward, real) { - for (size_t batchSize : {5}) { - for (size_t channels : {1, 5}) { - for (size_t inputHeight : {5, 33}) { - for (size_t inputWidth : {5, 32}) { - for (size_t block : {1, 3, 5}) { - for (size_t stride : {1, 2}) { - for (size_t padding : {0, 1}) { - // init Test object - std::vector strides = {stride, stride}; - std::vector paddings = {padding, padding}; - std::vector blocks = {block, block}; - CpuGpuFuncCompare test("BlockExpand", - FuncConfig() - .set("strides", strides) - .set("paddings", paddings) - .set("blocks", blocks)); - - size_t outputHeight = - 1 + - (inputHeight + 2 * padding - block + stride - 1) / stride; - size_t outputWidth = - 1 + - (inputWidth + 2 * padding - block + stride - 1) / stride; - TensorShape inputShape = - TensorShape({batchSize, channels, inputHeight, inputWidth}); - TensorShape outputShape = - TensorShape({batchSize, - outputHeight * outputWidth, - channels * block * block}); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, inputShape)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, outputShape)); - // run Function - test.run(); - } - } - } - } - } - } - } -} - -TEST(BlockExpandBackward, real) { - for (size_t batchSize : {5}) { - for (size_t channels : {1, 5}) { - for (size_t inputHeight : {5, 33}) { - for (size_t inputWidth : {5, 32}) { - for (size_t block : {1, 3, 5}) { - for (size_t stride : {1, 2}) { - for (size_t padding : {0, 1}) { - // init Test object - std::vector strides = {stride, stride}; - std::vector paddings = {padding, padding}; - std::vector blocks = {block, block}; - CpuGpuFuncCompare test("BlockExpandGrad", - FuncConfig() - .set("strides", strides) - .set("paddings", paddings) - .set("blocks", blocks)); - - size_t outputHeight = - 1 + - (inputHeight + 2 * padding - block + stride - 1) / stride; - size_t outputWidth = - 1 + - (inputWidth + 2 * padding - block + stride - 1) / stride; - TensorShape inputShape = - TensorShape({batchSize, channels, inputHeight, inputWidth}); - TensorShape outputShape = - TensorShape({batchSize, - outputHeight * outputWidth, - channels * block * block}); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, outputShape)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, inputShape), - ADD_TO); - // run Function - test.run(); - } - } - } - } - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/function/BufferArg.cpp b/paddle/legacy/function/BufferArg.cpp deleted file mode 100644 index 1f3d505c31bf8d50503032a4baae6230b9f7241d..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/BufferArg.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "BufferArg.h" -#include "paddle/legacy/math/SparseMatrix.h" - -namespace paddle { - -const SequenceArg& BufferArg::sequence() const { - CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA); - return dynamic_cast(*this); -} - -const SparseMatrixArg& BufferArg::sparse() const { - CHECK_EQ(bufferType_, TENSOR_SPARSE); - return dynamic_cast(*this); -} - -SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType) - : BufferArg(sparse, argType), - row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), - col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32), - nnz_(sparse.getElementCnt()), - format_(static_cast(sparse.getFormat())), - type_(static_cast(sparse.getValueType())) { - bufferType_ = TENSOR_SPARSE; -} - -SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType) - : BufferArg(sparse, argType), - row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), - col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32), - nnz_(sparse.getElementCnt()), - format_(static_cast(sparse.getFormat())), - type_(static_cast(sparse.getValueType())) { - bufferType_ = TENSOR_SPARSE; -} - -} // namespace paddle diff --git a/paddle/legacy/function/BufferArg.h b/paddle/legacy/function/BufferArg.h deleted file mode 100644 index 1f47ad556d29363d784fde718fdacdf0658ef010..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/BufferArg.h +++ /dev/null @@ -1,364 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "TensorShape.h" -#include "TensorType.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -enum BufferType { - TENSOR_UNKNOWN = 0, - TENSOR_NORMAL = 1, - TENSOR_SEQUENCE_ID = 2, - TENSOR_SEQUENCE_DATA = 3, - TENSOR_SPARSE = 4 -}; - -class BufferArg; -class SequenceArg; -class SparseMatrixArg; - -/** - * \brief BufferArg used as the argument type of Function. - * - * The arguments of the Paddle Function have four Buffer types. - * 1. BufferArg for a dense Buffer of any dimension. - * 2. SequenceIdArg for a Buffer of sequence start positions. - * 3. SequenceArg for a Buffer of sequence data. - * 4. SparseMatrixArg for a Buffer of sparse matrix. - * - * Buffer shape - * For most buffers, the first dimension `shape()[0]` represents - * the size of the mini-batch. - * - * Buffer argType - * There is an ArgType property for the BufferArg used as Function Output. - * Whether the result of the Function calculation is assigned to the - * output Buffer or added to the output Buffer is determined by the - * argType_ property of the output BufferArg. - */ - -// ArgType is only used by output BufferArg. -// For input argument, argType_ is ignored. -// For output argument, need to set the argType_ of the BufferArg. -enum ArgType { - UNSPECIFIED = 0, - ASSIGN_TO = 1, - ADD_TO = 2, -}; -class BufferArg { - public: - void setArgType(ArgType argType) { argType_ = argType; } - - ArgType getArgType() const { return argType_; } - - public: - BufferArg(ValueType valueType, - const TensorShape& shape, - ArgType argType = UNSPECIFIED) - : buf_(nullptr), valueType_(valueType), shape_(shape), argType_(argType) { - bufferType_ = TENSOR_NORMAL; - } - - BufferArg(void* buf, - ValueType valueType, - const TensorShape& shape, - ArgType argType = UNSPECIFIED) - : buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) { - bufferType_ = TENSOR_NORMAL; - } - - BufferArg(void* buf, ValueType valueType) : buf_(buf), valueType_(valueType) { - bufferType_ = TENSOR_NORMAL; - } - - BufferArg(const Matrix& matrix, ArgType argType = UNSPECIFIED) - : buf_( - const_cast(reinterpret_cast(matrix.getData()))), - valueType_(DataType::value), - shape_(2), - argType_(argType) { - bufferType_ = TENSOR_NORMAL; - shape_.setDim(0, matrix.getHeight()); - shape_.setDim(1, matrix.getWidth()); - } - - BufferArg(const Matrix& matrix, - const TensorShape& shape, - ArgType argType = UNSPECIFIED) - : buf_( - const_cast(reinterpret_cast(matrix.getData()))), - valueType_(DataType::value), - shape_(shape), - argType_(argType) { - bufferType_ = TENSOR_NORMAL; - CHECK_EQ(matrix.getElementCnt(), shape.getElements()); - } - - BufferArg(const Vector& vector, ArgType argType = UNSPECIFIED) - : buf_( - const_cast(reinterpret_cast(vector.getData()))), - valueType_(DataType::value), - shape_(1), - argType_(argType) { - bufferType_ = TENSOR_NORMAL; - shape_.setDim(0, vector.getSize()); - } - - BufferArg(const IVector& vector, ArgType argType = UNSPECIFIED) - : buf_( - const_cast(reinterpret_cast(vector.getData()))), - valueType_(VALUE_TYPE_INT32), - shape_(1), - argType_(argType) { - bufferType_ = TENSOR_NORMAL; - shape_.setDim(0, vector.getSize()); - } - - template - typename Tensor::Matrix matrix() const { - CHECK(buf_); - CHECK(valueType_ == DataType::value); - // CHECK(deviceType_ == DType); - CHECK_EQ((size_t)2, shape_.ndims()); - return typename Tensor::Matrix( - reinterpret_cast(buf_), shape_[0], shape_[1]); - } - - template - typename Tensor::Vector vector() const { - CHECK(buf_); - CHECK(valueType_ == DataType::value); - // CHECK(deviceType_ == DType); - CHECK_EQ((size_t)1, shape_.ndims()); - return typename Tensor::Vector( - shape_[0], reinterpret_cast(buf_)); - } - - virtual ~BufferArg() {} - - template - T* data() const { - return reinterpret_cast(buf_); - } - - void* data() const { return buf_; } - ValueType valueType() const { return valueType_; } - BufferType bufferType() const { return bufferType_; } - const TensorShape& shape() const { return shape_; } - bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; } - bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; } - virtual size_t numElements() const { return shape_.getElements(); } - - const SequenceArg& sequence() const; - const SparseMatrixArg& sparse() const; - - protected: - void* buf_; - ValueType valueType_; - TensorShape shape_; - BufferType bufferType_{TENSOR_UNKNOWN}; - ArgType argType_{UNSPECIFIED}; - // TODO(tianbing), add deviceType_ - // leading dimensions. The size is dims_.size() - // Dims lds_; -}; - -// sequence start positions in a mini-batch of sequences -// shape_.ndims() == 1 -// valueType_ = int32 -// if a < b then value_.buf_[a] < value_.buf_[b] -class SequenceIdArg : public BufferArg { - public: - SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED) - : BufferArg(VALUE_TYPE_INT32, shape, argType) { - bufferType_ = TENSOR_SEQUENCE_ID; - CHECK_EQ(shape_.ndims(), 1UL); - CHECK_GE(shape_[0], 1UL); - numSeqs_ = shape_[0] - 1; - } - - SequenceIdArg(void* buf, - const TensorShape& shape, - ArgType argType = UNSPECIFIED) - : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) { - bufferType_ = TENSOR_SEQUENCE_ID; - CHECK_EQ(shape_.ndims(), 1UL); - numSeqs_ = shape_[0] - 1; - } - - SequenceIdArg(const IVector& vector) : BufferArg(vector) { - bufferType_ = TENSOR_SEQUENCE_ID; - numSeqs_ = shape_[0] - 1; - } - - ~SequenceIdArg() {} - - size_t numSeqs() const { return numSeqs_; } - - private: - size_t numSeqs_; -}; - -// sequences data -// For mini-batch calculate, -// one batch can contain more than one sequence of data. -// SequenceArg can be used to represent sequences that contain multiple -// unequal lengths. -class SequenceArg : public BufferArg { - public: - SequenceArg(ValueType valueType, - const TensorShape& shape, - ArgType argType = UNSPECIFIED) - : BufferArg(valueType, shape, argType), - startPositions_(TensorShape({shape[0]})) { - bufferType_ = TENSOR_SEQUENCE_DATA; - } - - SequenceArg(void* buf, - ValueType valueType, - const TensorShape& shape, - const SequenceIdArg& startPositions, - ArgType argType = UNSPECIFIED) - : BufferArg(buf, valueType, shape, argType), - startPositions_(startPositions) { - bufferType_ = TENSOR_SEQUENCE_DATA; - } - - SequenceArg(const Matrix& matrix, - const IVector& vector, - ArgType argType = UNSPECIFIED) - : BufferArg(matrix, argType), startPositions_(vector) { - bufferType_ = TENSOR_SEQUENCE_DATA; - } - - ~SequenceArg() {} - - void* getIdBuf() const { return startPositions_.data(); } - size_t numSeqs() const { return startPositions_.numSeqs(); } - SequenceIdArg& getSequenceId() { return startPositions_; } - const SequenceIdArg& getSequenceId() const { return startPositions_; } - - private: - SequenceIdArg startPositions_; -}; - -// sparse matrix -// valueType_ == float or double -// shape_.ndims() == 2 -class SparseMatrixArg : public BufferArg { - public: - SparseMatrixArg(void* buf, - ValueType valueType, - const TensorShape& shape, - const BufferArg& row, - const BufferArg& col, - size_t nnz, - SparseFormat format, - SparseValueType type, - ArgType argType = UNSPECIFIED) - : BufferArg(buf, valueType, shape, argType), - row_(row), - col_(col), - nnz_(nnz), - format_(static_cast(format)), - type_(static_cast(type)) { - bufferType_ = TENSOR_SPARSE; - CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); - CHECK_EQ(shape_.ndims(), 2UL); - CHECK_EQ(row_.shape().ndims(), 1UL); - CHECK_EQ(col_.shape().ndims(), 1UL); - if (format_ == T_SPARSE_CSR) { - CHECK_EQ(nnz, col.shape()[0]); - } else if (format_ == T_SPARSE_CSC) { - CHECK_EQ(nnz, row.shape()[0]); - } - } - - SparseMatrixArg(ValueType valueType, - const TensorShape& shape, - size_t nnz, - SparseFormat format, - SparseValueType type, - ArgType argType = UNSPECIFIED) - : BufferArg(valueType, shape, argType), - row_(BufferArg(nullptr, VALUE_TYPE_INT32)), - col_(BufferArg(nullptr, VALUE_TYPE_INT32)), - nnz_(nnz), - format_(static_cast(format)), - type_(static_cast(type)) { - bufferType_ = TENSOR_SPARSE; - CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); - CHECK_EQ(shape_.ndims(), 2UL); - - /// len of row_ : height + 1 (CSR) or nnz (CSC), buf_ == nullptr - row_ = (format_ == T_SPARSE_CSR - ? BufferArg(VALUE_TYPE_INT32, TensorShape{shape_[0] + 1}) - : BufferArg(VALUE_TYPE_INT32, TensorShape{nnz})); - /// len of col_ : width + 1 (CSC) or nnz (CSR), buf_ == nullptr - col_ = (format_ == T_SPARSE_CSR - ? BufferArg(VALUE_TYPE_INT32, TensorShape{nnz}) - : BufferArg(VALUE_TYPE_INT32, TensorShape{shape_[1] + 1})); - } - - SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED); - - SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED); - - template - typename Tensor::SparseMatrix SparseMatrix() const { - CHECK(buf_); - CHECK(valueType_ == DataType::value); - // CHECK(deviceType_ == DType); - CHECK_EQ(2UL, shape_.ndims()); - return typename Tensor::SparseMatrix( - reinterpret_cast(buf_), - reinterpret_cast(row_.data()), - reinterpret_cast(col_.data()), - shape_[0], - shape_[1], - nnz_, - static_cast(type_), - static_cast(format_), - false); - } - - ~SparseMatrixArg() {} - - void* getRowBuf() const { return row_.data(); } - - void* getColBuf() const { return col_.data(); } - - size_t nnz() const { return nnz_; } - - size_t numElements() const override { return nnz_; } - - SparseDataFormat dataFormat() const { return format_; } - - SparseDataType dataType() const { return type_; } - - private: - BufferArg row_; - BufferArg col_; - size_t nnz_; - SparseDataFormat format_; - SparseDataType type_; -}; - -} // namespace paddle diff --git a/paddle/legacy/function/BufferArgTest.cpp b/paddle/legacy/function/BufferArgTest.cpp deleted file mode 100644 index 1ec153bea89f25414b0df3088ab0c366c92ecbe0..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/BufferArgTest.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "BufferArg.h" -#include -#include "paddle/legacy/math/MemoryHandle.h" - -namespace paddle { - -TEST(BufferTest, BufferArg) { - TensorShape shape({8, 10}); - CpuMemoryHandle memory(shape.getElements() * - sizeOfValuType(VALUE_TYPE_FLOAT)); - BufferArg buffer(memory.getBuf(), VALUE_TYPE_FLOAT, shape); - EXPECT_EQ(buffer.data(), memory.getBuf()); -} - -TEST(BufferTest, SequenceIdArg) { - TensorShape shape({10}); - CpuMemoryHandle memory(shape.getElements() * - sizeOfValuType(VALUE_TYPE_INT32)); - SequenceIdArg buffer(memory.getBuf(), shape); - EXPECT_EQ(buffer.data(), memory.getBuf()); - EXPECT_EQ(buffer.numSeqs(), 9U); -} - -} // namespace paddle diff --git a/paddle/legacy/function/CMakeLists.txt b/paddle/legacy/function/CMakeLists.txt deleted file mode 100644 index 29b4ac098e21ee315d5c9b2f2499521d1aa1c322..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CMakeLists.txt +++ /dev/null @@ -1,54 +0,0 @@ -file(GLOB h_files . *Op.h) -file(GLOB cpp_files . *Op.cpp) - -list(APPEND h_files Function.h) -list(APPEND cpp_files Function.cpp) -list(APPEND cpp_files BufferArg.cpp) -list(APPEND cpp_files GemmFunctor.cpp) -if(USE_EIGEN_FOR_BLAS) - list(APPEND cpp_files EigenGemm.cpp) -endif(USE_EIGEN_FOR_BLAS) - -if(WITH_GPU) - file(GLOB cu_files . *OpGpu.cu) - cuda_compile(cu_objs ${cu_files}) -endif() - -if(USE_NNPACK) - list(APPEND cpp_files nnpack/NNPACKConvOp.cpp) - if(WITH_TESTING) - add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp) - endif() -endif() - -list(APPEND cpp_files neon/NeonDepthwiseConv.cpp) - -add_library(paddle_function STATIC ${cpp_files} ${cu_objs}) -add_dependencies(paddle_function ${external_project_dependencies}) -add_dependencies(paddle_function paddle_proto) - -if(WITH_TESTING) -if(WITH_GPU) - # TODO: - # file(GLOB test_files . *OpTest.cpp) - # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files}) - add_simple_unittest(CrossMapNormalOpTest) - add_simple_unittest(TensorShapeTest) - add_simple_unittest(TensorTypeTest) - add_simple_unittest(BufferArgTest) - add_simple_unittest(FunctionTest) - add_simple_unittest(ContextProjectionOpTest) - add_simple_unittest(PadOpTest) - add_simple_unittest(MulOpTest) - add_simple_unittest(CosSimOpTest) - add_simple_unittest(RowConvOpTest) - add_simple_unittest(BlockExpandOpTest) - add_simple_unittest(CropOpTest) - add_simple_unittest(SwitchOpTest) - add_simple_unittest(ScaleSubRegionOpTest) -endif() - -add_simple_unittest(Im2ColTest) -add_simple_unittest(GemmConvOpTest) -add_simple_unittest(DepthwiseConvOpTest) -endif() diff --git a/paddle/legacy/function/ContextProjectionOp.cpp b/paddle/legacy/function/ContextProjectionOp.cpp deleted file mode 100644 index 05a3f915862b6657fc0a4300cbbea36721219e10..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ContextProjectionOp.cpp +++ /dev/null @@ -1,412 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ContextProjectionOp.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" - -namespace paddle { -/** - * Context Projection Forward with CPU Matrix Device. - * - */ -template <> -void ContextProjectionForward(CpuMatrix& out_mat, - const CpuMatrix& input_mat, - const CpuMatrix& weight_mat, - const CpuIVector& seq_vec, - size_t context_length, - int context_start, - size_t begin_pad) { - const int* starts = seq_vec.getData(); - const size_t num_sequences = seq_vec.getSize() - 1; - for (size_t i = 0; i < num_sequences; ++i) { - for (size_t j = 0; j < context_length; ++j) { - int begin = starts[i] + context_start + j; - int end = starts[i + 1] + context_start + j; - int dst_begin = starts[i]; - int dst_end = starts[i + 1]; - if (begin < starts[i]) { - int64_t pad_size = - std::min(starts[i] - begin, starts[i + 1] - starts[i]); - MatrixPtr mat = out_mat.subMatrix(starts[i], pad_size); - if (weight_mat) { - MatrixPtr sub = - const_cast(weight_mat).subMatrix(j, pad_size); - mat->addAtOffset(*sub, j * input_mat.getWidth()); - } - dst_begin = starts[i] + pad_size; - begin = starts[i]; - } - if (end > starts[i + 1]) { - int64_t pad_size = - std::min(end - starts[i + 1], starts[i + 1] - starts[i]); - MatrixPtr mat = out_mat.subMatrix(starts[i + 1] - pad_size, pad_size); - if (weight_mat) { - MatrixPtr sub = - const_cast(weight_mat) - .subMatrix(begin_pad + context_start + j - pad_size, - pad_size); - mat->addAtOffset(*sub, j * input_mat.getWidth()); - } - dst_end = starts[i + 1] - pad_size; - end = starts[i + 1]; - } - if (end <= begin) continue; - MatrixPtr src = - const_cast(input_mat).subMatrix(begin, end - begin); - MatrixPtr dst = out_mat.subMatrix(dst_begin, dst_end - dst_begin); - dst->addAtOffset(*src, j * input_mat.getWidth()); - } - } -} - -/** - * Paddle Function for Context Projection Forward. - * Calculate the output layer value sequence after context projection. - * - * What is Context Projection for a sequence? - * For example, assumed input (x) has 4 words and the dimension of each word - * representation is 2. If we use zero to pad instead of learned weight to pad, - * and the context_lenth is 3, the output (y) is: - * - * @code - * x = [a1, a2; - * b1, b2; - * c1, c2; - * d1, d2] - * y = [0, 0, a1, a2, b1, b2; - * a1, a2, b1, b2, c1, c2; - * b1, b2, c1, c2, d1, d2; - * c1, c2, d1, d2, 0, 0] - * @endcode - * - * \param outputs[0].matrix output layer value, n * (d * l) - * \param outputs[0].vector start position sequence, n * 1 - * \param inputs[0].matrix input layer value, n * d - * \param inputs[0].vector start position sequence, n * 1 - * \param inputs[1].matrix input layer weight, pad * d - */ -template -class ContextProjectionForwardFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { - context_length_ = config.get("context_length"); - context_start_ = config.get("context_start"); - begin_pad_ = config.get("begin_pad"); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK(1UL == inputs.size() || 2UL == inputs.size()); - CHECK_EQ(1UL, outputs.size()); - CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) - << "SequenceArg required here"; - const auto val_seqs = dynamic_cast(inputs[0]); - auto out_seq = dynamic_cast(outputs[0]); - - CHECK(out_seq.data() && val_seqs.data() && val_seqs.getSequenceId().data()); - CHECK_EQ(out_seq.shape().ndims(), 2UL); - CHECK_EQ(val_seqs.shape().ndims(), 2UL); - /// dim of output = dim of input * context_length - CHECK_EQ(out_seq.shape()[1], val_seqs.shape()[1] * context_length_); - /// input and output has the same batch_size - CHECK_EQ(val_seqs.shape()[0], out_seq.shape()[0]); - if (2UL == inputs.size()) { - CHECK_EQ(inputs[1].shape().ndims(), 2UL); - /// dim of input == dim of weight - CHECK_EQ(val_seqs.shape()[1], inputs[1].shape()[1]); - } - - CHECK_EQ(out_seq.getArgType(), ADD_TO); - auto out_mat = out_seq.matrix(); - const auto in_mat = val_seqs.matrix(); - const auto w_mat = - (2UL == inputs.size() && inputs[1].data()) - ? inputs[1].matrix() - : typename Tensor::Matrix(nullptr, 0, 0); - const auto seq_vec = val_seqs.getSequenceId().vector(); - - ContextProjectionForward(out_mat, - in_mat, - w_mat, - seq_vec, - context_length_, - context_start_, - begin_pad_); - } - - private: - size_t context_length_; - int context_start_; - size_t begin_pad_; -}; - -/** - * Context Projection Backward with CPU Matrix Device. - * - */ -template <> -void ContextProjectionBackward(const CpuMatrix& out_grad_mat, - CpuMatrix& in_grad_mat, - CpuMatrix& w_grad_mat, - const CpuIVector& seq_vec, - size_t context_length, - int context_start, - size_t begin_pad, - bool is_padding, - size_t total_pad) { - size_t input_dim = in_grad_mat ? in_grad_mat.getWidth() - : w_grad_mat ? w_grad_mat.getWidth() : 0; - const int* starts = seq_vec.getData(); - size_t num_sequences = seq_vec.getSize() - 1; - for (size_t i = 0; i < num_sequences; ++i) { - for (size_t j = 0; j < context_length; ++j) { - int begin = starts[i] + context_start + j; - int end = starts[i + 1] + context_start + j; - int dst_begin = starts[i]; - int dst_end = starts[i + 1]; - if (begin < starts[i]) { - int64_t pad_size = - std::min(starts[i] - begin, starts[i + 1] - starts[i]); - if (is_padding && w_grad_mat) { - MatrixPtr mat = const_cast(out_grad_mat) - .subMatrix(starts[i], pad_size); - MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size); - sub->addAtOffset(*mat, j * input_dim); - } - dst_begin = starts[i] + pad_size; - begin = starts[i]; - } - if (end > starts[i + 1]) { - int64_t pad_size = - std::min(end - starts[i + 1], starts[i + 1] - starts[i]); - if (is_padding && w_grad_mat) { - MatrixPtr mat = const_cast(out_grad_mat) - .subMatrix(starts[i + 1] - pad_size, pad_size); - MatrixPtr sub = w_grad_mat.subMatrix( - begin_pad + context_start + j - pad_size, pad_size); - sub->addAtOffset(*mat, j * input_dim); - } - dst_end = starts[i + 1] - pad_size; - end = starts[i + 1]; - } - if (end <= begin) continue; - if (!in_grad_mat) continue; - MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin); - MatrixPtr dst = const_cast(out_grad_mat) - .subMatrix(dst_begin, dst_end - dst_begin); - src->addAtOffset(*dst, j * input_dim); - } - } -} - -/** - * Context Projection Backward Function. - * Update the weight gradient and input layer gradient with backprop - * - * \param inputs[0].matrix output layer grad, n * (d * l) - * \param inputs[0].vector start position sequence, n * 1 - * \param outputs[0].matrix input layer grad, n * d - * \param outputs[0].vector start position sequence, n * 1 - * \param outputs[1] weight grad, pad * d - */ -template -class ContextProjectionBackwardFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { - context_length_ = config.get("context_length"); - context_start_ = config.get("context_start"); - begin_pad_ = config.get("begin_pad"); - is_padding_ = config.get("is_padding"); - total_pad_ = config.get("total_pad"); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1UL, inputs.size()); - CHECK(1UL == outputs.size() || 2UL == outputs.size()); - CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) - << "SequenceArg required here"; - const auto in_seq = dynamic_cast(inputs[0]); - auto out_seq = dynamic_cast(outputs[0]); - CHECK(in_seq.data() && in_seq.getSequenceId().data()); - CHECK_EQ(in_seq.shape().ndims(), 2UL); - CHECK_EQ(out_seq.shape().ndims(), 2UL); - CHECK_EQ(out_seq.getSequenceId().shape().ndims(), 1UL); - - /// input and output grad has the same batch_size - CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]); - /// dim of output grad = dim of input grad * context_length - CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_); - CHECK_EQ(out_seq.getArgType(), ADD_TO); - - if (2UL == outputs.size()) { - CHECK_EQ(outputs[1].shape().ndims(), 2UL); - /// dim of input grad == dim of weight - CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]); - CHECK_EQ(outputs[1].getArgType(), ADD_TO); - } - - const auto seq_vec = in_seq.getSequenceId().vector(); - const auto out_grad_mat = in_seq.matrix(); - auto in_grad_mat = - !out_seq.data() ? typename Tensor::Matrix(nullptr, 0, 0) - : out_seq.matrix(); - auto w_grad_mat = - (2UL == outputs.size() && outputs[1].data()) - ? outputs[1].matrix() - : typename Tensor::Matrix(nullptr, 0, 0); - - ContextProjectionBackward(out_grad_mat, - in_grad_mat, - w_grad_mat, - seq_vec, - context_length_, - context_start_, - begin_pad_, - is_padding_, - total_pad_); - } - - private: - size_t context_length_; - int context_start_; - size_t begin_pad_; - bool is_padding_; - size_t total_pad_; -}; - -/** - * Context Projection Backward Data Function - * Update input layer grad - * input: sequence of output layer grad - * output: sequence of input layer grad - * - * \param outputs[0].matrix input layer grad, n * d - * \param outputs[0].vector start position sequence, n * 1 - * \param inputs[0].matrix output layer grad, n * (d * l) - * \param inputs[0].vector start positon sequence, n * 1 - */ -template -class ContextProjectionBackwardDataFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { - context_length_ = config.get("context_length"); - context_start_ = config.get("context_start"); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) - << "SequenceArg required here"; - const auto in_seq = dynamic_cast(inputs[0]); - const auto out_seq = dynamic_cast(outputs[0]); - - CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceId().data()); - CHECK_EQ(out_seq.shape().ndims(), 2UL); - CHECK_EQ(in_seq.shape().ndims(), 2UL); - CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); - /// output layer grad dim == input layer grad dim * context_length_ - CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_); - /// input and output has the same batch_size - CHECK_EQ(in_seq.shape()[0], out_seq.shape()[0]); - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); - - const auto out_grad_mat = in_seq.matrix(); - const auto seq_vec = in_seq.getSequenceId().vector(); - auto in_grad_mat = out_seq.matrix(); - - ContextProjectionBackwardData( - out_grad_mat, in_grad_mat, seq_vec, context_length_, context_start_); - } - - private: - size_t context_length_; - int context_start_; -}; - -/** - * Context Projection Backward Weight Function - * Update weight grad by backprop - * input: sequence of output layer grad - * output: weight grad - * - * \param outputs[0] weight grad, pad * d - * \param inputs[0].matrix output layer grad, n * (d * l) - * \param inputs[0].vecotr start positon sequence, n * 1 - */ -template -class ContextProjectionBackwardWeightFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { - context_length_ = config.get("context_length"); - context_start_ = config.get("context_start"); - begin_pad_ = config.get("begin_pad"); - total_pad_ = config.get("total_pad"); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here"; - const auto in_seq = dynamic_cast(inputs[0]); - CHECK(in_seq.data() && in_seq.getSequenceId().data() && outputs[0].data()); - CHECK_EQ(outputs[0].shape().ndims(), 2UL); - CHECK_EQ(in_seq.shape().ndims(), 2UL); - CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); - CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]); - /// output layer grad dim == weight dim * context_length_ - CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_); - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - - const auto seq_vec = in_seq.getSequenceId().vector(); - const auto out_grad_mat = in_seq.matrix(); - auto w_grad_mat = outputs[0].matrix(); - ContextProjectionBackwardWeight(out_grad_mat, - w_grad_mat, - seq_vec, - context_length_, - context_start_, - total_pad_, - begin_pad_); - } - - private: - size_t context_length_; - int context_start_; - size_t begin_pad_; - size_t total_pad_; -}; - -REGISTER_TYPED_FUNC(ContextProjectionForward, - CPU, - ContextProjectionForwardFunc); -REGISTER_TYPED_FUNC(ContextProjectionBackward, - CPU, - ContextProjectionBackwardFunc); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(ContextProjectionForward, - GPU, - ContextProjectionForwardFunc); -REGISTER_TYPED_FUNC(ContextProjectionBackward, - GPU, - ContextProjectionBackwardFunc); -REGISTER_TYPED_FUNC(ContextProjectionBackwardData, - GPU, - ContextProjectionBackwardDataFunc); -REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight, - GPU, - ContextProjectionBackwardWeightFunc); -#endif -} // namespace paddle diff --git a/paddle/legacy/function/ContextProjectionOp.h b/paddle/legacy/function/ContextProjectionOp.h deleted file mode 100644 index 822734a78e6732a3441f571c71d910a76241055b..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ContextProjectionOp.h +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "Function.h" - -namespace paddle { - -/** - * \brief Context Projection Forward. - * - * \param[in/out] outputs output data. - * \param[in] input input data. - * \param[in] weight input weight. - * \param[in] sequence input data. - * \param[in] context_length consecutive rows for concatenation. - * \param[in] context_start context start position. - * \param[in] begin_pad begining pad position. - * \param[in] is_padding whether padding 0 or not. - * - */ -template -void ContextProjectionForward( - typename Tensor::Matrix& output, - const typename Tensor::Matrix& input, - const typename Tensor::Matrix& weight, - const typename Tensor::Vector& sequence, - size_t context_length, - int context_start, - size_t begin_pad); - -/** - * \brief Context Projection Backward. - * - * \param[out] outputs output gradient. - * \param[in] input input gradient. - * \param[in] weight input weight gradient. - * \param[in] sequence input data. - * \param[in] context_length consecutive rows for concatenation. - * \param[in] context_start context start position. - * \param[in] begin_pad begining pad position. - * \param[in] is_padding whether padding 0 or not. - * - */ -template -void ContextProjectionBackward( - const typename Tensor::Matrix& out_grad, - typename Tensor::Matrix& in_grad, - typename Tensor::Matrix& w_grad, - const typename Tensor::Vector& seq_vec, - size_t context_length, - int context_start, - size_t begin_pad, - bool is_padding, - size_t total_pad); - -template -void ContextProjectionBackwardData( - const typename Tensor::Matrix& out_grad, - typename Tensor::Matrix& in_grad, - const typename Tensor::Vector& sequence, - size_t context_length, - int context_start); - -template -void ContextProjectionBackwardWeight( - const typename Tensor::Matrix& out_grad, - typename Tensor::Matrix& w_grad, - const typename Tensor::Vector& seq_vec, - size_t context_length, - int context_start, - size_t total_pad, - size_t begin_pad); - -} // namespace paddle diff --git a/paddle/legacy/function/ContextProjectionOpGpu.cu b/paddle/legacy/function/ContextProjectionOpGpu.cu deleted file mode 100644 index 0a4d865e2c4fb0f5f802d4ba8a9c48cdf09d4cea..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ContextProjectionOpGpu.cu +++ /dev/null @@ -1,413 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ContextProjectionOp.h" -#include "hl_base.h" - -namespace paddle { - -template -__global__ void KeContextProjectionForward(const real* input, - const int* sequence, - const real* weight, - real* output, - int input_dim, - int context_length, - int context_start, - int begin_pad) { - int idx = threadIdx.x; - int block_size = blockDim.x; - int sequenceId = blockIdx.x; - int seq_start = sequence[sequenceId]; - int seq_end = sequence[sequenceId + 1]; - real value = 0; - - int instances = seq_end - seq_start + context_length - 1; - output += seq_start * input_dim * context_length; - input += seq_start * input_dim; - for (int k = 0; k <= input_dim / block_size; k++) { - if (idx < input_dim) { - for (int i = 0; i < instances; i++) { - // i + context_start; - if ((i + context_start) < 0) { - if (padding) { - value = weight[i * input_dim + idx]; - } else { - continue; - } - } else if ((i + context_start) >= (seq_end - seq_start)) { - if (padding) { - value = - weight[(begin_pad + i + context_start - (seq_end - seq_start)) * - input_dim + - idx]; - } else { - continue; - } - } else { - value = input[(i + context_start) * input_dim + idx]; - } - - int outx = (i - context_length) < 0 ? i : (context_length - 1); - int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); - real* output_r = - output + outy * input_dim * context_length + outx * input_dim; - for (int j = outy; j < seq_end - seq_start; j++) { - output_r[idx] += value; - if (j - outy == outx) break; - output_r += (context_length - 1) * input_dim; - } - } - } - idx += block_size; - } -} - -/** - * @brief Context projection forward. - * - * @param[in] input input sequence. - * @param[in] sequence sequence index. - * @param[in] weight padding data. - * @param[out] output output sequence. - * @param[in] num_sequences number of sequences. - * @param[in] input_dim input sequence dimension. - * @param[in] context_length context length. - * @param[in] context_start context start. - * @param[in] begin_pad number of extra timesteps added at the - * beginning. - * - */ -void hl_context_projection_forward(const real* input, - const int* sequence, - const real* weight, - real* output, - size_t num_sequences, - size_t input_dim, - size_t context_length, - int context_start, - size_t begin_pad) { - CHECK_NOTNULL(input); - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(output); - - int block_size = 128; - int blocks_x = num_sequences; - int blocks_y = 1; - dim3 threads(block_size, 1); - dim3 grid(blocks_x, blocks_y); - - if (weight) { - KeContextProjectionForward<<>>( - input, - sequence, - weight, - output, - input_dim, - context_length, - context_start, - begin_pad); - } else { - KeContextProjectionForward<<>>( - input, - sequence, - weight, - output, - input_dim, - context_length, - context_start, - begin_pad); - } - CHECK_SYNC("hl_context_projection_forward failed"); -} - -template <> -void ContextProjectionForward(GpuMatrix& output, - const GpuMatrix& input, - const GpuMatrix& weight, - const GpuIVector& sequence, - size_t context_length, - int context_start, - size_t begin_pad) { - hl_context_projection_forward(input.getData(), - sequence.getData(), - weight ? weight.getData() : nullptr, - output.getData(), - sequence.getSize() - 1, - input.getWidth(), - context_length, - context_start, - begin_pad); -} - -__global__ void KeContextProjectionBackwardData(const real* out_grad, - const int* sequence, - real* in_grad, - size_t input_dim, - int context_length, - int context_start) { - int idx = threadIdx.x; - int block_size = blockDim.x; - int sequenceId = blockIdx.x; - int seq_start = sequence[sequenceId]; - int seq_end = sequence[sequenceId + 1]; - real value = 0; - - int instances = seq_end - seq_start + context_length - 1; - auto out = const_cast(out_grad); - out += seq_start * input_dim * context_length; - in_grad += seq_start * input_dim; - for (int k = 0; k <= input_dim / block_size; k++) { - if (idx < input_dim) { - for (int i = 0; i < instances; i++) { - if ((i + context_start) < 0) { - continue; - } else if ((i + context_start) >= (seq_end - seq_start)) { - continue; - } else { - // value = 0; - value = in_grad[(i + context_start) * input_dim + idx]; - } - - int outx = (i - context_length) < 0 ? i : (context_length - 1); - int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); - real* output_r = - out + outy * input_dim * context_length + outx * input_dim; - for (int j = outy; j < seq_end - seq_start; j++) { - value += output_r[idx]; - if (j - outy == outx) break; - output_r += (context_length - 1) * input_dim; - } - in_grad[(i + context_start) * input_dim + idx] = value; - } - } - idx += block_size; - } -} - -/** - * @brief Context projection backward data. - * - * @param[in] out_grad output gradient. - * @param[in] sequence sequence index. - * @param[out] input_grad input gradient. - * @param[in] num_sequences number of sequences. - * @param[in] input_dim input sequence dimension. - * @param[in] context_length context length. - * @param[in] context_start context start. - * - */ -void hl_context_projection_backward_data(const real* out_grad, - const int* sequence, - real* input_grad, - size_t num_sequences, - size_t input_dim, - size_t context_length, - int context_start) { - CHECK_NOTNULL(out_grad); - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(input_grad); - - int block_size = 128; - int blocks_x = num_sequences; - int blocks_y = 1; - dim3 threads(block_size, 1); - dim3 grid(blocks_x, blocks_y); - KeContextProjectionBackwardData<<>>( - out_grad, sequence, input_grad, input_dim, context_length, context_start); - CHECK_SYNC("hl_context_projection_backward_data failed"); -} - -template <> -void ContextProjectionBackwardData(const GpuMatrix& out_grad, - GpuMatrix& in_grad, - const GpuIVector& sequence, - size_t context_length, - int context_start) { - hl_context_projection_backward_data(out_grad.getData(), - sequence.getData(), - in_grad.getData(), - sequence.getSize() - 1, - in_grad.getWidth(), - context_length, - context_start); -} - -template -__global__ void KeContextProjectionBackwardWeight(const real* out_grad, - const int* sequence, - real* w_grad, - int num_sequences, - int w_dim, - int context_length, - int context_start, - int begin_pad) { - __shared__ real sum_s[THREADS_Y][THREADS_X]; - int pad_of_block = (w_dim + THREADS_X - 1) / THREADS_X; - const int idx = threadIdx.x; - const int idy = threadIdx.y; - int padId = blockIdx.x / pad_of_block; - int weight_idx = idx + THREADS_X * (blockIdx.x % pad_of_block); - int instanceId; - real value = 0; - real* output_r; - - sum_s[idy][idx] = 0.0f; - if (weight_idx < w_dim) { - for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) { - int seq_start = sequence[seqId]; - int seq_end = sequence[seqId + 1]; - output_r = - const_cast(out_grad) + seq_start * w_dim * context_length; - - if (context_start < 0) { - if (padId + context_start < 0) { - instanceId = padId; - } else { - // begin_pad > 0; - instanceId = - (padId - begin_pad) + (seq_end - seq_start) - context_start; - } - } else { - if (padId + (seq_end - seq_start) < context_start) { - continue; - } else { - // begin_pad == 0; - instanceId = padId + (seq_end - seq_start) - context_start; - } - } - - int outx = - (instanceId - context_length) < 0 ? instanceId : (context_length - 1); - int outy = (instanceId - context_length) < 0 - ? 0 - : (instanceId - (context_length - 1)); - output_r += outy * w_dim * context_length + outx * w_dim; - for (int j = outy; j < seq_end - seq_start; j++) { - value += output_r[weight_idx]; - if (j - outy == outx) break; - output_r += (context_length - 1) * w_dim; - } - } - sum_s[idy][idx] = value; - } - __syncthreads(); - - for (int stride = THREADS_Y / 2; stride > 0; stride = stride / 2) { - if (idy < stride) { - sum_s[idy][idx] += sum_s[idy + stride][idx]; - } - __syncthreads(); - } - __syncthreads(); - - if (weight_idx < w_dim) { - if (idy == 0) { - w_grad[padId * w_dim + weight_idx] += sum_s[0][idx]; - } - } -} - -/** - * @brief Context projection backward weight. - * - * @param[in] out_grad output gradient. - * @param[in] sequence sequence index. - * @param[out] w_grad weight gradient. - * @param[in] num_sequences number of sequences. - * @param[in] w_dim input sequence dimension. - * @param[in] total_pad number of extra timesteps. - * @param[in] context_length context length. - * @param[in] context_start context start. - * @param[in] begin_pad number of extra timesteps added at the - * beginning. - * - */ -void hl_context_projection_backward_weight(const real* out_grad, - const int* sequence, - real* w_grad, - size_t num_sequences, - size_t w_dim, - size_t total_pad, - size_t context_length, - int context_start, - size_t begin_pad) { - CHECK_NOTNULL(out_grad); - CHECK_NOTNULL(sequence); - CHECK_NOTNULL(w_grad); - - int threads_x = 32; - int threads_y = 32; - int blocks_x = total_pad * ((w_dim + threads_x - 1) / threads_x); - dim3 threads(threads_x, threads_y); - dim3 grid(blocks_x, 1); - - KeContextProjectionBackwardWeight<32, - 32><<>>( - out_grad, - sequence, - w_grad, - num_sequences, - w_dim, - context_length, - context_start, - begin_pad); - CHECK_SYNC("hl_context_projection_backward_weight failed"); -} - -template <> -void ContextProjectionBackwardWeight(const GpuMatrix& out_grad, - GpuMatrix& w_grad, - const GpuIVector& seq_vec, - size_t context_length, - int context_start, - size_t total_pad, - size_t begin_pad) { - hl_context_projection_backward_weight(out_grad.getData(), - seq_vec.getData(), - w_grad.getData(), - seq_vec.getSize() - 1, - w_grad.getWidth(), - total_pad, - context_length, - context_start, - begin_pad); -} - -template <> -void ContextProjectionBackward(const GpuMatrix& out_grad, - GpuMatrix& in_grad, - GpuMatrix& w_grad, - const GpuIVector& sequence, - size_t context_length, - int context_start, - size_t begin_pad, - bool is_padding, - size_t total_pad) { - if (in_grad) { - ContextProjectionBackwardData( - out_grad, in_grad, sequence, context_length, context_start); - } - if (is_padding && w_grad) { - ContextProjectionBackwardWeight(out_grad, - w_grad, - sequence, - context_length, - context_start, - total_pad, - begin_pad); - } -} - -} // namespace paddle diff --git a/paddle/legacy/function/ContextProjectionOpTest.cpp b/paddle/legacy/function/ContextProjectionOpTest.cpp deleted file mode 100644 index 3b0a34567fe17b466de6186e537243fe8166a77a..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ContextProjectionOpTest.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT - -void testMatrixProjectionForward(int context_start, - size_t context_length, - bool is_padding, - size_t batch_size, - size_t input_dim) { - size_t pad = std::max(0, -context_start) + - std::max(0, (int)(context_start + context_length - 1)); - if (pad == 0) is_padding = false; - - CpuGpuFuncCompare test( - "ContextProjectionForward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", (size_t)std::max(0, -context_start))); - - // prepare input arguments - test.addSequence(SequenceIdArg(TensorShape{batch_size})); - test.addInputs( - SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim})); - if (is_padding) { // weight - test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim})); - } - test.addOutputs( - SequenceArg(VALUE_TYPE_FLOAT, - TensorShape{batch_size, input_dim * context_length}), - ADD_TO); - - // run Function - test.run(); -} - -void testMatrixProjectionBackward(int context_start, - size_t context_length, - bool is_padding, - size_t batch_size, - size_t input_dim) { - size_t pad = std::max(0, -context_start) + - std::max(0, (int)(context_start + context_length - 1)); - if (pad == 0) is_padding = false; - - CpuGpuFuncCompare test( - "ContextProjectionBackward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", (size_t)std::max(0, -context_start)) - .set("is_padding", is_padding) - .set("total_pad", pad)); - - // prepare input arguments - test.addSequence(SequenceIdArg(TensorShape{batch_size})); - test.addInputs(SequenceArg( - VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim * context_length})); - test.addOutputs( - SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim}), - ADD_TO); - if (is_padding) { // weight - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim}), - ADD_TO); - } - - // run Function - test.run(); -} - -TEST(ContextProjection, Projection) { - for (auto context_start : {-5, -3, -1, 0, 3}) { - for (auto context_length : {1, 2, 5, 7}) { - for (auto trainable_padding : {false, true}) { - for (auto batch_size : {1, 2, 5, 20, 100}) { - for (auto input_dim : {15, 32, 63, 128, 200}) { - VLOG(3) << " context_start=" << context_start - << " context_length=" << context_length - << " trainable_padding=" << trainable_padding - << " batch_size=" << batch_size - << " input_dim=" << input_dim; - testMatrixProjectionForward(context_start, - context_length, - trainable_padding, - batch_size, - input_dim); - testMatrixProjectionBackward(context_start, - context_length, - trainable_padding, - batch_size, - input_dim); - } - } - } - } - } -} diff --git a/paddle/legacy/function/ConvOp.h b/paddle/legacy/function/ConvOp.h deleted file mode 100644 index 2d8437bcfe60d1d81897f1c4be1cbfecb5b27fe0..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ConvOp.h +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Function.h" - -namespace paddle { - -/* - * \brief Based on the ConvFunctionBase class, the forward calculation, - * backward input calculation and backward filter calculation - * of convolution operations can be implemented. - * - * Arguments of forward and backward calculation: - * 1. Forward calculation of convolution. - * inputs = {INPUT, FILTER}, outputs = {OUTPUT} - * The first and second input arguments are input image and filter data. - * The output argument is output image. - * - * 2. Backward input calculation of convolution. - * inputs = {OUTPUT_GRAD, FILTER}, outputs = {INPUT_GRAD} - * The first and second input arguments are output grad image - * and filter data. - * The output argument is input grad image. - * - * 3. Backward filter calculation of convolution. - * inputs = {OUTPUT_GRAD, INPUT}, outputs = {FILTER_GRAD} - * The first and second input arguments are output grad image - * and input image. - * The output argument is filter grad. - * - * Arguments format of input, filter and output: - * 1. Input image, output image, input image gradient, output image gradient - * are all NCHW format. Where N is batch size, C is the number of channels, - * H and W is the height and width of image or image gradient. - * - * 2. The format of the filter data is MCHW, where M is the number of output - * image channels, C is the number of input image channels, - * H and W is height and width of filter. - * - * If `groups` is greater than 1, the filter's data format should be GMCHW, - * where G is the `groups`, and G * M is the number of output image - * channels, G * C is the number of input image channels, - * H and W is height and width of filter. - */ -class ConvFunctionBase : public FunctionBase { - public: - void init(const FuncConfig& config) override { - // function arguments - strides_ = config.get>("strides"); - paddings_ = config.get>("paddings"); - dilations_ = config.get>("dilations"); - groups_ = config.get("groups"); - - // number of inputs and outputs - numInputs_ = 2; - numOutputs_ = 1; - } - - // input can be INPUT and INPUT_GRAD - // filter can be FILTER and FILTER_GRAD - // output can be OUTPUT and OUTPUT_GRAD - void checkShape(const TensorShape& input, - const TensorShape& filter, - const TensorShape& output) { - // inputs and outputs arguments should be 4-dimensional. - CHECK_EQ(input.ndims(), (size_t)4); - CHECK_EQ(output.ndims(), (size_t)4); - // The batchSize of the input needs to be equal to - // the batchSize of the output. - CHECK_EQ(input[0], output[0]); - - if (filter.ndims() == (size_t)4) { - // If the filter's dimension is 4, groups convolution is not supported. - CHECK_EQ(groups_, (size_t)1); - // The input and output channel dimensions are the second and first - // dimensions of the filter shape. - CHECK_EQ(input[1], filter[1]); - CHECK_EQ(output[1], filter[0]); - } else { - // filter argument should be 5-dimensional. - CHECK_EQ(filter.ndims(), (size_t)5); - // The first dimension of the filter is the size of the group - CHECK_EQ(filter[0], groups_); - // The input and output channel dimensions are the third and second - // dimensions of the filter shape. - CHECK_EQ(input[1], filter[2] * groups_); - CHECK_EQ(output[1], filter[1] * groups_); - } - } - - protected: - size_t getFilterHeight(const TensorShape& filter) const { - return filter[filter.ndims() - 2]; - } - - size_t getFilterWidth(const TensorShape& filter) const { - return filter[filter.ndims() - 1]; - } - - // determine whether im2col needs to be performed - inline bool isNeedIm2col(const TensorShape& filter) const { - return !(getFilterHeight(filter) == 1 && getFilterWidth(filter) == 1 && - strideH() == 1 && strideW() == 1 && paddingH() == 0 && - paddingW() == 0); - } - - std::vector strides_; - std::vector paddings_; - std::vector dilations_; - - /// Group size, refer to grouped convolution in - /// Alex Krizhevsky's paper: when group=2, the first half of the - /// filters are only connected to the first half of the input channels, - /// and the second half only connected to the second half. - size_t groups_; - - inline int strideH() const { return strides_[0]; } - - inline int strideW() const { return strides_[1]; } - - inline int paddingH() const { return paddings_[0]; } - - inline int paddingW() const { return paddings_[1]; } - - inline int dilationH() const { return dilations_[0]; } - - inline int dilationW() const { return dilations_[1]; } - - // A temporary memory in convolution calculation. - MemoryHandlePtr memory_; - - template - void resizeBuffer(size_t newSize) { - if (!memory_ || newSize * sizeof(real) > memory_->getAllocSize()) { - if (Device == DEVICE_TYPE_CPU) { - memory_ = std::make_shared(newSize * sizeof(real)); - } else { - memory_ = std::make_shared(newSize * sizeof(real)); - } - } - } -}; - -} // namespace paddle diff --git a/paddle/legacy/function/ConvOpTest.h b/paddle/legacy/function/ConvOpTest.h deleted file mode 100644 index 5eac6089786e65c69a37bd3521e86f5dc836f0eb..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ConvOpTest.h +++ /dev/null @@ -1,275 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "FunctionTest.h" - -namespace paddle { - -template -void forward(Compare2Function& test, - const TensorShape& input, - const TensorShape& filter, - const TensorShape& output) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.run(); -} - -template -void backward_input(Compare2Function& test, - const TensorShape& input, - const TensorShape& filter, - const TensorShape& output) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); - test.run(); -} - -template -void backward_filter(Compare2Function& test, - const TensorShape& input, - const TensorShape& filter, - const TensorShape& output) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), ADD_TO); - test.run(); -} - -template -using Function = void (*)(Compare2Function& test, - const TensorShape& input, - const TensorShape& filter, - const TensorShape& output); - -/** - * \brief A basic convolution function test interface. - * - * \param conv1 type name of convolution function 1. - * \param conv2 type name of convolution function 2. - * \param function test function, can be one of the forward, backward_input - * backward_filter function. - * Example: - * 1. Compare GemmConv's CPU and GPU implementation: - * Convolution( - * "GemmConv-CPU", "GemmConv-GPU", forward); - */ -template -void Convolution(const std::string& conv1, - const std::string& conv2, - Function function) { - for (size_t batchSize : {1, 5}) { - for (size_t inputSize : {7, 14, 31}) { - for (size_t filterSize : {1, 3, 5}) { - for (size_t inputChannels : {3, 16}) { - for (size_t outputChannels : {3, 16}) { - if (outputChannels < inputChannels) continue; - for (size_t stride : {1, 2}) { - for (size_t padding : {0, 1}) { - for (size_t dilation : {1, 3}) { - if (padding >= filterSize) break; - size_t filterS = (filterSize - 1) * dilation + 1; - - if (inputSize + 2 * padding < filterS) break; - - if ((conv1 == "NaiveConv-CPU" || conv2 == "NaiveConv-CPU" || - conv1 == "NNPACKConv-CPU" || - conv2 == "NNPACKConv-CPU") && - dilation > 1) - break; - - // NNPACK only supports stride = 1 if batchSize > 1 - if ((conv1 == "NNPACKConv-CPU" || - conv2 == "NNPACKConv-CPU") && - batchSize > 1 && stride > 1) - break; - - size_t outputSize = - (inputSize - filterS + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - std::vector dilations = {dilation, dilation}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("dilations", dilations) - .set("groups", (size_t)1) - .set("algo", (std::string) "auto")); - - TensorShape input{ - batchSize, inputChannels, inputSize, inputSize}; - TensorShape filter{ - outputChannels, inputChannels, filterSize, filterSize}; - TensorShape output{ - batchSize, outputChannels, outputSize, outputSize}; - - function(test, input, filter, output); - } - } - } - } - } - } - } - } -} - -/** - * \brief A convolution function test interface for - * image height is not equal image width. - */ -template -void Convolution2(const std::string& conv1, - const std::string& conv2, - Function function) { - for (size_t batchSize : {4}) { - for (size_t inputHeight : {7, 31}) { - for (size_t inputWidth : {10, 54}) { - for (size_t filterHeight : {1, 5}) { - for (size_t filterWidth : {3, 7}) { - for (size_t inputChannels : {7}) { - for (size_t outputChannels : {7}) { - size_t stride = 1; - size_t padding = 0; - size_t dilation = 1; - size_t outputHeight = - (inputHeight - filterHeight + 2 * padding + stride) / - stride; - size_t outputWidth = - (inputWidth - filterWidth + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputHeight - << " inputWidth=" << inputWidth - << " outputChannels=" << outputChannels - << " filterHeight=" << filterHeight - << " filterWidth=" << filterWidth - << " outputHeight=" << outputHeight - << " outputWidth=" << outputWidth - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - std::vector dilations = {dilation, dilation}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)1) - .set("dilations", dilations) - .set("algo", (std::string) "auto")); - - TensorShape input{ - batchSize, inputChannels, inputHeight, inputWidth}; - TensorShape filter{ - outputChannels, inputChannels, filterHeight, filterWidth}; - TensorShape output{ - batchSize, outputChannels, outputHeight, outputWidth}; - - function(test, input, filter, output); - } - } - } - } - } - } - } -} - -/** - * \brief A convolution function test interface for depthwise convolution. - */ -template -void DepthwiseConvolution(const std::string& conv1, - const std::string& conv2, - Function function) { - for (size_t batchSize : {1, 32}) { - for (size_t inputSize : {7, 14, 54}) { - for (size_t filterSize : {3, 4}) { - for (size_t inputChannels : {32}) { - for (size_t outputChannels : {32, 64}) { - for (size_t stride : {1, 2}) { - for (size_t padding : {0, 1}) { - // NNPACK only supports stride = 1 if batchSize > 1, - // and there has some bug when batchSize > 1 and groups != 1 - if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && - batchSize > 1) - break; - - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize << " stride=" << stride - << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - std::vector dilations = {1, 1}; - size_t groups = inputChannels; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", groups) - .set("dilations", dilations) - .set("algo", (std::string) "auto")); - - TensorShape input{ - batchSize, inputChannels, inputSize, inputSize}; - TensorShape filter{groups, - outputChannels / groups, - inputChannels / groups, - filterSize, - filterSize}; - TensorShape output{ - batchSize, outputChannels, outputSize, outputSize}; - - function(test, input, filter, output); - } - } - } - } - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/function/CosSimOp.cpp b/paddle/legacy/function/CosSimOp.cpp deleted file mode 100644 index d04f4396caade803aa846fa81388f95a194845e6..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CosSimOp.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CosSimOp.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" - -namespace paddle { -/** - * Cosine Similarity for CpuMatrix - * - * \param out_mat, output value, size: nSamples * 1. - * \param in1_mat, input value 1, size: nSamples * dim. - * \param in2_mat, input value 2, size: n2 * dim (n2 == 1 or n2 == nSamples). - * \param scale, default 1.0 - * - */ -template <> -void CosSimForward(CpuMatrix& out_mat, - const CpuMatrix& in1_mat, - const CpuMatrix& in2_mat, - real scale) { - CHECK(out_mat.getData() && in1_mat.getData() && in2_mat.getData()); - size_t num_samples = out_mat.getHeight(); - size_t dim = in1_mat.getWidth(); - /// column vector [nSamples, 1] - real* out = out_mat.getData(); - const real* x = in1_mat.getData(); - const real* y = in2_mat.getData(); - - /// in2 might only have one row or full rows - CHECK(in2_mat.getHeight() == 1LU || in2_mat.getHeight() == num_samples); - size_t inc = (in2_mat.getHeight() == 1LU) ? 0 : dim; - for (size_t i = 0; i < num_samples; ++i, x += dim, y += inc) { - real square_sum_x = 0; - real square_sum_y = 0; - real xy = 0; - for (size_t j = 0; j < dim; ++j) { - square_sum_x += x[j] * x[j]; - square_sum_y += y[j] * y[j]; - xy += x[j] * y[j]; - } - CHECK(square_sum_x > 0 && square_sum_y > 0); - out[i] = scale * xy / (std::sqrt(square_sum_x) * std::sqrt(square_sum_y)); - } -} - -/** - * Cosine Similarity - * for each row i, - * out[i] = scale * cos(input1[i], input2[i]) - * = scale * /sqrt(|input1[i]|^2 * |input2[i]|^2) - * when input2 only has one row, then for each row i, - * out[i] = cos(input1[i], input2[0]) - * - * \param inputs[0] input matrix 1, size: nSamples * dim. - * \param inputs[1] input matrix 2, size: n2 * dim (n2 == 1 or n2 == nSamples). - * \param outputs[0] output matrix, size : nSamples * 1. - */ - -template -class CosSimForwardFunc : public FunctionBase { - void init(const FuncConfig& config) override { - scale_ = config.get("scale"); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(inputs.size(), 2UL); - CHECK_EQ(outputs.size(), 1UL); - - CHECK_EQ(inputs[0].shape().ndims(), 2UL); - CHECK_EQ(inputs[1].shape().ndims(), 2UL); - CHECK_EQ(outputs[0].shape().ndims(), 2UL); - - CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); - CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]); - CHECK_EQ(outputs[0].shape()[1], 1UL); - - CHECK(outputs[0].data() && inputs[0].data() && inputs[1].data()); - - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); - auto out_mat = outputs[0].matrix(); - const auto in1_mat = inputs[0].matrix(); - const auto in2_mat = inputs[1].matrix(); - - CosSimForward(out_mat, in1_mat, in2_mat, scale_); - } - - private: - real scale_; -}; - -/** - * Cosine Similarity Derivative for CpuMatrix - * - * \param in1_grad forward input grad 1, size: nSamples * dim. - * \param in2_grad forward input grad 2, - * size: n2 * dim (n2 == 1 or n2 == nSamples). - * - * \param out_grad backward loss output grad, size : nSamples * 1. - * \param out_val forward output value, size: nSamples * 1. - * \param in1_val forward input value 1, size: nSamples * dim. - * \param in2_val forward input value 2, - * size: n2 * dim (n2 == 1 or n2 == nSamples). - * \param scale, default 1.0 - */ -template <> -void CosSimBackward(const CpuMatrix& out_grad, - const CpuMatrix& out_val, - const CpuMatrix& in1_val, - const CpuMatrix& in2_val, - CpuMatrix& in1_grad, - CpuMatrix& in2_grad, - real scale) { - CHECK(out_grad.getData() && out_val.getData() && in1_val.getData() && - in2_val.getData() && in1_grad.getData() && in2_grad.getData()); - CHECK_EQ(out_val.useGpu_, false) << "Matrix type are GPU, CPU required"; - - const real* grad = out_grad.getData(); - const real* out = out_val.getData(); - const real* prev_out_x = in1_val.getData(); - const real* prev_out_y = in2_val.getData(); - real* prev_grad_x = in1_grad.getData(); - real* prev_grad_y = in2_grad.getData(); - - size_t num_samples = out_grad.getHeight(); - size_t dim = in1_val.getWidth(); - CHECK_EQ(in2_val.getHeight(), in2_grad.getHeight()); - CHECK(in2_val.getHeight() == 1LU || in2_val.getHeight() == num_samples); - size_t inc = (in2_val.getHeight() == 1LU) ? 0 : dim; - for (size_t i = 0; i < num_samples; ++i, - prev_out_x += dim, - prev_out_y += inc, - prev_grad_x += dim, - prev_grad_y += inc) { - real square_sum_x = 0; - real square_sum_y = 0; - real xy = 0; - for (size_t j = 0; j < dim; ++j) { - square_sum_x += prev_out_x[j] * prev_out_x[j]; - square_sum_y += prev_out_y[j] * prev_out_y[j]; - xy += prev_out_x[j] * prev_out_y[j]; - } - CHECK(square_sum_x > 0 && square_sum_y > 0); - if (xy == 0) { - real reciprocal = - 1.0f / (std::sqrt(square_sum_x) * std::sqrt(square_sum_y)); - for (size_t j = 0; j < dim; ++j) { - prev_grad_x[j] += scale * grad[i] * prev_out_y[j] * reciprocal; - prev_grad_y[j] += scale * grad[i] * prev_out_x[j] * reciprocal; - } - } else { - real reciprocal_xy = 1.0f / xy; - real reciprocal_square_sum_x = 1.0f / square_sum_x; - real reciprocal_square_sum_y = 1.0f / square_sum_y; - for (size_t j = 0; j < dim; ++j) { - prev_grad_x[j] += - out[i] * grad[i] * (prev_out_y[j] * reciprocal_xy - - prev_out_x[j] * reciprocal_square_sum_x); - prev_grad_y[j] += - out[i] * grad[i] * (prev_out_x[j] * reciprocal_xy - - prev_out_y[j] * reciprocal_square_sum_y); - } - } - } -} - -/** - * Cosine Similarity backward Derivative - * - * \param outputs[0] forward input grad 1, size: nSamples * dim. - * \param outputs[1] forward input grad 2, - * size: n2 * dim (n2 == 1 or n2 == nSamples). - * - * \param inputs[0] backward loss output grad, size : nSamples * 1. - * \param inputs[1] forward output value, size: nSamples * 1. - * \param inputs[2] forward input value 1, size: nSamples * dim. - * \param inputs[3] forward input value 2, - * size: n2 * dim (n2 == 1 or n2 == nSamples). - */ -template -class CosSimBackwardFunc : public FunctionBase { - void init(const FuncConfig& config) override { - scale_ = config.get("scale"); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(inputs.size(), 4UL); - CHECK_EQ(outputs.size(), 2UL); - /// dim of out_grad and out_val == 1, column vector - CHECK_EQ(inputs[0].shape()[1], 1UL); - CHECK_EQ(inputs[1].shape()[1], 1UL); - /// nSamples of out_grad == out_val == in_val1 == in_grad1 - CHECK_EQ(inputs[1].shape()[0], inputs[0].shape()[0]); - CHECK_EQ(inputs[0].shape()[0], inputs[0].shape()[0]); - CHECK_EQ(outputs[0].shape()[0], inputs[0].shape()[0]); - /// dim of in1_val1 == in_val2 == in_grad1 == in_grad2 - CHECK_EQ(inputs[3].shape()[1], inputs[2].shape()[1]); - CHECK_EQ(outputs[0].shape()[1], inputs[2].shape()[1]); - CHECK_EQ(outputs[1].shape()[1], inputs[2].shape()[1]); - - CHECK(inputs[0].data() && inputs[1].data() && inputs[2].data() && - inputs[3].data() && outputs[0].data() && outputs[1].data()); - - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - CHECK_EQ(outputs[1].getArgType(), ADD_TO); - - const auto out_grad = inputs[0].matrix(); - const auto out_val = inputs[1].matrix(); - const auto in1_val = inputs[2].matrix(); - const auto in2_val = inputs[3].matrix(); - auto in1_grad = outputs[0].matrix(); - auto in2_grad = outputs[1].matrix(); - - CosSimBackward( - out_grad, out_val, in1_val, in2_val, in1_grad, in2_grad, scale_); - } - - private: - real scale_; -}; - -REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc); -REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc); -REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc); -#endif -} // namespace paddle diff --git a/paddle/legacy/function/CosSimOp.h b/paddle/legacy/function/CosSimOp.h deleted file mode 100644 index 2d377eb3bef4f6cf79945746c7dea4ff6f754fbd..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CosSimOp.h +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Function.h" - -namespace paddle { - -/** - * \brief Cosine Similarity Forward. - * for each row i, - * out[i] = scale * cos(in1[i], in2[i]) - * = scale * \sum_j (in1[i][j] * in2[i][j]) / - * sqrt(sum_j (in1[i][j]^2) * sum_j (in2[i][j])^2) - * - * \param[out] output output value. - * \param[in] intput1 input value. - * \param[in] intput2 input value. - * \param[in] scale default 1.0. - * - */ -template -void CosSimForward(typename Tensor::Matrix& output, - const typename Tensor::Matrix& input1, - const typename Tensor::Matrix& input2, - real scale); - -/** - * \brief Cosine Similarity BackWard for Derivative. - * - * \param[in] output grad backward loss output grad. - * \param[in] output val forward-output value. - * \param[in] input val1 forward input value 1. - * \param[in] input val2 forward input value 2. - * \param[in/out] input grad forward input grad 1. - * \param[in/out] input grad forward input grad 2. - * \param[in] scale default 1.0. - * - */ -template -void CosSimBackward(const typename Tensor::Matrix& out_grad, - const typename Tensor::Matrix& out_value, - const typename Tensor::Matrix& in1_value, - const typename Tensor::Matrix& in2_value, - typename Tensor::Matrix& in1_grad, - typename Tensor::Matrix& in2_grad, - real scale); - -} // namespace paddle diff --git a/paddle/legacy/function/CosSimOpGpu.cu b/paddle/legacy/function/CosSimOpGpu.cu deleted file mode 100644 index 9fe50529ac4daeec4c7fe69d667c51dfc7512197..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CosSimOpGpu.cu +++ /dev/null @@ -1,248 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CosSimOp.h" -#include "hl_base.h" -#include "hl_device_functions.cuh" - -namespace paddle { - -template -__global__ void KeCosSim(real* output, - const real* input1, - const real* input2, - int width, - int input1_height, - int input2_height, - real scale) { - const int ty = blockIdx.y; - int tid = threadIdx.x; - - __shared__ real xx[block_size]; - __shared__ real yy[block_size]; - __shared__ real xy[block_size]; - - xx[tid] = 0.0; - yy[tid] = 0.0; - xy[tid] = 0.0; - __syncthreads(); - - input1 += ty * width; - if (input2_height > 1) { - input2 += ty * width; - } - for (int index = tid; index < width; index += block_size) { - real x = input1[index]; - real y = input2[index]; - xx[tid] += x * x; - yy[tid] += y * y; - xy[tid] += x * y; - } - __syncthreads(); - - for (int s = block_size / 2; s > 0; s >>= 1) { - if (tid < s) { - xx[tid] += xx[tid + s]; - yy[tid] += yy[tid + s]; - xy[tid] += xy[tid + s]; - } - __syncthreads(); - } - if (tid == 0) { - output[ty] = scale * xy[0] / (sqrt(xx[0]) * sqrt(yy[0])); - } -} - -void hlCossim(real* output, - const real* input1, - const real* input2, - size_t width, - size_t input1_height, - size_t input2_height, - real scale) { - CHECK_NOTNULL(output); - CHECK_NOTNULL(input1); - CHECK_NOTNULL(input2); - const int block_size = 256; - dim3 threads(block_size, 1); - dim3 grid(1, input1_height); - - KeCosSim<<>>( - output, input1, input2, width, input1_height, input2_height, scale); - CHECK_SYNC("hlCossim failed"); -} - -template <> -void CosSimForward(GpuMatrix& out_mat, - const GpuMatrix& in1_mat, - const GpuMatrix& in2_mat, - real scale) { - CHECK(out_mat.getData() && in1_mat.getData() && in2_mat.getData()); - CHECK(in1_mat.useGpu_ == true && in2_mat.useGpu_ == true) - << "Matrix type are not GPU"; - - size_t dim = in1_mat.getWidth(); - real* out = out_mat.getData(); - const real* x = in1_mat.getData(); - const real* y = in2_mat.getData(); - hlCossim(out, x, y, dim, in1_mat.getHeight(), in2_mat.getHeight(), scale); -} - -template -__global__ void KeCosSimDerivative(const real* grad, - const real* output, - const real* prev_out_x, - const real* prev_out_y, - real* prev_grad_x, - real* prev_grad_y, - size_t width, - size_t input1_height, - size_t input2_height, - real scale) { - const int ty = blockIdx.y; - int tid = threadIdx.x; - - __shared__ real xx[block_size]; - __shared__ real yy[block_size]; - __shared__ real xy[block_size]; - - xx[tid] = 0.0; - yy[tid] = 0.0; - xy[tid] = 0.0; - __syncthreads(); - - prev_out_x += ty * width; - prev_grad_x += ty * width; - if (input2_height > 1) { - prev_out_y += ty * width; - prev_grad_y += ty * width; - } - for (int index = tid; index < width; index += block_size) { - real x = prev_out_x[index]; - real y = prev_out_y[index]; - xx[tid] += x * x; - yy[tid] += y * y; - xy[tid] += x * y; - } - __syncthreads(); - - for (int s = block_size / 2; s > 0; s >>= 1) { - if (tid < s) { - xx[tid] += xx[tid + s]; - yy[tid] += yy[tid + s]; - xy[tid] += xy[tid + s]; - } - __syncthreads(); - } - if (xy[0] == 0) { - real reciprocal = 1.0 / (sqrt(xx[0]) * sqrt(yy[0])); - for (int index = tid; index < width; index += block_size) { - prev_grad_x[index] += scale * grad[ty] * prev_out_y[index] * reciprocal; - if (input2_height > 1) { - prev_grad_y[index] += scale * grad[ty] * prev_out_x[index] * reciprocal; - } else { - paddle::paddleAtomicAdd( - prev_grad_y + index, - scale * grad[ty] * prev_out_x[index] * reciprocal); - } - } - } else { - real reciprocalXY = 1.0 / xy[0]; - real reciprocalSquareSumX = 1.0 / xx[0]; - real reciprocalSquareSumY = 1.0 / yy[0]; - for (int index = tid; index < width; index += block_size) { - prev_grad_x[index] += - output[ty] * grad[ty] * (prev_out_y[index] * reciprocalXY - - prev_out_x[index] * reciprocalSquareSumX); - if (input2_height > 1) { - prev_grad_y[index] += - output[ty] * grad[ty] * (prev_out_x[index] * reciprocalXY - - prev_out_y[index] * reciprocalSquareSumY); - } else { - paddle::paddleAtomicAdd( - prev_grad_y + index, - output[ty] * grad[ty] * (prev_out_x[index] * reciprocalXY - - prev_out_y[index] * reciprocalSquareSumY)); - } - } - } -} - -void hlCossimDerivative(const real* grad, - const real* output, - const real* prev_out_x, - const real* prev_out_y, - real* prev_grad_x, - real* prev_grad_y, - size_t width, - size_t input1_height, - size_t input2_height, - real scale) { - CHECK_NOTNULL(grad); - CHECK_NOTNULL(output); - CHECK_NOTNULL(prev_out_x); - CHECK_NOTNULL(prev_out_y); - CHECK_NOTNULL(prev_grad_x); - CHECK_NOTNULL(prev_grad_y); - const int block_size = 256; - dim3 threads(block_size, 1); - dim3 grid(1, input1_height); - KeCosSimDerivative<<>>( - grad, - output, - prev_out_x, - prev_out_y, - prev_grad_x, - prev_grad_y, - width, - input1_height, - input2_height, - scale); - CHECK_SYNC("hlCossimDerivate failed"); -} - -template <> -void CosSimBackward(const GpuMatrix& out_grad, - const GpuMatrix& out_val, - const GpuMatrix& in1_val, - const GpuMatrix& in2_val, - GpuMatrix& in1_grad, - GpuMatrix& in2_grad, - real scale) { - CHECK(out_grad.getData() && out_val.getData() && in1_val.getData() && - in2_val.getData() && in1_grad.getData() && in2_grad.getData()); - CHECK(out_grad.useGpu_ && out_val.useGpu_ && in1_val.useGpu_ && - in2_val.useGpu_ && in1_grad.useGpu_ && in2_grad.useGpu_) - << "Matrix types are not equally GPU"; - - size_t dim = in1_val.getWidth(); - const real* grad = out_grad.getData(); - const real* out = out_val.getData(); - const real* prev_out_x = in1_val.getData(); - const real* prev_out_y = in2_val.getData(); - real* prev_grad_x = in1_grad.getData(); - real* prev_grad_y = in2_grad.getData(); - hlCossimDerivative(grad, - out, - prev_out_x, - prev_out_y, - prev_grad_x, - prev_grad_y, - dim, - in1_val.getHeight(), - in2_val.getHeight(), - scale); -} - -} // namespace paddle diff --git a/paddle/legacy/function/CosSimOpTest.cpp b/paddle/legacy/function/CosSimOpTest.cpp deleted file mode 100644 index 31bb43e1baa9a6d890d1b8fe2abf15a07a7094c6..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CosSimOpTest.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" -#include "paddle/legacy/math/Matrix.h" - -using namespace paddle; // NOLINT - -void testCosSimForward(size_t height_x, - size_t height_y, - size_t width, - real scale) { - CpuGpuFuncCompare test("CosSimForward", FuncConfig().set("scale", scale)); - // prepare input arguments - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width})); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width})); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1}), - ASSIGN_TO); - // run Function - test.run(); -} - -void testCosSimBackward(size_t height_x, - size_t height_y, - size_t width, - real scale) { - CpuGpuFuncCompare test("CosSimBackward", FuncConfig().set("scale", scale)); - // prepare input arguments - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width})); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width})); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width}), - ADD_TO); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width}), - ADD_TO); - // run Function - test.run(); -} - -TEST(Matrix, cosSim) { - for (auto height_x : {10, 100, 1000}) { - for (auto height_y : {1, height_x}) { - for (auto width : {10, 100, 1000}) { - for (auto scale : {1.0, 2.0}) { - testCosSimForward(height_x, height_y, width, scale); - testCosSimBackward(height_x, height_y, width, scale); - } - } - } - } -} diff --git a/paddle/legacy/function/CropOp.cpp b/paddle/legacy/function/CropOp.cpp deleted file mode 100644 index e22678822f06a323d1e6c17dce63d44d143484a3..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CropOp.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CropOp.h" -#include "paddle/legacy/function/TensorShape.h" -#include "paddle/legacy/math/Vector.h" - -namespace paddle { - -template <> -void Crop(real* outputs, - const real* inputs, - const TensorShape inShape, - const TensorShape outShape, - const FuncConfig& conf) { - std::vector crop_corner = - conf.get>("crop_corner"); - int cCrop = crop_corner[1]; - int hCrop = crop_corner[2]; - int wCrop = crop_corner[3]; - - int num = inShape[0]; - int inC = inShape[1]; - int inH = inShape[2]; - int inW = inShape[3]; - - int outC = outShape[1]; - int outH = outShape[2]; - int outW = outShape[3]; - - for (int n = 0; n < num; n++) { - for (int c = 0; c < outC; c++) { - for (int h = 0; h < outH; h++) { - int outoff = ((n * outC + c) * outH + h) * outW; - int inoff = ((n * inC + c + cCrop) * inH + h + hCrop) * inW + wCrop; - memcpy(outputs + outoff, inputs + inoff, outW * sizeof(real)); - } - } - } -} - -template <> -void CropGrad(const real* inGrad, - real* outGrad, - const TensorShape inShape, - const TensorShape outShape, - const FuncConfig& conf) { - std::vector crop_corner = - conf.get>("crop_corner"); - int cCrop = crop_corner[1]; - int hCrop = crop_corner[2]; - int wCrop = crop_corner[3]; - - int num = outShape[0]; - int outC = outShape[1]; - int outH = outShape[2]; - int outW = outShape[3]; - - int inC = inShape[1]; - int inH = inShape[2]; - int inW = inShape[3]; - - for (int n = 0; n < num; n++) { - for (int c = 0; c < inC; c++) { - for (int h = 0; h < inH; h++) { - int outoff = ((n * outC + c + cCrop) * outH + h + hCrop) * outW + wCrop; - int inoff = ((n * inC + c) * inH + h) * inW; - CpuVector inG = CpuVector(inW, const_cast(inGrad + inoff)); - CpuVector outG = CpuVector(inW, outGrad + outoff); - outG += inG; - } - } - } -} - -/** - * \brief Crop input according to the specify corner and shape. - * The input and output is a 4D tensor. In CropFunc, we only - * crop the 2nd to 4th dimension. - * - * Argument in this Function: - * \param pad_ A struct object contains the cropping corner and shape. - * \param inputs A 4D tensor, only one input. - * \param outputs A 4D tensor, the output value after cropping. - * - * For example, - * Input(2,2,2,3) = [ - * [ [[1,2,3], [3,4,5]], - * [[2,3,5], [1,6,7]] ], - * [ [[4,3,1], [1,8,7]], - * [[3,8,9], [2,3,5]] ] - * ] # the input shape is (2,2,2,3) - * - * pad_: if corner = (0,1,1) and crop_shape = (2,1,2) - * Output(2,2,1,2) = [ - * [ [[4,5]], - * [[6,7]] ], - * [ [[8,7]], - * [[3,5]] ] - * ] # the input shape is (2,2,2,3) - */ -template -class CropFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { conf_ = config; } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); - - TensorShape inShape = inputs[0].shape(); - TensorShape outShape = outputs[0].shape(); - - Crop(outputs[0].data(), - inputs[0].data(), - inShape, - outShape, - conf_); - } - - private: - FuncConfig conf_; -}; - -/** - * \brief The backward propagation of cropping Function. - * - * Argument in this Function: - * \param crop_ The same meaning as it in CropFunc. - * \param inputs The gradient with respect to the output value of CropFunc. - * \param outputs The gradient with respect to the input value of CropFunc. - */ - -template -class CropGradFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { conf_ = config; } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - - TensorShape outShape = outputs[0].shape(); - TensorShape inShape = inputs[0].shape(); - - CropGrad(inputs[0].data(), - outputs[0].data(), - inShape, - outShape, - conf_); - } - - private: - FuncConfig conf_; -}; - -REGISTER_TYPED_FUNC(Crop, CPU, CropFunc); -REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(Crop, GPU, CropFunc); -REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc); -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/CropOp.h b/paddle/legacy/function/CropOp.h deleted file mode 100644 index 05d4b163b37d5434184924552255cfc1e4e6f061..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CropOp.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Function.h" - -namespace paddle { - -/** - * \brief This funtion crops inputs according to the specify start point and - *shape. - * - * \param[out] outputs save results. - * \param[in] inputs input data. - * \param[in] inShape the shape of input tensor. - * \param[in] conf the cropping config - */ -template -void Crop(real* outputs, - const real* inputs, - const TensorShape inShape, - const TensorShape outShape, - const FuncConfig& conf); - -/** - * \brief Cropping operation backward. - * - * \param[out] inGrad gradients of previous layer - * \param[in] outGrad output gradient - * \param[in] inShape the shape of input tensor. - * \param[in] conf the cropping config - */ -template -void CropGrad(const real* inGrad, - real* outGrad, - const TensorShape inShape, - const TensorShape outShape, - const FuncConfig& conf); -} // namespace paddle diff --git a/paddle/legacy/function/CropOpGpu.cu b/paddle/legacy/function/CropOpGpu.cu deleted file mode 100644 index 5615062433717911f147ecc65fb844a24a4ced4f..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CropOpGpu.cu +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CropOp.h" -#include "hl_base.h" - -namespace paddle { - -__global__ void KeCrop(real* outputs, - const real* inputs, - int inC, - int inH, - int inW, - int cropC, - int cropH, - int cropW, - int outC, - int outH, - int outW, - int nthreads) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < nthreads) { - const int w = idx % outW; - const int h = (idx / outW) % outH; - const int c = (idx / outW / outH) % outC; - const int n = idx / outW / outH / outC; - - const int off = ((n * inC + c + cropC) * inH + h + cropH) * inW + cropW + w; - outputs[idx] = inputs[off]; - } -} - -template <> -void Crop(real* outputs, - const real* inputs, - const TensorShape inShape, - const TensorShape outShape, - const FuncConfig& conf) { - std::vector crop_corner = - conf.get>("crop_corner"); - int cropC = crop_corner[1]; - int cropH = crop_corner[2]; - int cropW = crop_corner[3]; - - int num = inShape[0]; - int inC = inShape[1]; - int inH = inShape[2]; - int inW = inShape[3]; - - int outC = outShape[1]; - int outH = outShape[2]; - int outW = outShape[3]; - - size_t nth = num * outC * outH * outW; - int blockSize = 1024; - int gridSize = (nth + blockSize - 1) / blockSize; - - KeCrop<<>>(outputs, - inputs, - inC, - inH, - inW, - cropC, - cropH, - cropW, - outC, - outH, - outW, - nth); - CHECK_SYNC("Crop"); -} - -__global__ void KeCropDiff(const real* inGrad, - real* outGrad, - int inC, - int inH, - int inW, - int cropC, - int cropH, - int cropW, - int outC, - int outH, - int outW, - int nthreads) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < nthreads) { - const int w = idx % inW; - const int h = (idx / inW) % inH; - const int c = (idx / inW / inH) % inC; - const int n = idx / inW / inH / inC; - - const int off = - ((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w; - - outGrad[off] += inGrad[idx]; - } -} - -template <> -void CropGrad(const real* inGrad, - real* outGrad, - const TensorShape inShape, - const TensorShape outShape, - const FuncConfig& conf) { - std::vector crop_corner = - conf.get>("crop_corner"); - int cropC = crop_corner[1]; - int cropH = crop_corner[2]; - int cropW = crop_corner[3]; - - int num = outShape[0]; - int outC = outShape[1]; - int outH = outShape[2]; - int outW = outShape[3]; - - int inC = inShape[1]; - int inH = inShape[2]; - int inW = inShape[3]; - - size_t nth = num * inC * inH * inW; - int blockSize = 1024; - int gridSize = (nth + blockSize - 1) / blockSize; - - KeCropDiff<<>>(inGrad, - outGrad, - inC, - inH, - inW, - cropC, - cropH, - cropW, - outC, - outH, - outW, - nth); - CHECK_SYNC("CropGrad"); -} - -} // namespace paddle diff --git a/paddle/legacy/function/CropOpTest.cpp b/paddle/legacy/function/CropOpTest.cpp deleted file mode 100644 index 10c83a0321fd890186aa942ed1beae06814158d6..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CropOpTest.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" - -namespace paddle { - -TEST(Crop, real) { - for (size_t numSamples : {5, 32}) { - for (size_t channels : {5, 5, 32}) { - for (size_t imgSizeH : {5, 33, 100}) { - for (size_t imgSizeW : {5, 32, 96}) { - VLOG(3) << " numSamples=" << numSamples << " channels=" << channels - << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; - for (bool test_grad : {false, true}) { - CpuGpuFuncCompare compare( - test_grad ? "CropGrad" : "Crop", - FuncConfig() - .set>("crop_corner", {0, 1, 1, 1}) - .set>("crop_shape", {0, 2, 3, 3})); - TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; - TensorShape outDims{numSamples, 2, 3, 3}; - compare.addInputs( - BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); - compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, - test_grad ? inDims : outDims, - test_grad ? ADD_TO : ASSIGN_TO), - test_grad ? ADD_TO : ASSIGN_TO); - compare.run(); - } - } - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/function/CrossMapNormalOp.cpp b/paddle/legacy/function/CrossMapNormalOp.cpp deleted file mode 100644 index f28703af00fa4bd7bebd98839cb077798083b61f..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CrossMapNormalOp.cpp +++ /dev/null @@ -1,344 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CrossMapNormalOp.h" -#include "paddle/legacy/math/Vector.h" - -namespace paddle { - -template <> -void CrossMapNormal(real* outputs, - real* denoms, - const real* inputs, - size_t numSamples, - size_t channels, - size_t height, - size_t width, - size_t size, - real scale, - real pow) { - size_t oneImage = height * width; - size_t oneSample = channels * oneImage; - - CpuVector outputsV(numSamples * oneSample, outputs); - CpuVector inputsV(numSamples * oneSample, const_cast(inputs)); - CpuVector denomsV(numSamples * oneSample, denoms); - - // f(x) = x * ( 1 + scale * SUM((x)^2) )^(-pow) - // x represents inputs - // f(x) represents outputs - // denoms save the intermediate result for backward - denomsV = denomsV.constant(1.0); - const int start = -((int)size - 1) / 2; - const int end = (int)size + start; - for (size_t i = 0; i < numSamples; i++) { - real* oneDenom = denoms + i * oneSample; - real* oneInput = const_cast(inputs) + i * oneSample; - for (int c = 0; c < (int)channels; c++) { - CpuVector denom(oneImage, oneDenom + c * oneImage); - for (int s = start; s < end; s++) { - if (c + s >= 0 && c + s < (int)channels) { - CpuVector input(oneImage, oneInput + (c + s) * oneImage); - denom += input.square() * scale; - } - } - } - } - - outputsV = inputsV * denomsV.pow(-pow); -} - -template <> -void CrossMapNormalGrad(real* inputsGrad, - const real* inputsValue, - const real* outputsValue, - const real* outputsGrad, - const real* denoms, - size_t numSamples, - size_t channels, - size_t height, - size_t width, - size_t size, - real scale, - real pow) { - size_t oneSample = channels * height * width; - std::function oneImage = [=](real* data, - size_t offset) { - return CpuVector(height * width, data + offset); - }; - - const int start = -((int)size) / 2; - const int end = (int)size + start; - const real ratio = -(real)2 * scale * pow; - for (size_t i = 0; i < numSamples; i++) { - size_t sOffset = i * oneSample; - real* oneInputGrad = inputsGrad + sOffset; - real* oneInputValue = const_cast(inputsValue) + sOffset; - real* oneDenom = const_cast(denoms) + sOffset; - real* oneOutputGrad = const_cast(outputsGrad) + sOffset; - real* oneOutputValue = const_cast(outputsValue) + sOffset; - - for (int c = 0; c < (int)channels; c++) { - size_t cOffset = c * height * width; - CpuVector inputGrad = oneImage(oneInputGrad, cOffset); - CpuVector inputValue = oneImage(oneInputValue, cOffset); - CpuVector denom = oneImage(oneDenom, cOffset); - CpuVector outputGrad = oneImage(oneOutputGrad, cOffset); - - inputGrad = inputGrad + denom.pow(-pow) * outputGrad; - for (int s = start; s < end; s++) { - if (c + s >= 0 && c + s < (int)channels) { - size_t offset = (c + s) * height * width; - CpuVector output = oneImage(oneOutputValue, offset); - CpuVector outputGrad = oneImage(oneOutputGrad, offset); - CpuVector denom = oneImage(oneDenom, offset); - - inputGrad += ((outputGrad * output * ratio) / denom) * inputValue; - } - } - } - } -} - -/** - * \brief Normalization with across maps. - * - * This Function comes from the paper - * "ImageNet Classification with Deep Convolutional Neural Networks". - * - * The original formula is: - * - * Input(i, x, y) - * Output(i, x, y) = ---------------------------------------------- - * -- upper - * (k + alpha * > (Input(j, x, y))^2) ^ (beta) - * -- j = lower - * - * upper is `min(C, c + N/2)` - * lower if `max(0, c - N/2)` - * - * Function implementation: - * - * inputs and outpus is NCHW format, while input.shape.ndims() is equal 4. - * And the meaning of each dimension(0-3) is respectively batch size, - * feature maps, rows and columns. - * - * Input and Output in the above formula is for each map(i) of one image, and - * Input(i, x, y), Output(i, x, y) represents an element in an image. - * - * C is the number of feature maps of one image, and N is a hyper-parameters - * is configured when Function is initialized. The sum in the denominator - * is the sum of the same position in the neighboring maps. - * - * In the implementation of Function, k is equal to 1, - * so Function has no argument for k. - * - * Function Arguments: - * - * \param size_ represent N - * \param scale_ represent alpha - * \param pow_ represent beta - * \param inputs[0] represent Input - * \param outputs[0] represent Output - * \param outputs[1] represent The denominator in the formula(except beta) - * - * Note: - * Save output[1] is to simplify the backward calculation. - * TODO, if only consider the forward calculation, we can optimize to - * remove the output[1]. - */ -template -class CrossMapNormalFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { - // function arguments - size_ = config.get("size"); - scale_ = config.get("scale"); - pow_ = config.get("pow"); - - // number of inputs and outputs - numInputs_ = 1; - numOutputs_ = 2; - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - check(inputs, outputs); - // ArgType check still on here, - // not sure whether it is better to put inside the check. - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); - CHECK_EQ(outputs[1].getArgType(), ASSIGN_TO); - size_t batchSize = inputs[0].shape()[0]; - size_t maps = inputs[0].shape()[1]; - size_t rows = inputs[0].shape()[2]; - size_t columns = inputs[0].shape()[3]; - - CrossMapNormal(outputs[0].data(), - outputs[1].data(), - inputs[0].data(), - batchSize, - maps, - rows, - columns, - size_, - scale_, - pow_); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - - CHECK_EQ(inputs[0].shape().ndims(), (size_t)4); - CHECK(inputs[0].shape() == outputs[0].shape()); - CHECK(inputs[0].shape() == outputs[1].shape()); - } - - // Only need the shape of the input, can calculate the - // floating-point operation. - size_t ops(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)numInputs_, inputs.size()); - size_t batchSize = inputs[0].shape()[0]; - size_t maps = inputs[0].shape()[1]; - size_t rows = inputs[0].shape()[2]; - size_t columns = inputs[0].shape()[3]; - - // number of floating-point operations - // an approximate value - size_t ops = batchSize * maps * rows * columns * (size_ * 2 + 3); - - return ops; - } - - private: - size_t size_; - real scale_; - real pow_; -}; - -/** - * \brief Backward calculation for normalization with across maps. - * - * Function implementation: - * - * The implementation of this Function is derived from the - * CrossMapNormalFunc implementation. - * - * InputGrad = OutputGrad * denoms ^ (-beta) - * -- upper - * + > (OutputGrad * OutputValue * (-2 * alpha * beta) / denoms) * InputValue - * -- lower - * - * The data of inputs/outputs format is the same as the forward interface - * and is NCHW. - * - * The upper and lower is the same as forward. The logic of the sum - * is also the same as forward. - * - * Function Arguments: - * - * \param size_ represent N - * \param scale_ represent alpha - * \param pow_ represent beta - * \param inputs[0] represent InputValue, inputs[0] of CrossMapNormalFunc - * \param inputs[1] represent OutputValue, outputs[0] of CrossMapNormalFunc - * \param inputs[2] represent OutputGrad - * \param inputs[3] represent denoms, outputs[1] of CrossMapNormalFunc - * This is the intermediate result that is - * preserved in the forward calculation. - * \param outputs[0] represent InputGrad - */ -template -class CrossMapNormalGradFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { - // function arguments - size_ = config.get("size"); - scale_ = config.get("scale"); - pow_ = config.get("pow"); - - // number of inputs and outputs - numInputs_ = 4; - numOutputs_ = 1; - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - check(inputs, outputs); - if (outputs[0].getArgType() != ADD_TO) { - // Currently, some algorithm implementations are ASSIGN_TO mode, - // if need to support the ADD_TO calculation, need to clear the output. - typename Tensor::Vector tmp( - outputs[0].shape().getElements(), outputs[0].data()); - tmp.zero(); - } - - size_t batchSize = inputs[0].shape()[0]; - size_t maps = inputs[0].shape()[1]; - size_t rows = inputs[0].shape()[2]; - size_t columns = inputs[0].shape()[3]; - - CrossMapNormalGrad(outputs[0].data(), - inputs[0].data(), - inputs[1].data(), - inputs[2].data(), - inputs[3].data(), - batchSize, - maps, - rows, - columns, - size_, - scale_, - pow_); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - - CHECK_EQ(inputs[0].shape().ndims(), (size_t)4); - CHECK(inputs[0].shape() == inputs[1].shape()); - CHECK(inputs[0].shape() == inputs[2].shape()); - CHECK(inputs[0].shape() == inputs[3].shape()); - CHECK(inputs[0].shape() == outputs[0].shape()); - } - - // Only need the shape of one input, can calculate the - // floating-point operation. - size_t ops(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_LT((size_t)1, inputs.size()); - size_t batchSize = inputs[0].shape()[0]; - size_t maps = inputs[0].shape()[1]; - size_t rows = inputs[0].shape()[2]; - size_t columns = inputs[0].shape()[3]; - - // number of floating-point operations - // an approximate value - size_t ops = batchSize * maps * rows * columns * (size_ * 4 + 2); - - return ops; - } - - private: - size_t size_; - real scale_; - real pow_; -}; - -REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc); -REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc); -REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc); -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/CrossMapNormalOp.h b/paddle/legacy/function/CrossMapNormalOp.h deleted file mode 100644 index bb9cdf20216c6fbd0e61f8098cfe78a4b4580a35..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CrossMapNormalOp.h +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Function.h" - -namespace paddle { - -/** - * \brief Cross map respose normalize forward. - * The data structure of image data is NCHW. - * - * \param[out] outputs output data. - * \param[in] denoms denoms buffer. - * \param[in] inputs input data. - * \param[in] numSamples batch size of input image. - * \param[in] channels number of channel. - * \param[in] height image height. - * \param[in] width image width. - * \param[in] size size. - * \param[in] scale scale. - * \param[in] pow scale. - * - */ -template -void CrossMapNormal(real* outputs, - real* denoms, - const real* inputs, - size_t numSamples, - size_t channels, - size_t height, - size_t width, - size_t size, - real scale, - real pow); - -/** - * \brief Cross map respose normalize backward. - * The data structure of image data is NCHW. - * - * \param[out] inputsGrad input grad. - * \param[in] inputsValue input value. - * \param[out] outputsValue output value. - * \param[out] outputsGrad output grad. - * \param[in] denoms denoms buffer. - * \param[in] numSamples batch size of input image. - * \param[in] channels number of channel. - * \param[in] height image height. - * \param[in] width image width. - * \param[in] size size. - * \param[in] scale scale. - * \param[in] pow scale. - * - */ -template -void CrossMapNormalGrad(real* inputsGrad, - const real* inputsValue, - const real* outputsValue, - const real* outputsGrad, - const real* denoms, - size_t numSamples, - size_t channels, - size_t height, - size_t width, - size_t size, - real scale, - real pow); - -} // namespace paddle diff --git a/paddle/legacy/function/CrossMapNormalOpGpu.cu b/paddle/legacy/function/CrossMapNormalOpGpu.cu deleted file mode 100644 index 938827610afbd8c1ea943365ac47b2cd55e2b5b1..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CrossMapNormalOpGpu.cu +++ /dev/null @@ -1,177 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CrossMapNormalOp.h" -#include "hl_base.h" - -namespace paddle { - -__global__ void KeCMRNormFillScale(size_t imageSize, - const real* in, - real* scale, - size_t channels, - size_t height, - size_t width, - size_t size, - real alpha) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < imageSize) { - const int w = idx % width; - const int h = (idx / width) % height; - const int n = idx / width / height; - const int offset = (n * channels * height + h) * width + w; - - in += offset; - scale += offset; - const int step = height * width; - const int pre_pad = (size - 1) / 2; - const int post_pad = size - pre_pad - 1; - - real accum = 0; - int index = 0; - while (index < channels + post_pad) { - if (index < channels) { - accum += in[index * step] * in[index * step]; - } - if (index >= size) { - accum -= in[(index - size) * step] * in[(index - size) * step]; - } - if (index >= post_pad) { - scale[(index - post_pad) * step] = 1. + accum * alpha; - } - ++index; - } - } -} - -__global__ void KeCMRNormOutput(size_t inputSize, - const real* in, - const real* scale, - real negative_beta, - real* out) { - const int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index < inputSize) { - out[index] = in[index] * pow(scale[index], negative_beta); - } -} - -template <> -void CrossMapNormal(real* outputs, - real* denoms, - const real* inputs, - size_t numSamples, - size_t channels, - size_t height, - size_t width, - size_t size, - real scale, - real pow) { - size_t imageSize = numSamples * height * width; - int blockSize = 1024; - int gridSize = (imageSize + 1024 - 1) / 1024; - KeCMRNormFillScale<<>>( - imageSize, inputs, denoms, channels, height, width, size, scale); - - size_t inputSize = numSamples * height * width * channels; - blockSize = 1024; - gridSize = (inputSize + 1024 - 1) / 1024; - KeCMRNormOutput<<>>( - inputSize, inputs, denoms, -pow, outputs); - - CHECK_SYNC("CrossMapNormal"); -} - -__global__ void KeCMRNormDiff(size_t imageSize, - const real* bottom_data, - const real* top_data, - const real* scale, - const real* top_diff, - size_t channels, - size_t height, - size_t width, - size_t size, - real negative_beta, - real cache_ratio, - real* bottom_diff) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < imageSize) { - const int w = idx % width; - const int h = (idx / width) % height; - const int n = idx / width / height; - const int offset = (n * channels * height + h) * width + w; - bottom_data += offset; - top_data += offset; - scale += offset; - top_diff += offset; - bottom_diff += offset; - - const int step = height * width; - const int pre_pad = size - (size + 1) / 2; - const int post_pad = size - pre_pad - 1; - - int index = 0; - real accum = 0; - while (index < channels + post_pad) { - if (index < channels) { - accum += top_diff[index * step] * top_data[index * step] / - scale[index * step]; - } - if (index >= size) { - accum -= top_diff[(index - size) * step] * - top_data[(index - size) * step] / scale[(index - size) * step]; - } - if (index >= post_pad) { - bottom_diff[(index - post_pad) * step] += - top_diff[(index - post_pad) * step] * - pow(scale[(index - post_pad) * step], negative_beta) - - cache_ratio * bottom_data[(index - post_pad) * step] * accum; - } - ++index; - } - } -} - -template <> -void CrossMapNormalGrad(real* inputsGrad, - const real* inputsValue, - const real* outputsValue, - const real* outputsGrad, - const real* denoms, - size_t numSamples, - size_t channels, - size_t height, - size_t width, - size_t size, - real scale, - real pow) { - size_t imageSize = numSamples * height * width; - - int blockSize = 1024; - int gridSize = (imageSize + 1024 - 1) / 1024; - KeCMRNormDiff<<>>(imageSize, - inputsValue, - outputsValue, - denoms, - outputsGrad, - channels, - height, - width, - size, - -pow, - 2.0f * pow * scale, - inputsGrad); - CHECK_SYNC("CrossMapNormalGrad"); -} - -} // namespace paddle diff --git a/paddle/legacy/function/CrossMapNormalOpTest.cpp b/paddle/legacy/function/CrossMapNormalOpTest.cpp deleted file mode 100644 index dec52adde22d57ea074eb4a9ad6a7ac2111751d3..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/CrossMapNormalOpTest.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" - -namespace paddle { - -TEST(CrossMapNormal, real) { - for (size_t numSamples : {5}) { - for (size_t channels : {1, 5}) { - for (size_t imgSizeH : {5, 33}) { - for (size_t imgSizeW : {5, 32}) { - for (size_t size : {1, 3}) { - VLOG(3) << " numSamples=" << numSamples << " channels=" << channels - << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW - << " size=" << size; - - // init Test object - CpuGpuFuncCompare test("CrossMapNormal", - FuncConfig() - .set("size", size) - .set("scale", (real)1.5) - .set("pow", (real)0.5)); - // prepare input arguments - TensorShape shape{numSamples, channels, imgSizeH, imgSizeW}; - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape)); - // run Function - test.run(); - } - } - } - } - } -} - -TEST(CrossMapNormalGrad, real) { - for (size_t numSamples : {5}) { - for (size_t channels : {1, 5}) { - for (size_t imgSizeH : {5, 33}) { - for (size_t imgSizeW : {5, 32}) { - for (size_t size : {1, 3}) { - VLOG(3) << " numSamples=" << numSamples << " channels=" << channels - << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW - << " size=" << size; - - CpuGpuFuncCompare test("CrossMapNormalGrad", - FuncConfig() - .set("size", size) - .set("scale", (real)1.5) - .set("pow", (real)0.5)); - TensorShape shape{numSamples, channels, imgSizeH, imgSizeW}; - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape)); - // run Function - test.run(); - } - } - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/function/DepthwiseConvOp.cpp b/paddle/legacy/function/DepthwiseConvOp.cpp deleted file mode 100644 index 958034e08e60c9a63d1c480bde7c84b760205ae4..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/DepthwiseConvOp.cpp +++ /dev/null @@ -1,305 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "DepthwiseConvOp.h" -#include "ConvOp.h" - -namespace paddle { - -template -class DepthwiseConvFunctor { - public: - void operator()(const T* inputData, - const T* filterData, - int batchSize, - int outputChannels, - int outputHeight, - int outputWidth, - int inputChannels, - int inputHeight, - int inputWidth, - int filterMultiplier, - int filterHeight, - int filterWidth, - int strideH, - int strideW, - int paddingH, - int paddingW, - T* outputData) { - // TODO(zhaolong) : cpu implementation of depthwise convolution - } -}; - -template -class DepthwiseConvGradInputFunctor { - public: - void operator()(const T* outputGrad, - const T* filterData, - int batchSize, - int outputChannels, - int outputHeight, - int outputWidth, - int inputChannels, - int inputHeight, - int inputWidth, - int filterMultiplier, - int filterHeight, - int filterWidth, - int strideH, - int strideW, - int paddingH, - int paddingW, - T* inputGrad) {} - // TODO(zhaolong) : cpu implementation of depthwise convolution -}; - -template -class DepthwiseConvGradFilterFunctor { - public: - void operator()(const T* outputGrad, - const T* inputData, - int batchSize, - int outputChannels, - int outputHeight, - int outputWidth, - int inputChannels, - int inputHeight, - int inputWidth, - int filterMultiplier, - int filterHeight, - int filterWidth, - int strideH, - int strideW, - int paddingH, - int paddingW, - T* colData, - T* filterGrad) {} - // TODO(zhaolong) : cpu implementation of depthwise convolution -}; - -/* - * \brief Forward calculation of depthwise convolution. - */ -template -class DepthwiseConvFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - check(inputs, outputs); - - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t filterHeight = getFilterHeight(filter); - size_t filterWidth = getFilterWidth(filter); - size_t outputChannels = output[1]; - size_t outputHeight = output[2]; - size_t outputWidth = output[3]; - size_t filterMultiplier = outputChannels / groups_; - CHECK_EQ(inputChannels, groups_); - - real* inputData = inputs[0].data(); - real* filterData = inputs[1].data(); - real* outputData = outputs[0].data(); - - DepthwiseConvFunctor depthwiseConv; - depthwiseConv(inputData, - filterData, - batchSize, - outputChannels, - outputHeight, - outputWidth, - inputChannels, - inputHeight, - inputWidth, - filterMultiplier, - filterHeight, - filterWidth, - strideH(), - strideW(), - paddingH(), - paddingW(), - outputData); - } -}; - -/* - * \brief Backward input calculation of depthwise convolution. - */ -template -class DepthwiseConvGradInputFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& output = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& input = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - check(inputs, outputs); - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - const TensorShape& output = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& input = outputs[0].shape(); - - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t filterHeight = getFilterHeight(filter); - size_t filterWidth = getFilterWidth(filter); - size_t outputChannels = output[1]; - size_t outputHeight = output[2]; - size_t outputWidth = output[3]; - size_t filterMultiplier = outputChannels / groups_; - CHECK_EQ(inputChannels, groups_); - - real* outputGrad = inputs[0].data(); - real* filterData = inputs[1].data(); - real* inputGrad = outputs[0].data(); - - DepthwiseConvGradInputFunctor depthwiseConvGradInput; - depthwiseConvGradInput(outputGrad, - filterData, - batchSize, - outputChannels, - outputHeight, - outputWidth, - inputChannels, - inputHeight, - inputWidth, - filterMultiplier, - filterHeight, - filterWidth, - strideH(), - strideW(), - paddingH(), - paddingW(), - inputGrad); - } -}; - -/* - * \brief Backward filter calculation of depthwise convolution. - */ -template -class DepthwiseConvGradFilterFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& output = inputs[0].shape(); - const TensorShape& input = inputs[1].shape(); - const TensorShape& filter = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - check(inputs, outputs); - const TensorShape& output = inputs[0].shape(); - const TensorShape& input = inputs[1].shape(); - const TensorShape& filter = outputs[0].shape(); - - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t filterHeight = getFilterHeight(filter); - size_t filterWidth = getFilterWidth(filter); - size_t outputChannels = output[1]; - size_t outputHeight = output[2]; - size_t outputWidth = output[3]; - size_t filterMultiplier = outputChannels / groups_; - CHECK_EQ(inputChannels, groups_); - - real* outputGrad = inputs[0].data(); - real* inputData = inputs[1].data(); - real* filterGrad = outputs[0].data(); - - int size = outputChannels * filterHeight * filterWidth * outputHeight * - outputWidth; - resizeBuffer(size); - real* colData = reinterpret_cast(memory_->getBuf()); - - DepthwiseConvGradFilterFunctor depthwiseConvGradFilter; - - depthwiseConvGradFilter(outputGrad, - inputData, - batchSize, - outputChannels, - outputHeight, - outputWidth, - inputChannels, - inputHeight, - inputWidth, - filterMultiplier, - filterHeight, - filterWidth, - strideH(), - strideW(), - paddingH(), - paddingW(), - colData, - filterGrad); - } -}; - -REGISTER_TYPED_FUNC(DepthwiseConv, CPU, DepthwiseConvFunction); -REGISTER_TYPED_FUNC(DepthwiseConvGradInput, - CPU, - DepthwiseConvGradInputFunction); -REGISTER_TYPED_FUNC(DepthwiseConvGradFilter, - CPU, - DepthwiseConvGradFilterFunction); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction); -REGISTER_TYPED_FUNC(DepthwiseConvGradInput, - GPU, - DepthwiseConvGradInputFunction); -REGISTER_TYPED_FUNC(DepthwiseConvGradFilter, - GPU, - DepthwiseConvGradFilterFunction); -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/DepthwiseConvOp.h b/paddle/legacy/function/DepthwiseConvOp.h deleted file mode 100644 index 7837edd1c071980592b1cf36ecb69a3b7c12cc5e..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/DepthwiseConvOp.h +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "TensorType.h" - -namespace paddle { - -/** - *\brief Depthwise convolution forward. The outputData - * of depthwise convolution is same with ExpandConvLayer - * when groups equals inputChannels in ExpandConvLayer. - * - * \param[in] inputData input data. - * \param[in] filterData the Paramters of the depthwise conv layer.. - * \param[in] batchSize batch size of input data. - * \param[in] outputChannels channels of outputData. - * \param[in] outputHeight height of outputData. - * \param[in] outputWidth width of outputData. - * \param[in] inputChannels channels of inputData. - * \param[in] inputHeight height of inputData. - * \param[in] inputWidth width of inputData.. - * \param[in] filterMultiplier equals to outputChannels/groups_. - * \param[in] filterHeight height of filter. - * \param[in] filterWidth widht of filter. - * \param[in] strideH stride size in height direction. - * \param[in] strideW stride size in width direction. - * \param[in] paddingH padding size in height direction. - * \param[in] paddingW padding size in width direction. - * \param[out] outputData outputData. - * - */ -template -class DepthwiseConvFunctor { - public: - void operator()(const T* inputData, - const T* filterData, - int batchSize, - int outputChannels, - int outputHeight, - int outputWidth, - int inputChannels, - int inputHeight, - int inputWidth, - int filterMultiplier, - int filterHeight, - int filterWidth, - int strideH, - int strideW, - int paddingH, - int paddingW, - T* outputData); -}; - -/** - *\brief Functor tot compute the depthwise convolution backprop w.r.t input. - * - * - * \param[in] outputGradData the grad data of output. - * \param[in] filterData the Paramters of the depthwise conv layer.. - * \param[in] batchSize batch size of input data. - * \param[in] outputChannels channels of outputData. - * \param[in] outputHeight height of outputData. - * \param[in] outputWidth width of outputData. - * \param[in] inputChannels channels of input data. - * \param[in] inputHeight height of inputData. - * \param[in] inputWidth width of inputData. - * \param[in] filterMultiplier equals to outputChannels/groups_. - * \param[in] filterHeight height of filter. - * \param[in] filterWidth widht of filter. - * \param[in] strideH stride size in height direction. - * \param[in] strideW stride size in width direction. - * \param[in] paddingH padding size in height direction. - * \param[in] paddingW padding size in width direction. - * \param[out] inputGrad the grad data of input. - * - */ -template -class DepthwiseConvGradInputFunctor { - public: - void operator()(const T* outputGrad, - const T* filterData, - int batchSize, - int outputChannels, - int outputHeight, - int outputWidth, - int inputChannels, - int inputHeight, - int inputWidth, - int filterMultiplier, - int filterHeight, - int filterWidth, - int strideH, - int strideW, - int paddingH, - int paddingW, - T* inputGrad); -}; - -/** - *\brief Functor tot compute the depthwise convolution backprop w.r.t filter. - * - * \param[in] outputGradData the grad data of output. - * \param[in] inputData inputData. - * \param[in] batchSize batch size of input data. - * \param[in] outputChannels channels of outputData. - * \param[in] outputHeight height of outputData. - * \param[in] outputWidth width of outputData. - * \param[in] inputChannels channels of input data. - * \param[in] inputHeight height of inputData. - * \param[in] inputWidth width of inputData. - * \param[in] filterMultiplier equals to outputChannels/groups_. - * \param[in] filterHeight height of filter. - * \param[in] filterWidth widht of filter. - * \param[in] strideH stride size in height direction. - * \param[in] strideW stride size in width direction. - * \param[in] paddingH padding size in height direction. - * \param[in] paddingW padding size in width direction. - * \param[in] colData Auxiliary data when calculating filterGrad. - * \param[in] multiplierData Auxiliary data when calculating filterGrad. - * \param[out] filterGrad the grad data of filter. - * - */ -template -class DepthwiseConvGradFilterFunctor { - public: - void operator()(const T* outputGrad, - const T* inputData, - int batchSize, - int outputChannels, - int outputHeight, - int outputWidth, - int inputChannels, - int inputHeight, - int inputWidth, - int filterMultiplier, - int filterHeight, - int filterWidth, - int strideH, - int strideW, - int paddingH, - int paddingW, - T* colData, - T* filterGrad); -}; - -} // namespace paddle diff --git a/paddle/legacy/function/DepthwiseConvOpGpu.cu b/paddle/legacy/function/DepthwiseConvOpGpu.cu deleted file mode 100644 index 17138cc56390d0fcfb15d4b77a56eda466bcfd3c..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/DepthwiseConvOpGpu.cu +++ /dev/null @@ -1,376 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "DepthwiseConvOp.h" -#include "paddle/legacy/math/BaseMatrix.h" - -namespace paddle { - -// CUDA kernel to compute the depthwise convolution forward pass -template -__global__ void ConvolutionDepthwiseForward(const int nthreads, - const T* const inputData, - const T* const filterData, - const int batchSize, - const int outputChannels, - const int outputHeight, - const int outputWidth, - const int inputChannels, - const int inputHeight, - const int inputWidth, - const int filterMultiplier, - const int filterHeight, - const int filterWidth, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - T* const outputData) { - int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; - - if (index < nthreads) { - const int batch = index / outputChannels / outputHeight / outputWidth; - const int c_out = (index / outputHeight / outputWidth) % outputChannels; - const int h_out = (index / outputWidth) % outputHeight; - const int w_out = index % outputWidth; - - const int c_in = c_out / filterMultiplier; - const T* weight = filterData + c_out * filterHeight * filterWidth; - T value = 0; - const int h_in_start = -paddingH + h_out * strideH; - const int w_in_start = -paddingW + w_out * strideW; - const int h_in_end = -paddingH + h_out * strideH + filterHeight - 1; - const int w_in_end = -paddingW + w_out * strideW + filterWidth - 1; - if ((h_in_start >= 0) && (h_in_end < inputHeight) && (w_in_start >= 0) && - (w_in_end < inputWidth)) { - for (int kh = 0; kh < filterHeight; ++kh) { - for (int kw = 0; kw < filterWidth; ++kw) { - const int h_in = -paddingH + h_out * strideH + kh; - const int w_in = -paddingW + w_out * strideW + kw; - const int offset = - ((batch * inputChannels + c_in) * inputHeight + h_in) * - inputWidth + - w_in; - value += (*weight) * inputData[offset]; - ++weight; - } - } - } else { - for (int kh = 0; kh < filterHeight; ++kh) { - for (int kw = 0; kw < filterWidth; ++kw) { - const int h_in = -paddingH + h_out * strideH + kh; - const int w_in = -paddingW + w_out * strideW + kw; - if ((h_in >= 0) && (h_in < inputHeight) && (w_in >= 0) && - (w_in < inputWidth)) { - const int offset = - ((batch * inputChannels + c_in) * inputHeight + h_in) * - inputWidth + - w_in; - value += (*weight) * inputData[offset]; - } - ++weight; - } - } - } - outputData[index] = value; - } -} - -// CUDA kernel to compute the depthwise convolution backprop w.r.t input. -template -__global__ void ConvolutionDepthwiseInputBackward(const int nthreads, - const T* const top_diff, - const T* const weight_data, - const int num, - const int outputChannels, - const int outputHeight, - const int outputWidth, - const int inputChannels, - const int inputHeight, - const int inputWidth, - const int filterMultiplier, - const int filterHeight, - const int filterWidth, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - T* const bottom_diff) { - int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; - if (index < nthreads) { - const int batch = index / inputChannels / inputHeight / inputWidth; - const int c_in = (index / inputHeight / inputWidth) % inputChannels; - const int h_in = (index / inputWidth) % inputHeight; - const int w_in = index % inputWidth; - - const int c_out_start = c_in * filterMultiplier; - - int h_out_start = (h_in - filterHeight + paddingH + strideH) / strideH; - h_out_start = 0 > h_out_start ? 0 : h_out_start; - int h_out_end = (h_in + paddingH) / strideH; - h_out_end = outputHeight - 1 < h_out_end ? outputHeight - 1 : h_out_end; - int w_out_start = (w_in - filterWidth + paddingW + strideW) / strideW; - w_out_start = 0 > w_out_start ? 0 : w_out_start; - int w_out_end = (w_in + paddingW) / strideW; - w_out_end = outputWidth - 1 < w_out_end ? outputWidth - 1 : w_out_end; - - T value = 0; - - for (int c_out = c_out_start; c_out < c_out_start + filterMultiplier; - c_out++) { - for (int h_out = h_out_start; h_out <= h_out_end; ++h_out) { - const int filter_h = h_in + paddingH - h_out * strideH; - for (int w_out = w_out_start; w_out <= w_out_end; ++w_out) { - const int filter_w = w_in + paddingW - w_out * strideW; - const int filter_offset = c_out * filterHeight * filterWidth + - filter_h * filterWidth + filter_w; - const int top_diff_offset = - ((batch * outputChannels + c_out) * outputHeight + h_out) * - outputWidth + - w_out; - value += top_diff[top_diff_offset] * weight_data[filter_offset]; - } - } - } - bottom_diff[index] += value; - } -} - -// CUDA kernel to compute the depthwise convolution backprop w.r.t filter. -template -__global__ void ConvolutionDepthwiseFilterBackward(const int num_i, - const int nthreads, - const T* const top_diff, - const T* const inputData, - const int num, - const int outputChannels, - const int outputHeight, - const int outputWidth, - const int inputChannels, - const int inputHeight, - const int inputWidth, - const int filterMultiplier, - const int filterHeight, - const int filterWidth, - const int strideH, - const int strideW, - const int paddingH, - const int paddingW, - T* const buffer_data) { - int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; - if (index < nthreads) { - const int h_out = (index / outputWidth) % outputHeight; - const int w_out = index % outputWidth; - const int kh = - (index / filterWidth / outputHeight / outputWidth) % filterHeight; - const int kw = (index / outputHeight / outputWidth) % filterWidth; - const int h_in = -paddingH + h_out * strideH + kh; - const int w_in = -paddingW + w_out * strideW + kw; - if ((h_in >= 0) && (h_in < inputHeight) && (w_in >= 0) && - (w_in < inputWidth)) { - const int c_out = - index / (filterHeight * filterWidth * outputHeight * outputWidth); - const int c_in = c_out / filterMultiplier; - const int batch = num_i; - const int top_offset = - ((batch * outputChannels + c_out) * outputHeight + h_out) * - outputWidth + - w_out; - const int bottom_offset = - ((batch * inputChannels + c_in) * inputHeight + h_in) * inputWidth + - w_in; - buffer_data[index] = top_diff[top_offset] * inputData[bottom_offset]; - } else { - buffer_data[index] = 0; - } - } -} - -template -class DepthwiseConvFunctor { - public: - void operator()(const T* inputData, - const T* filterData, - int batchSize, - int outputChannels, - int outputHeight, - int outputWidth, - int inputChannels, - int inputHeight, - int inputWidth, - int filterMultiplier, - int filterHeight, - int filterWidth, - int strideH, - int strideW, - int paddingH, - int paddingW, - T* outputData) { - int outputSize = batchSize * outputChannels * outputHeight * outputWidth; - - size_t blocks = (outputSize + 1024 - 1) / 1024; - size_t blockX = 512; - size_t blockY = (blocks + 512 - 1) / 512; - dim3 threads(1024, 1); - dim3 grid(blockX, blockY); - - ConvolutionDepthwiseForward<<>>( - outputSize, - inputData, - filterData, - batchSize, - outputChannels, - outputHeight, - outputWidth, - inputChannels, - inputHeight, - inputWidth, - filterMultiplier, - filterHeight, - filterWidth, - strideH, - strideW, - paddingH, - paddingW, - outputData); - } -}; - -template -class DepthwiseConvGradInputFunctor { - public: - void operator()(const T* outputGrad, - const T* filterData, - int batchSize, - int outputChannels, - int outputHeight, - int outputWidth, - int inputChannels, - int inputHeight, - int inputWidth, - int filterMultiplier, - int filterHeight, - int filterWidth, - int strideH, - int strideW, - int paddingH, - int paddingW, - T* inputGrad) { - int inputSize = batchSize * inputChannels * inputHeight * inputWidth; - - size_t blocks = (inputSize + 1024 - 1) / 1024; - size_t blockX = 512; - size_t blockY = (blocks + 512 - 1) / 512; - dim3 threads(1024, 1); - dim3 grid(blockX, blockY); - - ConvolutionDepthwiseInputBackward - // NOLINT_NEXT_LINE(whitespace/operators) - <<>>(inputSize, - outputGrad, - filterData, - batchSize, - outputChannels, - outputHeight, - outputWidth, - inputChannels, - inputHeight, - inputWidth, - filterMultiplier, - filterHeight, - filterWidth, - strideH, - strideW, - paddingH, - paddingW, - inputGrad); - } -}; - -template -class DepthwiseConvGradFilterFunctor { - public: - void operator()(const T* outputGrad, - const T* inputData, - int batchSize, - int outputChannels, - int outputHeight, - int outputWidth, - int inputChannels, - int inputHeight, - int inputWidth, - int filterMultiplier, - int filterHeight, - int filterWidth, - int strideH, - int strideW, - int paddingH, - int paddingW, - T* colData, - T* filterGrad) { - int colDataSize = outputChannels * filterHeight * filterWidth * - outputHeight * outputWidth; - - size_t blocks = (colDataSize + 1024 - 1) / 1024; - size_t blockX = 512; - size_t blockY = (blocks + 512 - 1) / 512; - dim3 threads(1024, 1); - dim3 grid(blockX, blockY); - BaseMatrix filterGradMatrix(outputChannels * filterHeight * filterWidth, - 1, - filterGrad, - false, - true); - - for (int i = 0; i < batchSize; i++) { - ConvolutionDepthwiseFilterBackward< - T><<>>(i, - colDataSize, - outputGrad, - inputData, - batchSize, - outputChannels, - outputHeight, - outputWidth, - inputChannels, - inputHeight, - inputWidth, - filterMultiplier, - filterHeight, - filterWidth, - strideH, - strideW, - paddingH, - paddingW, - colData); - int K = outputHeight * outputWidth; - int M = colDataSize / K; - - BaseMatrix colMatrix(M, K, colData, false, true); - filterGradMatrix.sumRows(colMatrix, (T)1.0, (T)1.0); - } - } -}; - -#ifdef PADDLE_TYPE_DOUBLE -template class DepthwiseConvGradInputFunctor; -template class DepthwiseConvFunctor; -template class DepthwiseConvGradFilterFunctor; -#else -template class DepthwiseConvGradInputFunctor; -template class DepthwiseConvFunctor; -template class DepthwiseConvGradFilterFunctor; -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/DepthwiseConvOpTest.cpp b/paddle/legacy/function/DepthwiseConvOpTest.cpp deleted file mode 100644 index caf8f3597ffa283f2ae6fe8eb130df936c22a50c..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/DepthwiseConvOpTest.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "ConvOpTest.h" - -namespace paddle { - -#ifdef PADDLE_WITH_CUDA -TEST(DepthwiseConv, Forward) { - DepthwiseConvolution( - "GemmConv-CPU", "DepthwiseConv-GPU", forward); -} - -TEST(DepthwiseConv, BackwardInput) { - DepthwiseConvolution( - "GemmConvGradInput-CPU", "DepthwiseConvGradInput-GPU", backward_input); -} - -TEST(DepthwiseConv, BackwardFilter) { - DepthwiseConvolution( - "GemmConvGradFilter-CPU", "DepthwiseConvGradFilter-GPU", backward_filter); -} -#endif - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - -TEST(DepthwiseConv, Forward) { - DepthwiseConvolution( - "GemmConv-CPU", "NeonDepthwiseConv-CPU", forward); -} - -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/EigenGemm.cpp b/paddle/legacy/function/EigenGemm.cpp deleted file mode 100644 index 5929c5c68ec818c2307580b06f76c63f04e0db5f..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/EigenGemm.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "paddle/legacy/function/EigenThreadDevice.h" - -namespace paddle { - -template -struct EigenBlasGemm { - typedef Eigen::TensorMap, - Eigen::Aligned> - EigenMatrix; - - static void compute(const bool transA, - const bool transB, - const int M, - const int N, - const int K, - const T alpha, - const T* A, - const int lda, - const T* B, - const int ldb, - const T beta, - T* C, - const int ldc) { - Eigen::array sizeA; - if (transA) { - sizeA[0] = K; - sizeA[1] = M; - CHECK_EQ(M, lda); - } else { - sizeA[0] = M; - sizeA[1] = K; - CHECK_EQ(K, lda); - } - Eigen::array sizeB; - if (transB) { - sizeB[0] = N; - sizeB[1] = K; - CHECK_EQ(K, ldb); - } else { - sizeB[0] = K; - sizeB[1] = N; - CHECK_EQ(N, ldb); - } - Eigen::array sizeC = {{M, ldc}}; - Eigen::array offsetC = {{0, 0}}; - Eigen::array extentC = {{M, N}}; - - const EigenMatrix a(const_cast(A), sizeA); - const EigenMatrix b(const_cast(B), sizeB); - EigenMatrix c(C, sizeC); - - typedef typename Eigen::Tensor::DimensionPair DimPair; - Eigen::array dims; - dims[0] = DimPair(1, 0); - dims[0].first = transA ? 0 : 1; - dims[0].second = transB ? 1 : 0; - - auto* device = EigenDeviceWarpper::device(); - if (N == ldc) { - if (alpha == T(1) && beta == T(0)) { - c.device(*device) = a.contract(b, dims); - } else if (alpha == T(1) && beta == T(1)) { - c.device(*device) += a.contract(b, dims); - } else { - c.device(*device) = alpha * a.contract(b, dims) + beta * c; - } - } else { - if (alpha == T(1) && beta == T(0)) { - c.slice(offsetC, extentC).device(*device) = a.contract(b, dims); - } else if (alpha == T(1) && beta == T(1)) { - c.slice(offsetC, extentC).device(*device) += a.contract(b, dims); - } else { - c.slice(offsetC, extentC).device(*device) = - alpha * a.contract(b, dims) + beta * c.slice(offsetC, extentC); - } - } - EigenDeviceWarpper::free_device(device); - } -}; - -#ifdef PADDLE_TYPE_DOUBLE -template struct EigenBlasGemm; -#else -template struct EigenBlasGemm; -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/EigenThreadDevice.h b/paddle/legacy/function/EigenThreadDevice.h deleted file mode 100644 index eb92251c827a26d55ca021c4418182bae28dd6a5..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/EigenThreadDevice.h +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. */ - -#pragma once - -#if defined(__OSX__) || defined(__APPLE__) -#include -#include -#endif -#include "unsupported/Eigen/CXX11/Tensor" - -namespace paddle { - -#if defined(__ANDROID__) -int GetCpuCount() { - FILE* fp = fopen("/sys/devices/system/cpu/possible", "r"); - if (!fp) { - return 1; - } - int rank0, rank1; - int num = fscanf(fp, "%d-%d", &rank0, &rank1); - fclose(fp); - if (num < 2) return 1; - return rank1 + 1; -} -#elif defined(__OSX__) || defined(__APPLE__) -int GetCpuCount() { - int count = 0; - size_t len = sizeof(int); - sysctlbyname("hw.ncpu", &count, &len, NULL, 0); - return count > 0 ? count : 1; -} -#else -int GetCpuCount() { return 1; } -#endif - -class EigenDeviceWarpper { - public: // NOLINT -#if EIGEN_USE_THREADS - static Eigen::ThreadPoolDevice* device() { - const int num_cpus = GetCpuCount(); - const int num_threads = (num_cpus > 2) ? 2 : num_cpus; - static Eigen::ThreadPool tp(num_threads); - static Eigen::ThreadPoolDevice* device = - new Eigen::ThreadPoolDevice(&tp, num_threads); - return device; - } - - static void free_device(Eigen::ThreadPoolDevice* device) { - // do nothing - } -#else - static Eigen::DefaultDevice* device() { - Eigen::DefaultDevice* device = new Eigen::DefaultDevice; - return device; - } - - static void free_device(Eigen::DefaultDevice* device) { delete device; } -#endif -}; - -} // namespace paddle diff --git a/paddle/legacy/function/Function.cpp b/paddle/legacy/function/Function.cpp deleted file mode 100644 index 344358fd3d3d217b5b653d897391538049848858..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/Function.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Function.h" - -namespace paddle { - -void BufferArgs::addArg(const Matrix& arg, - const TensorShape& shape, - ArgType argType) { - _args_.push_back(new BufferArg(arg, shape, argType)); - addArg(*_args_.back()); -} - -void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) { - _args_.push_back(new SparseMatrixArg(arg, argType)); - addArg(*_args_.back()); -} - -void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { - _args_.push_back(new SparseMatrixArg(arg, argType)); - addArg(*_args_.back()); -} - -void BufferArgs::addArg(const Matrix& matrix, - const IVector& vector, - ArgType argType) { - _args_.push_back(new SequenceArg(matrix, vector, argType)); - addArg(*_args_.back()); -} - -ClassRegistrar FunctionBase::funcRegistrar_; - -} // namespace paddle diff --git a/paddle/legacy/function/Function.h b/paddle/legacy/function/Function.h deleted file mode 100644 index bc5ef7e6f20b63a120a577ded876820aafecff19..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/Function.h +++ /dev/null @@ -1,214 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "BufferArg.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Any.h" -#include "paddle/legacy/utils/ClassRegistrar.h" -#include "paddle/legacy/utils/Error.h" - -namespace paddle { - -/** - * Function Configuration. - * The argument type of Function::init. - */ -class FuncConfig { - public: - template - T get(const std::string& key, Error* err = nullptr) const { - try { - return any_cast(valueMap_.at(key)); - } catch (std::exception& e) { // could be cast or out of range exception. - if (err) { - *err = Error(e.what()); - } else { - LOG(FATAL) << "Cannot get key " << key << " with error " << e.what(); - } - return T(); - } - } - - template - FuncConfig& set(const std::string& key, T v, Error* err = nullptr) { - auto it = valueMap_.find(key); - if (it != valueMap_.end()) { // already contains key. - if (err) { - *err = Error("Key %s is already set in FuncConfig", key.c_str()); - } else { - LOG(FATAL) << "Key " << key << " is already set in FuncConfig."; - } - return *this; - } - valueMap_[key] = any(v); - return *this; - } - - protected: - mutable std::unordered_map valueMap_; -}; - -/** - * Argument type for Function::calc(). - * A BufferArgs contains a set of BufferArg, - * because Function can have multiple inputs and outputs. - * - * addArg() with Matix object used to adapt Layer Argument. - * Will create a BufferArg object in addArg(), - * and free in destructor of BufferArgs. - * - * addArg() with BufferArg object, just save BufferArg object address, - * and the caller needs to guarantee the validity of the BufferArg object - * in the BufferArgs life time. - */ -class BufferArgs { - public: - BufferArgs() {} - - ~BufferArgs() { - for (auto arg : _args_) { - delete arg; - } - } - - size_t size() const { return args_.size(); } - - // add argument into BufferArgs - // Tensor can be Matrix, Vector, IVector. - // For inputs, do not need argType. - // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO. - void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) { - _args_.push_back(new BufferArg(arg, argType)); - addArg(*_args_.back()); - } - - void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) { - _args_.push_back(new BufferArg(arg, argType)); - addArg(*_args_.back()); - } - - void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) { - _args_.push_back(new BufferArg(arg, argType)); - addArg(*_args_.back()); - } - - // Add arg into BufferArgs and reshape the arg. - // - // For example, arg represents an image buffer, - // but Matrix can only represent a two-dimensional Tensor. - // So need an extra argument to describe the shape of the image buffer. - void addArg(const Matrix& arg, - const TensorShape& shape, - ArgType argType = UNSPECIFIED); - - void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); - void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); - - void addArg(const Matrix& matrix, - const IVector& vector, - ArgType argType = UNSPECIFIED); - - // get argument - const BufferArg& operator[](size_t num) const { - CHECK_LT(num, args_.size()); - return *args_[num]; - } - - void addArg(BufferArg& arg) { args_.push_back(&arg); } - - void addArg(SequenceIdArg& arg) { args_.push_back(&arg); } - - void addArg(SequenceArg& arg) { args_.push_back(&arg); } - - void addArg(SparseMatrixArg& arg) { args_.push_back(&arg); } - - private: - std::vector args_; - // The BufferArg object is constructed and freed by BufferArgs. - std::vector _args_; -}; - -/** - * \brief Base class for Function. - * The basic Function implementation requires override init and calc interfaces. - * - * The caller needs to ensure the validity of the arguments - * during Function execution. - * - * Function inputs are readonly, Function outputs have two modes: ASSIGN_TO - * and ADD_TO. - * If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation - * result of Function assigned to the output BufferArg. - * If output.getArgType() == ADD_TO, this is add mode, and the calculation - * result of Function need added to the output BufferArg. - * - * For example: - * ASSIGN_TO: output = Function(inputs) - * ADD_TO: output += Function(inputs) - * If Function has more than one output, each output can have different modes. - */ -class FunctionBase { - public: - virtual ~FunctionBase() {} - - virtual void init(const FuncConfig& config) {} - - virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {} - - // This member function is used to check whether the BufferType and shape of - // the inputs and outputs arguments of the Function are correct. - // General calc function which will call this check to do arguments check. - // And before the calc called, the caller can also check their own arguments. - virtual void check(const BufferArgs& inputs, const BufferArgs& outputs) {} - - // Calculate the number of floating-point operations of this Function. - // The inputs and outputs arguments do not need to contain the actual data, - // only the shape. - // And some Functions have the same input and output shapes, - // so you may not need to enter the complete number of arguments. - // But entering the full arguments is always correct for this interface. - virtual size_t ops(const BufferArgs& inputs, const BufferArgs& outputs) { - return 0; - } - - int getNumInputs() const { return numInputs_; } - - int getNumOutputs() const { return numOutputs_; } - - static ClassRegistrar funcRegistrar_; - - protected: - // numInputs_ and numOutputs_ represents the maximum - // input and output supported by Function. - // Some functions are optimized for input and output, - // so when comparing the number of arguments, for these functions - // inputs.size() <= numInputs_ or outputs.size() <= numOutputs_ - size_t numInputs_; - size_t numOutputs_; -}; - -#define FUNC_NAME(typeName, deviceName) #typeName "-" #deviceName - -#define REGISTER_TYPED_FUNC(typeName, deviceName, className) \ - static InitFunction __reg_type_##typeName##deviceName([]() { \ - FunctionBase::funcRegistrar_ \ - .registerClass>( \ - FUNC_NAME(typeName, deviceName)); \ - }) - -} // namespace paddle diff --git a/paddle/legacy/function/FunctionTest.cpp b/paddle/legacy/function/FunctionTest.cpp deleted file mode 100644 index 1a0993e3135bcad9eb8a431e079ed56a267174ea..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/FunctionTest.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Function.h" -#include -#include "paddle/legacy/math/SparseMatrix.h" - -namespace paddle { - -template -void FunctionApi(typename Tensor::Matrix& output, - const typename Tensor::Matrix& input); - -template <> -void FunctionApi(CpuMatrix& output, const CpuMatrix& input) { - EXPECT_EQ(output.getHeight(), 100U); - EXPECT_EQ(output.getWidth(), 200U); -} - -template <> -void FunctionApi(GpuMatrix& output, const GpuMatrix& input) { - EXPECT_EQ(output.getHeight(), 10U); - EXPECT_EQ(output.getWidth(), 20U); -} - -template -void Function(const BufferArgs& arguments) { - const auto input = arguments[0].matrix(); - auto output = arguments[1].matrix(); - FunctionApi(output, input); -} - -TEST(Function, BufferArgs) { - CpuMatrix cpuInput = CpuMatrix(100, 200); - CpuMatrix cpuOutput = CpuMatrix(100, 200); - BufferArgs cpuArgments; - cpuArgments.addArg(cpuInput); - cpuArgments.addArg(cpuOutput); - Function(cpuArgments); - - GpuMatrix gpuInput = GpuMatrix(10, 20); - GpuMatrix gpuOutput = GpuMatrix(10, 20); - BufferArgs gpuArgments; - gpuArgments.addArg(gpuInput); - gpuArgments.addArg(gpuOutput); - Function(gpuArgments); -} - -/** - * Some tests case are used to check the consistency between the BufferArg type - * argument received by Function and the original type argument. - * - * Use Case: - * TEST() { - * Matrix matrix(...); - * CheckBufferArg lambda = [=](const BufferArg& arg) { - * // check matrix and arg are equivalent - * EXPECT_EQ(matrix, arg); - * } - * - * BufferArgs argments{matrix...}; - * std::vector checkFunc{lambda...}; - * testBufferArgs(argments, checkFunc); - * } - */ -typedef std::function CheckBufferArg; - -void testBufferArgs(const BufferArgs& inputs, - const std::vector& check) { - EXPECT_EQ(inputs.size(), check.size()); - for (size_t i = 0; i < inputs.size(); i++) { - check[i](inputs[i]); - } -} - -void testBufferArgs(const BufferArgs& inputs, const CheckBufferArg& check) { - EXPECT_EQ(inputs.size(), 1U); - check(inputs[0]); -} - -TEST(Arguments, Matrix) { - MatrixPtr matrix = Matrix::create(100, 200); - CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 2U); - EXPECT_EQ(arg.shape()[0], 100U); - EXPECT_EQ(arg.shape()[1], 200U); - EXPECT_EQ(arg.data(), matrix->getData()); - - EXPECT_EQ(arg.matrix().getHeight(), matrix->getHeight()); - EXPECT_EQ(arg.matrix().getWidth(), matrix->getWidth()); - EXPECT_EQ(arg.matrix().getData(), matrix->getData()); - }; - - BufferArgs argments; - argments.addArg(*matrix); - std::vector checkFunc; - checkFunc.push_back(check); - testBufferArgs(argments, checkFunc); -} - -TEST(Arguments, Vector) { - VectorPtr vector = Vector::create(100, false); - CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 1U); - EXPECT_EQ(arg.shape()[0], 100U); - EXPECT_EQ(arg.data(), vector->getData()); - - CpuVector inVector = arg.vector(); - EXPECT_EQ(inVector.getSize(), vector->getSize()); - EXPECT_EQ(inVector.getData(), vector->getData()); - }; - - BufferArgs argments; - argments.addArg(*vector); - std::vector checkFunc; - checkFunc.push_back(check); - testBufferArgs(argments, checkFunc); -} - -TEST(Arguments, CpuSparseMatrix) { - CpuSparseMatrix sparse(200, 300, 50); - CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 2U); - EXPECT_EQ(arg.shape()[0], 200U); - EXPECT_EQ(arg.shape()[1], 300U); - EXPECT_EQ(arg.data(), sparse.getData()); - // CHECK_EQ(arg.sparse().nnz(), 50); - // CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT); - // CHECK_EQ(arg.sparse().dataType(), SPARSE_FLOAT_VALUE); - EXPECT_EQ(arg.sparse().getRowBuf(), sparse.getRows()); - EXPECT_EQ(arg.sparse().getColBuf(), sparse.getCols()); - }; - - BufferArgs argments; - argments.addArg(sparse); - std::vector checkFunc; - checkFunc.push_back(check); - testBufferArgs(argments, checkFunc); -} - -TEST(Arguments, BufferArg) { - BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3}); - CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 3U); - EXPECT_EQ(arg.shape()[0], 1U); - EXPECT_EQ(arg.shape()[1], 2U); - EXPECT_EQ(arg.shape()[2], 3U); - }; - - BufferArgs argments; - argments.addArg(arg); - testBufferArgs(argments, check); -} - -} // namespace paddle diff --git a/paddle/legacy/function/FunctionTest.h b/paddle/legacy/function/FunctionTest.h deleted file mode 100644 index 6f01981a34bff0a7d9bb04d0a0012117ecf5f803..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/FunctionTest.h +++ /dev/null @@ -1,410 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Function.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/SparseMatrix.h" -#include "paddle/legacy/math/tests/TensorCheck.h" -#include "paddle/testing/TestUtil.h" - -namespace paddle { - -typedef std::shared_ptr BufferArgPtr; - -namespace test { -template -struct Allocator; - -template <> -struct Allocator { - using type = CpuMemoryHandle; -}; - -template <> -struct Allocator { - using type = GpuMemoryHandle; -}; - -// Copy argument1 to argument2 -template -class CopyArgument { - public: - void operator()(const BufferArg& arg1, BufferArg& arg2) { - CHECK_EQ(arg1.valueType(), arg2.valueType()); - CHECK_LE(arg1.shape().getElements(), arg2.shape().getElements()); - - if (arg1.valueType() == VALUE_TYPE_INT32) { - IVectorPtr vector1 = - IVector::create((int*)arg1.data(), - arg1.shape().getElements(), - DType1 == DEVICE_TYPE_CPU ? false : true); - IVectorPtr vector2 = - IVector::create((int*)arg2.data(), - arg2.shape().getElements(), - DType2 == DEVICE_TYPE_CPU ? false : true); - vector2->copyFrom(*vector1); - } else { - VectorPtr vector1 = - Vector::create((real*)arg1.data(), - arg1.shape().getElements(), - DType1 == DEVICE_TYPE_CPU ? false : true); - VectorPtr vector2 = - Vector::create((real*)arg2.data(), - arg2.shape().getElements(), - DType2 == DEVICE_TYPE_CPU ? false : true); - vector2->copyFrom(*vector1); - } - } -}; -} // namespace test - -/** - * \brief A class for comparing two Functions of different implementations. - * For example, can be used to compare the CPU and GPU implementation - * of the function is consistent. - * - * Use case: - * // Initializes a test object, the corresponding cpu and gpu Function - * // are constructed according to FunctionName and FuncConfig. - * CpuGpuFuncCompare test(FunctionName, FuncConfig); - * // Prepare inputs and outputs arguments. - * // Here the input and output can not contain real data, - * // only contains the argument type and shape. - * test.addInputs(input1); - * test.addInputs(input2); - * test.addOutputs(output1); - * test.addOutputs(output2); - * // Run. - * // Will according to the type and shape of arguments(inputs_/outputs_), - * // automatic initialization cpu and gpu function required arguments - * // (cpuInputs_/cpuOutputs_/gpuInputs_/gpuOutputs_). - * // Call the CPU and GPU Function calculation results. - * // Compares CPU and GPU calculation results for consistency. - * test.run(); - */ -template -class Compare2Function { - public: - typedef typename test::Allocator::type Allocator1; - typedef typename test::Allocator::type Allocator2; - typedef typename Tensor::Vector Vector1; - typedef typename Tensor::Vector Vector2; - typedef typename Tensor::SparseMatrix SparseMatrix1; - typedef typename Tensor::SparseMatrix SparseMatrix2; - - Compare2Function(const std::string& name1, - const std::string& name2, - const FuncConfig& config) - : function1_(FunctionBase::funcRegistrar_.createByType(name1)), - function2_(FunctionBase::funcRegistrar_.createByType(name2)) { - function1_->init(config); - function2_->init(config); - initArgsCallback_ = nullptr; - } - - ~Compare2Function() {} - - // input need only contains shape, do not contains data. - void addInputs(const BufferArg& input) { - size_t size = - input.shape().getElements() * sizeOfValuType(input.valueType()); - func1Memory_.emplace_back(std::make_shared(size)); - func2Memory_.emplace_back(std::make_shared(size)); - - func1Inputs_.emplace_back(std::make_shared( - func1Memory_.back()->getBuf(), input.valueType(), input.shape())); - func2Inputs_.emplace_back(std::make_shared( - func2Memory_.back()->getBuf(), input.valueType(), input.shape())); - } - - // assume one copy of sequence is shared by different SequenceArgs - void addSequence(const SequenceIdArg& input) { - CHECK_EQ(input.shape().ndims(), 1UL); - size_t batchSize = input.shape()[0]; - size_t numSeqs = batchSize / 10 + 1; - size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); - func1Memory_.emplace_back(std::make_shared(sizeId)); - func2Memory_.emplace_back(std::make_shared(sizeId)); - seq1_ = std::make_shared(func1Memory_.back()->getBuf(), - TensorShape{numSeqs + 1}); - seq2_ = std::make_shared(func2Memory_.back()->getBuf(), - TensorShape{numSeqs + 1}); - /// init sequence Id - initArg(*seq1_, batchSize); - - copyArg_(*seq1_, *seq2_); - } - - void addInputs(const SequenceArg& input) { - CHECK_EQ(input.shape().ndims(), 2UL); - size_t batchSize = input.shape()[0]; - if (!seq1_ || !seq2_) { // sequence not exist - addSequence(SequenceIdArg(TensorShape{batchSize})); - } - - size_t size = - input.shape().getElements() * sizeOfValuType(input.valueType()); - func1Memory_.emplace_back(std::make_shared(size)); - func2Memory_.emplace_back(std::make_shared(size)); - - /// SequenceArg - func1Inputs_.emplace_back( - std::make_shared(func1Memory_.back()->getBuf(), - input.valueType(), - input.shape(), - *seq1_)); - func2Inputs_.emplace_back( - std::make_shared(func2Memory_.back()->getBuf(), - input.valueType(), - input.shape(), - *seq2_)); - } - - void registerInitCallback(std::function callback) { - initArgsCallback_ = callback; - } - - // output need only contains shape, do not contains data. - void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) { - size_t size = - output.shape().getElements() * sizeOfValuType(output.valueType()); - func1Memory_.emplace_back(std::make_shared(size)); - func2Memory_.emplace_back(std::make_shared(size)); - - func1Outputs_.emplace_back( - std::make_shared(func1Memory_.back()->getBuf(), - output.valueType(), - output.shape(), - argType)); - func2Outputs_.emplace_back( - std::make_shared(func2Memory_.back()->getBuf(), - output.valueType(), - output.shape(), - argType)); - } - - /// add and init output sparse matrix - void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) { - sparse1_ = std::make_shared( - output.shape()[0], - output.shape()[1], - output.nnz(), - static_cast(output.dataType()), - static_cast(output.dataFormat())); - - sparse2_ = std::make_shared( - output.shape()[0], - output.shape()[1], - output.nnz(), - static_cast(output.dataType()), - static_cast(output.dataFormat())); - - /// init sparse matrix - hl_stream_t stream(HPPL_STREAM_1); - sparse1_->randomizeUniform(); - sparse2_->copyFrom(*sparse1_, stream); - hl_stream_synchronize(stream); - - func1Outputs_.emplace_back( - std::make_shared(*sparse1_, argType)); - func2Outputs_.emplace_back( - std::make_shared(*sparse2_, argType)); - } - - void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) { - CHECK_EQ(output.shape().ndims(), 2UL); - size_t batchSize = output.shape()[0]; - - if (!seq1_ || !seq2_) { // sequence not exist - addSequence(SequenceIdArg(TensorShape{batchSize})); - } - size_t size = - output.shape().getElements() * sizeOfValuType(output.valueType()); - func1Memory_.emplace_back(std::make_shared(size)); - func2Memory_.emplace_back(std::make_shared(size)); - - /// SequenceArg - func1Outputs_.emplace_back( - std::make_shared(func1Memory_.back()->getBuf(), - output.valueType(), - output.shape(), - *seq1_, - argType)); - func2Outputs_.emplace_back( - std::make_shared(func2Memory_.back()->getBuf(), - output.valueType(), - output.shape(), - *seq2_, - argType)); - } - - void addInputs(const SparseMatrixArg& input) { - sparse1_ = std::make_shared( - input.shape()[0], - input.shape()[1], - input.nnz(), - static_cast(input.dataType()), - static_cast(input.dataFormat())); - - sparse2_ = std::make_shared( - input.shape()[0], - input.shape()[1], - input.nnz(), - static_cast(input.dataType()), - static_cast(input.dataFormat())); - - /// init sparse matrix - hl_stream_t stream(HPPL_STREAM_1); - sparse1_->randomizeUniform(); - sparse2_->copyFrom(*sparse1_, stream); - hl_stream_synchronize(stream); - - func1Inputs_.emplace_back(std::make_shared(*sparse1_)); - func2Inputs_.emplace_back(std::make_shared(*sparse2_)); - } - - void run() { - // prepare cpu/gpu arguments - initInputs(); - - initOutputs(); - // function calculate - auto callFunction = [](FunctionBase* function, - std::vector& inputs, - std::vector& outputs) { - BufferArgs inArgs; - BufferArgs outArgs; - for (auto arg : inputs) { - inArgs.addArg(*arg); - } - for (auto arg : outputs) { - outArgs.addArg(*arg); - } - function->calc(inArgs, outArgs); - }; - - callFunction(function1_.get(), func1Inputs_, func1Outputs_); - callFunction(function2_.get(), func2Inputs_, func2Outputs_); - - // check outputs - compareOutputs(); - } - - std::shared_ptr getFunction1() const { return function1_; } - - std::shared_ptr getFunction2() const { return function2_; } - - protected: - // only init cpu argument, gpu argument copy from cpu argument. - void initArg(BufferArg& arg) { - Vector1 vector(arg.shape().getElements(), (real*)arg.data()); - vector.uniform(0.001, 1); - } - - void initArg(SequenceArg& arg) { - /// init only matrix - Vector1 vector(arg.shape().getElements(), (real*)arg.data()); - vector.uniform(0.001, 1); - } - - void initArg(SequenceIdArg& arg, size_t batchSize) { - size_t numSeqs = arg.numSeqs(); - int* buf = reinterpret_cast(arg.data()); - int pos = 0; - size_t maxLen = 2 * batchSize / numSeqs; - for (int i = 0; i < (int)numSeqs; ++i) { - int len = 1 + uniformRandom(std::min( - maxLen, batchSize - pos - numSeqs + i)); - buf[i] = pos; - pos += len; - VLOG(1) << " len=" << len; - } - buf[numSeqs] = batchSize; - } - - void initInputs() { - for (size_t i = 0; i < func1Inputs_.size(); i++) { - if (func1Inputs_[i]->isSparseArg()) { - continue; /// sparse matrix already init - } - - if (func1Inputs_[i]->isSequenceArg()) { - initArg(dynamic_cast(*func1Inputs_[i])); - } else { - initArg(*func1Inputs_[i]); - } - - if (initArgsCallback_ != nullptr) { - initArgsCallback_(*func1Inputs_[i], i); - } - - copyArg_(*func1Inputs_[i], *func2Inputs_[i]); - } - } - - void initOutputs() { - for (size_t i = 0; i < func1Outputs_.size(); i++) { - if (func1Outputs_[i]->isSparseArg()) { - continue; /// sparse matrix already init - } - - if (func1Outputs_[i]->isSequenceArg()) { - initArg(dynamic_cast(*func1Outputs_[i])); - } else { - initArg(*func1Outputs_[i]); - } - - copyArg_(*func1Outputs_[i], *func2Outputs_[i]); - } - } - - void compareOutputs() { - for (size_t i = 0; i < func1Outputs_.size(); i++) { - // TODO, Need a BufferCheck used to compare the two buffers. - const auto cpu = func1Outputs_[i]; - const auto gpu = func2Outputs_[i]; - CHECK_EQ(cpu->numElements(), gpu->numElements()); - Vector1 cpuVector(cpu->numElements(), (real*)cpu->data()); - Vector2 gpuVector(gpu->numElements(), (real*)gpu->data()); - autotest::TensorCheckErr(cpuVector, gpuVector); - } - } - - protected: - std::shared_ptr function1_; - std::shared_ptr function2_; - std::vector> func1Memory_; - std::vector> func2Memory_; - std::vector func1Inputs_; - std::vector func1Outputs_; - std::vector func2Inputs_; - std::vector func2Outputs_; - std::shared_ptr sparse1_; - std::shared_ptr sparse2_; - std::shared_ptr seq1_; - std::shared_ptr seq2_; - test::CopyArgument copyArg_; - std::function initArgsCallback_; -}; - -class CpuGpuFuncCompare - : public Compare2Function { - public: - CpuGpuFuncCompare(const std::string& name, const FuncConfig& config) - : Compare2Function(name + "-CPU", name + "-GPU", config) {} - - ~CpuGpuFuncCompare() {} -}; - -} // namespace paddle diff --git a/paddle/legacy/function/GemmConvOp.cpp b/paddle/legacy/function/GemmConvOp.cpp deleted file mode 100644 index 5a81315661dc2843a648315ca4a6b590f217a657..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/GemmConvOp.cpp +++ /dev/null @@ -1,522 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ConvOp.h" -#include "GemmFunctor.h" -#include "Im2Col.h" -#include "paddle/legacy/math/MemoryHandle.h" - -namespace paddle { - -/* - * \brief Forward calculation of convolution. - */ -template -class GemmConvFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - check(inputs, outputs); - // TODO(hedaoyuan): Need to define some index macros, - // to avoid useing 0 and 1. - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - - real beta; - if (outputs[0].getArgType() == ADD_TO) { - beta = 1.0; - } else { - beta = 0.0; - } - - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t filterHeight = getFilterHeight(filter); - size_t filterWidth = getFilterWidth(filter); - size_t outputChannels = output[1]; - size_t outputHeight = output[2]; - size_t outputWidth = output[3]; - - real* inputData = inputs[0].data(); - real* filterData = inputs[1].data(); - real* outputData = outputs[0].data(); - bool needIm2col = isNeedIm2col(filter); - - TensorShape imShape = - TensorShape({inputChannels / groups_, inputHeight, inputWidth}); - - TensorShape colShape; - real* colData = NULL; - - if (needIm2col) { - colShape = TensorShape({inputChannels / groups_, - filterHeight, - filterWidth, - outputHeight, - outputWidth}); - resizeBuffer(colShape.getElements()); - colData = reinterpret_cast(memory_->getBuf()); - } - - Im2ColFunctor im2col; - size_t inputOffset = imShape.getElements(); - size_t outputOffset = - (outputChannels / groups_) * outputHeight * outputWidth; - size_t filterOffset = filter.getElements() / groups_; - - for (size_t i = 0; i < batchSize; i++) { - for (size_t g = 0; g < groups_; g++) { - if (needIm2col) { - im2col(inputData + g * inputOffset, - imShape, - colData, - colShape, - strideH(), - strideW(), - paddingH(), - paddingW(), - dilationH(), - dilationW()); - } else { - colData = inputData + g * inputOffset; - } - int M = outputChannels / groups_; - int N = outputHeight * outputWidth; - int K = inputChannels / groups_ * filterHeight * filterWidth; - BlasGemm::compute(false, - false, - M, - N, - K, - 1.0f, - filterData + g * filterOffset, - K, - colData, - N, - beta, - outputData + g * outputOffset, - N); - } - inputData += inputChannels * inputHeight * inputWidth; - outputData += outputChannels * outputHeight * outputWidth; - } - } -}; - -#ifdef PADDLE_MOBILE_INFERENCE - -/* - * \brief Forward calculation of convolution, optimized for mobile. - */ -template -class GemmConvMobileFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - check(inputs, outputs); - // TODO(hedaoyuan): Need to define some index macros, - // to avoid useing 0 and 1. - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - - real beta; - if (outputs[0].getArgType() == ADD_TO) { - beta = 1.0; - } else { - beta = 0.0; - } - - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t filterHeight = getFilterHeight(filter); - size_t filterWidth = getFilterWidth(filter); - size_t outputChannels = output[1]; - size_t outputHeight = output[2]; - size_t outputWidth = output[3]; - - real* inputData = inputs[0].data(); - real* filterData = inputs[1].data(); - real* outputData = outputs[0].data(); - real* colData = NULL; - bool needIm2col = isNeedIm2col(filter); - - TensorShape imShape = - TensorShape({inputChannels / groups_, inputHeight, inputWidth}); - TensorShape colShape; - - // Max col matrix width 4096, Max col matrix size 4M. - size_t outputHeightSteps = - std::min(std::max(4096 / outputWidth, (size_t)1), outputHeight); - size_t maxColWidth = outputHeightSteps * outputWidth; - size_t channelSteps = - std::min(std::max((1048576 / maxColWidth) / filterHeight * filterWidth, - (size_t)1), - inputChannels / groups_); - size_t maxColHeight = channelSteps * filterHeight * filterWidth; - - if (needIm2col) { - colShape = TensorShape({inputChannels / groups_, - filterHeight, - filterWidth, - outputHeight, - outputWidth}); - - resizeBuffer(maxColHeight * maxColWidth * sizeof(real)); - colData = reinterpret_cast(memory_->getBuf()); - } - - Im2ColMobileFunctor im2col; - size_t inputOffset = imShape.getElements(); - size_t outputOffset = - (outputChannels / groups_) * outputHeight * outputWidth; - size_t filterOffset = filter.getElements() / groups_; - - int nStride = outputHeight * outputWidth; - int kStride = inputChannels / groups_ * filterHeight * filterWidth; - for (size_t i = 0; i < batchSize; i++) { - filterData = inputs[1].data(); - for (size_t g = 0; g < groups_; g++) { - if (needIm2col) { - real beta_ = beta; - for (size_t ic = 0; ic < inputChannels / groups_; - ic += channelSteps) { - int channels = std::min(inputChannels / groups_ - ic, channelSteps); - for (size_t oh = 0; oh < outputHeight; oh += outputHeightSteps) { - int height = std::min(outputHeight - oh, outputHeightSteps); - - int M = outputChannels / groups_; - int N = height * outputWidth; - int K = channels * filterHeight * filterWidth; - // im2col - im2col(inputData, - imShape, - colData, - colShape, - strideH(), - strideW(), - paddingH(), - paddingW(), - dilationH(), - dilationW(), - channels, - oh, - height, - N); - - // gemm - BlasGemm::compute( - false, - false, - M, - N, - K, - 1.0f, - filterData + ic * filterHeight * filterWidth, - kStride, - colData, - N, - beta_, - outputData + oh * outputWidth, - nStride); - } - beta_ = 1.0; - } - } else { - int M = outputChannels / groups_; - int N = outputHeight * outputWidth; - int K = inputChannels / groups_ * filterHeight * filterWidth; - BlasGemm::compute(false, - false, - M, - N, - K, - 1.0f, - filterData, - K, - inputData, - N, - beta, - outputData, - N); - } - inputData += inputOffset; - outputData += outputOffset; - filterData += filterOffset; - } - } - - memory_.reset(); - } -}; - -#endif - -/* - * \brief Backward input calculation of convolution. - */ -template -class GemmConvGradInputFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& output = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& input = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - check(inputs, outputs); - // Since the implementation of Col2ImFunctor is ADD_TO, - // this function only supports ADD_TO mode. - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - const TensorShape& output = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& input = outputs[0].shape(); - - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t filterHeight = getFilterHeight(filter); - size_t filterWidth = getFilterWidth(filter); - size_t outputChannels = output[1]; - size_t outputHeight = output[2]; - size_t outputWidth = output[3]; - - real* outputGrad = inputs[0].data(); - real* filterData = inputs[1].data(); - real* inputGrad = outputs[0].data(); - bool needIm2col = isNeedIm2col(filter); - - TensorShape imShape = - TensorShape({inputChannels / groups_, inputHeight, inputWidth}); - - TensorShape colShape; - real* colData = NULL; - - if (needIm2col) { - colShape = TensorShape({inputChannels / groups_, - filterHeight, - filterWidth, - outputHeight, - outputWidth}); - resizeBuffer(colShape.getElements()); - colData = reinterpret_cast(memory_->getBuf()); - } - - Col2ImFunctor col2im; - size_t inputOffset = imShape.getElements(); - size_t outputOffset = - (outputChannels / groups_) * outputHeight * outputWidth; - size_t filterOffset = filter.getElements() / groups_; - - for (size_t i = 0; i < batchSize; i++) { - for (size_t g = 0; g < groups_; g++) { - int K = outputChannels / groups_; - int N = outputHeight * outputWidth; - int M = inputChannels / groups_ * filterHeight * filterWidth; - real scale = 0.0f; - if (!needIm2col) { - colData = inputGrad + g * inputOffset; - scale = 1.0f; - } - BlasGemm::compute(true, - false, - M, - N, - K, - 1.0f, - filterData + g * filterOffset, - M, - outputGrad + g * outputOffset, - N, - scale, - colData, - N); - if (needIm2col) { - col2im(inputGrad + g * inputOffset, - imShape, - colData, - colShape, - strideH(), - strideW(), - paddingH(), - paddingW(), - dilationH(), - dilationW()); - } - } - inputGrad += inputChannels * inputHeight * inputWidth; - outputGrad += outputChannels * outputHeight * outputWidth; - } - } -}; - -/* - * \brief Backward filter calculation of convolution. - */ -template -class GemmConvGradFilterFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& output = inputs[0].shape(); - const TensorShape& input = inputs[1].shape(); - const TensorShape& filter = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - check(inputs, outputs); - const TensorShape& output = inputs[0].shape(); - const TensorShape& input = inputs[1].shape(); - const TensorShape& filter = outputs[0].shape(); - - real beta; - if (outputs[0].getArgType() == ADD_TO) { - beta = 1.0; - } else { - beta = 0.0; - } - - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t filterHeight = getFilterHeight(filter); - size_t filterWidth = getFilterWidth(filter); - size_t outputChannels = output[1]; - size_t outputHeight = output[2]; - size_t outputWidth = output[3]; - - real* outputGrad = inputs[0].data(); - real* inputData = inputs[1].data(); - real* filterGrad = outputs[0].data(); - bool needIm2col = isNeedIm2col(filter); - - TensorShape imShape = - TensorShape({inputChannels / groups_, inputHeight, inputWidth}); - - TensorShape colShape; - real* colData = NULL; - - if (needIm2col) { - colShape = TensorShape({inputChannels / groups_, - filterHeight, - filterWidth, - outputHeight, - outputWidth}); - resizeBuffer(colShape.getElements()); - colData = reinterpret_cast(memory_->getBuf()); - } - - Im2ColFunctor im2col; - size_t inputOffset = imShape.getElements(); - size_t outputOffset = - (outputChannels / groups_) * outputHeight * outputWidth; - size_t filterOffset = filter.getElements() / groups_; - for (size_t i = 0; i < batchSize; i++) { - for (size_t g = 0; g < groups_; g++) { - if (needIm2col) { - im2col(inputData + g * inputOffset, - imShape, - colData, - colShape, - strideH(), - strideW(), - paddingH(), - paddingW(), - dilationH(), - dilationW()); - } else { - colData = inputData + g * inputOffset; - } - int M = outputChannels / groups_; - int K = outputHeight * outputWidth; - int N = inputChannels / groups_ * filterHeight * filterWidth; - BlasGemm::compute(false, - true, - M, - N, - K, - 1.0f, - outputGrad + g * outputOffset, - K, - colData, - K, - i == 0 ? beta : 1.0f, - filterGrad + g * filterOffset, - N); - } - inputData += inputChannels * inputHeight * inputWidth; - outputGrad += outputChannels * outputHeight * outputWidth; - } - } -}; - -#ifdef PADDLE_MOBILE_INFERENCE -REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvMobileFunction); -#else -REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction); -#endif -REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction); -REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(GemmConv, GPU, GemmConvFunction); -REGISTER_TYPED_FUNC(GemmConvGradInput, GPU, GemmConvGradInputFunction); -REGISTER_TYPED_FUNC(GemmConvGradFilter, GPU, GemmConvGradFilterFunction); -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/GemmConvOpTest.cpp b/paddle/legacy/function/GemmConvOpTest.cpp deleted file mode 100644 index a30b7c90bb082a1f256fe37f09048d6fdc45804c..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/GemmConvOpTest.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "ConvOpTest.h" - -namespace paddle { - -TEST(GemmConv, NaiveConv) { - Convolution( - "NaiveConv-CPU", "GemmConv-CPU", forward); - Convolution2( - "NaiveConv-CPU", "GemmConv-CPU", forward); -} - -#ifdef PADDLE_WITH_CUDA -TEST(GemmConv, Forward) { - Convolution( - "GemmConv-CPU", "GemmConv-GPU", forward); - Convolution2( - "GemmConv-CPU", "GemmConv-GPU", forward); -} - -TEST(GemmConv, BackwardInput) { - Convolution( - "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input); - Convolution2( - "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input); -} - -TEST(GemmConv, BackwardFilter) { - Convolution( - "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter); - Convolution2( - "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter); -} -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/GemmFunctor.cpp b/paddle/legacy/function/GemmFunctor.cpp deleted file mode 100644 index 450293dfeea170e287cfc90226dabad25c76e537..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/GemmFunctor.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "GemmFunctor.h" -#include "paddle/legacy/math/MathFunctions.h" - -namespace paddle { - -template -struct BlasGemm { - static void compute(const bool transA, - const bool transB, - const int M, - const int N, - const int K, - const T alpha, - const T* A, - const int lda, - const T* B, - const int ldb, - const T beta, - T* C, - const int ldc) { -#ifdef PADDLE_USE_EIGEN_FOR_BLAS - EigenBlasGemm::compute( - transA, transB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); -#else - gemm(transA == false ? CblasNoTrans : CblasTrans, - transB == false ? CblasNoTrans : CblasTrans, - M, - N, - K, - alpha, - A, - lda, - B, - ldb, - beta, - C, - ldc); -#endif - } -}; - -template -struct BlasGemm { - static void compute(const bool transA, - const bool transB, - const int M, - const int N, - const int K, - const T alpha, - const T* A, - const int lda, - const T* B, - const int ldb, - const T beta, - T* C, - const int ldc) { - hl_matrix_mul((T*)A, - transA == false ? HPPL_OP_N : HPPL_OP_T, - (T*)B, - transB == false ? HPPL_OP_N : HPPL_OP_T, - C, - M, - N, - K, - alpha, - beta, - lda, - ldb, - ldc); - } -}; - -template struct BlasGemm; -template struct BlasGemm; - -} // namespace paddle diff --git a/paddle/legacy/function/GemmFunctor.h b/paddle/legacy/function/GemmFunctor.h deleted file mode 100644 index df63fc64f84a12ea3558005f5b3cac3d6ac6ede1..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/GemmFunctor.h +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "TensorType.h" - -namespace paddle { - -// TODO(hedaoyuan): Since the hl_matrix_mul interface does not conform to the -// cblas_dgemm interface's parameter format, it is necessary to introduce -// GemmFunctor as a new interface. Later, when considering the implementation -// of MatMulFunction, we need to consider the reconstruction of hl_matrix_mul -// interface. -template -struct BlasGemm { - static void compute(const bool transA, - const bool transB, - const int M, - const int N, - const int K, - const T alpha, - const T* A, - const int lda, - const T* B, - const int ldb, - const T beta, - T* C, - const int ldc); -}; - -// TODO(hedaoyuan): Since the definition of the real type in the Paddle -// conflicts with the Eigen library, so compile the Eigen code can not -// include the Paddle header file. And need an EigenBlasGemm template class -// that does not contain the DeviceType parameter. -// I will fix this problem and merge BlasGemm and EigenBlasGemm into one. -template -struct EigenBlasGemm { - static void compute(const bool transA, - const bool transB, - const int M, - const int N, - const int K, - const T alpha, - const T* A, - const int lda, - const T* B, - const int ldb, - const T beta, - T* C, - const int ldc); -}; - -} // namespace paddle diff --git a/paddle/legacy/function/GruFunctor.h b/paddle/legacy/function/GruFunctor.h deleted file mode 100644 index d5a30c332764f3dfb4f9abe885f989f3202205b1..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/GruFunctor.h +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "GemmFunctor.h" -#include "hl_cpu_gru.cuh" - -namespace paddle { - -template -struct GruFunctor { - template - static void compute(OpResetOutput opResetOutput, - OpFinalOutput opFinalOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { -#ifndef __NVCC__ - if (value.prevOutValue) { - BlasGemm::compute(false, - false, - batchSize, - 2 * frameSize, - frameSize, - 1, - value.prevOutValue, - frameSize, - value.gateWeight, - frameSize * 2, - 1, - value.gateValue, - frameSize * 3); - } - - forward_reset_output( - opResetOutput, value, frameSize, batchSize, active_gate); - - if (value.prevOutValue) { - BlasGemm::compute(false, - false, - batchSize, - frameSize, - frameSize, - 1, - value.resetOutputValue, - frameSize, - value.stateWeight, - frameSize, - 1, - value.gateValue + frameSize * 2, - frameSize * 3); - } - - forward_final_output( - opFinalOutput, value, frameSize, batchSize, active_node); -#endif - } -}; - -template -struct GruGradFunctor { - template - static void compute(OpStateGrad opStateGrad, - OpResetGrad opResetGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { -#ifndef __NVCC__ - backward_state_grad( - opStateGrad, value, grad, frameSize, batchSize, active_node); - - if (value.prevOutValue && grad.prevOutGrad) { - BlasGemm::compute(false, - true, - batchSize, - frameSize, - frameSize, - 1, - grad.gateGrad + frameSize * 2, - frameSize * 3, - value.stateWeight, - frameSize, - 0, - grad.resetOutputGrad, - frameSize); - - if (grad.stateWeightGrad) { - BlasGemm::compute(true, - false, - frameSize, - frameSize, - batchSize, - 1, - value.resetOutputValue, - frameSize, - grad.gateGrad + frameSize * 2, - frameSize * 3, - 1, - grad.stateWeightGrad, - frameSize); - } - } - - backward_reset_grad( - opResetGrad, value, grad, frameSize, batchSize, active_gate); - - if (grad.prevOutGrad && value.prevOutValue) { - BlasGemm::compute(false, - true, - batchSize, - frameSize, - frameSize * 2, - 1, - grad.gateGrad, - frameSize * 3, - value.gateWeight, - frameSize * 2, - 1, - grad.prevOutGrad, - frameSize); - - if (grad.gateWeightGrad) { - BlasGemm::compute(true, - false, - frameSize, - frameSize * 2, - batchSize, - 1, - value.prevOutValue, - frameSize, - grad.gateGrad, - frameSize * 3, - 1, - grad.gateWeightGrad, - frameSize * 2); - } - } -#endif - } -}; - -} // namespace paddle diff --git a/paddle/legacy/function/Im2Col.h b/paddle/legacy/function/Im2Col.h deleted file mode 100644 index e0ce6918a2a5324a396ade734945cf426b81ab56..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/Im2Col.h +++ /dev/null @@ -1,154 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "TensorShape.h" -#include "TensorType.h" -#include "neon/neon_util.h" - -namespace paddle { - -/* The storage format of the coldata in the Im2ColFunctor and Col2ImFunctor. */ -enum ColFormat { kCFO = 0, kOCF = 1 }; - -/* - * \brief Converts the image data of three dimensions(CHW) into a colData of - * five dimensions in the Im2ColFunctor calculation, - * And in the Col2ImFunctor calculation, it is reversed. - * - * \param imData Image data. - * \param imShape The shape of imData, - * [inputChannels, inputHeight, inputWidth]. - * \param colData Column data. - * \param colShape The shape of colData. - * - * If the template argument Format is kCFO, the shape of colData is: - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] - * So, it is easy to reshape into a convolution matrix for convolution - * calculation based on matrix multiplication. - * The shape of convolution matrix is [height, width], where the height is equal - * inputChannels * filterHeight * filterWidth, and the width is equal - * outputHeight * outputWidth. - * - * Reshape: - * shape of colData shape of convolution matrix - * [inputChannels, - * filterHeight, - * filterWidth, ======> [height, width] - * outputHeight, - * outputWidth] - * - * If the template argument Format is kOCF, the shape of colData is: - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] - * So, it is easy to reshape into a sequence matrix for rnn calculation. - * The shape of sequence matrix is [seqLength, stepSize], where the seqLength - * is equal outputHeight * outputWidth, and the stepSize is equal - * inputChannels * filterHeight * filterWidth. - * - * Reshape: - * shape of colData shape of sequence matrix - * [outputHeight, - * outputWidth, - * inputChannels, ======> [seqLength, stepSize] - * filterHeight, - * filterWidth] - * - * \note The caller needs to ensure that imShape.inputChannels is equal to - * colShape.inputChannels. - */ -template -class Im2ColFunctor { - public: - void operator()(const T* imData, - const TensorShape& imShape, - T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight = 1, - int dilationWidth = 1); -}; - -template -class Col2ImFunctor { - public: - void operator()(T* imData, - const TensorShape& imShape, - const T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight = 1, - int dilationWidth = 1); -}; - -template -class Im2ColMobileFunctor { - public: - void operator()(const T* imData, - const TensorShape& imShape, - T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight, - int dilationWidth, - int inputChannels, - int colOffset, - int colOutputHeight, - int colWidth) { - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[1]; - int filterWidth = colShape[2]; - int outputWidth = colShape[4]; - - for (int ic = 0; ic < inputChannels; ic++) { - for (int oh = 0; oh < colOutputHeight; oh++) { - T* dstData = colData + oh * outputWidth; - for (int fh = 0; fh < filterHeight; fh++) { - for (int fw = 0; fw < filterWidth; fw++) { - int imRowIdx = (oh + colOffset) * strideHeight + - fh * dilationHeight - paddingHeight; - if (imRowIdx < 0 || imRowIdx >= inputHeight) { - memset(dstData, 0, outputWidth * sizeof(T)); - } else { - for (int ow = 0; ow < outputWidth; ow++) { - int imColIdx = - ow * strideWidth + fw * dilationWidth - paddingWidth; - if (imColIdx < 0 || imColIdx >= inputWidth) { - dstData[ow] = T(0); - } else { - dstData[ow] = imData[imRowIdx * inputWidth + imColIdx]; - } - } - } - dstData += colWidth; - } - } - } - colData += filterHeight * filterWidth * colWidth; - imData += inputHeight * inputWidth; - } - } -}; - -} // namespace paddle diff --git a/paddle/legacy/function/Im2ColOp.cpp b/paddle/legacy/function/Im2ColOp.cpp deleted file mode 100644 index 55a3ff98db63ede96094a3d3fdeedf03b573294f..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/Im2ColOp.cpp +++ /dev/null @@ -1,245 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Im2Col.h" - -namespace paddle { - -/* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] - */ -template -class Im2ColFunctor { - public: - void operator()(const T* imData, - const TensorShape& imShape, - T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight, - int dilationWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[1]; - int filterWidth = colShape[2]; - int outputHeight = colShape[3]; - int outputWidth = colShape[4]; - int channelsCol = inputChannels * filterHeight * filterWidth; - - for (int c = 0; c < channelsCol; ++c) { - int wOffset = c % filterWidth; - int hOffset = (c / filterWidth) % filterHeight; - int c_im = c / filterWidth / filterHeight; - for (int h = 0; h < outputHeight; ++h) { - for (int w = 0; w < outputWidth; ++w) { - int imRowIdx = h * strideHeight + hOffset * dilationHeight; - int imColIdx = w * strideWidth + wOffset * dilationWidth; - if ((imRowIdx - paddingHeight) < 0 || - (imRowIdx - paddingHeight) >= inputHeight || - (imColIdx - paddingWidth) < 0 || - (imColIdx - paddingWidth) >= inputWidth) { - colData[(c * outputHeight + h) * outputWidth + w] = T(0); - } else { - imRowIdx += c_im * inputHeight - paddingHeight; - imColIdx -= paddingWidth; - colData[(c * outputHeight + h) * outputWidth + w] = - imData[imRowIdx * inputWidth + imColIdx]; - } - } - } - } - } -}; - -/* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] - */ -template -class Col2ImFunctor { - public: - void operator()(T* imData, - const TensorShape& imShape, - const T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight, - int dilationWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[1]; - int filterWidth = colShape[2]; - int outputHeight = colShape[3]; - int outputWidth = colShape[4]; - int channelsCol = inputChannels * filterHeight * filterWidth; - - for (int c = 0; c < channelsCol; ++c) { - int wOffset = c % filterWidth; - int hOffset = (c / filterWidth) % filterHeight; - int c_im = c / filterWidth / filterHeight; - for (int h = 0; h < outputHeight; ++h) { - for (int w = 0; w < outputWidth; ++w) { - int imRowIdx = h * strideHeight + hOffset * dilationHeight; - int imColIdx = w * strideWidth + wOffset * dilationWidth; - if ((imRowIdx - paddingHeight) >= 0 && - (imRowIdx - paddingHeight) < inputHeight && - (imColIdx - paddingWidth) >= 0 && - (imColIdx - paddingWidth) < inputWidth) { - imRowIdx += c_im * inputHeight - paddingHeight; - imColIdx -= paddingWidth; - imData[imRowIdx * inputWidth + imColIdx] += - colData[(c * outputHeight + h) * outputWidth + w]; - } - } - } - } - } -}; - -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; - -/* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] - */ -template -class Im2ColFunctor { - public: - void operator()(const T* imData, - const TensorShape& imShape, - T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight = 1, - int dilationWidth = 1) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[3]; - int filterWidth = colShape[4]; - int outputHeight = colShape[0]; - int outputWidth = colShape[1]; - for (int outputH = 0; outputH < outputHeight; ++outputH) { - for (int outputW = 0; outputW < outputWidth; ++outputW) { - for (int channel = 0; channel < inputChannels; ++channel) { - for (int filterH = 0; filterH < filterHeight; ++filterH) { - for (int filterW = 0; filterW < filterWidth; ++filterW) { - int imRowOffset = outputH * strideHeight + - filterH * dilationHeight - paddingHeight; - int imColOffset = outputW * strideWidth + - filterW * dilationWidth - paddingWidth; - int colDataOffset = - (((outputH * outputWidth + outputW) * inputChannels + - channel) * - filterHeight + - filterH) * - filterWidth + - filterW; - if (imRowOffset < 0 || imRowOffset >= inputHeight || - imColOffset < 0 || imColOffset >= inputWidth) { - colData[colDataOffset] = float(0); - } else { - int imDataOffset = - (channel * inputHeight + imRowOffset) * inputWidth + - imColOffset; - colData[colDataOffset] = imData[imDataOffset]; - } - } - } - } - } - } - } -}; - -/* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] - */ -template -class Col2ImFunctor { - public: - void operator()(T* imData, - const TensorShape& imShape, - const T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight = 1, - int dilationWidth = 1) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[3]; - int filterWidth = colShape[4]; - int outputHeight = colShape[0]; - int outputWidth = colShape[1]; - for (int outputH = 0; outputH < outputHeight; ++outputH) { - for (int outputW = 0; outputW < outputWidth; ++outputW) { - for (int channel = 0; channel < inputChannels; ++channel) { - for (int filterH = 0; filterH < filterHeight; ++filterH) { - for (int filterW = 0; filterW < filterWidth; ++filterW) { - int imRowOffset = outputH * strideHeight + - filterH * dilationHeight - paddingHeight; - int imColOffset = outputW * strideWidth + - filterW * dilationWidth - paddingWidth; - int colDataOffset = - (((outputH * outputWidth + outputW) * inputChannels + - channel) * - filterHeight + - filterH) * - filterWidth + - filterW; - if (imRowOffset >= 0 && imRowOffset < inputHeight && - imColOffset >= 0 && imColOffset < inputWidth) { - int imDataOffset = - (channel * inputHeight + imRowOffset) * inputWidth + - imColOffset; - imData[imDataOffset] += colData[colDataOffset]; - } - } - } - } - } - } - } -}; - -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; - -} // namespace paddle diff --git a/paddle/legacy/function/Im2ColOpGpu.cu b/paddle/legacy/function/Im2ColOpGpu.cu deleted file mode 100644 index 96dd8f528eaa38f9d174ab7c2a5ea5eb96e2a060..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/Im2ColOpGpu.cu +++ /dev/null @@ -1,464 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Im2Col.h" -#include "hl_device_functions.cuh" - -namespace paddle { - -template -__global__ void im2col(const T* data_im, - int numOuts, - int height, - int width, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int dilationH, - int dilationW, - int height_col, - int width_col, - T* data_col) { - int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; - if (index < numOuts) { - int w_out = index % width_col; - index /= width_col; - int h_out = index % height_col; - int channel_in = index / height_col; - int channel_out = channel_in * blockH * blockW; - int h_in = h_out * strideH; - int w_in = w_out * strideW; - - data_col += (channel_out * height_col + h_out) * width_col + w_out; - for (int i = 0; i < blockH; ++i) { - for (int j = 0; j < blockW; ++j) { - int rIdx = int(h_in + i * dilationH); - int cIdx = int(w_in + j * dilationW); - if ((rIdx - (int)paddingH) >= (int)height || - (rIdx - (int)paddingH) < 0 || - (cIdx - (int)paddingW) >= (int)width || - (cIdx - (int)paddingW) < 0) { - *data_col = 0; - } else { - rIdx = rIdx + channel_in * height - paddingH; - cIdx = cIdx - paddingW; - *data_col = data_im[rIdx * width + cIdx]; - } - data_col += height_col * width_col; - } - } - } -} - -/* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] - */ -template -class Im2ColFunctor { - public: - void operator()(const T* imData, - const TensorShape& imShape, - T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight, - int dilationWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[1]; - int filterWidth = colShape[2]; - int outputHeight = colShape[3]; - int outputWidth = colShape[4]; - - int numKernels = inputChannels * outputHeight * outputWidth; - int blocks = (numKernels + 1024 - 1) / 1024; - int blockX = 512; - int blockY = (blocks + 512 - 1) / 512; - dim3 threads(1024, 1); - dim3 grid(blockX, blockY); - im2col<<>>(imData, - numKernels, - inputHeight, - inputWidth, - filterHeight, - filterWidth, - strideHeight, - strideWidth, - paddingHeight, - paddingWidth, - dilationHeight, - dilationWidth, - outputHeight, - outputWidth, - colData); - CHECK_SYNC("Im2ColFunctor GPU failed"); - } -}; - -template -__global__ void col2im(size_t n, - const T* data_col, - size_t height, - size_t width, - size_t channels, - size_t blockH, - size_t blockW, - size_t strideH, - size_t strideW, - size_t paddingH, - size_t paddingW, - size_t dilationH, - size_t dilationW, - size_t height_col, - size_t width_col, - T* data_im) { - size_t index = - (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; - if (index < n) { - T val = 0; - int w = int(index % width); - int h = int((index / width) % height); - int c = int(index / (width * height)); - int filterH = (blockH - 1) * dilationH + 1; - int filterW = (blockW - 1) * dilationW + 1; - - if ((w - (int)paddingW) >= 0 && - (w - (int)paddingW) < (width - 2 * paddingW) && - (h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) { - // compute the start and end of the output - int w_col_start = - (w < (int)filterW) ? 0 : (w - int(filterW)) / (int)strideW + 1; - int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col)); - int h_col_start = - (h < (int)filterH) ? 0 : (h - (int)filterH) / (int)strideH + 1; - int h_col_end = min(int(h / strideH + 1), int(height_col)); - - for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { - for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - // the col location: [c * width * height + h_out, w_out] - int h_k = (h - h_col * strideH); - int w_k = (w - w_col * strideW); - if (h_k % dilationH == 0 && w_k % dilationW == 0) { - h_k /= dilationH; - w_k /= dilationW; - int c_col = - (((c * blockH + h_k) * blockW + w_k) * height_col + h_col) * - width_col + - w_col; - val += data_col[c_col]; - } - } - } - h -= paddingH; - w -= paddingW; - data_im[c * ((width - 2 * paddingW) * (height - 2 * paddingH)) + - h * (width - 2 * paddingW) + w] += val; - } - } -} - -/* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] - */ -template -class Col2ImFunctor { - public: - void operator()(T* imData, - const TensorShape& imShape, - const T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight, - int dilationWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[1]; - int filterWidth = colShape[2]; - int outputHeight = colShape[3]; - int outputWidth = colShape[4]; - - size_t numKernels = inputChannels * (inputHeight + 2 * paddingHeight) * - (inputWidth + 2 * paddingWidth); - - size_t blocks = (numKernels + 1024 - 1) / 1024; - size_t blockX = 512; - size_t blockY = (blocks + 512 - 1) / 512; - dim3 threads(1024, 1); - dim3 grid(blockX, blockY); - - // To avoid involving atomic operations, we will launch one kernel per - // bottom dimension, and then in the kernel add up the top dimensions. - col2im<<>>( - numKernels, - colData, - inputHeight + 2 * paddingHeight, - inputWidth + 2 * paddingWidth, - inputChannels, - filterHeight, - filterWidth, - strideHeight, - strideWidth, - paddingHeight, - paddingWidth, - dilationHeight, - dilationWidth, - outputHeight, - outputWidth, - imData); - CHECK_SYNC("Col2ImFunctor GPU failed"); - } -}; - -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; - -template -__global__ void im2colOCF(const T* imData, - T* colData, - int inputChannels, - int inputHeight, - int inputWidth, - int filterHeight, - int filterWidth, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight, - int dilationWidth, - int outputHeight, - int outputWidth) { - int swId = blockIdx.x; - int shId = blockIdx.y; - for (int channelId = threadIdx.z; channelId < inputChannels; - channelId += blockDim.z) { - for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { - for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { - int widthOffset = - idx * dilationHeight + swId * strideWidth - paddingWidth; - int heightOffset = - idy * dilationWidth + shId * strideHeight - paddingHeight; - int imOffset = widthOffset + heightOffset * inputWidth + - channelId * inputHeight * inputWidth; - - int colOffset = idx + idy * filterWidth + - channelId * filterHeight * filterWidth + - (shId * outputWidth + swId) * - (inputChannels * filterHeight * filterWidth); - - if (heightOffset >= inputHeight || heightOffset < 0 || - widthOffset >= inputWidth || widthOffset < 0) { - colData[colOffset] = T(0); - } else { - colData[colOffset] = imData[imOffset]; - } - } - } - } -} - -/* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] - */ -template -class Im2ColFunctor { - public: - void operator()(const T* imData, - const TensorShape& imShape, - T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight, - int dilationWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[3]; - int filterWidth = colShape[4]; - int outputHeight = colShape[0]; - int outputWidth = colShape[1]; - - int blockDimX = 0; - int blockDimY = 0; - if (filterHeight <= 4 && filterWidth <= 4) { - blockDimX = 4; - blockDimY = 4; - } else if (filterHeight <= 8 && filterWidth <= 8) { - blockDimX = 8; - blockDimY = 8; - } else if (filterHeight <= 16 && filterWidth <= 16) { - blockDimX = 16; - blockDimY = 16; - } else { - blockDimX = 32; - blockDimY = 32; - } - - int blockDimZ = 1024 / blockDimX / blockDimY; - dim3 threads(blockDimX, blockDimY, std::min(blockDimZ, inputChannels)); - dim3 grid(outputWidth, outputHeight); - im2colOCF<<>>(imData, - colData, - inputChannels, - inputHeight, - inputWidth, - filterHeight, - filterWidth, - strideHeight, - strideWidth, - paddingHeight, - paddingWidth, - dilationHeight, - dilationWidth, - outputHeight, - outputWidth); - CHECK_SYNC("Im2ColFunctor GPU failed"); - } -}; - -template -__global__ void col2imOCF(T* imData, - const T* colData, - int inputChannels, - int inputHeight, - int inputWidth, - int filterHeight, - int filterWidth, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight, - int dilationWidth, - int outputHeight, - int outputWidth) { - int swId = blockIdx.x; - int shId = blockIdx.y; - for (int channelId = threadIdx.z; channelId < inputChannels; - channelId += blockDim.z) { - for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { - for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { - int widthOffset = - idx * dilationWidth + swId * strideWidth - paddingWidth; - int heightOffset = - idy * dilationHeight + shId * strideHeight - paddingHeight; - int imOffset = widthOffset + heightOffset * inputWidth + - channelId * inputHeight * inputWidth; - - int colOffset = idx + idy * filterWidth + - channelId * filterHeight * filterWidth + - (shId * outputWidth + swId) * - (inputChannels * filterHeight * filterWidth); - - if (heightOffset >= 0 && heightOffset < inputHeight && - widthOffset >= 0 && widthOffset < inputWidth) { - paddle::paddleAtomicAdd(imData + imOffset, colData[colOffset]); - } - } - } - } -} - -/* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] - */ -template -class Col2ImFunctor { - public: - void operator()(T* imData, - const TensorShape& imShape, - const T* colData, - const TensorShape& colShape, - int strideHeight, - int strideWidth, - int paddingHeight, - int paddingWidth, - int dilationHeight, - int dilationWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[3]; - int filterWidth = colShape[4]; - int outputHeight = colShape[0]; - int outputWidth = colShape[1]; - - int blockDimX = 0; - int blockDimY = 0; - if (filterHeight <= 4 && filterWidth <= 4) { - blockDimX = 4; - blockDimY = 4; - } else if (filterHeight <= 8 && filterWidth <= 8) { - blockDimX = 8; - blockDimY = 8; - } else if (filterHeight <= 16 && filterWidth <= 16) { - blockDimX = 16; - blockDimY = 16; - } else { - blockDimX = 32; - blockDimY = 32; - } - - int blockDimZ = 1024 / blockDimX / blockDimY; - dim3 threads(blockDimX, blockDimY, std::min(blockDimZ, inputChannels)); - dim3 grid(outputWidth, outputHeight); - col2imOCF<<>>(imData, - colData, - inputChannels, - inputHeight, - inputWidth, - filterHeight, - filterWidth, - strideHeight, - strideWidth, - paddingHeight, - paddingWidth, - dilationHeight, - dilationWidth, - outputHeight, - outputWidth); - CHECK_SYNC("Col2ImFunctor GPU failed"); - } -}; - -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; - -} // namespace paddle diff --git a/paddle/legacy/function/Im2ColTest.cpp b/paddle/legacy/function/Im2ColTest.cpp deleted file mode 100644 index 2c5f06f38991497963cfbe1e12825f1bc39dffa6..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/Im2ColTest.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Im2Col.h" -#include -#include "Function.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/tests/TensorCheck.h" - -namespace paddle { - -template -void TestIm2ColFunctor() { - for (size_t channels : {1, 5, 32}) { - for (size_t inputHeight : {5, 33, 100}) { - for (size_t inputWidth : {5, 32, 96}) { - for (size_t filterHeight : {1, 5}) { - for (size_t filterWidth : {3, 7}) { - for (size_t stride : {1, 2}) { - for (size_t padding : {0, 1}) { - for (size_t dilation : {1, 3}) { - size_t filterSizeH = (filterHeight - 1) * dilation + 1; - size_t filterSizeW = (filterWidth - 1) * dilation + 1; - if (inputHeight + 2 * padding < filterSizeH || - inputWidth + 2 * padding < filterSizeW) - break; - if (padding >= filterSizeH || padding >= filterSizeW) break; - size_t outputHeight = - (inputHeight - filterSizeH + 2 * padding) / stride + 1; - size_t outputWidth = - (inputWidth - filterSizeW + 2 * padding) / stride + 1; - - TensorShape imShape = - TensorShape({channels, inputHeight, inputWidth}); - TensorShape colShape1 = TensorShape({channels, - filterHeight, - filterWidth, - outputHeight, - outputWidth}); - TensorShape colShape2 = TensorShape({outputHeight, - outputWidth, - channels, - filterHeight, - filterWidth}); - - size_t height = channels * filterHeight * filterWidth; - size_t width = outputHeight * outputWidth; - VectorPtr input1 = - Vector::create(imShape.getElements(), false); - VectorPtr input2 = - Vector::create(imShape.getElements(), false); - MatrixPtr output1 = - Matrix::create(height, width, false, false); - MatrixPtr output2 = - Matrix::create(width, height, false, false); - input1->uniform(0.001, 1); - input2->copyFrom(*input1); - - Im2ColFunctor im2Col1; - Im2ColFunctor im2Col2; - im2Col1(input1->getData(), - imShape, - output1->getData(), - colShape1, - stride, - stride, - padding, - padding, - dilation, - dilation); - im2Col2(input2->getData(), - imShape, - output2->getData(), - colShape2, - stride, - stride, - padding, - padding, - dilation, - dilation); - - // The transposition of the result of ColFormat == kCFO - // is equal to the result of ColFormat == kOCF. - MatrixPtr test; - output2->transpose(test, true); - autotest::TensorCheckErr(*output1, *test); - - Col2ImFunctor col2Im1; - Col2ImFunctor col2Im2; - - col2Im1(input1->getData(), - imShape, - output1->getData(), - colShape1, - stride, - stride, - padding, - padding, - dilation, - dilation); - col2Im2(input2->getData(), - imShape, - output2->getData(), - colShape2, - stride, - stride, - padding, - padding, - dilation, - dilation); - autotest::TensorCheckErr(*input1, *input2); - } - } - } - } - } - } - } - } -} - -TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor(); } - -#ifdef PADDLE_WITH_CUDA - -TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor(); } - -#endif - -template -void TestIm2ColMobileFunctor() { - for (size_t channels : {32}) { - for (size_t inputHeight : {33, 100}) { - for (size_t inputWidth : {32, 96}) { - for (size_t filterHeight : {5}) { - for (size_t filterWidth : {7}) { - for (size_t stride : {2}) { - for (size_t padding : {1}) { - for (size_t dilation : {1, 3}) { - size_t filterSizeH = (filterHeight - 1) * dilation + 1; - size_t filterSizeW = (filterWidth - 1) * dilation + 1; - if (inputHeight + 2 * padding < filterSizeH || - inputWidth + 2 * padding < filterSizeW) - break; - if (padding >= filterSizeH || padding >= filterSizeW) break; - size_t outputHeight = - (inputHeight - filterSizeH + 2 * padding) / stride + 1; - size_t outputWidth = - (inputWidth - filterSizeW + 2 * padding) / stride + 1; - - TensorShape imShape = - TensorShape({channels, inputHeight, inputWidth}); - TensorShape colShape1 = TensorShape({channels, - filterHeight, - filterWidth, - outputHeight, - outputWidth}); - - size_t height = channels * filterHeight * filterWidth; - size_t width = outputHeight * outputWidth; - VectorPtr input1 = - Vector::create(imShape.getElements(), false); - VectorPtr input2 = - Vector::create(imShape.getElements(), false); - MatrixPtr output1 = - Matrix::create(height, width, false, false); - MatrixPtr output2 = - Matrix::create(height, width, false, false); - input1->uniform(0.001, 1); - input2->copyFrom(*input1); - - Im2ColFunctor im2Col1; - Im2ColMobileFunctor im2Col2; - im2Col1(input1->getData(), - imShape, - output1->getData(), - colShape1, - stride, - stride, - padding, - padding, - dilation, - dilation); - im2Col2(input2->getData(), - imShape, - output2->getData(), - colShape1, - stride, - stride, - padding, - padding, - dilation, - dilation, - channels, - 0, - outputHeight, - outputHeight * outputWidth); - - autotest::TensorCheckEqual(*output1, *output2); - } - } - } - } - } - } - } - } -} - -TEST(Im2ColFunctor, Mobile) { TestIm2ColMobileFunctor(); } - -} // namespace paddle diff --git a/paddle/legacy/function/MulOp.cpp b/paddle/legacy/function/MulOp.cpp deleted file mode 100644 index 750978fc90201ccdc0a32f93fc01a2170d3f39d5..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/MulOp.cpp +++ /dev/null @@ -1,347 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MulOp.h" -#include "GemmFunctor.h" -#include "paddle/legacy/math/SIMDFunctions.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace { -inline void vecAddTo(real* a, const real* b, real scaleB, size_t len) { - for (unsigned int i = 0; i < len; ++i) { - a[i] += (1.0 == scaleB) ? b[i] : scaleB * b[i]; - } -} - -inline void colVecAddTo( - real* a, real* b, real c, size_t len, size_t aWidth, size_t bWidth) { - for (unsigned int i = 0; i < len; ++i) { - a[i * aWidth] += (1.0 == c) ? b[i * bWidth] : b[i * bWidth] * c; - } -} -} // namespace - -namespace paddle { -/// sparse matrix (+)= dense matrix * dense matrix -template <> -void MulOp(CpuSparseMatrix& out, - const CpuMatrix& a, - const CpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans) { - CHECK_EQ(out.getValueType(), FLOAT_VALUE); - if (scaleT == 0) { - out.zeroMem(); - } - const real* A = a.getData(); - const real* B = b.getData(); - real* C = out.getValue(); - int* rows = out.getRows(); - int* cols = out.getCols(); - size_t width = out.getWidth(); - size_t height = out.getHeight(); - - /// SPARSE_CSC, {a any, b not trans} - if (out.getFormat() == SPARSE_CSC) { - /// b not trans and a any - CHECK(!bTrans); - size_t m = !aTrans ? a.getWidth() : a.getHeight(); - for (size_t i = 0; i < width; i++) { - size_t start = out.getColStartIdx(i); - size_t end = out.getColStartIdx(i + 1); - for (size_t j = start; j < end; j++) { - real sum = 0; - size_t rowIdx = rows[j]; - for (size_t k = 0; k < m; k++) { - sum += (!aTrans ? A[rowIdx * m + k] : A[k * height + rowIdx]) * - B[k * width + i]; - } - C[j] = scaleAB * sum + scaleT * C[j]; - } - } - return; - } - - /// SPARSE_CSR, {a any, b not trans} or {a not trans, b trans} - if (out.getFormat() == SPARSE_CSR) { - /// a and b can not both transpose - CHECK(!(aTrans && bTrans)); - size_t m = a.getWidth(); - for (size_t i = 0; i < height; i++) { - size_t start = out.getRowStartIdx(i); - size_t end = out.getRowStartIdx(i + 1); - for (size_t j = start; j < end; j++) { - real sum = 0; - size_t colIdx = cols[j]; - for (size_t k = 0; k < m; k++) { - sum += (!aTrans ? A[i * m + k] : A[k * height + i]) * - (!bTrans ? B[k * width + colIdx] : B[colIdx * m + k]); - } - C[j] = scaleAB * sum + scaleT * C[j]; - } - } - return; - } -} - -/// dense matrix (+)= dense matrix * dense matrix -template <> -void MulOp(CpuMatrix& out, - const CpuMatrix& a, - const CpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans) { - BlasGemm::compute( - aTrans, - bTrans, - out.getHeight(), - out.getWidth(), - !aTrans ? a.getWidth() : a.getHeight(), - scaleAB, - a.getData(), - a.getStride(), - b.getData(), - b.getStride(), - scaleT, - out.getData(), - out.getStride()); -} - -/// dense matrix (+)= sparse matrix * dense matrix -template <> -void MulOp(CpuMatrix& out, - const CpuSparseMatrix& a, - const CpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans) { - if (scaleT == 0) { - out.zeroMem(); - } - const real* B = b.getData(); - real* C = out.getData(); - if (out.getWidth() % 32 == 0) { - CHECK_EQ((size_t)B % 32, 0UL); - CHECK_EQ((size_t)C % 32, 0UL); - } - - int* cols = a.getCols(); - real* values = a.getValue(); - for (size_t i = 0; i < a.getHeight(); ++i) { - const int start = a.getRowStartIdx(i); - const int end = a.getRowStartIdx(i + 1); - for (int j = start; j < end; ++j) { - vecAddTo(!aTrans ? out.getRow(i) : out.getRow(cols[j]), - !aTrans ? const_cast(b).getRow(cols[j]) - : const_cast(b).getRow(i), - (a.getValueType() == FLOAT_VALUE) ? values[j] : (real)1.0, - out.getWidth()); - } - } -} - -/// dense matrix (+)= dense matrix * sparse matrix -template <> -void MulOp(CpuMatrix& out, - const CpuMatrix& a, - const CpuSparseMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans) { - if (scaleT == 0) { - out.zeroMem(); - } - real* A = const_cast(a.getData()); - real* B = const_cast(b.getValue()); - real* C = out.getData(); - int* rows = b.getRows(); - int* cols = b.getCols(); - - /// SPARSE_CSC format - if (b.getFormat() == SPARSE_CSC) { - for (size_t j = 0; j < b.getWidth(); ++j) { - int start = b.getColStartIdx(j); - int end = b.getColStartIdx(j + 1); - for (int i = start; i < end; ++i) { - colVecAddTo(!bTrans ? C + j : C + rows[i], - !bTrans ? A + rows[i] : A + j, - (b.getValueType() == NO_VALUE) ? (real)1.0 : B[i], - out.getHeight(), - out.getWidth(), - a.getWidth()); - } - } - return; - } - - /// SPARSE_CSR format - if (b.getFormat() == SPARSE_CSR) { - for (size_t j = 0; j < b.getHeight(); ++j) { - int start = b.getRowStartIdx(j); - int end = b.getRowStartIdx(j + 1); - for (int i = start; i < end; ++i) { - colVecAddTo(!bTrans ? C + cols[i] : C + j, - !bTrans ? A + j : A + cols[i], - (b.getValueType() == NO_VALUE) ? (real)1.0 : B[i], - out.getHeight(), - out.getWidth(), - a.getWidth()); - } - } - return; - } -} - -/** - * mul operator - * out = scaleT * out + scaleAB * (A * B) - * here, scaleT in {0, 1}, scaleAB == 1, - * out = A * B, ASSIGN_TO - * out += A * B, ADD_TO - * - * - * \param outputs[0] output matrix (out), M * N, - * could be either Sparse or Dense Matrix - * M is num of rows, N is num of columns - * \param inputs[0] first input matrix (A), M * K (if non-trans) - * could be either Sparse or Dense Matrix - * M is num of rows, K is num of columns - * \param inputs[1] second input matrix (B), K * N (if non-trans) - * could be either Sparse or Dense Matrix - * K is num of rows, N is num of columns - * - * Support eight Mul operators, with both GPU and CPU devices - * For each device, four Mul operators are supported: - * 1. dense (out) = dense (A) * dense (B) - * 2. dense (out) = sparse (A) * dense (B) - * sparse matrix only support SPARSE_CSR format - * 3. dense (out) = dense (A) * sparse (B) - * sparse matrix support SPARSE_CSC and SPARSE_CSR formats - * 4. sparse (out) = dense (A) * dense (B) - * sparse matrix support SPARSE_CSC and SPARSE_CSR formats - * - */ -template -class MulFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { - aTrans_ = config.get("aTrans"); - bTrans_ = config.get("bTrans"); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK(!aTrans_ || !bTrans_) - << "Not support both a and b are transpose matrices"; - - CHECK_EQ((size_t)2, inputs.size()); - CHECK_EQ((size_t)1, outputs.size()); - CHECK(inputs[0].data() && inputs[1].data() && outputs[0].data()); - CHECK_EQ(inputs[0].shape().ndims(), (size_t)2); - CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); - CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); - - size_t aRow = !aTrans_ ? inputs[0].shape()[0] : inputs[0].shape()[1]; - size_t aCol = !aTrans_ ? inputs[0].shape()[1] : inputs[0].shape()[0]; - size_t bRow = !bTrans_ ? inputs[1].shape()[0] : inputs[1].shape()[1]; - size_t bCol = !bTrans_ ? inputs[1].shape()[1] : inputs[1].shape()[0]; - /// C = A * B, or C += A * B, for matrix format - CHECK_EQ(aCol, bRow); - CHECK_EQ(aRow, outputs[0].shape()[0]); - CHECK_EQ(bCol, outputs[0].shape()[1]); - - /// only support C = A * B (ASSIGN_TO) or C += A * B (ADD_TO) - real scaleT = (outputs[0].getArgType() == ADD_TO) ? 1.0 : 0.0; - - /// support dense = not both sparse * sparse - /// or sparse = dense * dense - CHECK((!outputs[0].isSparseArg() && - !(inputs[0].isSparseArg() && inputs[1].isSparseArg())) || - (outputs[0].isSparseArg() && !inputs[0].isSparseArg() && - !inputs[1].isSparseArg())); - - auto outMat = outputs[0].matrix(); - /// dense matrix = dense matrix * dense matrix - if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() && - !outputs[0].isSparseArg()) { - MulOp(outMat, - inputs[0].matrix(), - inputs[1].matrix(), - 1.0, // scaleAB - scaleT, - aTrans_, - bTrans_); - return; - } - - /// dense matrix = dense matrix * sparse matrix - if (!inputs[0].isSparseArg() && inputs[1].isSparseArg() && - !outputs[0].isSparseArg()) { - CHECK(!aTrans_) << "Not supported a transpose"; - MulOp(outMat, - inputs[0].matrix(), - inputs[1].sparse().SparseMatrix(), - 1.0, // scaleAB - scaleT, - aTrans_, - bTrans_); - return; - } - - /// dense matrix = sparse matrix * dense matrix - if (inputs[0].isSparseArg() && !inputs[1].isSparseArg() && - !outputs[0].isSparseArg()) { - CHECK(!bTrans_) << "Not supported b transpose"; - CHECK_EQ(inputs[0].sparse().dataFormat(), T_SPARSE_CSR) - << "Only supported SPARSE_CSR format for sparse matrix a"; - MulOp(outMat, - inputs[0].sparse().SparseMatrix(), - inputs[1].matrix(), - 1.0, // scaleAB - scaleT, - aTrans_, - bTrans_); - return; - } - - /// sparse matrix = dense matrix * dense matrix - auto outSparseMat = outputs[0].sparse().SparseMatrix(); - if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() && - outputs[0].isSparseArg()) { - MulOp(outSparseMat, - inputs[0].matrix(), - inputs[1].matrix(), - 1.0, // scaleAB - scaleT, - aTrans_, - bTrans_); - return; - } - } - - private: - bool aTrans_; - bool bTrans_; -}; - -REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc); -#endif -} // namespace paddle diff --git a/paddle/legacy/function/MulOp.h b/paddle/legacy/function/MulOp.h deleted file mode 100644 index ab33bde17296cd2b17ac45c5a936cfd2727919a5..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/MulOp.h +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Function.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/SparseMatrix.h" - -namespace paddle { -/// CPU, dense matrix (+)= dense matrix * dense matrix -template -void MulOp(CpuMatrix& out, - const CpuMatrix& a, - const CpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans); - -/// CPU, dense matrix (+)= sparse matrix * dense matrix -template -void MulOp(CpuMatrix& out, - const CpuSparseMatrix& a, - const CpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans); - -/// CPU, dense matrix (+)= dense matrix * sparse matrix -template -void MulOp(CpuMatrix& out, - const CpuMatrix& a, - const CpuSparseMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans); - -/// CPU, sparse matrix (+)= dense matrix * dense matrix -template -void MulOp(CpuSparseMatrix& out, - const CpuMatrix& a, - const CpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans); - -/// GPU, dense matrix (+)= dense matrix * dense matrix -template -void MulOp(GpuMatrix& out, - const GpuMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans); - -/// GPU, dense matrix (+)= sparse matrix * dense matrix -template -void MulOp(GpuMatrix& out, - const GpuSparseMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans); - -/// GPU, dense matrix (+)= dense matrix * sparse matrix -template -void MulOp(GpuMatrix& out, - const GpuMatrix& a, - const GpuSparseMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans); - -/// GPU, sparse matrix (+)= dense matrix * dense matrix -template -void MulOp(GpuSparseMatrix& out, - const GpuMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans); - -} // namespace paddle diff --git a/paddle/legacy/function/MulOpGpu.cu b/paddle/legacy/function/MulOpGpu.cu deleted file mode 100644 index 217c983cb75dfcbc0e17f752a66847c5e92fcc91..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/MulOpGpu.cu +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MulOp.h" -#include "hl_base.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/SparseMatrix.h" - -namespace paddle { -/// dense matrix (+)= dense matrix * dense matrix -template <> -void MulOp(GpuMatrix& out, - const GpuMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans) { - CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match"; - hl_matrix_mul(const_cast(a.getData()), - !aTrans ? HPPL_OP_N : HPPL_OP_T, - const_cast(b.getData()), - !bTrans ? HPPL_OP_N : HPPL_OP_T, - const_cast(out.getData()), - out.getHeight(), - out.getWidth(), - !aTrans ? a.getWidth() : a.getHeight(), - scaleAB, - scaleT, - a.getStride(), - b.getStride(), - out.getStride()); -} - -/// dense matrix (+)= sparse matrix * dense matrix -template <> -void MulOp(GpuMatrix& out, - const GpuSparseMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans) { - CHECK(out.isContiguous()); - CHECK(b.isContiguous()); - CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match"; - hl_matrix_csr_mul_dense(a.sMatrix_.get(), - aTrans ? HPPL_OP_T : HPPL_OP_N, - const_cast(b.getData()), - HPPL_OP_N, - const_cast(out.getData()), - out.getHeight(), - out.getWidth(), - b.getHeight(), - scaleAB, - scaleT); -} - -/// dense matrix (+)= dense matrix * sparse matrix -template <> -void MulOp(GpuMatrix& out, - const GpuMatrix& a, - const GpuSparseMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans) { - CHECK(out.isContiguous()); - CHECK(a.isContiguous()); - CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match"; - - if (b.format_ == SPARSE_CSC) { - hl_matrix_dense_mul_csc(const_cast(a.getData()), - HPPL_OP_N, - b.sMatrix_.get(), - bTrans ? HPPL_OP_T : HPPL_OP_N, - const_cast(out.getData()), - out.getHeight(), - out.getWidth(), - a.getWidth(), - scaleAB, - scaleT); - } else { - hl_matrix_dense_mul_csr(const_cast(a.getData()), - HPPL_OP_N, - b.sMatrix_.get(), - bTrans ? HPPL_OP_T : HPPL_OP_N, - const_cast(out.getData()), - out.getHeight(), - out.getWidth(), - a.getWidth(), - scaleAB, - scaleT); - } -} - -/// sparse matrix (+)= dense matrix * dense matrix -template <> -void MulOp(GpuSparseMatrix& out, - const GpuMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT, - bool aTrans, - bool bTrans) { - CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match"; - hl_sparse_matrix_mul(const_cast(a.getData()), - aTrans ? HPPL_OP_T : HPPL_OP_N, - const_cast(b.getData()), - bTrans ? HPPL_OP_T : HPPL_OP_N, - out.sMatrix_.get(), - out.getHeight(), - out.getWidth(), - !bTrans ? b.getHeight() : b.getWidth(), - scaleAB, - scaleT); -} - -} // namespace paddle diff --git a/paddle/legacy/function/MulOpTest.cpp b/paddle/legacy/function/MulOpTest.cpp deleted file mode 100644 index ab08b6f8696ff4aefd2dbdda591b20730b46898c..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/MulOpTest.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/SparseMatrix.h" -#include "paddle/legacy/math/tests/test_matrixUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT - -/** - * C += A * B, A, B, C dense matrix - * dense = dense * dense - */ -void testFuncDDDMatrix( - bool transa, bool transb, size_t dimM, size_t dimN, size_t dimK) { - real scaleT = 1.0; - size_t heightA = (transa == false) ? dimM : dimK; - size_t widthA = (transa == false) ? dimK : dimM; - size_t heightB = (transb == false) ? dimK : dimN; - size_t widthB = (transb == false) ? dimN : dimK; - size_t heightC = dimM; - size_t widthC = dimN; - // init Test object - CpuGpuFuncCompare test( - "MulOp", FuncConfig().set("aTrans", transa).set("bTrans", transb)); - // prepare input arguments - /// matrix A : HA * WA - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{heightA, widthA})); - /// matrix B: HB * WB - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{heightB, widthB})); - - /// output matrix C: HC * WC - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{heightC, widthC}), - scaleT == 1.0 ? ADD_TO : ASSIGN_TO); - // run Function - test.run(); -} - -TEST(MulOp, DDDMatrixMul) { - LOG(INFO) << "function test for dense = dense * dense matrix"; - for (const auto transa : {false, true}) { - for (const auto transb : {false, true}) { - for (const auto dimM : {1, 10, 100}) { - for (const auto dimN : {1, 10}) { - for (const auto dimK : {8}) { - if (transa && transb) { - continue; - } - VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') - << " transa=" << transa << " transb=" << transb - << " dimM=" << std::setw(5) << dimM - << " dimN=" << std::setw(5) << dimN - << " dimK=" << std::setw(5) << dimK; - testFuncDDDMatrix(transa, transb, dimM, dimN, dimK); - } - } - } - } - } -} - -/** - * C += A * B, B, C dense, A sparse - * dense = sparse * dense - */ -void testFuncDSparseDMatrix( - size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { - real scaleT = 1.0; - // init Test object - CpuGpuFuncCompare test( - "MulOp", FuncConfig().set("aTrans", false).set("bTrans", false)); - // prepare input arguments - /// sparse matrix A : M * K - test.addInputs(SparseMatrixArg( - VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}, nnz, FORMAT, FLOAT_VALUE)); - /// matrix B: K * N - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN})); - - /// output matrix C: M * N - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), - scaleT == 1.0 ? ADD_TO : ASSIGN_TO); - // run Function - test.run(); -} - -TEST(MuLOp, DSparseDMul) { - LOG(INFO) << "function test for dense = sparse * dense matrix"; - for (const auto dimM : {10, 100, 1000}) { - for (const auto dimN : {10, 100}) { - for (const auto dimK : {3, 10}) { - for (const auto nnz : {3, 10}) { - for (const auto FORMAT : {SPARSE_CSR}) { - VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') - << " dimM=" << std::setw(5) << dimM - << " dimN=" << std::setw(5) << dimN - << " dimK=" << std::setw(5) << dimK - << " nnz=" << std::setw(5) << nnz - << " format=" << std::setw(5) << FORMAT; - testFuncDSparseDMatrix(dimM, dimN, dimK, nnz, FORMAT); - } - } - } - } - } -} - -/** - * C += A * B, A, C dense, B sparse - * dense = dense * sparse - */ -void testFuncDDSparseMatrix( - size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { - real scaleT = 1.0; - // init Test object - CpuGpuFuncCompare test( - "MulOp", FuncConfig().set("aTrans", false).set("bTrans", false)); - // prepare input arguments - /// matrix A : M * K - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK})); - - /// matrix B: K * N - test.addInputs(SparseMatrixArg( - VALUE_TYPE_FLOAT, TensorShape{dimK, dimN}, nnz, FORMAT, FLOAT_VALUE)); - - /// output matrix C: M * N - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), - scaleT == 1.0 ? ADD_TO : ASSIGN_TO); - // run Function - test.run(); -} - -TEST(MulOp, DDSparseMul) { - LOG(INFO) << "function test for dense = dense * sparse matrix"; - for (const auto dimM : {10, 100, 1000}) { - for (const auto dimN : {10, 100}) { - for (const auto dimK : {3, 10}) { - for (const auto nnz : {3, 10}) { - for (const auto FORMAT : {SPARSE_CSR, SPARSE_CSC}) { - VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') - << " dimM=" << std::setw(5) << dimM - << " dimN=" << std::setw(5) << dimN - << " dimK=" << std::setw(5) << dimK - << " nnz=" << std::setw(5) << nnz - << " format=" << std::setw(5) << FORMAT; - testFuncDDSparseMatrix(dimM, dimN, dimK, nnz, FORMAT); - } - } - } - } - } -} - -/** - * C += A * B, A sparse, B, C dense - * sparse = dense * dense - */ -void testFuncSparseDDMatrix( - size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { - real scaleT = 1.0; - // init Test object - CpuGpuFuncCompare test( - "MulOp", FuncConfig().set("aTrans", false).set("bTrans", false)); - // prepare input arguments - /// matrix A : M * K - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK})); - - /// matrix B: K * N - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN})); - - /// output sparse matrix C: M * N - test.addOutputs( - SparseMatrixArg( - VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}, nnz, FORMAT, FLOAT_VALUE), - scaleT == 1.0 ? ADD_TO : ASSIGN_TO); - // run Function - test.run(); -} - -TEST(MulOp, SparseDDMul) { - LOG(INFO) << "function test for sparse = dense * dense matrix"; - for (const auto dimM : {10, 100, 1000}) { - for (const auto dimN : {10, 100}) { - for (const auto dimK : {3, 10}) { - for (const auto nnz : {3, 10}) { - for (const auto FORMAT : {SPARSE_CSC, SPARSE_CSR}) { - VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') - << " dimM=" << std::setw(5) << dimM - << " dimN=" << std::setw(5) << dimN - << " dimK=" << std::setw(5) << dimK - << " nnz=" << std::setw(5) << nnz - << " format=" << std::setw(5) << FORMAT; - testFuncSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT); - } - } - } - } - } -} diff --git a/paddle/legacy/function/NaiveConvOp.cpp b/paddle/legacy/function/NaiveConvOp.cpp deleted file mode 100644 index 99c8b81acbbb16a91bc0faa1c7f2873fa94ab108..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/NaiveConvOp.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ConvOp.h" - -namespace paddle { - -/* - * The three arguments are stored in memory in row major order. - * inputData = [batchSize, inputChannels, inputHeight, inputWidth] - * filterData = [outputChannels, inputChannels, filterHeight, filterWidth] - * outputData = [batchSize, outputChannels, outputHeight, outputWidth] - */ -template -class NaiveConvFunctor { - public: - void operator()(const T* inputData, - size_t batchSize, - size_t inputChannels, - size_t inputHeight, - size_t inputWidth, - const T* filterData, - size_t filterHeight, - size_t filterWidth, - T* outputData, - size_t outputChannels, - size_t outputHeight, - size_t outputWidth, - size_t paddingH, - size_t paddingW, - size_t strideH, - size_t strideW) { - for (size_t batch = 0; batch < batchSize; batch++) { - for (size_t outC = 0; outC < outputChannels; outC++) { - for (size_t outH = 0; outH < outputHeight; outH++) { - for (size_t outW = 0; outW < outputWidth; outW++) { - const int inStartH = (outH * strideH) - paddingH; - const int inStartW = (outW * strideW) - paddingW; - T outValue = (T)0; - for (size_t inC = 0; inC < inputChannels; inC++) { - for (size_t fH = 0; fH < filterHeight; fH++) { - for (size_t fW = 0; fW < filterWidth; fW++) { - T inValue; - const int inH = inStartH + fH; - const int inW = inStartW + fW; - if ((inH >= 0 && inH < (int)inputHeight) && - (inW >= 0 && inW < (int)inputWidth)) { - size_t offsetInput = - batch * inputChannels * inputHeight * inputWidth + - inC * inputHeight * inputWidth + inH * inputWidth + inW; - inValue = inputData[offsetInput]; - } else { - inValue = (T)0; - } - size_t offsetFilter = - outC * inputChannels * filterHeight * filterWidth + - inC * filterHeight * filterWidth + fH * filterWidth + fW; - T filterValue = filterData[offsetFilter]; - outValue += (inValue * filterValue); - } - } - } - - size_t offset = - batch * outputChannels * outputHeight * outputWidth + - outC * outputHeight * outputWidth + outH * outputWidth + outW; - outputData[offset] = outValue; - } - } - } - } - } -}; - -template -class NaiveConvFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); - check(inputs, outputs); - - size_t batchSize = inputs[0].shape()[0]; - size_t inputChannels = inputs[0].shape()[1]; - size_t inputHeight = inputs[0].shape()[2]; - size_t inputWidth = inputs[0].shape()[3]; - size_t filterHeight = inputs[1].shape()[2]; - size_t filterWidth = inputs[1].shape()[3]; - size_t outputChannels = outputs[0].shape()[1]; - size_t outputHeight = outputs[0].shape()[2]; - size_t outputWidth = outputs[0].shape()[3]; - - real* inputData = inputs[0].data(); - real* filterData = inputs[1].data(); - real* outputData = outputs[0].data(); - NaiveConvFunctor conv; - conv(inputData, - batchSize, - inputChannels, - inputHeight, - inputWidth, - filterData, - filterHeight, - filterWidth, - outputData, - outputChannels, - outputHeight, - outputWidth, - paddingH(), - paddingW(), - strideH(), - strideW()); - } -}; - -REGISTER_TYPED_FUNC(NaiveConv, CPU, NaiveConvFunction); - -} // namespace paddle diff --git a/paddle/legacy/function/PadOp.cpp b/paddle/legacy/function/PadOp.cpp deleted file mode 100644 index 9d011d28e6938fac6980bed88f774abdbf3532d4..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/PadOp.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PadOp.h" -#include "paddle/legacy/math/Vector.h" - -namespace paddle { - -template <> -void Pad(real* outputs, - const real* inputs, - const int num, - const int inC, - const int inH, - const int inW, - const PadConf& pad) { - int cstart = pad.channel[0], cend = pad.channel[1]; - int hstart = pad.height[0], hend = pad.height[1]; - int wstart = pad.width[0], wend = pad.width[1]; - int outC = inC + cstart + cend; - int outH = inH + hstart + hend; - int outW = inW + wstart + wend; - for (int i = 0; i < num; i++) { - for (int c = 0; c < inC; c++) { - for (int h = 0; h < inH; h++) { - int inoff = ((i * inC + c) * inH + h) * inW; - int outoff = - ((i * outC + c + cstart) * outH + h + hstart) * outW + wstart; - memcpy(outputs + outoff, inputs + inoff, inW * sizeof(real)); - } - } - } -} - -template <> -void PadGrad(real* inGrad, - const real* outGrad, - const int num, - const int inC, - const int inH, - const int inW, - const PadConf& pad) { - int cstart = pad.channel[0], cend = pad.channel[1]; - int hstart = pad.height[0], hend = pad.height[1]; - int wstart = pad.width[0], wend = pad.width[1]; - int outC = inC + cstart + cend; - int outH = inH + hstart + hend; - int outW = inW + wstart + wend; - for (int i = 0; i < num; i++) { - for (int c = 0; c < inC; c++) { - for (int h = 0; h < inH; h++) { - int inoff = ((i * inC + c) * inH + h) * inW; - int outoff = - ((i * outC + c + cstart) * outH + h + hstart) * outW + wstart; - CpuVector inG = CpuVector(inW, inGrad + inoff); - CpuVector outG = CpuVector(inW, const_cast(outGrad + outoff)); - inG += outG; - } - } - } -} - -static inline PadConf castToPadConf(const FuncConfig& conf) { - return {conf.get>("channel"), - conf.get>("height"), - conf.get>("width")}; -} - -/** - * \brief Padding zeros to input according to the specify dimension. - * The struct pad_ contains the padding size in each dimension. - * The input and output is a 4D tensor. In PadFunc, we only - * pad zeros to the 2nd to 4th dimension. - * - * Argument in this Function: - * \param pad_ A struct object contains the padding size in each dimension. - * It has six integers. The channelStart and channelEnd indicate - * how many zeros to add before and after the input in channel - * dimension. And the heightStart and heightEnd indicate padding - * in height dimension. The widthStart and widthEnd indicate the - * padding in width dimension. - * \param inputs A 4D tensor, only one input. - * \param outputs A 4D tensor, the output value after padding. - * - * For example, - * Input(2,2,2,3) = [ - * [ [[1,2,3], [3,4,5]], - * [[2,3,5], [1,6,7]] ], - * [ [[4,3,1], [1,8,7]], - * [[3,8,9], [2,3,5]] ] - * ] # the shape is (1,2,2,3) - * - * pad_: if channelStart = channelEnd = 1, others are 0. - * Output(2,4,2,3) = [ - * [ [[0,0,0], [0,0,0]], - * [[1,2,3], [3,4,5]], - * [[2,3,5], [1,6,7]], - * [[0,0,0], [0,0,0]] ], - * [ [[0,0,0], [0,0,0]], - * [[4,3,1], [1,8,7]], - * [[3,8,9], [2,3,5]], - * [[0,0,0], [0,0,0]] ] - * ] # the shape is (2,4,2,3) - * - * pad_: if widthStart = 1, widthEnd = 2, others are 0. - * Output(2,2,2,6) = [ - * [ [[0,1,2,3,0,0], [0,3,4,5,0,0]], - * [[0,2,3,5,0,0], [0,1,6,7,0,0]] ], - * [ [[0,4,3,1,0,0], [0,1,8,7,0,0]], - * [[0,3,8,9,0,0], [0,2,3,5,0,0]] ], - * ] # the shape is (2,2,2,6) - * - * pad_: if heightStart = 1, heightEnd = 1, others are 0. - * Output(2,2,4,3) = [ - * [ [[0,0,0], [1,2,3], [3,4,5], [0,0,0]], - * [[0,0,0], [2,3,5], [1,6,7], [0,0,0]] ], - * [ [[0,0,0], [4,3,1], [1,8,7], [0,0,0]], - * [[0,0,0], [3,8,9], [2,3,5], [0,0,0]] ], - * ] # the shape is (2,2,4,3) - */ - -template -class PadFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { pad_ = castToPadConf(config); } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); - - size_t num = inputs[0].shape()[0]; - size_t inC = inputs[0].shape()[1]; - size_t inH = inputs[0].shape()[2]; - size_t inW = inputs[0].shape()[3]; - typename Tensor::Vector vec(outputs[0].shape().getElements(), - outputs[0].data()); - vec.zero(); - - Pad(outputs[0].data(), - inputs[0].data(), - num, - inC, - inH, - inW, - pad_); - } - - private: - PadConf pad_; -}; - -/** - * \brief The backward propagation of padding Function. Remove the elements - * in the padding positions of forward. - * - * Argument in this Function: - * \param pad_ The same meaning as it in PadFunc. - * \param inputs The gradient with respect to the output value of PadFunc. - * \param outputs The gradient with respect to the input value of PadFunc. - */ - -template -class PadGradFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { pad_ = castToPadConf(config); } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - - size_t num = outputs[0].shape()[0]; - size_t inC = outputs[0].shape()[1]; - size_t inH = outputs[0].shape()[2]; - size_t inW = outputs[0].shape()[3]; - - if (outputs[0].getArgType() != ADD_TO) { - // for unit test - typename Tensor::Vector tmp( - outputs[0].shape().getElements(), outputs[0].data()); - tmp.zero(); - } - - PadGrad(outputs[0].data(), - inputs[0].data(), - num, - inC, - inH, - inW, - pad_); - } - - private: - PadConf pad_; -}; - -REGISTER_TYPED_FUNC(Pad, CPU, PadFunc); -REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(Pad, GPU, PadFunc); -REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc); -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/PadOp.h b/paddle/legacy/function/PadOp.h deleted file mode 100644 index 4b0aa4014bbb3e94a2f8632a6be7009a99f3fe32..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/PadOp.h +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Function.h" - -namespace paddle { - -struct PadConf { - /// how many values to add before/after the data along channel dimension. - std::vector channel; - /// how many values to add before/after the data along height dimension. - std::vector height; - /// how many values to add before/after the data along width dimension. - std::vector width; -}; - -/** - * \brief This funtion pads zeros to inputs according to the specify dimension. - * The input and output is a 4D tensor. Padding zeros from the 2nd to - * the 4th dimenstion according argument of pad. - * - * \param[out] outputs save results. - * \param[in] inputs input data. - * \param[in] num batch size of input data. - * \param[in] inC channel number of input data. - * \param[in] inH height of input data. - * \param[in] inH with of input data. - * \param[in] pad the padding config, contains the size along the - * specify dimension. - */ -template -void Pad(real* outputs, - const real* inputs, - const int num, - const int inC, - const int inH, - const int inW, - const PadConf& pad); - -/** - * \brief Padding operation backward. - * - * \param[out] inGrad gradients of previous layer. - * \param[in] outGrad output gradients. - * \param[in] num batch size of input data. - * \param[in] inC channel number of input data. - * \param[in] inH height of input data. - * \param[in] inH with of input data. - * \param[in] pad the padding config, contains the size along the - * specify dimension. - */ -template -void PadGrad(real* inGrad, - const real* outGrad, - const int num, - const int inC, - const int inH, - const int inW, - const PadConf& pad); -} // namespace paddle diff --git a/paddle/legacy/function/PadOpGpu.cu b/paddle/legacy/function/PadOpGpu.cu deleted file mode 100644 index 01d9b5c3b2af60fe82a9b03ac6322a619e7805ef..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/PadOpGpu.cu +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PadOp.h" -#include "hl_base.h" - -namespace paddle { - -__global__ void KePad(real* outputs, - const real* inputs, - int inC, - int inH, - int inW, - int padc, - int padh, - int padw, - int outC, - int outH, - int outW, - int nthreads) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < nthreads) { - const int w = idx % inW; - const int h = (idx / inW) % inH; - const int c = (idx / inW / inH) % inC; - const int n = idx / inW / inH / inC; - - const int off = ((n * outC + c + padc) * outH + h + padh) * outW + padw + w; - outputs[off] = inputs[idx]; - } -} - -template <> -void Pad(real* outputs, - const real* inputs, - const int num, - const int inC, - const int inH, - const int inW, - const PadConf& pad) { - size_t nth = num * inC * inH * inW; - int blockSize = 1024; - int gridSize = (nth + 1024 - 1) / 1024; - int cstart = pad.channel[0], cend = pad.channel[1]; - int hstart = pad.height[0], hend = pad.height[1]; - int wstart = pad.width[0], wend = pad.width[1]; - int outC = inC + cstart + cend; - int outH = inH + hstart + hend; - int outW = inW + wstart + wend; - KePad<<>>(outputs, - inputs, - inC, - inH, - inW, - cstart, - hstart, - wstart, - outC, - outH, - outW, - nth); - CHECK_SYNC("Pad"); -} - -__global__ void KePadDiff(real* inGrad, - const real* outGrad, - int inC, - int inH, - int inW, - int padc, - int padh, - int padw, - int outC, - int outH, - int outW, - int nthreads) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < nthreads) { - const int w = idx % inW; - const int h = (idx / inW) % inH; - const int c = (idx / inW / inH) % inC; - const int n = idx / inW / inH / inC; - - const int off = ((n * outC + c + padc) * outH + h + padh) * outW + padw + w; - inGrad[idx] += outGrad[off]; - } -} - -template <> -void PadGrad(real* inGrad, - const real* outGrad, - const int num, - const int inC, - const int inH, - const int inW, - const PadConf& pad) { - int nth = num * inC * inH * inW; - int blockSize = 1024; - int gridSize = (nth + 1024 - 1) / 1024; - int cstart = pad.channel[0], cend = pad.channel[1]; - int hstart = pad.height[0], hend = pad.height[1]; - int wstart = pad.width[0], wend = pad.width[1]; - int outC = inC + cstart + cend; - int outH = inH + hstart + hend; - int outW = inW + wstart + wend; - KePadDiff<<>>(inGrad, - outGrad, - inC, - inH, - inW, - cstart, - hstart, - wstart, - outC, - outH, - outW, - nth); - CHECK_SYNC("PadGrad"); -} - -} // namespace paddle diff --git a/paddle/legacy/function/PadOpTest.cpp b/paddle/legacy/function/PadOpTest.cpp deleted file mode 100644 index a4474f8549887f03f77448eccbe40911379a36ca..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/PadOpTest.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" - -namespace paddle { - -TEST(Pad, real) { - for (size_t numSamples : {5, 32}) { - for (size_t channels : {1, 5, 32}) { - for (size_t imgSizeH : {5, 33, 100}) { - for (size_t imgSizeW : {5, 32, 96}) { - VLOG(3) << " numSamples=" << numSamples << " channels=" << channels - << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; - for (bool test_grad : {false, true}) { - CpuGpuFuncCompare compare( - test_grad ? "PadGrad" : "Pad", - FuncConfig() - .set>("channel", {2, 3}) - .set>("height", {1, 2}) - .set>("width", {3, 2})); - TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; - TensorShape outDims{ - numSamples, channels + 5, imgSizeH + 3, imgSizeW + 5}; - compare.addInputs( - BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); - compare.addOutputs(BufferArg( - VALUE_TYPE_FLOAT, test_grad ? inDims : outDims, ASSIGN_TO)); - compare.run(); - } - } - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/function/RowConvOp.cpp b/paddle/legacy/function/RowConvOp.cpp deleted file mode 100644 index 3be50e80d71fabdb3e7a22bfc061da09412c132d..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/RowConvOp.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "RowConvOp.h" -#include -#include "paddle/legacy/math/Vector.h" - -namespace paddle { - -template <> -void RowConv(CpuMatrix& out, - const CpuMatrix& in, - const CpuMatrix& filter, - const CpuIVector& seq) { - const int* starts = seq.getData(); - const size_t numSeq = seq.getSize() - 1; - const size_t contextLength = filter.getHeight(); - for (size_t i = 0; i < numSeq; ++i) { - size_t begin = starts[i]; - size_t end = starts[i + 1]; - for (size_t j = begin; j < end; ++j) { - MatrixPtr x; - MatrixPtr w; - if ((j + contextLength) < end) { - x = (const_cast(in)).subMatrix(j, contextLength); - w = (const_cast(filter)).subMatrix(0, contextLength); - } else { - x = (const_cast(in)).subMatrix(j, end - j); - w = (const_cast(filter)).subMatrix(0, end - j); - } - MatrixPtr y = out.subMatrix(j, 1); - y->addDotMulVMM(*x, *w); - } - } -} - -template <> -void RowConvGrad(const CpuMatrix& outG, - const CpuMatrix& in, - const CpuMatrix& filter, - CpuMatrix& inG, - CpuMatrix& filterG, - const CpuIVector& seq) { - // gradient w.r.t filter - const int* starts = seq.getData(); - const size_t numSeq = seq.getSize() - 1; - const size_t contextLength = filter.getHeight(); - if (filterG) { - for (size_t i = 0; i < numSeq; ++i) { - size_t begin = starts[i]; - size_t end = starts[i + 1]; - size_t steps = end - begin; - for (size_t j = 0; j < contextLength && (begin + j) < end; ++j) { - MatrixPtr x = - (const_cast(in)).subMatrix(begin + j, steps - j); - MatrixPtr dy = - (const_cast(outG)).subMatrix(begin, steps - j); - MatrixPtr dw = filterG.subMatrix(j, 1); - dw->addDotMulVMM(*dy, *x); - } - } - } - - // gradient w.r.t input feature - if (inG) { - for (size_t i = 0; i < numSeq; ++i) { - size_t begin = starts[i]; - size_t end = starts[i + 1]; - size_t steps = end - begin; - for (size_t j = 0; j < steps; ++j) { - MatrixPtr dx = inG.subMatrix(begin + j, 1); - for (size_t t = 0; t < contextLength; ++t) { - if (int(j - t) >= 0) { - MatrixPtr dy = - (const_cast(outG)).subMatrix(begin + j - t, 1); - MatrixPtr w = (const_cast(filter)).subMatrix(t, 1); - dx->addDotMul(*dy, *w, 1.0, 1.0); - } - } - } - } - } -} - -/** - * \brief The row convolution is called lookahead convolution. It is firstly - * introduced in deep-speech2 system. The bidirectional RNN that learns - * representation for a sequence by performing a forward and a backward pass - * through the entire sequence. However, unlike unidirectional RNNs, - * bidirectional RNNs are challenging to deploy in an online and low-latency - * setting. The lookahead convolution incorporates information from future - * subsequences in a computationally efficient manner to improve unidirectional - * recurrent neural networks. - * - * The connection of row convolution is different form the 1D sequence - * convolution. Assumed that, the future context-length is k, that is to say, - * it can get the output at timestep t by using the the input feature from t-th - * timestep to (t+k)-th timestep. Assumed that the hidden dim of input - * activations are d, the activations r_t for the new layer at time-step t are: - * - * - * -- k + 1 - * r(t,i) = > W(i,j) * h(t+j-1, i), for (1 <= i <= d) - * -- j = 1 - * - * - * The weight shape is: (k + 1) x d - * Function Arguments: - * - * \param inputs[0] The input activations. - * \param inputs[0] The filter (or weight) and shape is (k+1) x d. - * \param outputs[1] The output activations. - * - * [1] Dario Amodei, etc. Deep Speech 2 : End-to-End Speech Recognition in - * English - * and Mandarin. https://arxiv.org/abs/1512.02595 - */ - -template -class RowConvFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override {} - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - // check - CHECK_EQ(2UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - // TODO(qingqing): support ASSIGN_TO. - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) - << "SequenceArg required here."; - const auto in = dynamic_cast(inputs[0]); - auto out = dynamic_cast(outputs[0]); - auto w = inputs[1]; - CHECK(in.data() && out.data() && in.getSequenceId().data()); - CHECK_EQ(in.shape().ndims(), 2UL); - CHECK(in.shape() == out.shape()); - CHECK_EQ(w.shape()[1], in.shape()[1]); - - auto outMat = out.matrix(); - const auto inMat = in.matrix(); - const auto wMat = w.matrix(); - const auto seqId = in.getSequenceId().vector(); - - RowConv(outMat, inMat, wMat, seqId); - } -}; - -/** - * \brief The backward of row convolution function. This function calculated - * the gradient w.r.t filter and the gradient w.r.t input activations(or data). - * - * Argument in this Function: - * - * \param inputs[0] The gradient w.r.t output activations. - * \param inputs[1] The input activations. - * \param inputs[2] The filter (or weight) and shape is (k+1) x d. - * \param outputs[0] The gradient w.r.t input activations. - * \param outputs[1] The gradient w.r.r filter. - * - * Abbreviation: - * w.r.t: with respect to. - */ - -template -class RowConvGradFunc : public FunctionBase { - // TODO(qingqing): split into RowConvDataFunc and RowConvWeightFunc - public: - void init(const FuncConfig& config) override {} - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - // check - CHECK_EQ(3UL, inputs.size()); - CHECK_EQ(2UL, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - CHECK_EQ(outputs[1].getArgType(), ADD_TO); - CHECK(inputs[0].isSequenceArg() && inputs[1].isSequenceArg() && - outputs[0].isSequenceArg()) - << "SequenceArg required here."; - - const auto outGrad = dynamic_cast(inputs[0]); - const auto in = dynamic_cast(inputs[1]); - const auto w = inputs[2]; - auto inGrad = dynamic_cast(outputs[0]); - auto wGrad = outputs[1]; - - CHECK_EQ(in.shape().ndims(), 2UL); - CHECK(in.shape() == inGrad.shape()); - CHECK(in.shape() == outGrad.shape()); - CHECK_EQ(wGrad.shape()[1], in.shape()[1]); - - const auto outGMat = outGrad.matrix(); - const auto inMat = in.matrix(); - const auto wMat = w.matrix(); - auto inGMat = inGrad.data() - ? inGrad.matrix() - : typename Tensor::Matrix(nullptr, 0, 0); - auto wGMat = wGrad.data() - ? wGrad.matrix() - : typename Tensor::Matrix(nullptr, 0, 0); - const auto seqId = in.getSequenceId().vector(); - - RowConvGrad(outGMat, inMat, wMat, inGMat, wGMat, seqId); - } -}; - -REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc); -REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc); -REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc); -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/RowConvOp.h b/paddle/legacy/function/RowConvOp.h deleted file mode 100644 index bfe775e014d56b574db10da9ca560d9d78888f57..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/RowConvOp.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Function.h" - -namespace paddle { - -/** - * \brief The forward of row convolution. - * - * \param[out] out The output data and shape is h x d. h is the sum of - * time steps of all samples in one mini-batch. - * \param[in] in The input data and shape is h x d. - * \param[in] filter The filter and shape is k x d. The lookahead step - * number plus one equals k. - * \param[in] seq The sequence start positions. - * - */ -template -void RowConv(typename Tensor::Matrix& out, - const typename Tensor::Matrix& in, - const typename Tensor::Matrix& filter, - const typename Tensor::Vector& seq); - -/** - * \brief The backward of row convolution. - * - * \param[in] outG The gradient w.r.t output data. - * \param[in] in The input data. - * \param[in] filter The filter. - * \param[out] inG The gradient w.r.t input data. - * \param[out] filterG The gradient w.r.t filter. - * \param[in] seq The sequence start positions. - * - */ -template -void RowConvGrad(const typename Tensor::Matrix& outG, - const typename Tensor::Matrix& in, - const typename Tensor::Matrix& filter, - typename Tensor::Matrix& inG, - typename Tensor::Matrix& filterG, - const typename Tensor::Vector& seq); -} // namespace paddle diff --git a/paddle/legacy/function/RowConvOpGpu.cu b/paddle/legacy/function/RowConvOpGpu.cu deleted file mode 100644 index a6d2e4c7e38b12bcd448a85f9e74df226e6984af..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/RowConvOpGpu.cu +++ /dev/null @@ -1,373 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/cuda/include/hl_base.h" -#include "paddle/legacy/function/RowConvOp.h" - -namespace paddle { - -template -__global__ void KeRowConv(real* y, - const real* x, - const real* w, - const int* starts, - const int height, - const int width, - const int numSeq, - const int context) { - const int tidx = threadIdx.x; - const int tidy = threadIdx.y; - const int blky = blockDim.y; - const int gidx = blockIdx.x * blockDim.x; - - __shared__ real sw[BLOCK_H][BLOCK_W]; - - for (int i = tidy; i < context; i += blky) { - sw[i][tidx] = gidx + tidx < width ? w[i * width + gidx + tidx] : 0.0; - } - - __syncthreads(); - - for (int i = 0; i < numSeq; ++i) { - const int start = starts[i]; - const int end = starts[i + 1]; - const int steps = end - start; - for (int j = tidy; j < steps; j += blky) { - real sum = 0; - int off = (start + j) * width; - for (int t = 0; t < context; ++t) { - if ((start + j + t) < end) { - int xoff = off + t * width; - real xVal = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0; - sum += sw[t][tidx] * xVal; - } - } - if (gidx + tidx < width) { - y[off + gidx + tidx] += sum; - } - } - } -} - -__global__ void KeRowConv2(real* y, - const real* x, - const real* w, - const int* starts, - const int height, - const int width, - const int numSeq, - const int context) { - const int tidx = threadIdx.x; - const int tidy = threadIdx.y; - const int blky = blockDim.y; - const int gidx = blockIdx.x * blockDim.x; - - for (int i = 0; i < numSeq; ++i) { - const int start = starts[i]; - const int end = starts[i + 1]; - const int steps = end - start; - for (int j = tidy; j < steps; j += blky) { - int off = (start + j) * width; - real sum = 0; - for (int t = 0; t < context && (start + j + t) < end; ++t) { - int xoff = off + t * width; - real xd = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0; - real wd = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0; - sum += wd * xd; - } - if (gidx + tidx < width) { - y[off + gidx + tidx] += sum; - } - } - } -} - -template <> -void RowConv(GpuMatrix& out, // NOLINT - const GpuMatrix& in, - const GpuMatrix& filter, - const GpuIVector& seq) { - const size_t numSeq = seq.getSize() - 1; - const size_t contextLength = filter.getHeight(); - const size_t height = in.getHeight(); - const size_t width = in.getWidth(); - - real* y = out.getData(); - const real* x = in.getData(); - const real* w = filter.getData(); - const int* starts = seq.getData(); - - dim3 dimBlock(32, 32); - dim3 dimGrid(DIVUP(width, dimBlock.x), 1); - - if (contextLength <= 32) { - KeRowConv<32, 32><<>>( - y, x, w, starts, height, width, numSeq, contextLength); - } else { - KeRowConv2<<>>( - y, x, w, starts, height, width, numSeq, contextLength); - } - CHECK_SYNC("RowConv"); -} - -template -__global__ void KeRowConvBwWeight(real* dw, - const real* x, - const real* dy, - const int* starts, - const int height, - const int width, - const int numSeq, - const int context) { - const int tidx = threadIdx.x; - const int tidy = threadIdx.y; - const int blky = blockDim.y; - const int gidx = blockIdx.x * blockDim.x; - - __shared__ real sh_x[BLOCK_W][BLOCK_H]; - __shared__ real sh_dy[BLOCK_W][BLOCK_H + CONTEXT - 1]; - __shared__ real sh_dw[CONTEXT][BLOCK_W]; - - if (tidy < context) { - sh_dw[tidy][tidx] = 0.0; - } - __syncthreads(); - - // NOTE(zcd): temporary solution - unsigned mask = 0u; - CREATE_SHFL_MASK(mask, true); - - for (int i = 0; i < numSeq; ++i) { - const int start = starts[i]; - const int end = starts[i + 1]; - const int steps = end - start; - const int size = ((steps + BLOCK_H - 1) / BLOCK_H) * BLOCK_H; - for (int j = tidy; j < size; j += BLOCK_H) { - int xoff = gidx + tidx; - int yoff = start + j; - - // transpose - sh_x[tidx][tidy] = - (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; - sh_dy[tidx][tidy + context - 1] = - (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0; - __syncthreads(); - if (tidy < (context - 1)) { - yoff = yoff - context + 1; - sh_dy[tidx][tidy] = - (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0; - } - __syncthreads(); - - for (int t = 0; t < context; t++) { - real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx + context - 1 - t]; - __syncthreads(); - // warp size and blockDim.x is 32. - - for (int offset = 16; offset > 0; offset /= 2) - val += __shfl_down_sync(mask, val, offset); - - __syncthreads(); - if (tidx == 0) { - sh_dw[t][tidy] += val; - } - __syncthreads(); - } - } - } - - for (int t = tidy; (t < context) && ((gidx + tidx) < width); t += blky) { - dw[t * width + gidx + tidx] += sh_dw[t][tidx]; - } -} - -template -__global__ void KeRowConvBwWeight2(real* dw, - const real* x, - const real* dy, - const int* starts, - const int height, - const int width, - const int numSeq, - const int context) { - const int tidx = threadIdx.x; - const int tidy = threadIdx.y; - const int gidx = blockIdx.x * blockDim.x; - - __shared__ real sh_x[BLOCK_H][BLOCK_W]; - __shared__ real sh_dy[BLOCK_H][BLOCK_W]; - - // NOTE(zcd): temporary solution - unsigned mask = 0u; - CREATE_SHFL_MASK(mask, true); - - for (int i = 0; i < numSeq; ++i) { - const int start = starts[i]; - const int end = starts[i + 1]; - const int steps = end - start; - - const int size = ((steps + BLOCK_H - 1) / BLOCK_H) * BLOCK_H; - for (int j = tidy; j < size; j += BLOCK_H) { - int xoff = gidx + tidx; - int yoff = start + j; - - // transpose - sh_x[tidx][tidy] = - (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; - __syncthreads(); - - for (int t = 0; t < context; t++) { - sh_dy[tidx][tidy] = - (xoff < width && (yoff - t) >= start && yoff - t < end) - ? dy[(yoff - t) * width + xoff] - : 0.0; - __syncthreads(); - - real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx]; - __syncthreads(); - // warp size and blockDim.x is 32. - for (int offset = 16; offset > 0; offset /= 2) - val += __shfl_down_sync(mask, val, offset); - - __syncthreads(); - - if (tidx == 0 && (gidx + tidy) < width) { - dw[t * width + gidx + tidy] += val; - } - } - } - } -} - -template -__global__ void KeRowConvBwData(real* dx, - const real* w, - const real* dy, - const int* starts, - const int height, - const int width, - const int numSeq, - const int context) { - const int tidx = threadIdx.x; - const int tidy = threadIdx.y; - const int blky = blockDim.y; - const int gidx = blockIdx.x * blockDim.x; - - __shared__ real sw[BLOCK_H][BLOCK_W]; - - for (int i = tidy; i < context; i += blky) { - sw[i][tidx] = gidx + tidx < width ? w[i * width + gidx + tidx] : 0.0; - } - - __syncthreads(); - - for (int i = 0; i < numSeq; ++i) { - const int start = starts[i]; - const int end = starts[i + 1]; - const int steps = end - start; - for (int j = tidy; j < steps; j += blky) { - real sum = 0; - int off = (start + j) * width; - for (int t = 0; t < context && (j - t) >= 0; ++t) { - int dyOff = off - t * width; - real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0; - sum += sw[t][tidx] * dyVal; - } - if (gidx + tidx < width) { - dx[off + gidx + tidx] += sum; - } - } - } -} - -__global__ void KeRowConvBwData2(real* dx, - const real* w, - const real* dy, - const int* starts, - const int height, - const int width, - const int numSeq, - const int context) { - const int tidx = threadIdx.x; - const int tidy = threadIdx.y; - const int blky = blockDim.y; - const int gidx = blockIdx.x * blockDim.x; - - for (int i = 0; i < numSeq; ++i) { - const int start = starts[i]; - const int end = starts[i + 1]; - const int steps = end - start; - for (int j = tidy; j < steps; j += blky) { - real sum = 0; - int off = (start + j) * width; - for (int t = 0; t < context && (j - t) >= 0; ++t) { - int dyOff = off - t * width; - real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0; - real wVal = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0; - sum += wVal * dyVal; - } - if (gidx + tidx < width) { - dx[off + gidx + tidx] += sum; - } - } - } -} - -template <> -void RowConvGrad(const GpuMatrix& outG, - const GpuMatrix& in, - const GpuMatrix& filter, - GpuMatrix& inG, // NOLINT - GpuMatrix& filterG, // NOLINT - const GpuIVector& seq) { - const size_t numSeq = seq.getSize() - 1; - const size_t contextLength = filter.getHeight(); - const size_t height = in.getHeight(); - const size_t width = in.getWidth(); - - const real* dy = outG.getData(); - const real* x = in.getData(); - const real* w = filter.getData(); - const int* starts = seq.getData(); - - if (filterG) { - dim3 dimBlock(32, 32); - dim3 dimGrid(DIVUP(width, dimBlock.x), 1); - real* dw = filterG.getData(); - if (contextLength <= 32) { - KeRowConvBwWeight<32, 32, 32><<>>( - dw, x, dy, starts, height, width, numSeq, contextLength); - } else { - KeRowConvBwWeight2<32, 32><<>>( - dw, x, dy, starts, height, width, numSeq, contextLength); - } - } - - if (inG) { - real* dx = inG.getData(); - dim3 dimBlock2(32, 32); - dim3 dimGrid2(DIVUP(width, dimBlock2.x), 1); - if (contextLength <= 64) { - KeRowConvBwData<32, 64><<>>( - dx, w, dy, starts, height, width, numSeq, contextLength); - } else { - KeRowConvBwData2<<>>( - dx, w, dy, starts, height, width, numSeq, contextLength); - } - } - - CHECK_SYNC("RowConvGrad"); -} - -} // namespace paddle diff --git a/paddle/legacy/function/RowConvOpTest.cpp b/paddle/legacy/function/RowConvOpTest.cpp deleted file mode 100644 index bbc29ad6a6b58b5d7619b26a52c07c3e7bb385b2..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/RowConvOpTest.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" - -namespace paddle { - -void testRowConvFw(size_t batchSize, size_t dim, size_t contextLength) { - CpuGpuFuncCompare test("RowConv", FuncConfig()); - - test.addSequence(SequenceIdArg(TensorShape{batchSize})); - test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim})); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim})); - - test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}), - ADD_TO); - - test.run(); -} - -void testRowConvBw(size_t batchSize, size_t dim, size_t contextLength) { - CpuGpuFuncCompare test("RowConvGrad", FuncConfig()); - - test.addSequence(SequenceIdArg(TensorShape{batchSize})); - test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim})); - test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim})); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim})); - - test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}), - ADD_TO); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}), - ADD_TO); - - test.run(); -} - -TEST(RowConv, real) { - for (size_t numSamples : {17, 129, 2020}) { - for (size_t dim : {16, 512, 2560}) { - for (size_t context : {3, 19, 65}) { - VLOG(3) << " numSamples=" << numSamples << " dim=" << dim - << " context length=" << context; - testRowConvFw(numSamples, dim, context); - testRowConvBw(numSamples, dim, context); - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/function/ScaleSubRegionOp.cpp b/paddle/legacy/function/ScaleSubRegionOp.cpp deleted file mode 100644 index 03a422a740dca4499532cdb1bdfbf3d3ab272a9a..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ScaleSubRegionOp.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ScaleSubRegionOp.h" -#include "paddle/legacy/function/TensorShape.h" - -namespace paddle { - -template <> -void ScaleSubRegion(real* outputs, - const real* inputs, - const real* indices, - const TensorShape shape, - const FuncConfig& conf) { - real value = conf.get("value"); - - int number = shape[0]; - int channel = shape[1]; - int height = shape[2]; - int width = shape[3]; - - memcpy(outputs, inputs, number * channel * height * width * sizeof(real)); - - for (int n = 0; n < number; ++n) { - // indices start from 1 - int offset = n * 6; - for (int c = indices[offset] - 1; c < indices[offset + 1]; ++c) { - for (int h = indices[offset + 2] - 1; h < indices[offset + 3]; ++h) { - for (int w = indices[offset + 4] - 1; w < indices[offset + 5]; ++w) { - int idx = ((n * channel + c) * height + h) * width + w; - outputs[idx] *= value; - } - } - } - } -} - -template <> -void ScaleSubRegionGrad(const real* inGrad, - real* outGrad, - const real* indices, - const TensorShape shape, - const FuncConfig& conf) { - real value = conf.get("value"); - - int number = shape[0]; - int channel = shape[1]; - int height = shape[2]; - int width = shape[3]; - - for (int n = 0; n < number; ++n) { - for (int c = 0; c < channel; ++c) { - for (int h = 0; h < height; ++h) { - for (int w = 0; w < width; ++w) { - int idx = ((n * channel + c) * height + h) * width + w; - int offset = n * 6; - if (c >= (indices[offset] - 1) && c <= (indices[offset + 1] - 1) && - h >= (indices[offset + 2] - 1) && - h <= (indices[offset + 3] - 1) && - w >= (indices[offset + 4] - 1) && - w <= (indices[offset + 5] - 1)) { - outGrad[idx] += inGrad[idx] * value; - } else { - outGrad[idx] += inGrad[idx]; - } - } - } - } - } -} - -/** - * \brief For each instance, ScaleSubRegion can be used to multiply a value to - * a specified sub continuous region. By providing start index and end - * index for C/H/W, you can specify the location and shape of the region. - * - * Argument in this Function: - * \param inputs A 4-D tensor with shape [N, C, H, W], only one input. - * \param indices A 2-D tensor with shape [N, 6], indicates the sub region. - * \param outputs A 4-D tensor with same shape as inputs, output value. - */ -template -class ScaleSubRegionFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { conf_ = config; } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(2UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); - - TensorShape shape = inputs[0].shape(); - - ScaleSubRegion(outputs[0].data(), - inputs[0].data(), - inputs[1].data(), - shape, - conf_); - } - - private: - FuncConfig conf_; -}; - -/** - * \brief The backward propagation of ScaleSubRegion Function. - * - * Argument in this Function: - * \param inputs A 4-D tensor with shape [N, C, H, W], output gradient. - * \param indices A 2-D tensor with shape [N, 6], indicates the sub region. - * \param outputs A 4-D tensor with shape [N, C, H, W], gradient of input value. - */ - -template -class ScaleSubRegionGradFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override { conf_ = config; } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(2UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - - TensorShape shape = inputs[0].shape(); - - ScaleSubRegionGrad(inputs[0].data(), - outputs[0].data(), - inputs[1].data(), - shape, - conf_); - } - - private: - FuncConfig conf_; -}; - -REGISTER_TYPED_FUNC(ScaleSubRegion, CPU, ScaleSubRegionFunc); -REGISTER_TYPED_FUNC(ScaleSubRegionGrad, CPU, ScaleSubRegionGradFunc); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(ScaleSubRegion, GPU, ScaleSubRegionFunc); -REGISTER_TYPED_FUNC(ScaleSubRegionGrad, GPU, ScaleSubRegionGradFunc); -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/ScaleSubRegionOp.h b/paddle/legacy/function/ScaleSubRegionOp.h deleted file mode 100644 index ed7d6b8ad3caa14b0379bc9887ff5fd1a83ac1cc..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ScaleSubRegionOp.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Function.h" - -namespace paddle { - -/** - * \brief Function to multiply a value to values in specified sub continuous - * region. Indices must be provided to indcate the location and shape of - * the region and the multiplied value is passed by configure variable. - * - * - * \param[out] outputs Output value. - * \param[in] inputs Input data which contains NCHW information. - * \param[in] indices Indices data to indcate the sub region. - * \param[in] shape Tensor shape of input value. - * \param[in] conf Configure variable which contains the multiplied value. - */ -template -void ScaleSubRegion(real* outputs, - const real* inputs, - const real* indices, - const TensorShape shape, - const FuncConfig& conf); - -/** - * \brief Backward propagation function of ScaleSubRegion. - * - * \param[out] inGrad Gradients of previous layer. - * \param[in] outGrad Output gradient. - * \param[in] indices Indices data. - * \param[in] shape The Shape of input tensor. - * \param[in] conf Configure variable. - */ -template -void ScaleSubRegionGrad(const real* inGrad, - real* outGrad, - const real* indices, - const TensorShape shape, - const FuncConfig& conf); -} // namespace paddle diff --git a/paddle/legacy/function/ScaleSubRegionOpGpu.cu b/paddle/legacy/function/ScaleSubRegionOpGpu.cu deleted file mode 100644 index 9784c51ae03d4b67ce52a16be5d6ab98bb1ce4d4..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ScaleSubRegionOpGpu.cu +++ /dev/null @@ -1,116 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ScaleSubRegionOp.h" -#include "hl_base.h" - -namespace paddle { - -__global__ void KeScaleSubRegion(real* outputs, - const real* inputs, - const real* indices, - real value, - int channel, - int height, - int width, - int nthreads) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < nthreads) { - const int w = idx % width; - const int h = (idx / width) % height; - const int c = (idx / width / height) % channel; - const int n = idx / width / height / channel; - - const int offset = n * 6; - if (c >= (indices[offset] - 1) && c <= (indices[offset + 1] - 1) && - h >= (indices[offset + 2] - 1) && h <= (indices[offset + 3] - 1) && - w >= (indices[offset + 4] - 1) && w <= (indices[offset + 5] - 1)) { - outputs[idx] = inputs[idx] * value; - } else { - outputs[idx] = inputs[idx]; - } - } -} - -template <> -void ScaleSubRegion(real* outputs, - const real* inputs, - const real* indices, - const TensorShape shape, - const FuncConfig& conf) { - real value = conf.get("value"); - - int number = shape[0]; - int channel = shape[1]; - int height = shape[2]; - int width = shape[3]; - - size_t nth = number * channel * height * width; - int blockSize = 1024; - int gridSize = (nth + blockSize - 1) / blockSize; - - KeScaleSubRegion<<>>( - outputs, inputs, indices, value, channel, height, width, nth); - CHECK_SYNC("ScaleSubRegion"); -} - -__global__ void KeScaleSubRegionDiff(const real* inGrad, - real* outGrad, - const real* indices, - real value, - int channel, - int height, - int width, - int nthreads) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < nthreads) { - const int w = idx % width; - const int h = (idx / width) % height; - const int c = (idx / width / height) % channel; - const int n = idx / width / height / channel; - - const int offset = n * 6; - if (c >= (indices[offset] - 1) && c <= (indices[offset + 1] - 1) && - h >= (indices[offset + 2] - 1) && h <= (indices[offset + 3] - 1) && - w >= (indices[offset + 4] - 1) && w <= (indices[offset + 5] - 1)) { - outGrad[idx] += inGrad[idx] * value; - } else { - outGrad[idx] += inGrad[idx]; - } - } -} - -template <> -void ScaleSubRegionGrad(const real* inGrad, - real* outGrad, - const real* indices, - const TensorShape shape, - const FuncConfig& conf) { - real value = conf.get("value"); - - int number = shape[0]; - int channel = shape[1]; - int height = shape[2]; - int width = shape[3]; - - size_t nth = number * channel * height * width; - int blockSize = 1024; - int gridSize = (nth + blockSize - 1) / blockSize; - - KeScaleSubRegionDiff<<>>( - inGrad, outGrad, indices, value, channel, height, width, nth); - CHECK_SYNC("ScaleSubRegionGrad"); -} - -} // namespace paddle diff --git a/paddle/legacy/function/ScaleSubRegionOpTest.cpp b/paddle/legacy/function/ScaleSubRegionOpTest.cpp deleted file mode 100644 index dd6ee67108948cf1158d7e3788dd67bfef70409a..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/ScaleSubRegionOpTest.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" - -namespace paddle { - -TEST(ScaleSubRegion, real) { - for (size_t numSamples : {5, 32}) { - for (size_t channels : {5, 32}) { - for (size_t imgSizeH : {5, 33}) { - for (size_t imgSizeW : {5, 32}) { - for (real value : {-0.5, 0.0, 0.5}) { - for (bool firstHalf : {false, true}) { - VLOG(3) << " numSamples=" << numSamples - << " channels=" << channels << " imgSizeH=" << imgSizeH - << " imgSizeW=" << imgSizeW; - - for (bool testGrad : {false, true}) { - CpuGpuFuncCompare compare( - testGrad ? "ScaleSubRegionGrad" : "ScaleSubRegion", - FuncConfig().set("value", value)); - - TensorShape shape{numSamples, channels, imgSizeH, imgSizeW}; - TensorShape indicesShape{numSamples, 6}; - - compare.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); - compare.addInputs(BufferArg(VALUE_TYPE_FLOAT, indicesShape)); - - compare.registerInitCallback([=](BufferArg& arg, size_t index) { - if (index == 1) { - real* data = (real*)arg.data(); - - for (size_t i = 0; i < numSamples; ++i) { - size_t offset = i * 6; - data[offset] = firstHalf ? 1 : channels / 2; - data[offset + 1] = firstHalf ? channels / 2 : channels; - data[offset + 2] = firstHalf ? 1 : imgSizeH / 2; - data[offset + 3] = firstHalf ? imgSizeH / 2 : imgSizeH; - data[offset + 4] = firstHalf ? 1 : imgSizeW / 2; - data[offset + 5] = firstHalf ? imgSizeW / 2 : imgSizeW; - } - } - }); - - compare.addOutputs( - BufferArg( - VALUE_TYPE_FLOAT, shape, testGrad ? ADD_TO : ASSIGN_TO), - testGrad ? ADD_TO : ASSIGN_TO); - compare.run(); - } - } - } - } - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/function/SwitchOp.cpp b/paddle/legacy/function/SwitchOp.cpp deleted file mode 100644 index c6accd18039180aa521c18193e576d22e11f5a97..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/SwitchOp.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SwitchOp.h" -#include "paddle/legacy/math/Vector.h" - -namespace paddle { - -template <> -void NCHW2NHWC(real* outputs, - const real* inputs, - const int num, - const int inC, - const int inH, - const int inW, - const int argType) { - for (int n = 0; n < num; ++n) { - for (int c = 0; c < inC; ++c) { - for (int h = 0; h < inH; ++h) { - for (int w = 0; w < inW; ++w) { - if (argType == ADD_TO) { - outputs[((n * inH + h) * inW + w) * inC + c] += *(inputs++); - } else { - outputs[((n * inH + h) * inW + w) * inC + c] = *(inputs++); - } - } - } - } - } -} - -template <> -void NHWC2NCHW(real* outputs, - const real* inputs, - const int num, - const int inH, - const int inW, - const int inC, - const int argType) { - for (int n = 0; n < num; ++n) { - for (int h = 0; h < inH; ++h) { - for (int w = 0; w < inW; ++w) { - for (int c = 0; c < inC; ++c) { - if (argType == ADD_TO) { - outputs[((n * inC + c) * inH + h) * inW + w] += *(inputs++); - } else { - outputs[((n * inC + c) * inH + h) * inW + w] = *(inputs++); - } - } - } - } - } -} - -/** - * \brief Switch dimension order of image input. - * The input and output is a 4D tensor. Switch order - * 'batch_size,channels, height, width' to - * order 'batch_size, height, width, channels'. - * - * Argument in this Function: - * \param inputs input data with order 'batch_size,channels, height, width'. - * \param outputs output data with order 'batch_size, height, width, channels'. - */ -template -class NCHW2NHWCFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override {} - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - - size_t num = inputs[0].shape()[0]; - size_t inC = inputs[0].shape()[1]; - size_t inH = inputs[0].shape()[2]; - size_t inW = inputs[0].shape()[3]; - NCHW2NHWC(outputs[0].data(), - inputs[0].data(), - num, - inC, - inH, - inW, - outputs[0].getArgType()); - } -}; - -/** - * \brief Switch dimension order of image input. - * The input and output is a 4D tensor. Switch order - * 'batch_size, height, width, channels' to - * order 'batch_size, channels, height, width'. - * - * Argument in this Function: - * \param inputs input data with order 'batch_size, height, width, channels'. - * \param outputs output data with order 'batch_size, channels, height, width'. - */ -template -class NHWC2NCHWFunc : public FunctionBase { - public: - void init(const FuncConfig& config) override {} - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1UL, inputs.size()); - CHECK_EQ(1UL, outputs.size()); - - size_t num = inputs[0].shape()[0]; - size_t inH = inputs[0].shape()[1]; - size_t inW = inputs[0].shape()[2]; - size_t inC = inputs[0].shape()[3]; - - NHWC2NCHW(outputs[0].data(), - inputs[0].data(), - num, - inH, - inW, - inC, - outputs[0].getArgType()); - } -}; - -REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc); -REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc); -#ifdef PADDLE_WITH_CUDA -REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc); -REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc); -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/SwitchOp.h b/paddle/legacy/function/SwitchOp.h deleted file mode 100644 index b5eb0883cb6d3f1affe6b28bbfe31d1acde88025..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/SwitchOp.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Function.h" - -namespace paddle { - -/** - * \brief This funtion switch dimension order of image input. - * The input and output is a 4D tensor. Switch order 'batch_size, - *channels, height, width' to - * order 'batch_size, height, width, channels'. - * - * \param[out] outputs save results. - * \param[in] inputs input data. - * \param[in] num batch size of input data. - * \param[in] inC channel number of input data. - * \param[in] inH height of input data. - * \param[in] inH with of input data. - * \param[in] argType type of output argument. - */ -template -void NCHW2NHWC(real* outputs, - const real* inputs, - const int num, - const int inC, - const int inH, - const int inW, - const int argtype); - -/** - * \brief This funtion switch dimension order of image input. - * The input and output is a 4D tensor. Switch order 'batch_size, - *height, width, channels' to - * order 'batch_size, channels, height, width'. - * - * \param[out] inGrad gradients of previous layer. - * \param[in] outGrad output gradients. - * \param[in] num batch size of input data. - * \param[in] inH height of input data. - * \param[in] inW with of input data. - * \param[in] inC channel number of input data. - * \param[in] argType type of output argument. - */ -template -void NHWC2NCHW(real* inGrad, - const real* outGrad, - const int num, - const int inH, - const int inW, - const int inC, - const int argType); -} // namespace paddle diff --git a/paddle/legacy/function/SwitchOpGpu.cu b/paddle/legacy/function/SwitchOpGpu.cu deleted file mode 100644 index 45390a56c3f776ec18a65a6ba2f7149a7a6ef6c3..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/SwitchOpGpu.cu +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright (c) 2016 Paddle - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SwitchOp.h" -#include "hl_base.h" - -namespace paddle { - -__global__ void KeNCHW2NHWC(real* outputs, - const real* inputs, - int inC, - int inH, - int inW, - int nthreads, - int argType) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < nthreads) { - const int w = idx % inW; - const int h = (idx / inW) % inH; - const int c = (idx / inW / inH) % inC; - const int n = idx / inW / inH / inC; - - const int off = ((n * inH + h) * inW + w) * inC + c; - if (argType == ADD_TO) { - outputs[off] += inputs[idx]; - } else { - outputs[off] = inputs[idx]; - } - } -} - -template <> -void NCHW2NHWC(real* outputs, - const real* inputs, - const int num, - const int inC, - const int inH, - const int inW, - const int argType) { - size_t nth = num * inC * inH * inW; - int blockSize = 1024; - int gridSize = (nth + 1024 - 1) / 1024; - KeNCHW2NHWC<<>>( - outputs, inputs, inC, inH, inW, nth, argType); - CHECK_SYNC("NCHW2NHWC"); -} - -__global__ void KeNHWC2NCHW(real* outputs, - const real* inputs, - int inH, - int inW, - int inC, - int nthreads, - int argType) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < nthreads) { - const int c = idx % inC; - const int w = (idx / inC) % inW; - const int h = (idx / inC / inW) % inH; - const int n = idx / inW / inH / inC; - - const int off = ((n * inC + c) * inH + h) * inW + w; - if (argType == ADD_TO) { - outputs[off] += inputs[idx]; - } else { - outputs[off] = inputs[idx]; - } - } -} - -template <> -void NHWC2NCHW(real* outputs, - const real* inputs, - const int num, - const int inH, - const int inW, - const int inC, - const int argType) { - int nth = num * inC * inH * inW; - int blockSize = 1024; - int gridSize = (nth + 1024 - 1) / 1024; - KeNHWC2NCHW<<>>( - outputs, inputs, inH, inW, inC, nth, argType); - CHECK_SYNC("NHWC2NCHW"); -} - -} // namespace paddle diff --git a/paddle/legacy/function/SwitchOpTest.cpp b/paddle/legacy/function/SwitchOpTest.cpp deleted file mode 100644 index 08e5a613c06c9ca4b9ea2aedad225797f6d38039..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/SwitchOpTest.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "FunctionTest.h" - -namespace paddle { - -TEST(Pad, real) { - for (size_t numSamples : {1, 4, 8, 16}) { - for (size_t channels : {1, 4, 8, 16}) { - for (size_t imgSizeH : {1, 4, 8, 16}) { - for (size_t imgSizeW : {1, 4, 8, 16}) { - VLOG(3) << " numSamples=" << numSamples << " channels=" << channels - << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; - for (bool test_grad : {true, false}) { - CpuGpuFuncCompare compare(test_grad ? "NHWC2NCHW" : "NCHW2NHWC", - FuncConfig()); - TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; - TensorShape outDims{numSamples, imgSizeH, imgSizeW, channels}; - compare.addInputs( - BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); - compare.addOutputs(BufferArg( - VALUE_TYPE_FLOAT, test_grad ? inDims : outDims, ASSIGN_TO)); - compare.run(); - } - } - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/function/TensorShape.h b/paddle/legacy/function/TensorShape.h deleted file mode 100644 index d4d1eae3960c333a2a7dc6099ae7a68677fdcd5f..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/TensorShape.h +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -namespace paddle { - -/** - * TensorShape used to represent shape of normal tensor. - */ -class TensorShape { - public: - TensorShape() : ndims_(0), nelements_(0) { initDims(0); } - - TensorShape(size_t ndims) : ndims_(ndims), nelements_(1) { initDims(ndims); }; - - TensorShape(std::initializer_list dims) { - ndims_ = dims.size(); - initDims(ndims_); - dims_.assign(dims); - numElements(); - }; - - TensorShape(const TensorShape& t) - : ndims_(t.ndims_), nelements_(t.nelements_) { - initDims(ndims_); - dims_.assign(t.dims_.begin(), t.dims_.end()); - }; - - // get the size of specified dimension - size_t operator[](size_t dim) const { - CHECK_GE(dim, (size_t)0); - CHECK_LT(dim, ndims_); - return dims_[dim]; - } - - // set the size of specified dimension - void setDim(size_t dim, size_t size) { - CHECK_GE(dim, (size_t)0); - CHECK_LT(dim, ndims_); - dims_[dim] = size; - numElements(); - } - - void reshape(std::initializer_list dims) { - ndims_ = dims.size(); - if (ndims_ > kMinDims) { - dims_.resize(ndims_); - } - dims_.assign(dims); - numElements(); - } - - // number of dimensions of the tensor - size_t ndims() const { return ndims_; } - - size_t getElements() const { return nelements_; } - - bool operator==(const TensorShape& t) const { - if (ndims() != t.ndims()) return false; - for (size_t i = 0; i < ndims(); i++) { - if (dims_[i] != t.dims_[i]) return false; - } - - return true; - } - - bool operator!=(const TensorShape& t) const { return !(*this == t); } - - private: - // compute number of elements - void numElements() { - nelements_ = 1; - for (size_t n = 0; n < ndims_; n++) { - nelements_ *= dims_[n]; - } - } - - // init dims_ - void initDims(size_t ndims) { - size_t count = ndims < kMinDims ? kMinDims : ndims; - dims_.assign(count, 1); - } - - // number of dimensions - // ndims_ may be not equeal dims_.size() - size_t ndims_; - // number of elements - size_t nelements_; - std::vector dims_; - static const size_t kMinDims = 4; -}; - -} // namespace paddle diff --git a/paddle/legacy/function/TensorShapeTest.cpp b/paddle/legacy/function/TensorShapeTest.cpp deleted file mode 100644 index 4d692b9b97acb60f0124f8ac87acced470f16b3a..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/TensorShapeTest.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "TensorShape.h" -#include - -namespace paddle { - -TEST(TensorShape, Constructor) { - TensorShape t1; - EXPECT_EQ(t1.ndims(), 0U); - EXPECT_EQ(t1.getElements(), 0U); - - TensorShape t2(3); - EXPECT_EQ(t2.ndims(), 3U); - EXPECT_EQ(t2.getElements(), 1U); - - TensorShape t3({8, 10}); - EXPECT_EQ(t3.ndims(), 2U); - EXPECT_EQ(t3.getElements(), 80U); - - TensorShape t4(t3); - EXPECT_EQ(t4.ndims(), t3.ndims()); - EXPECT_EQ(t4.getElements(), t3.getElements()); - - TensorShape t5({1, 2, 3, 4, 5}); - EXPECT_EQ(t5.ndims(), 5U); - EXPECT_EQ(t5.getElements(), 120U); -} - -TEST(TensorShape, GetAndSet) { - TensorShape t({1, 2, 3}); - EXPECT_EQ(t.ndims(), 3U); - EXPECT_EQ(t.getElements(), 6U); - - EXPECT_EQ(t[1], 2U); - t.setDim(1, 100); - EXPECT_EQ(t.getElements(), 300U); - EXPECT_EQ(t[1], 100U); -} - -} // namespace paddle diff --git a/paddle/legacy/function/TensorType.h b/paddle/legacy/function/TensorType.h deleted file mode 100644 index 13994821be7ba7264f43d8550e6800cdc5b93875..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/TensorType.h +++ /dev/null @@ -1,149 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -enum ValueType { - VALUE_TYPE_INT32 = 0, - VALUE_TYPE_FLOAT = 1, - VALUE_TYPE_DOUBLE = 2, - VALUE_TYPE_BYTE = 3 -}; - -enum DeviceType { - DEVICE_TYPE_UNSPECIFIED = 0, - DEVICE_TYPE_CPU = 1, - DEVICE_TYPE_GPU = 2 -}; - -enum SparseDataType { T_NO_VALUE = 0, T_FLOAT_VALUE = 1 }; - -enum SparseDataFormat { T_SPARSE_CSR = 0, T_SPARSE_CSC = 1 }; - -inline int sizeOfValuType(ValueType valueType) { - if (valueType == VALUE_TYPE_INT32) { - return 4; - } else if (valueType == VALUE_TYPE_FLOAT) { - return 4; - } else if (valueType == VALUE_TYPE_DOUBLE) { - return 8; - } else { - LOG(FATAL) << "Unknown type: " << valueType; - return 0; - } -} - -template -struct DataType; - -template <> -struct DataType { - static const ValueType value = VALUE_TYPE_FLOAT; -}; - -template <> -struct DataType { - static const ValueType value = VALUE_TYPE_DOUBLE; -}; - -template <> -struct DataType { - static const ValueType value = VALUE_TYPE_INT32; -}; - -namespace detail { - -template -struct MatrixT; - -template <> -struct MatrixT { - using type = CpuMatrix; -}; - -template <> -struct MatrixT { - using type = GpuMatrix; -}; - -template <> -struct MatrixT { - using type = void; // Not implemented -}; - -template <> -struct MatrixT { - using type = void; // Not implemented -}; - -template -struct SparseMatrixT; - -template <> -struct SparseMatrixT { - using type = CpuSparseMatrix; -}; - -template <> -struct SparseMatrixT { - using type = GpuSparseMatrix; -}; - -template <> -struct SparseMatrixT { - using type = void; // Not implemented -}; - -template <> -struct SparseMatrixT { - using type = void; // Not implemented -}; - -template -struct VectorT; - -template <> -struct VectorT { - using type = CpuVector; -}; - -template <> -struct VectorT { - using type = GpuVector; -}; - -template <> -struct VectorT { - using type = CpuIVector; -}; - -template <> -struct VectorT { - using type = GpuIVector; -}; - -} // namespace detail - -template -struct Tensor { - typedef typename detail::VectorT::type Vector; - typedef typename detail::MatrixT::type Matrix; - typedef typename detail::SparseMatrixT::type SparseMatrix; -}; - -} // namespace paddle diff --git a/paddle/legacy/function/TensorTypeTest.cpp b/paddle/legacy/function/TensorTypeTest.cpp deleted file mode 100644 index d0cd63147a8e112d5d5fefa4509d398acaf478b8..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/TensorTypeTest.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "TensorType.h" -#include - -namespace paddle { - -TEST(TensorType, Matrix) { - Tensor::Matrix matrix(100, 200); - EXPECT_EQ(matrix.getHeight(), 100U); - EXPECT_EQ(matrix.getWidth(), 200U); - EXPECT_EQ(matrix.getElementCnt(), 100U * 200U); - EXPECT_EQ(matrix.useGpu(), false); - - Tensor::Matrix testGpu(100, 200); - EXPECT_EQ(testGpu.useGpu(), true); -} - -TEST(TensorType, Vector) { - Tensor::Vector cpuVector(100); - Tensor::Vector gpuVector(100); - EXPECT_EQ(cpuVector.useGpu(), false); - EXPECT_EQ(gpuVector.useGpu(), true); - EXPECT_EQ(cpuVector.getSize(), 100U); - EXPECT_EQ(gpuVector.getSize(), 100U); - - Tensor::Vector cpuIVector(100); - Tensor::Vector gpuIVector(100); - EXPECT_EQ(cpuIVector.useGpu(), false); - EXPECT_EQ(gpuIVector.useGpu(), true); - EXPECT_EQ(cpuIVector.getSize(), 100U); - EXPECT_EQ(gpuIVector.getSize(), 100U); -} - -TEST(TensorType, EmptyMatrix) { - CpuMatrix empty(nullptr, 0, 0); - CpuMatrix nonEmpty(10, 10); - EXPECT_EQ(empty.isEmpty(), true); - EXPECT_EQ(nonEmpty.isEmpty(), false); - CHECK(nonEmpty); - auto function = [](const CpuMatrix& matrix) { - if (matrix) { - EXPECT_NE(matrix.getData(), nullptr); - } else { - EXPECT_EQ(matrix.getData(), nullptr); - } - }; - function(empty); - function(nonEmpty); -} - -} // namespace paddle diff --git a/paddle/legacy/function/neon/NeonDepthwiseConv.cpp b/paddle/legacy/function/neon/NeonDepthwiseConv.cpp deleted file mode 100644 index 6179635a9fec4afecf53fabdc6a818588b54c808..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/neon/NeonDepthwiseConv.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "NeonDepthwiseConv.h" -#include "paddle/legacy/function/ConvOp.h" - -namespace paddle { - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - -template -class NeonDepthwiseConvFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - check(inputs, outputs); - - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - - int batchSize = input[0]; - int inputChannels = input[1]; - int inputHeight = input[2]; - int inputWidth = input[3]; - int filterHeight = getFilterHeight(filter); - int filterWidth = getFilterWidth(filter); - int outputChannels = output[1]; - int outputHeight = output[2]; - int outputWidth = output[3]; - int filterMultiplier = outputChannels / groups_; - CHECK_EQ(static_cast(inputChannels), groups_); - - // only support strideH() == strideW() and filterHeight == filterWidth. - CHECK_EQ(strideH(), strideW()); - CHECK_EQ(filterHeight, filterWidth); - - float* inputData = inputs[0].data(); - float* filterData = inputs[1].data(); - float* outputData = outputs[0].data(); - - // padding the input - float* inputPadding = inputData; - int padInputHeight = inputHeight + 2 * paddingH(); - int padInputWidth = inputWidth + 2 * paddingW(); - int newSize = - batchSize * (inputChannels + 1) * padInputHeight * padInputWidth; - - resizeBuffer(newSize); - inputPadding = reinterpret_cast(memory_->getBuf()); - neon::Padding::run(inputData, - inputPadding, - batchSize * inputChannels, - inputHeight, - inputWidth, - padInputHeight, - padInputWidth); - - std::function - DepthWiseConv; - - if (filterWidth == 3 && strideW() == 1) { - DepthWiseConv = neon::DepthwiseConvKernel<3, 1>::run; - } else if (filterWidth == 3 && strideW() == 2) { - DepthWiseConv = neon::DepthwiseConvKernel<3, 2>::run; - } else if (filterWidth == 4 && strideW() == 1) { - DepthWiseConv = neon::DepthwiseConvKernel<4, 1>::run; - } else if (filterWidth == 4 && strideW() == 2) { - DepthWiseConv = neon::DepthwiseConvKernel<4, 2>::run; - } else { - LOG(FATAL) << "Not supported"; - } - - for (int i = 0; i < batchSize; i++) { - DepthWiseConv(inputPadding, - filterData, - padInputHeight, - padInputWidth, - outputChannels, - outputHeight, - outputWidth, - filterMultiplier, - outputData); - inputPadding += inputChannels * padInputHeight * padInputWidth; - outputData += outputChannels * outputHeight * outputWidth; - } - } -}; - -#ifndef PADDLE_TYPE_DOUBLE -REGISTER_TYPED_FUNC(NeonDepthwiseConv, CPU, NeonDepthwiseConvFunction); -#endif - -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/neon/NeonDepthwiseConv.h b/paddle/legacy/function/neon/NeonDepthwiseConv.h deleted file mode 100644 index 8b2cba263e7b30e9d6a001f40cc74e54541dc882..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/neon/NeonDepthwiseConv.h +++ /dev/null @@ -1,627 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "neon_util.h" - -namespace paddle { -namespace neon { - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - -template -struct DepthwiseConvKernel {}; - -inline float32_t conv3x3(const float* r0, - const float* r1, - const float* r2, - float32x4_t k0, - float32x4_t k1, - float32x4_t k2) { - float32_t tmp[12]; - vst1q_f32(&(tmp[0]), k0); - vst1q_f32(&(tmp[4]), k1); - vst1q_f32(&(tmp[8]), k2); - float32_t sum0 = r0[0] * tmp[0] + r0[1] * tmp[1] + r0[2] * tmp[2]; - float32_t sum1 = r1[0] * tmp[4] + r1[1] * tmp[5] + r1[2] * tmp[6]; - float32_t sum2 = r2[0] * tmp[8] + r2[1] * tmp[9] + r2[2] * tmp[10]; - return sum0 + sum1 + sum2; -} - -inline float32_t conv4x4(float32x4_t r0, - float32x4_t r1, - float32x4_t r2, - float32x4_t r3, - float32x4_t k0, - float32x4_t k1, - float32x4_t k2, - float32x4_t k3) { - float32x4_t tmp; - tmp = vmulq_f32(r0, k0); - tmp = vmlaq_f32(tmp, r1, k1); - tmp = vmlaq_f32(tmp, r2, k2); - tmp = vmlaq_f32(tmp, r3, k3); - return vaddvq_f32(tmp); -} - -/** - * Each step calculates four elements of the output. - * First step: - * R0[0, 1, 2, 3...] * K[0][0] - * R0[1, 2, 3, 4...] * K[0][1] - * R0[2, 3, 4, 5...] * K[0][2] - * R1[0, 1, 2, 3...] * K[1][0] - * R1[1, 2, 3, 4...] * K[1][1] - * R1[2, 3, 4, 5...] * K[1][2] - * R2[0, 1, 2, 3...] * K[2][0] - * R2[1, 2, 3, 4...] * K[2][1] - * + R2[2, 3, 4, 5...] * K[2][2] - * ------------------------------ - * Output[0, 1, 2, 3] - */ -template <> -struct DepthwiseConvKernel<3, 1> { - static void run(const float* inputData, - const float* filterData, - int inputHeight, - int inputWidth, - int outputChannels, - int outputHeight, - int outputWidth, - int filterMultiplier, - float* outputData) { - const int steps = outputWidth >> 2; - const int remain = outputWidth & 3; - for (int c = 0; c < outputChannels; c++, filterData += 9) { - // Load the filters - float32x4_t k[3]; - k[0] = vld1q_f32(filterData); - k[1] = vld1q_f32(filterData + 3); - k[2] = vld1q_f32(filterData + 6); - k[0] = vsetq_lane_f32(0.f, k[0], 3); - k[1] = vsetq_lane_f32(0.f, k[1], 3); - k[2] = vsetq_lane_f32(0.f, k[2], 3); - - const float* r0 = - inputData + (c / filterMultiplier) * (inputHeight * inputWidth); - const float* r1 = r0 + inputWidth; - const float* r2 = r0 + inputWidth * 2; - float32x4_t input[3][3]; - for (int h = 0; h < outputHeight; h++) { - for (int s = 0; s < steps; s++) { - // Load the inputs - float32x4_t tmp; - input[0][0] = vld1q_f32(r0); - tmp = vld1q_f32(r0 + 4); - input[0][1] = vextq_f32(input[0][0], tmp, 1); - input[0][2] = vextq_f32(input[0][0], tmp, 2); - input[1][0] = vld1q_f32(r1); - tmp = vld1q_f32(r1 + 4); - input[1][1] = vextq_f32(input[1][0], tmp, 1); - input[1][2] = vextq_f32(input[1][0], tmp, 2); - input[2][0] = vld1q_f32(r2); - tmp = vld1q_f32(r2 + 4); - input[2][1] = vextq_f32(input[2][0], tmp, 1); - input[2][2] = vextq_f32(input[2][0], tmp, 2); - - float32x4_t tmp1 = vdupq_n_f32(0.f); - float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp1 = vaddq_f32(tmp1, tmp2); - - vst1q_f32(outputData, tmp1); - r0 += 4; - r1 += 4; - r2 += 4; - outputData += 4; - } - - for (int r = 0; r < remain; r++) { - *outputData = conv3x3(r0, r1, r2, k[0], k[1], k[2]); - r0++; - r1++; - r2++; - outputData++; - } - - r0 += 2; - r1 += 2; - r2 += 2; - } - } - } -}; - -/** - * Each step calculates four elements of the output. - * First step: - * R0[0, 2, 4, 6...] * K[0][0] - * R0[1, 3, 5, 7...] * K[0][1] - * R0[2, 4, 6, 8...] * K[0][2] - * R1[0, 2, 4, 6...] * K[1][0] - * R1[1, 3, 5, 7...] * K[1][1] - * R1[2, 4, 6, 8...] * K[1][2] - * R2[0, 2, 4, 6...] * K[2][0] - * R2[1, 3, 5, 7...] * K[2][1] - * R2[2, 4, 6, 8...] * K[2][2] - * ------------------------------ - * Output[0, 1, 2, 3] - */ -template <> -struct DepthwiseConvKernel<3, 2> { - static void run(const float* inputData, - const float* filterData, - int inputHeight, - int inputWidth, - int outputChannels, - int outputHeight, - int outputWidth, - int filterMultiplier, - float* outputData) { - const int steps = outputWidth >> 2; - const int remain = outputWidth & 3; - for (int c = 0; c < outputChannels; c++, filterData += 9) { - // Load the filters - float32x4_t k[3]; - k[0] = vld1q_f32(filterData); - k[1] = vld1q_f32(filterData + 3); - k[2] = vld1q_f32(filterData + 6); - k[0] = vsetq_lane_f32(0.f, k[0], 3); - k[1] = vsetq_lane_f32(0.f, k[1], 3); - k[2] = vsetq_lane_f32(0.f, k[2], 3); - - const float* start = - inputData + (c / filterMultiplier) * (inputHeight * inputWidth); - float32x4_t input[3][3]; - for (int h = 0; h < outputHeight; h++) { - const float* r0 = start + 2 * h * inputWidth; - const float* r1 = start + (2 * h + 1) * inputWidth; - const float* r2 = start + (2 * h + 2) * inputWidth; - for (int s = 0; s < steps; s++) { - // Load the inputs - float32x4_t data1; - float32x4x2_t data2; - - data2 = vld2q_f32(r0); - input[0][0] = data2.val[0]; - input[0][1] = data2.val[1]; - data1 = vld1q_f32(r0 + 8); - input[0][2] = vextq_f32(data2.val[0], data1, 1); - - data2 = vld2q_f32(r1); - input[1][0] = data2.val[0]; - input[1][1] = data2.val[1]; - data1 = vld1q_f32(r1 + 8); - input[1][2] = vextq_f32(data2.val[0], data1, 1); - - data2 = vld2q_f32(r2); - input[2][0] = data2.val[0]; - input[2][1] = data2.val[1]; - data1 = vld1q_f32(r2 + 8); - input[2][2] = vextq_f32(data2.val[0], data1, 1); - - float32x4_t tmp1 = vdupq_n_f32(0.f); - float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp1 = vaddq_f32(tmp1, tmp2); - - vst1q_f32(outputData, tmp1); - r0 += 8; - r1 += 8; - r2 += 8; - outputData += 4; - } - - for (int r = 0; r < remain; r++) { - *outputData = conv3x3(r0, r1, r2, k[0], k[1], k[2]); - r0 += 2; - r1 += 2; - r2 += 2; - outputData++; - } - } - } - } -}; - -/** - * Each step calculates four elements of the output. - */ -template <> -struct DepthwiseConvKernel<4, 1> { - static void run(const float* inputData, - const float* filterData, - int inputHeight, - int inputWidth, - int outputChannels, - int outputHeight, - int outputWidth, - int filterMultiplier, - float* outputData) { - const int steps = outputWidth >> 2; - const int remain = outputWidth & 3; - for (int c = 0; c < outputChannels; c++, filterData += 16) { - // Load the filters - float32x4_t k[4]; - k[0] = vld1q_f32(filterData); - k[1] = vld1q_f32(filterData + 4); - k[2] = vld1q_f32(filterData + 8); - k[3] = vld1q_f32(filterData + 12); - - const float* r0 = - inputData + (c / filterMultiplier) * (inputHeight * inputWidth); - const float* r1 = r0 + inputWidth; - const float* r2 = r0 + inputWidth * 2; - const float* r3 = r0 + inputWidth * 3; - float32x4_t input[4][4]; - for (int h = 0; h < outputHeight; h++) { - for (int s = 0; s < steps; s++) { - // Load the inputs - float32x4_t tmp; - input[0][0] = vld1q_f32(r0); - tmp = vld1q_f32(r0 + 4); - input[0][1] = vextq_f32(input[0][0], tmp, 1); - input[0][2] = vextq_f32(input[0][0], tmp, 2); - input[0][3] = vextq_f32(input[0][0], tmp, 3); - - input[1][0] = vld1q_f32(r1); - tmp = vld1q_f32(r1 + 4); - input[1][1] = vextq_f32(input[1][0], tmp, 1); - input[1][2] = vextq_f32(input[1][0], tmp, 2); - input[1][3] = vextq_f32(input[1][0], tmp, 3); - - input[2][0] = vld1q_f32(r2); - tmp = vld1q_f32(r2 + 4); - input[2][1] = vextq_f32(input[2][0], tmp, 1); - input[2][2] = vextq_f32(input[2][0], tmp, 2); - input[2][3] = vextq_f32(input[2][0], tmp, 3); - - input[3][0] = vld1q_f32(r3); - tmp = vld1q_f32(r3 + 4); - input[3][1] = vextq_f32(input[3][0], tmp, 1); - input[3][2] = vextq_f32(input[3][0], tmp, 2); - input[3][3] = vextq_f32(input[3][0], tmp, 3); - - float32x4_t tmp1 = vdupq_n_f32(0.f); - float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); - tmp1 = vaddq_f32(tmp1, tmp2); - - vst1q_f32(outputData, tmp1); - r0 += 4; - r1 += 4; - r2 += 4; - r3 += 4; - outputData += 4; - } - - for (int r = 0; r < remain; r++) { - float32x4_t i0 = vld1q_f32(r0); - float32x4_t i1 = vld1q_f32(r1); - float32x4_t i2 = vld1q_f32(r2); - float32x4_t i3 = vld1q_f32(r3); - *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]); - r0++; - r1++; - r2++; - r3++; - outputData++; - } - - r0 += 3; - r1 += 3; - r2 += 3; - r3 += 3; - } - } - } -}; - -/** - * Each step calculates four elements of the output. - */ -template <> -struct DepthwiseConvKernel<4, 2> { - static void run(const float* inputData, - const float* filterData, - int inputHeight, - int inputWidth, - int outputChannels, - int outputHeight, - int outputWidth, - int filterMultiplier, - float* outputData) { - const int steps = outputWidth >> 2; - const int remain = outputWidth & 3; - for (int c = 0; c < outputChannels; c++, filterData += 16) { - // Load the filters - float32x4_t k[4]; - k[0] = vld1q_f32(filterData); - k[1] = vld1q_f32(filterData + 4); - k[2] = vld1q_f32(filterData + 8); - k[3] = vld1q_f32(filterData + 12); - - const float* start = - inputData + (c / filterMultiplier) * (inputHeight * inputWidth); - float32x4_t input[4][4]; - for (int h = 0; h < outputHeight; h++) { - const float* r0 = start + 2 * h * inputWidth; - const float* r1 = start + (2 * h + 1) * inputWidth; - const float* r2 = start + (2 * h + 2) * inputWidth; - const float* r3 = start + (2 * h + 3) * inputWidth; - for (int s = 0; s < steps; s++) { - // Load the inputs - float32x4x2_t data1; - float32x4x2_t data2; - - data1 = vld2q_f32(r0); - data2 = vld2q_f32(r0 + 8); - input[0][0] = data1.val[0]; - input[0][1] = data1.val[1]; - input[0][2] = vextq_f32(data1.val[0], data2.val[0], 1); - input[0][3] = vextq_f32(data1.val[1], data2.val[1], 1); - - data1 = vld2q_f32(r1); - data2 = vld2q_f32(r1 + 8); - input[1][0] = data1.val[0]; - input[1][1] = data1.val[1]; - input[1][2] = vextq_f32(data1.val[0], data2.val[0], 1); - input[1][3] = vextq_f32(data1.val[1], data2.val[1], 1); - - data1 = vld2q_f32(r2); - data2 = vld2q_f32(r2 + 8); - input[2][0] = data1.val[0]; - input[2][1] = data1.val[1]; - input[2][2] = vextq_f32(data1.val[0], data2.val[0], 1); - input[2][3] = vextq_f32(data1.val[1], data2.val[1], 1); - - data1 = vld2q_f32(r3); - data2 = vld2q_f32(r3 + 8); - input[3][0] = data1.val[0]; - input[3][1] = data1.val[1]; - input[3][2] = vextq_f32(data1.val[0], data2.val[0], 1); - input[3][3] = vextq_f32(data1.val[1], data2.val[1], 1); - - float32x4_t tmp1 = vdupq_n_f32(0.f); - float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); - tmp1 = vaddq_f32(tmp1, tmp2); - - vst1q_f32(outputData, tmp1); - r0 += 8; - r1 += 8; - r2 += 8; - r3 += 8; - outputData += 4; - } - - for (int r = 0; r < remain; r++) { - float32x4_t i0 = vld1q_f32(r0); - float32x4_t i1 = vld1q_f32(r1); - float32x4_t i2 = vld1q_f32(r2); - float32x4_t i3 = vld1q_f32(r3); - *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]); - r0 += 2; - r1 += 2; - r2 += 2; - r3 += 2; - outputData++; - } - } - } - } -}; - -template -struct Padding { - static void run(const T* input, - T* inputPadding, - int channels, - int inputHeight, - int inputWidth, - int padInputHeight, - int padInputWidth) { - const int paddingHeight = (padInputHeight - inputHeight) / 2; - const int paddingWidth = (padInputWidth - inputWidth) / 2; - for (int c = 0; c < channels; c++) { - if (paddingHeight > 0) { - memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(T)); - inputPadding += padInputWidth * paddingHeight; - } - - for (int i = 0; i < inputHeight; i++) { - // padding head - for (int j = 0; j < paddingWidth; j++) { - *inputPadding++ = T(0); - } - - memcpy(inputPadding, input, inputWidth * sizeof(T)); - inputPadding += inputWidth; - input += inputWidth; - - // padding tail - for (int j = 0; j < paddingWidth; j++) { - *inputPadding++ = T(0); - } - } - - if (paddingHeight > 0) { - memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(T)); - inputPadding += padInputWidth * paddingHeight; - } - } - } -}; - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) -template <> -struct Padding { - static void run(const float* input, - float* inputPadding, - int channels, - int inputHeight, - int inputWidth, - int padInputHeight, - int padInputWidth) { - const int paddingHeight = (padInputHeight - inputHeight) / 2; - const int paddingWidth = (padInputWidth - inputWidth) / 2; - for (int c = 0; c < channels; c++) { - if (paddingHeight > 0) { - memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(float)); - inputPadding += padInputWidth * paddingHeight; - } - - for (int i = 0; i < inputHeight; i++) { - // padding head - for (int j = 0; j < paddingWidth; j++) { - *inputPadding++ = float(0); - } - - int step = inputWidth >> 2; - int remain = inputWidth & 3; - for (int s = 0; s < step; s++) { - float32x4_t s0 = vld1q_f32(input); - vst1q_f32(inputPadding, s0); - input += 4; - inputPadding += 4; - } - for (int r = 0; r < remain; r++) { - *inputPadding++ = *input++; - } - - // padding tail - for (int j = 0; j < paddingWidth; j++) { - *inputPadding++ = float(0); - } - } - - if (paddingHeight > 0) { - memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(float)); - inputPadding += padInputWidth * paddingHeight; - } - } - } -}; - -// for stride is 2 -struct StridePadding { - static void run(const float* input, - float* inputPadding, - int channels, - int inputHeight, - int inputWidth, - int padInputHeight, - int padInputWidth) { - const int paddingHeight = (padInputHeight - (inputHeight * 2 - 1)) / 2; - const int paddingWidth = (padInputWidth - (inputWidth * 2 - 1)) / 2; - for (int c = 0; c < channels; c++) { - if (paddingHeight > 0) { - memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(float)); - inputPadding += padInputWidth * paddingHeight; - } - - for (int i = 0; i < inputHeight; i++) { - // padding head - for (int j = 0; j < paddingWidth; j++) { - *inputPadding++ = float(0); - } - - int step = inputWidth >> 2; - int remain = inputWidth & 3; - float32x4_t s1 = vdupq_n_f32(0.f); - for (int s = 0; s < step; s++) { - float32x4_t s0 = vld1q_f32(input); - float32x4x2_t v = {{s0, s1}}; - vst2q_f32(inputPadding, v); - input += 4; - inputPadding += 8; - } - for (int r = 0; r < remain; r++) { - *inputPadding++ = *input++; - *inputPadding++ = float(0); - } - inputPadding--; - - // padding tail - for (int j = 0; j < paddingWidth; j++) { - *inputPadding++ = float(0); - } - if (i != inputHeight - 1) { - memset(inputPadding, 0, padInputWidth * sizeof(float)); - inputPadding += padInputWidth; - } - } - - if (paddingHeight > 0) { - memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(float)); - inputPadding += padInputWidth * paddingHeight; - } - } - } -}; - -#endif - -#endif - -} // namespace neon -} // namespace paddle diff --git a/paddle/legacy/function/neon/NeonDepthwiseConvTranspose.cpp b/paddle/legacy/function/neon/NeonDepthwiseConvTranspose.cpp deleted file mode 100644 index feb77e1ff9f591d63dbf86a05313d65025f7c65d..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/neon/NeonDepthwiseConvTranspose.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "NeonDepthwiseConv.h" -#include "paddle/legacy/function/ConvOp.h" - -namespace paddle { - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - -template -class NeonDepthwiseConvTransposeFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - check(inputs, outputs); - - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - - int batchSize = input[0]; - int inputChannels = input[1]; - int inputHeight = input[2]; - int inputWidth = input[3]; - int filterHeight = getFilterHeight(filter); - int filterWidth = getFilterWidth(filter); - int outputChannels = output[1]; - int outputHeight = output[2]; - int outputWidth = output[3]; - int filterMultiplier = outputChannels / groups_; - CHECK_EQ(inputChannels, groups_); - - // only support strideH() == strideW() and filterHeight == filterWidth. - CHECK_EQ(strideH(), strideW()); - CHECK_EQ(paddingH(), paddingW()); - CHECK_EQ(filterHeight, filterWidth); - - float* inputData = inputs[0].data(); - float* filterData = inputs[1].data(); - float* outputData = outputs[0].data(); - - // padding the input, input -> inputPadding - float* inputPadding = inputData; - int padInputHeight = - (inputHeight - 1) * strideH() + 2 * filterHeight - 1 - 2 * paddingH(); - int padInputWidth = - (inputWidth - 1) * strideW() + 2 * filterWidth - 1 - 2 * paddingW(); - - if (padInputHeight > inputHeight || padInputWidth > inputWidth) { - int newSize = batchSize * inputChannels * padInputHeight * padInputWidth; - resizeBuffer(newSize); - inputPadding = reinterpret_cast(memory_->getBuf()); - if (strideH() == 1) { - neon::Padding::run(inputData, - inputPadding, - batchSize * inputChannels, - inputHeight, - inputWidth, - padInputHeight, - padInputWidth); - } else if (strideH() == 2) { - neon::StridePadding::run(inputData, - inputPadding, - batchSize * inputChannels, - inputHeight, - inputWidth, - padInputHeight, - padInputWidth); - } else { - LOG(FATAL) << "Not supported"; - } - } - - std::function - DepthWiseConv; - - if (filterWidth == 3) { - DepthWiseConv = neon::DepthwiseConvKernel<3, 1>::run; - } else if (filterWidth == 4) { - DepthWiseConv = neon::DepthwiseConvKernel<4, 1>::run; - } else { - LOG(FATAL) << "Not supported"; - } - - for (int i = 0; i < batchSize; i++) { - DepthWiseConv(inputPadding, - filterData, - padInputHeight, - padInputWidth, - outputChannels, - outputHeight, - outputWidth, - filterMultiplier, - outputData); - inputPadding += inputChannels * padInputHeight * padInputWidth; - outputData += outputChannels * outputHeight * outputWidth; - } - } -}; - -#ifndef PADDLE_TYPE_DOUBLE - -REGISTER_TYPED_FUNC(NeonDepthwiseConvTranspose, - CPU, - NeonDepthwiseConvTransposeFunction); - -#endif - -#endif - -} // namespace paddle diff --git a/paddle/legacy/function/neon/neon_util.h b/paddle/legacy/function/neon/neon_util.h deleted file mode 100644 index 95076b1387a77f84efa9c8f46e72bd84ed5b65a2..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/neon/neon_util.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - -#include - -namespace paddle { - -namespace neon { - -inline float32x4_t vld1q_f32_aligned(const float* p) { - return vld1q_f32( - (const float*)__builtin_assume_aligned(p, sizeof(float32x4_t))); -} - -#ifndef __aarch64__ -inline float32_t vaddvq_f32(float32x4_t a) { - float32x2_t v = vadd_f32(vget_high_f32(a), vget_low_f32(a)); - return vget_lane_f32(vpadd_f32(v, v), 0); -} - -#define vmlaq_laneq_f32(a, b, v, lane) \ - vmlaq_n_f32(a, b, vgetq_lane_f32(v, lane)) -#endif - -} // namespace neon -} // namespace paddle - -#endif diff --git a/paddle/legacy/function/nnpack/NNPACKConvOp.cpp b/paddle/legacy/function/nnpack/NNPACKConvOp.cpp deleted file mode 100644 index 81c832e7747f8e75d322891476e08dacc435f5d4..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/nnpack/NNPACKConvOp.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "nnpack.h" -#include "paddle/legacy/function/ConvOp.h" - -DEFINE_bool(nnpack_allocate_outside, - true, - "Allocate and free workspace memory outside the NNPACK interface."); -DEFINE_int32(nnpack_num_threads, - 0, - "The number of nnpack threads" - "default: 0; 0 to disable threadpool."); - -namespace paddle { - -nnp_convolution_algorithm get_nnp_convolution_algorithm( - const std::string& algorithm) { - if (algorithm == "auto") { - return nnp_convolution_algorithm_auto; - } else if (algorithm == "ft8x8") { - return nnp_convolution_algorithm_ft8x8; - } else if (algorithm == "ft16x16") { - return nnp_convolution_algorithm_ft16x16; - } else if (algorithm == "wt8x8") { - return nnp_convolution_algorithm_wt8x8; - } else if (algorithm == "implicit-gemm") { - return nnp_convolution_algorithm_implicit_gemm; - } else if (algorithm == "direct") { - return nnp_convolution_algorithm_direct; - } else { - return nnp_convolution_algorithm_auto; - } -} - -template -class NNPACKConvFunction : public ConvFunctionBase { - public: - void init(const FuncConfig& config) override { - ConvFunctionBase::init(config); - algorithm_ = get_nnp_convolution_algorithm(config.get("algo")); - transform_strategy_ = nnp_convolution_transform_strategy_compute; - nnp_status status = nnp_initialize(); - CHECK_EQ(status, nnp_status_success); - workspaceBuffer_ = nullptr; - workspaceSize_ = 0; - - create_nnpack_threadpool(); - } - - ~NNPACKConvFunction() { - if (workspaceBuffer_) { - free(workspaceBuffer_); - } - } - - void check(const BufferArgs& inputs, const BufferArgs& outputs) override { - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - checkShape(input, filter, output); - } - - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); - check(inputs, outputs); - const TensorShape& input = inputs[0].shape(); - const TensorShape& filter = inputs[1].shape(); - const TensorShape& output = outputs[0].shape(); - - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t filterHeight = getFilterHeight(filter); - size_t filterWidth = getFilterWidth(filter); - size_t outputChannels = output[1]; - size_t outputHeight = output[2]; - size_t outputWidth = output[3]; - - nnp_size inputSize = {.width = inputWidth, .height = inputHeight}; - nnp_padding padding = {.top = (size_t)paddingH(), - .right = (size_t)paddingW(), - .bottom = (size_t)paddingH(), - .left = (size_t)paddingW()}; - nnp_size kernelSize = {.width = filterWidth, .height = filterHeight}; - nnp_size outputSubsampling = {.width = (size_t)strideW(), - .height = (size_t)strideH()}; - - float* inputData = inputs[0].data(); - float* filterData = inputs[1].data(); - float* outputData = outputs[0].data(); - - void* bufferPtr = nullptr; - size_t* sizePtr = nullptr; - size_t needSize; - if (FLAGS_nnpack_allocate_outside) { - if (batchSize == 1) { - nnp_status status = nnp_convolution_inference(algorithm_, - transform_strategy_, - inputChannels, - outputChannels, - inputSize, - padding, - kernelSize, - outputSubsampling, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - &needSize, - nnp_activation_identity, - nullptr, - nullptr, - nullptr); - CHECK_EQ(status, nnp_status_success); - } else { - // only supports stride = 1 - CHECK_EQ(strideH(), 1); - CHECK_EQ(strideW(), 1); - nnp_status status = nnp_convolution_output(algorithm_, - batchSize, - inputChannels, - outputChannels, - inputSize, - padding, - kernelSize, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - &needSize, - nnp_activation_identity, - nullptr, - nullptr, - nullptr); - CHECK_EQ(status, nnp_status_success); - } - - VLOG(3) << "workspace size is " << needSize; - if (needSize > workspaceSize_) { - workspaceSize_ = needSize; - if (workspaceBuffer_) { - free(workspaceBuffer_); - } else { - posix_memalign(&workspaceBuffer_, 64, needSize); - } - } - - if (needSize) { - bufferPtr = workspaceBuffer_; - sizePtr = &needSize; - } - } - - size_t inputOffset = inputChannels / groups_ * inputHeight * inputWidth; - size_t outputOffset = outputChannels / groups_ * outputHeight * outputWidth; - size_t filterOffset = filter.getElements() / groups_; - - if (batchSize == 1) { - for (size_t g = 0; g < groups_; g++) { - nnp_status status = - nnp_convolution_inference(algorithm_, - transform_strategy_, - inputChannels / groups_, - outputChannels / groups_, - inputSize, - padding, - kernelSize, - outputSubsampling, - inputData + inputOffset * g, - filterData + filterOffset * g, - nullptr, /* bias */ - outputData + outputOffset * g, - bufferPtr, - sizePtr, - nnp_activation_identity, - nullptr, - threadpool_, /* threadpool */ - nullptr); - CHECK_EQ(status, nnp_status_success); - } - } else { - // only supports stride = 1 - CHECK_EQ(strideH(), 1); - CHECK_EQ(strideW(), 1); - - // TODO(hedaoyuan): There has some bug when batchSize > 1 and groups_ > 1. - CHECK_EQ(groups_, static_cast(1)); - nnp_status status = nnp_convolution_output(algorithm_, - batchSize, - inputChannels, - outputChannels, - inputSize, - padding, - kernelSize, - inputData, - filterData, - nullptr, /* bias */ - outputData, - bufferPtr, - sizePtr, - nnp_activation_identity, - nullptr, - threadpool_, /* threadpool */ - nullptr); - CHECK_EQ(status, nnp_status_success); - } - } - - static void create_nnpack_threadpool() { - if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) { - threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads); - VLOG(3) << "Number of threads " - << pthreadpool_get_threads_count(threadpool_); - } - } - - private: - nnp_convolution_algorithm algorithm_; - nnp_convolution_transform_strategy transform_strategy_; - void* workspaceBuffer_; - size_t workspaceSize_; - static pthreadpool_t threadpool_; -}; - -template -pthreadpool_t NNPACKConvFunction::threadpool_ = nullptr; - -REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction); - -} // namespace paddle diff --git a/paddle/legacy/function/nnpack/NNPACKConvOpTest.cpp b/paddle/legacy/function/nnpack/NNPACKConvOpTest.cpp deleted file mode 100644 index a2db83f5a36310ca6f173d6e6501118b34060761..0000000000000000000000000000000000000000 --- a/paddle/legacy/function/nnpack/NNPACKConvOpTest.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "paddle/legacy/function/ConvOpTest.h" - -namespace paddle { - -TEST(NNPACK, Forward) { - Convolution( - "GemmConv-CPU", "NNPACKConv-CPU", forward); -} - -TEST(NNPACK, Depthwise) { - DepthwiseConvolution( - "GemmConv-CPU", "NNPACKConv-CPU", forward); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/CMakeLists.txt b/paddle/legacy/gserver/CMakeLists.txt deleted file mode 100644 index 6dc877dd90ee2ae3d99406299a9244eb3e3d7b53..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/CMakeLists.txt +++ /dev/null @@ -1,152 +0,0 @@ -# Gserver package contains: -# * Layers -# * Activations -# * DataProviders -# * Evaluators -# * GradientMachines(NeuralNetwork) -file(GLOB_RECURSE GSERVER_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.h") -file(GLOB_RECURSE GSERVER_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cpp") -set(GSERVER_SOURCES - layers/LstmCompute.cu - layers/GruCompute.cu - ${GSERVER_SOURCES}) - -macro(filter_test VAR_NAME) - set(tmp) - foreach(p IN LISTS ${VAR_NAME}) - if(NOT ${p} MATCHES ".*tests/.*") - set(tmp ${p} ${tmp}) - endif() - endforeach() - set(${VAR_NAME} ${tmp}) -endmacro() - -filter_test(GSERVER_HEADER) -filter_test(GSERVER_SOURCES) - -if(NOT WITH_MKLDNN) - file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") - file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") - list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER}) - list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES}) - message(STATUS "Skip compiling with MKLDNNLayers and MKLDNNActivations") -else() - message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations") -endif() - -if(NOT WITH_MKLML) - file(GLOB_RECURSE MKL_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLPacked*.h") - file(GLOB_RECURSE MKL_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLPacked*.cpp") - list(REMOVE_ITEM GSERVER_HEADER ${MKL_HEADER}) - list(REMOVE_ITEM GSERVER_SOURCES ${MKL_SOURCES}) - message(STATUS "Skip compiling with MKLPackedLayers") -else() - message(STATUS "Compile with MKLPackedLayers") -endif() - -if(NOT WITH_GPU) - list(REMOVE_ITEM GSERVER_HEADER - layers/CudnnConvBaseLayer.h - layers/CudnnConvLayer.h - layers/CudnnConvTransLayer.h - layers/CudnnPoolLayer.h - layers/CudnnBatchNormLayer.h) - - list(REMOVE_ITEM GSERVER_SOURCES - layers/CudnnConvBaseLayer.cpp - layers/CudnnConvLayer.cpp - layers/CudnnConvTransLayer.cpp - layers/CudnnPoolLayer.cpp - layers/CudnnBatchNormLayer.cpp) - compile_cu_as_cpp(layers/LstmCompute.cu) - compile_cu_as_cpp(layers/GruCompute.cu) -endif() - -if(NOT WITH_PYTHON) - list(REMOVE_ITEM GSERVER_SOURCES - dataproviders/PyDataProvider.cpp) - - list(REMOVE_ITEM GSERVER_HEADER - dataproviders/PyDataProvider.h) -endif() - -if(MOBILE_INFERENCE) - # Remove evaluators - list(REMOVE_ITEM GSERVER_SOURCES - layers/ValidationLayer.cpp - evaluators/Evaluator.cpp - evaluators/DetectionMAPEvaluator.cpp - evaluators/CTCErrorEvaluator.cpp - evaluators/ChunkEvaluator.cpp) - - # Remove dataproviders - list(REMOVE_ITEM GSERVER_SOURCES - dataproviders/DataProvider.cpp - dataproviders/MultiDataProvider.cpp - dataproviders/PyDataProvider2.cpp - dataproviders/PyDataProvider.cpp) - - # Remove useless gradientmachines - list(REMOVE_ITEM GSERVER_SOURCES - gradientmachines/MultiNetwork.cpp - gradientmachines/RecurrentGradientMachine.cpp - gradientmachines/ParallelNeuralNetwork.cpp - gradientmachines/GradientMachineMode.cpp - gradientmachines/MultiGradientMachine.cpp) - - # Remove layers that used in training - list(REMOVE_ITEM GSERVER_SOURCES - layers/RecurrentLayerGroup.cpp - layers/CostLayer.cpp - layers/MultiBoxLossLayer.cpp - layers/WarpCTCLayer.cpp - layers/CTCLayer.cpp - layers/LinearChainCTC.cpp - layers/PrintLayer.cpp) - list(REMOVE_ITEM GSERVER_SOURCES - layers/OuterProdLayer.cpp - layers/SumToOneNormLayer.cpp - layers/ConvShiftLayer.cpp - layers/InterpolationLayer.cpp - layers/AgentLayer.cpp - layers/DotMulOperator.cpp - layers/GruStepLayer.cpp - layers/LstmStepLayer.cpp - layers/ConvexCombinationLayer.cpp - layers/Conv3DLayer.cpp - layers/DeConv3DLayer.cpp - layers/CropLayer.cpp - layers/CrossEntropyOverBeam.cpp - layers/DataNormLayer.cpp - layers/FeatureMapExpandLayer.cpp - layers/HierarchicalSigmoidLayer.cpp - layers/MultinomialSampler.cpp - layers/NCELayer.cpp - layers/KmaxSeqScoreLayer.cpp - layers/MDLstmLayer.cpp - layers/MultiplexLayer.cpp - layers/PadLayer.cpp - layers/Pool3DLayer.cpp - layers/ResizeLayer.cpp - layers/RotateLayer.cpp - layers/RowConvLayer.cpp - layers/RowL2NormLayer.cpp - layers/SamplingIdLayer.cpp - layers/ScaleShiftLayer.cpp - layers/SelectiveFullyConnectedLayer.cpp - layers/SpatialPyramidPoolLayer.cpp - layers/BilinearInterpLayer.cpp - layers/ClipLayer.cpp) -endif() - -if(WITH_GPU) - cuda_add_library(paddle_gserver ${GSERVER_SOURCES}) -else() - add_library(paddle_gserver STATIC - ${GSERVER_SOURCES}) -endif() - -add_dependencies(paddle_gserver paddle_proto ${external_project_dependencies}) -if(WITH_TESTING) - add_subdirectory(tests) -endif() diff --git a/paddle/legacy/gserver/activations/ActivationFunction.cpp b/paddle/legacy/gserver/activations/ActivationFunction.cpp deleted file mode 100644 index ae07c7e6d7fd9fe28a00dd209ae834cd28a327f7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/activations/ActivationFunction.cpp +++ /dev/null @@ -1,509 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ActivationFunction.h" - -#include -#include -#include -#include -#include -#include -#include "paddle/legacy/parameter/Argument.h" -#include "paddle/legacy/utils/ClassRegistrar.h" -#include "paddle/legacy/utils/Logging.h" - -#ifdef PADDLE_WITH_MKLDNN -#include "MKLDNNActivation.h" -#endif - -namespace paddle { - -static ClassRegistrar gActivationRegistrar; -/** - * @def ACTIVATION_CLASS_NAME - * @brief Macro for getting derived activation class name - * @note ACTIVATION_CLASS_NAME(softmax) softmax_; - * means softmaxActivation softmax_; - */ -#define ACTIVATION_CLASS_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Activation -/** - * @def BEGIN_DEFINE_ACTIVATION - * @brief Macro for defining a devried activation class - */ -#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME) \ - class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \ - private: \ - static const std::string name; \ - \ - public: \ - const std::string& getName() const { return name; } -/** - * @def END_DEFINE_ACTIVATION - * @brief Macro for registering a derived activation class - */ -#define END_DEFINE_ACTIVATION(ACTIVATION_NAME) \ - } \ - ; \ - const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \ - #ACTIVATION_NAME; \ - static InitFunction __reg_activation__##ACTIVATION_NAME([] { \ - gActivationRegistrar \ - .registerClass( \ - #ACTIVATION_NAME); \ - }); - -/** - * @brief The IdentityActivation class - * - * Do nothing when forward/backward. - */ -class IdentityActivation : public ActivationFunction { - public: - static const std::string name; - Error __must_check forward(Argument& act) { - (void)act; - return Error(); - } - Error __must_check backward(Argument& act) { - (void)act; - return Error(); - } - const std::string& getName() const { return name; } -}; -const std::string IdentityActivation::name = ""; -static InitFunction __reg_activation__identity([] { - gActivationRegistrar.registerClass(""); - gActivationRegistrar.registerClass("linear"); -}); - -/** - * @brief Sigmoid Activation - * \f[ - * f(z) = \frac{1}{1+exp(-z)} - * \f] - */ -BEGIN_DEFINE_ACTIVATION(sigmoid) -Error __must_check forward(Argument& act) { - act.value->sigmoid(*act.value); - return Error(); -} -Error __must_check backward(Argument& act) { - act.grad->sigmoidDerivative(*act.value); - return Error(); -} -END_DEFINE_ACTIVATION(sigmoid) - -/** - * @brief Softmax Activation - * \f[ - * P(y=j|x) = \frac{e^{x^Tw_j}}{\sum^K_{k=1}e^{x^Tw_k}} - * \f] - */ -BEGIN_DEFINE_ACTIVATION(softmax) -private: -MatrixPtr sftMaxSum_; -MatrixPtr sftMaxDot_; - -public: -Error __must_check forward(Argument& act) { - act.value->softmax(*act.value); - return Error(); -} - -Error __must_check backward(Argument& act) { - MatrixPtr outputV = act.value; - MatrixPtr outputG = act.grad; - - if (outputG->useGpu()) { - outputG->softmaxBackward(*outputV); - } else { - SetDevice device(act.deviceId); - Matrix::resizeOrCreate(sftMaxDot_, - outputG->getHeight(), - outputG->getWidth(), - /* trans */ false, - useGpu(act.deviceId)); - Matrix::resizeOrCreate(sftMaxSum_, - outputG->getHeight(), - 1, - /* trans */ false, - useGpu(act.deviceId)); - - sftMaxDot_->dotMul(*outputG, *outputV); - sftMaxSum_->colMerge(*sftMaxDot_); - - act.grad->softmaxDerivative(*act.value, *sftMaxSum_); - } - return Error(); -} -END_DEFINE_ACTIVATION(softmax) - -/** - * @brief Sequence_softmax Activation - * @note Softmax on all frames of one sequence. - * Width of frame must be one. - */ -BEGIN_DEFINE_ACTIVATION(sequence_softmax) -private: -ACTIVATION_CLASS_NAME(softmax) softmax_; -Argument argument_; - -public: -Error __must_check forward(Argument& act) { - if (act.value->getWidth() != 1UL) { - return Error( - "Input width for each timestep of sequence softmax should be 1"); - } - - if (!argument_.value) { - argument_.value = Matrix::create(nullptr, - /* height= */ 1, - 1, - /* trans= */ false, - useGpu(act.deviceId)); - argument_.grad = Matrix::create(nullptr, - /* height= */ 1, - 1, - /* trans= */ false, - useGpu(act.deviceId)); - } - - auto starts = - act.hasSubseq() - ? act.subSequenceStartPositions->getVector(useGpu(act.deviceId)) - : act.sequenceStartPositions->getVector(useGpu(act.deviceId)); - act.value->sequenceSoftmax(*act.value, *starts); - return Error(); -} - -Error __must_check backward(Argument& act) { - if (act.value->getWidth() != 1UL) { - return Error( - "Input width for each timestep of sequence softmax should be 1"); - } - - size_t numSequences = - act.hasSubseq() ? act.getNumSubSequences() : act.getNumSequences(); - const int* starts = act.getCpuStartPositions(); - - for (size_t i = 0; i < numSequences; ++i) { - // TODO(Dangqingqing) optimization for GPU - size_t offset = starts[i]; - size_t size = starts[i + 1] - starts[i]; - argument_.value->setData(act.value->getData() + offset, 1UL, size); - argument_.grad->setData(act.grad->getData() + offset, 1UL, size); - - Error err = softmax_.backward(argument_); - if (!err.isOK()) return err; - } - return Error(); -} -END_DEFINE_ACTIVATION(sequence_softmax) - -/* - * @brief SoftSign Activation. - * \f[ - * f(z) = \frac{z}{1 + |z|} - * \f] - */ -BEGIN_DEFINE_ACTIVATION(softsign) -private: -MatrixPtr denominator_; - -Error __must_check forward(Argument& act) { - size_t height = act.value->getHeight(); - size_t width = act.value->getWidth(); - Matrix::resizeOrCreate( - denominator_, height, width, false, useGpu(act.deviceId)); - denominator_->assign(*act.value); - denominator_->abs2(); - denominator_->add(1.); - - act.value->dotDiv(*act.value, *denominator_); - return Error(); -} - -Error __must_check backward(Argument& act) { - denominator_->square2(); - denominator_->scalarDiv(*denominator_, 1.); - act.grad->dotMul(*act.grad, *denominator_); - return Error(); -} -END_DEFINE_ACTIVATION(softsign) - -/** - * @brief Relu Activation. - * forward. y = max(0, z) - * - * derivative of relu is: - * - * 1 if z > 0 - * - * 0 otherwise. - */ -BEGIN_DEFINE_ACTIVATION(relu) -Error __must_check forward(Argument& act) { - act.value->relu(*act.value); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->reluDerivative(*act.value); - return Error(); -} -END_DEFINE_ACTIVATION(relu) - -/** - * @brief BRelu Activation. - * - * forward. y = min(24, max(0, z)) - * - * derivative of brelu is: - * - * 1 if 0 < z < 24 - * - * 0 otherwise. - * - * TODO(yuyang18): Remove magic number 24 or make it configuable. - */ -BEGIN_DEFINE_ACTIVATION(brelu) -Error __must_check forward(Argument& act) { - act.value->brelu(*act.value); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->breluDerivative(*act.value); - return Error(); -} -END_DEFINE_ACTIVATION(brelu) - -/** - * @brief Tanh Activation. - * \f[ - * f(z) = tanh(z)=\frac{e^z-e^{-z}}{e^z+e^{-z}} - * \f] - */ -BEGIN_DEFINE_ACTIVATION(tanh) -Error __must_check forward(Argument& act) { - act.value->tanh(*act.value); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->tanhDerivative(*act.value); - return Error(); -} -END_DEFINE_ACTIVATION(tanh) - -/** - * @brief Scaled Tanh Activation - * \f[ - * f(z) = 1.7159 * tanh(2/3*z) - * \f] - */ -BEGIN_DEFINE_ACTIVATION(stanh) -private: -real a, b; - -public: -ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {} -Error __must_check forward(Argument& act) { - act.value->scaledTanh(*act.value, a, b); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->scaledTanhDerivative(*act.value, a, b); - return Error(); -} -END_DEFINE_ACTIVATION(stanh) - -/** - * @brief Soft Relu Activation. - * \f[ - * f(z) = ln(1+e^z) - * \f] - */ -BEGIN_DEFINE_ACTIVATION(softrelu) -Error __must_check forward(Argument& act) { - act.value->softrelu(*act.value); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->softreluDerivative(*act.value); - return Error(); -} -END_DEFINE_ACTIVATION(softrelu) - -/** - * @brief Abs Activation. - * Forward: f(z) = abs(z) - * - * Derivative: - * - * 1 if z>0 - * - * -1 if z<0 - * - * 0 if z=0 - */ -BEGIN_DEFINE_ACTIVATION(abs) -Error __must_check forward(Argument& act) { - SetDevice device(act.deviceId); - Matrix::resizeOrCreate(act.in, - act.value->getHeight(), - act.value->getWidth(), - /* trans */ false, - useGpu(act.deviceId)); - - act.in->copyFrom(*act.value); - act.value->abs2(*act.value); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->absDerivative(*act.in); - return Error(); -} -END_DEFINE_ACTIVATION(abs) - -/** - * @brief Square Activation. - * \f[ - * f(z) = z^2. - * \f] - */ -BEGIN_DEFINE_ACTIVATION(square) -Error __must_check forward(Argument& act) { - SetDevice device(act.deviceId); - Matrix::resizeOrCreate(act.in, - act.value->getHeight(), - act.value->getWidth(), - /* trans */ false, - useGpu(act.deviceId)); - - act.in->copyFrom(*act.value); - act.value->square2(*act.value); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->squareDerivative(*act.in); - return Error(); -} -END_DEFINE_ACTIVATION(square) - -/** - * @brief Exponential Activation. - * \f[ - * f(z) = e^z - * \f] - */ -BEGIN_DEFINE_ACTIVATION(exponential) -Error __must_check forward(Argument& act) { - act.value->exp2(*act.value); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->expDerivative(*act.value); - return Error(); -} -END_DEFINE_ACTIVATION(exponential) - -/** - * @brief Reciprocal Activation. - * \f[ - * f(z) = 1/z - * \f] - */ -BEGIN_DEFINE_ACTIVATION(reciprocal) -Error __must_check forward(Argument& act) { - act.value->reciprocal2(); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->dotMulSquare(*act.value); - act.grad->neg(); - return Error(); -} -END_DEFINE_ACTIVATION(reciprocal) - -/** - * @brief Square Root Activation. - * \f[ - * f(z) = sqrt(z) - * \f] - */ -BEGIN_DEFINE_ACTIVATION(sqrt) -Error __must_check forward(Argument& act) { - act.value->sqrt2(); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->dotDiv(*act.grad, *act.value); - act.grad->mulScalar(0.5); - return Error(); -} -END_DEFINE_ACTIVATION(sqrt) - -/** - * @brief Logarithm Activation. - * \f[ - * f(z) = log(z) - * \f] - */ -BEGIN_DEFINE_ACTIVATION(log) -Error __must_check forward(Argument& act) { - SetDevice device(act.deviceId); - Matrix::resizeOrCreate(act.in, - act.value->getHeight(), - act.value->getWidth(), - /* trans */ false, - useGpu(act.deviceId)); - - act.in->copyFrom(*act.value); - act.value->log2(*act.value); - return Error(); -} - -Error __must_check backward(Argument& act) { - act.grad->dotDiv(*act.grad, *act.in); - return Error(); -} -END_DEFINE_ACTIVATION(log) - -ActivationFunction* ActivationFunction::create(const std::string& type) { -#ifdef PADDLE_WITH_MKLDNN - if (!type.empty() && type.compare(0, 7, "mkldnn_") == 0) { - return MKLDNNActivation::create(type); - } -#endif - - return gActivationRegistrar.createByType(type); -} - -std::vector ActivationFunction::getAllRegisteredTypes() { - std::vector types; - gActivationRegistrar.forEachType( - [&](const std::string& type) { types.push_back(type); }); - return types; -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/activations/ActivationFunction.h b/paddle/legacy/gserver/activations/ActivationFunction.h deleted file mode 100644 index 8bc5b0f529a6358fba8b6c9d1e1f6ee2358dbbf9..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/activations/ActivationFunction.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include "paddle/legacy/utils/Error.h" - -namespace paddle { - -struct Argument; -/** - * @brief Activation function is a function that transforms a set of input - * signals into an output signals. The purpose of the activation function - * is to introduce non-liearilty into the network. - * - * @note Common activation function are provieded, including linear, - * sigmoid, softmax, sequence_max, relu, brelu, tanh, stanh, - * softrelu, abs, square, exponential. - * - */ -class ActivationFunction { - public: - static ActivationFunction* create(const std::string& type); - static std::vector getAllRegisteredTypes(); - - ActivationFunction() {} - - virtual ~ActivationFunction() {} - - /** - * @brief Foward propagation - * - * act.value <- f(act.value), - * where f is the activation function. - * Suppose that before calling forward(), act.value is x and - * after forward() is called, act.value is y, then y = f(x). - * - * Usually, act is Layer::output_ - */ - virtual Error __must_check forward(Argument& act) = 0; - - /** - * @brief Backward propagaion - * - * x and y are defined in the above comment for forward(). - * - Before calling backward(), act.grad = dE / dy, where E is the error/cost - * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx) - */ - virtual Error __must_check backward(Argument& act) = 0; - - virtual const std::string& getName() const = 0; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/activations/MKLDNNActivation.cpp b/paddle/legacy/gserver/activations/MKLDNNActivation.cpp deleted file mode 100644 index 2eed7af70a8a3cc305a79bbe23177ea71d15d252..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/activations/MKLDNNActivation.cpp +++ /dev/null @@ -1,249 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNActivation.h" -#include "mkldnn.hpp" -#include "paddle/legacy/utils/ClassRegistrar.h" - -namespace paddle { - -static ClassRegistrar gMKLDNNActivationRegistrar; -/** - * @def MKLDNN_ACTIVATION_CLASS_NAME - * @note MKLDNN_ACTIVATION_CLASS_NAME(relu) relu_; - * means mkldnn_reluActivation relu_; - */ -#define MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) mkldnn_##ACT_TYPE##Activation - -/** - * @def BEGIN_MKLDNN_ACTIVATION - */ -#define BEGIN_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ - class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) : public BASE_CLASS { -/** - * @def END_MKLDNN_ACTIVATION - */ -#define END_MKLDNN_ACTIVATION(ACT_TYPE) \ - private: \ - static const std::string name; \ - \ - public: \ - const std::string& getName() const { return name; } \ - } \ - ; \ - const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \ - "mkldnn_" #ACT_TYPE; \ - static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \ - gMKLDNNActivationRegistrar \ - .registerClass( \ - "mkldnn_" #ACT_TYPE); \ - }); - -/** - * @def DEFINE_MKLDNN_ACTIVATION - */ -#define DEFINE_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ - BEGIN_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ - END_MKLDNN_ACTIVATION(ACT_TYPE) - -/** - * @def DEFINE_MKLDNN_ELTWISE_ACTIVATION - */ -#define DEFINE_MKLDNN_ELTWISE_ACTIVATION( \ - ACT_TYPE, BASE_CLASS, ALPHA, BWD_ALPHA) \ - BEGIN_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ - private: \ - static const float alpha; \ - static const float bwdAlpha; \ - \ - public: \ - float getAlpha() const { return alpha; } \ - float getBwdAlpha() const { return bwdAlpha; } \ - END_MKLDNN_ACTIVATION(ACT_TYPE) \ - const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \ - const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; - -/** - * @brief MKLDNN Relu Activation. - * Actually mkldnn_relu is Leaky Relu. - * f(x) = x (x >= 0) - * f(x) = negative_slope * x (x < 0) - * @note the negative_slope should be -0.f in forward - */ -DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, MKLDNNEltwiseActivation, -0.f, 0.f) - -/** - * @brief MKLDNN Tanh Activation. - */ -DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, MKLDNNEltwiseActivation, 0.f, 0.f) - -/** - * @brief MKLDNN ELU(Exponential Linear Unit) Activation. - * f(x) = x (x >= 0) - * f(x) = negative_slope * (exp(x) - 1) (x < 0) - */ -DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, MKLDNNEltwiseActivation, 0.f, 0.f) - -mkldnn::algorithm MKLDNNEltwiseActivation::getAlgo(std::string type) const { - const std::map algoMap = { - {"relu", algorithm::eltwise_relu}, - {"tanh", algorithm::eltwise_tanh}, - {"elu", algorithm::eltwise_elu}}; - type.erase(0, 7); // remove mkldnn_ - algorithm algo = (algorithm)0; - mapGet(type, algoMap, &algo); - return algo; -} - -void MKLDNNEltwiseActivation::resetFwd(Argument& act) { - if (cnt_ == act.value->getElementCnt()) { - return; - } - MKLDNNActivation::resetFwd(act); - // note: alpha represents the NegativeSlope when used in relu. - float alpha = getAlpha(); - float beta = getBeta(); - algorithm algo = getAlgo(this->getName()); - auto fwdDesc = eltwise_fwd::desc(mkldnn::prop_kind::forward_training, - algo, - val_->getMemoryDesc(), - alpha, - beta); - fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, *engine_)); - // use inplace for forward but save input value before submit - inVal_ = val_; - copyInVal_ = nullptr; - if (act.grad && algo == algorithm::eltwise_tanh) { - // tanh need save src input for backward - inVal_ = MKLDNNMatrix::create(val_->getPrimitiveDesc()); - copyInVal_ = std::make_shared(*val_, *inVal_); - CHECK(copyInVal_) << "should not be emptry"; - pipelineFwd_.push_back(*copyInVal_); - } - fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_)); - pipelineFwd_.push_back(*fwd_); - needResetBwd_ = true; -} - -void MKLDNNEltwiseActivation::resetBwd(Argument& act) { - if (!needResetBwd_) { - return; - } - VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; - needResetBwd_ = false; - algorithm algo = getAlgo(this->getName()); - float alpha = getBwdAlpha(); - float beta = getBeta(); - grad_ = MKLDNNMatrix::create(val_->getPrimitiveDesc(), act.grad); - auto eng = CPUEngine::Instance().getEngine(); - auto bwdDesc = eltwise_bwd::desc( - algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta); - auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_); - CHECK(inVal_); - bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_)); - pipelineBwd_.clear(); - pipelineBwd_.push_back(*bwd_); -} - -/** - * @brief MKLDNN Softmax Activation - */ -DEFINE_MKLDNN_ACTIVATION(softmax, MKLDNNSoftmaxActivation) - -void MKLDNNSoftmaxActivation::resetFwd(Argument& act) { - if (cnt_ == act.value->getElementCnt()) { - return; - } - MKLDNNActivation::resetFwd(act); - int axis = 1; - auto fwdDesc = softmax_fwd::desc( - mkldnn::prop_kind::forward_scoring, val_->getMemoryDesc(), axis); - auto fwdPD = softmax_fwd::primitive_desc(fwdDesc, *engine_); - fwd_.reset(new softmax_fwd(fwdPD, *val_, *val_)); - pipelineFwd_.push_back(*fwd_); -} - -Error __must_check MKLDNNSoftmaxActivation::forward(Argument& act) { - resetFwd(act); - stream_->submit(pipelineFwd_); - real* v = act.value->getData(); - real threshold = exp(-64); -#pragma omp parallel for - for (size_t i = 0; i < act.value->getElementCnt(); ++i) { - v[i] = v[i] < threshold ? threshold : v[i]; - } - return Error(); -} - -Error __must_check MKLDNNSoftmaxActivation::backward(Argument& act) { - MatrixPtr outputV = act.value; - MatrixPtr outputG = act.grad; - Matrix::resizeOrCreate(sftMaxDot_, - outputG->getHeight(), - outputG->getWidth(), - /* trans */ false, - /* useGpu */ false); - Matrix::resizeOrCreate(sftMaxSum_, - outputG->getHeight(), - 1, - /* trans */ false, - /* useGpu */ false); - sftMaxDot_->dotMul(*outputG, *outputV); - sftMaxSum_->colMerge(*sftMaxDot_); - act.grad->softmaxDerivative(*act.value, *sftMaxSum_); - return Error(); -} - -ActivationFunction* MKLDNNActivation::create(const std::string& type) { - return gMKLDNNActivationRegistrar.createByType(type); -} - -std::vector MKLDNNActivation::getAllRegisteredTypes() { - std::vector types; - gMKLDNNActivationRegistrar.forEachType( - [&](const std::string& type) { types.push_back(type); }); - return types; -} - -void MKLDNNActivation::resetFwd(Argument& act) { - VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; - cnt_ = act.value->getElementCnt(); - pipelineFwd_.clear(); - stream_.reset(new MKLDNNStream()); - engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0)); - val_ = std::dynamic_pointer_cast(act.value); - if (val_ == nullptr) { - int bs = act.getBatchSize(); - int ih = act.getFrameHeight() > 0 ? act.getFrameHeight() : 1; - int iw = act.getFrameWidth() > 0 ? act.getFrameWidth() : 1; - int ic = cnt_ / bs / ih / iw; - CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw); - val_ = MKLDNNMatrix::create( - {bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_, act.value); - CHECK(val_); - val_->downSpatial(); - } -} - -Error __must_check MKLDNNActivation::forward(Argument& act) { - resetFwd(act); - stream_->submit(pipelineFwd_); - return Error(); -} -Error __must_check MKLDNNActivation::backward(Argument& act) { - resetBwd(act); - stream_->submit(pipelineBwd_); - return Error(); -} -} // namespace paddle diff --git a/paddle/legacy/gserver/activations/MKLDNNActivation.h b/paddle/legacy/gserver/activations/MKLDNNActivation.h deleted file mode 100644 index 59c447ad07398c0b6ca7d78766dd533963744d1b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/activations/MKLDNNActivation.h +++ /dev/null @@ -1,119 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "ActivationFunction.h" -#include "mkldnn.hpp" -#include "paddle/legacy/gserver/layers/MKLDNNBase.h" -#include "paddle/legacy/math/MKLDNNMatrix.h" -#include "paddle/legacy/parameter/Argument.h" - -namespace paddle { - -/** - * @brief Base class of MKLDNN Activation. - * Common activation function are provieded, - * including mkldnn_relu, mkldnn_elu, mkldnn_tanh, mkldnn_softmax - */ -class MKLDNNActivation : public ActivationFunction { - protected: - // input value element count - size_t cnt_; - // should not merge the resetBwd into resetFwd, - // because the grad data would be changing before backward. - bool needResetBwd_; - // mkldnn matrix, primitive, stream and pipeline - MKLDNNMatrixPtr val_; - MKLDNNMatrixPtr grad_; - std::shared_ptr engine_; - std::shared_ptr stream_; - std::shared_ptr fwd_; - std::shared_ptr bwd_; - std::vector pipelineFwd_; - std::vector pipelineBwd_; - - public: - MKLDNNActivation() : cnt_(0), needResetBwd_(true) {} - ~MKLDNNActivation() {} - static ActivationFunction* create(const std::string& type); - static std::vector getAllRegisteredTypes(); - virtual const std::string& getName() const = 0; - /** - * reset the forward primitives - */ - virtual void resetFwd(Argument& act); - /** - * reset the backward primitives, - * can not merge this functions into resetFwd as the grad data - * would be changing before backward. - */ - virtual void resetBwd(Argument& act) {} - virtual Error __must_check forward(Argument& act); - virtual Error __must_check backward(Argument& act); -}; - -/** - * @brief Base class of MKLDNN Eltwise Activation, - * includes mkldnn_relu, mkldnn_elu and mkldnn_tanh. - */ -class MKLDNNEltwiseActivation : public MKLDNNActivation { - typedef mkldnn::eltwise_forward eltwise_fwd; - typedef mkldnn::eltwise_backward eltwise_bwd; - typedef mkldnn::algorithm algorithm; - - protected: - // save the forward primitive desc, which can be used backward - std::shared_ptr fwdPD_; - // eltwise_bwd need src input value - MKLDNNMatrixPtr inVal_; - // use for copy data - std::shared_ptr copyInVal_; - - public: - MKLDNNEltwiseActivation() {} - ~MKLDNNEltwiseActivation() {} - virtual const std::string& getName() const = 0; - - // in common, the alpha of forward and backward should be equal. - // but for relu, to avoid negative value, they should be opposite - virtual float getAlpha() const = 0; - virtual float getBwdAlpha() const = 0; - virtual float getBeta() const { return 0.f; } - virtual algorithm getAlgo(std::string type) const; - void resetFwd(Argument& act) override; - void resetBwd(Argument& act) override; -}; - -/** - * @brief Base class of MKLDNN softmax Activation, - * only have mkldnn forward, use cpu implement for backward. - */ -class MKLDNNSoftmaxActivation : public MKLDNNActivation { - typedef mkldnn::softmax_forward softmax_fwd; - - private: - // for backward - MatrixPtr sftMaxSum_; - MatrixPtr sftMaxDot_; - - public: - MKLDNNSoftmaxActivation() {} - ~MKLDNNSoftmaxActivation() {} - virtual const std::string& getName() const = 0; - void resetFwd(Argument& act) override; - Error __must_check forward(Argument& act) override; - Error __must_check backward(Argument& act) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/dataproviders/DataProvider.cpp b/paddle/legacy/gserver/dataproviders/DataProvider.cpp deleted file mode 100644 index b67af8a326bdfd211ee5720bf67828040b19e5c1..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/dataproviders/DataProvider.cpp +++ /dev/null @@ -1,410 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "DataProvider.h" - -#include -#include -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/StringUtil.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -void BufferBatch::swap(BufferBatch* bufBatch) { - DataBatch* batchData = bufBatch->getDataBatch(); - hl_event_t hlEvent = bufBatch->getCuEvent(); - hl_stream_t hlStream = bufBatch->getCuStream(); - bufBatch->setDataBatch(batchData_); - bufBatch->setCuStream(hlStream_); - bufBatch->setCuEvent(hlEvent_); - - batchData_ = batchData; - hlEvent_ = hlEvent; - hlStream_ = hlStream; -} - -void BufferBatch::clone(DataBatch* srcBatch, bool useGpu) { - if (batchData_ == NULL) { - batchData_ = new DataBatch(); - } - std::vector& destData = batchData_->getStreams(); - int numStreams = srcBatch->getNumStreams(); - destData.resize(numStreams); - batchData_->setSize(srcBatch->getSize()); - if (useGpu) { - createCuEvent(); - } - - for (int i = 0; i < numStreams; i++) { - destData[i].resizeAndCopyFrom(srcBatch->getStream(i), useGpu, hlStream_); - } - if (useGpu) { - hl_stream_record_event(hlStream_, hlEvent_); - } -} - -DoubleBuffer::DoubleBuffer(DataProvider* dataPool, - bool useGpu, - int64_t batchSize) { - batchSize_ = batchSize; - dataPool_ = dataPool; - useGpu_ = useGpu; - dataQueue_ = new BufferBatchQueue(); - bufferQueue_ = new BufferBatchQueue(); - - // insert a empty buffer - bufferQueue_->enqueue(new BufferBatch()); - stopping_ = false; - pending_ = true; -} - -DoubleBuffer::~DoubleBuffer() { - finishAsyncLoad(); - while (dataQueue_->size()) { - BufferBatch* dataBtch = dataQueue_->dequeue(); - delete dataBtch; - dataBtch = NULL; - } - while (bufferQueue_->size()) { - BufferBatch* bufBtch = bufferQueue_->dequeue(); - delete bufBtch; - bufBtch = NULL; - } - delete dataQueue_; - dataQueue_ = NULL; - delete bufferQueue_; - bufferQueue_ = NULL; -} - -void DoubleBuffer::removeOneBatch(DataBatch* dataBatch) { - // get data - BufferBatch* batch = dataQueue_->dequeue(); - batch->syncEvent(); // when use GPU, need synchronized with the cuEvent - *dataBatch = *(batch->getDataBatch()); - - // push anothor buffer - if (*usingBatch_ == nullptr) { - *usingBatch_ = std::make_shared(); - } - - // Mark the using-batch - batch->swap((*usingBatch_).get()); - bufferQueue_->enqueue(batch); - - if (0 == dataBatch->getSize()) { - setPending(true); - } -} - -void DoubleBuffer::insertOneBatch(DataBatch* batch) { - while (!bufferQueue_->waitNotEmptyFor(2 /* seconds */)) { // time out - if (stopping_) return; - } - BufferBatch* bufBatch = bufferQueue_->dequeue(); - // clone and copy the data from an Threadlocal Variable - bufBatch->clone(batch, useGpu_); - dataQueue_->enqueue(bufBatch); -} - -void DoubleBuffer::asyncLoadBatch() { - int64_t actualSize = 0; - if (useGpu_) { - hl_set_device(FLAGS_gpu_id); - } - setPending(false); - - while (true) { - taskReadySem_.wait(); - if (stopping_) break; - - while (batchSize_ == 0 && !stopping_) { - usleep(5); - } - if (stopping_) break; - - do { - DataBatch newBatch; - { - REGISTER_TIMER("getNextBatchInternal"); - actualSize = dataPool_->getNextBatchInternal(batchSize_, &newBatch); - } - insertOneBatch(&newBatch); - } while (actualSize > 0 && !stopping_); - } -} - -void DoubleBuffer::startAsyncLoad() { - if (asyncLoader_ == nullptr) { - asyncLoader_.reset(new std::thread([this]() { this->asyncLoadBatch(); })); - } - taskReadySem_.post(); -} - -ClassRegistrar - DataProvider::registrar_; - -DataProvider* DataProvider::create(const DataConfig& config, - const ModelConfig& modelConfig, - bool useGpu) { - return registrar_.createByType(config.type(), config, modelConfig, useGpu); -} - -REGISTER_DATA_PROVIDER(simple, SimpleDataProvider); -REGISTER_DATA_PROVIDER(dummy, DummyDataProvider); - -int64_t DataProvider::getNextBatch(int64_t size, DataBatch* batch) { - int64_t batchSize = doubleBuffer_ ? getNextBatchFromBuffer(size, batch) - : getNextBatchInternal(size, batch); - - if (!batchSize) return 0; - - if (!config_.constant_slots_size()) return batchSize; - - auto& constantSlots = *constantSlots_; - constantSlots.resize(config_.constant_slots_size()); - - for (int i = 0; i < config_.constant_slots_size(); ++i) { - MemoryHandlePtr handle = - constantSlots[i] ? constantSlots[i]->getMemoryHandle() : nullptr; - Matrix::resizeOrCreate(constantSlots[i], - batchSize, - 1, // = width - false, // = trans - useGpu_); // = useGpu - if (handle != constantSlots[i]->getMemoryHandle()) { - // memory buf was reallocated. We need to initialize the value - constantSlots[i]->assign(config_.constant_slots(i)); - } - batch->appendData(constantSlots[i], - batch->getStream(0).sequenceStartPositions); - } - - return batchSize; -} - -int64_t DataProvider::getNextBatchFromBuffer(int64_t size, DataBatch* batch) { - CHECK(doubleBuffer_ != nullptr); - - if (doubleBuffer_->getBatchSize() != size) { - doubleBuffer_->setBatchSize(size); - } - - doubleBuffer_->removeOneBatch(batch); - return batch->getSize(); -} - -void DataProvider::initAsyncLoader() { - if (doubleBuffer_ == nullptr) { - doubleBuffer_.reset(new DoubleBuffer(this, useGpu_)); - } - useGpu_ = false; // Avoid D2D copy, it will delay the computing performance -} - -SimpleDataProviderBase::SimpleDataProviderBase(const DataConfig& config, - bool useGpu, - bool withInfo) - : DataProvider(config, useGpu) { - /* initialize the size of a sample, and the buffer */ - sampleDim_ = config_.feat_dim() * (2 * config_.context_len() + 1); - bufferCapacity_ = config_.buffer_capacity(); - withInfo_ = withInfo; - sampleNumInBuf_ = 0; - nextItemIndex_ = 0; - - /* malloc buffer in cpu */ - hInputDataBuf_ = std::make_shared(bufferCapacity_, sampleDim_); - hInputLabelBuf_ = std::make_shared(bufferCapacity_); - hInputInfoBuf_ = std::make_shared(bufferCapacity_); -} - -void SimpleDataProviderBase::shuffle() { - int i, t; - int len = sampleNumInBuf_; - std::vector temp(sampleDim_); - real* data = hInputDataBuf_->getData(); - int* label = hInputLabelBuf_->getData(); - int* info = hInputInfoBuf_->getData(); - int sampleSz = sizeof(real) * sampleDim_; - for (i = 0; i < len; i++) { - int randNum = rand(); // NOLINT TODO(yuyang18): Use rand_r instead? - t = randNum % (len - i) + i; - // swap - if (i != t) { - // swap data - memcpy(&temp[0], &data[i * sampleDim_], sampleSz); - memcpy(&data[i * sampleDim_], &data[t * sampleDim_], sampleSz); - memcpy(&data[t * sampleDim_], &temp[0], sampleSz); - std::swap(label[i], label[t]); - if (withInfo_) { - std::swap(info[i], info[t]); - } - } - } -} - -int64_t SimpleDataProviderBase::getNextBatchInternal(int64_t size, - DataBatch* batch) { - CHECK(batch != NULL); - batch->clear(); - - int64_t startIndex; - int64_t cpySize; - - std::lock_guard guard(lock_); - if (sampleNumInBuf_ - nextItemIndex_ < size) { - int64_t n = fillBuffer(); - VLOG(1) << "fillBuffer return " << n << " samples.\n"; - } - - startIndex = nextItemIndex_; - cpySize = std::min(size, sampleNumInBuf_ - nextItemIndex_); - nextItemIndex_ += cpySize; - - if (cpySize > 0) { - real* data = hInputDataBuf_->getData() + startIndex * sampleDim_; - int* label = hInputLabelBuf_->getData() + startIndex; - int* info = hInputInfoBuf_->getData() + startIndex; - - MatrixPtr& dataBatch = *dataBatch_; // get the thread local object - IVectorPtr& labelBatch = *labelBatch_; // get the thread local object - IVectorPtr& infoBatch = *infoBatch_; // get the thread local object - if (!dataBatch) { - dataBatch = Matrix::create(cpySize, sampleDim_, false, useGpu_); - labelBatch = IVector::create(cpySize, useGpu_); - if (withInfo_) { - infoBatch = IVector::create(cpySize, 0); - } - } else { - dataBatch->resize(cpySize, sampleDim_); - labelBatch->resize(cpySize); - if (withInfo_) { - infoBatch->resize(cpySize); - } - } - dataBatch->copyFrom(data, cpySize * sampleDim_); - labelBatch->copyFrom(label, cpySize); - batch->appendData(dataBatch); - batch->appendLabel(labelBatch); - if (withInfo_) { - infoBatch->copyFrom(info, cpySize); - batch->appendLabel(infoBatch); - } - } - - batch->setSize(cpySize); - return cpySize; -} - -void SimpleDataProviderBase::reset() { - sampleNumInBuf_ = 0; - nextItemIndex_ = 0; - DataProvider::reset(); -} - -int64_t SimpleDataProviderBase::getSize() { - LOG(FATAL) << "Currently, not implemented"; - return 0; -} - -int64_t SimpleDataProviderBase::fillBuffer() { - int64_t n = sampleNumInBuf_ - nextItemIndex_; - - /* flash the remaining data to the beginning of the buffer */ - if (n > 0) { - hInputDataBuf_->copyFrom( - hInputDataBuf_->getData() + nextItemIndex_ * sampleDim_, - n * sampleDim_); - hInputLabelBuf_->copyFrom(hInputLabelBuf_->getData() + nextItemIndex_, n); - if (withInfo_) { - hInputInfoBuf_->copyFrom(hInputInfoBuf_->getData() + nextItemIndex_, n); - } - } - - sampleNumInBuf_ = - n + fillBufferImp(hInputDataBuf_->getData() + n * sampleDim_, - hInputLabelBuf_->getData() + n, - hInputInfoBuf_->getData() + n, - bufferCapacity_ - n); - - /* for stachastic gradient training */ - if (!skipShuffle_) { - shuffle(); - } - - nextItemIndex_ = 0; - - return sampleNumInBuf_; -} - -SimpleDataProvider::SimpleDataProvider(const DataConfig& config, bool useGpu) - : SimpleDataProviderBase(config, useGpu, /* withInfo= */ false), - currentSampleIndex_(0) { - loadData(config_.files()); -} - -SimpleDataProvider::~SimpleDataProvider() {} - -int64_t SimpleDataProvider::fillBufferImp(real* data, - int* label, - int* info, - int64_t size) { - (void)info; - int64_t n = std::min(labels_.size() - currentSampleIndex_, size); - memcpy(data, - &data_[currentSampleIndex_ * sampleDim_], - n * sampleDim_ * sizeof(real)); - memcpy(label, &labels_[currentSampleIndex_], sizeof(int) * n); - currentSampleIndex_ += n; - - return n; -} - -void SimpleDataProvider::reset() { - currentSampleIndex_ = 0; - SimpleDataProviderBase::reset(); -} - -void SimpleDataProvider::loadData(const std::string& fileName) { - std::ifstream is(fileName); - CHECK(is) << "Fail to open " << fileName; - std::string line; - while (is) { - if (!getline(is, line)) break; - LOG(INFO) << "load data file " << line; - loadDataFile(line); - } - LOG(INFO) << "read done, num of instance=" << labels_.size() - << " data size=" << data_.size(); -} - -void SimpleDataProvider::loadDataFile(const std::string& fileName) { - std::ifstream is(fileName); - std::string line; - std::vector pieces; - while (is) { - if (!getline(is, line)) break; - str::split(line, ' ', &pieces); - CHECK_EQ((uint64_t)(sampleDim_ + 1), pieces.size()) - << " Dimension mismatch, " << pieces.size() - 1 << " in " << fileName - << " " << sampleDim_ << " from config"; - labels_.push_back(atoi(pieces[0].c_str())); - for (int i = 0; i < sampleDim_; ++i) { - data_.push_back(atof(pieces[i + 1].c_str())); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/dataproviders/DataProvider.h b/paddle/legacy/gserver/dataproviders/DataProvider.h deleted file mode 100644 index c2e1c5fdd6d504b77873aaeeba3611dff6d8f738..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/dataproviders/DataProvider.h +++ /dev/null @@ -1,480 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "DataConfig.pb.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/SparseMatrix.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/parameter/Argument.h" -#include "paddle/legacy/utils/ClassRegistrar.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Queue.h" -#include "paddle/legacy/utils/ThreadLocal.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { -/** - * @def REGISTER_DATA_PROVIDER - * @brief Macro for registering a data provider. The class type should contain - * a consturctor with parameter (DataConfig, bool). - */ -#define REGISTER_DATA_PROVIDER(__type_name, __class_name) \ - static InitFunction __reg_type_##__type_name([]() { \ - DataProvider::registrar_.registerClass( \ - #__type_name, \ - [](DataConfig conf, ModelConfig, bool useGpu) -> DataProvider* { \ - DataProvider* dp = new __class_name(conf, useGpu); \ - return dp; \ - }); \ - }) - -/** - * @def REGISTER_DATA_PROVIDER_EX - * @brief Macro for registering a data provider, which contains a constructor - * with parameter (DataConfig, ModelConfig, bool). - */ -#define REGISTER_DATA_PROVIDER_EX(__type_name, __class_name) \ - static InitFunction __reg_type_##__type_name([] { \ - DataProvider::registrar_.registerClass<__class_name>(#__type_name); \ - }) - -class DataBatch; -class BufferBatch; -typedef std::shared_ptr DataBatchPtr; -typedef std::shared_ptr BufferBatchPtr; -/** - * @brief Data for batch training a neural network - */ -class DataBatch { - public: - DataBatch() : size_(0) { data_.clear(); } - /** - * @brief Get batch size - * @return batch size - */ - int64_t getSize() const { return size_; } - /** - * @brief Get num of sequences of sequence data - * @return num of sequences - */ - int64_t getNumSequences() const { - if (data_.empty()) return size_; - return data_[0].sequenceStartPositions - ? data_[0].sequenceStartPositions->getSize() - 1 - : size_; - } - /** - * @brief Set batch size - * @param[in] size size - */ - void setSize(int64_t size) { size_ = size; } - /** - * @brief Get size of argument vector - * @return size of argument vector - * @note For usual supervised learning, input data and label is needed, - * then there will be two argument. - */ - int64_t getNumStreams() const { return data_.size(); } - - /** - * @brief Get a argument with index i - * @param[in] i index in argument vector - * @return a argument with index i - */ - const Argument& getStream(int i) const { return data_[i]; } - /** - * @brief Get all argument - * @return an argument vector - */ - std::vector& getStreams() { return data_; } - /** - * @brief Get all argument const - * @return an argument vector - */ - std::vector getStreams() const { return data_; } - /** - * @brief Clear DataBatch - */ - void clear() { - data_.clear(); - size_ = 0; - } - - /** - * @brief Append data to DataBatch - * @param[in] data matrix data - * @note The order in which each data stream is appended must match the order - * specified in stream_names of DataConfig. The stream_names can be obtained - * using DataProvider::getStreamNames(). - */ - void appendData(MatrixPtr data) { - Argument argu; - argu.value = data; - data_.push_back(argu); - } - - /** - * @brief Append sequence data to DataBatch - * @param[in] data matrix data - * @param[in] sequenceStartPositions sequence data - * @note The order in which each data stream is appended must match the order - * specified in stream_names of DataConfig. The stream_names can be obtained - * using DataProvider::getStreamNames(). - */ - void appendData(const MatrixPtr& data, - const ICpuGpuVectorPtr& sequenceStartPositions) { - Argument argu; - argu.value = data; - argu.sequenceStartPositions = sequenceStartPositions; - data_.push_back(argu); - } - /** - * @brief Append label data - * @param[in] label label data - * @param[in] value matrix data, default null - */ - void appendLabel(IVectorPtr label, MatrixPtr value = nullptr) { - Argument argu; - argu.ids = label; - argu.value = value; - data_.push_back(argu); - } - - /* - * @brief Append argument - * @param[in] argus DataBatch.getStreams() - * @param[in] size DataBatch.getSize() - * @param[in] dataId sub dataprovider id (in MultiDataProvider) - */ - void appendArguments(const std::vector& argus, - int size, - int dataId) { - size_ += size; - for (const auto& argu : argus) { - data_.push_back(argu); - data_.back().dataId = dataId; - } - } - - protected: - /** - * @brief batch size - */ - int64_t size_; - /** - * @brief A batch data consist of a Argument vector, - * An argument corresponds to a type of input data. - */ - std::vector data_; -}; - -class BufferBatch { - public: - BufferBatch() { - hlStream_ = HPPL_STREAM_DEFAULT; - hlEvent_ = NULL; - batchData_ = NULL; - } - ~BufferBatch() { - if (hlEvent_) { - hl_destroy_event(hlEvent_); - hlEvent_ = NULL; - } - delete batchData_; - batchData_ = NULL; - } - - void setDataBatch(DataBatch* batchData) { batchData_ = batchData; } - DataBatch* getDataBatch() { return batchData_; } - - void setCuStream(hl_stream_t stream) { hlStream_ = stream; } - hl_stream_t getCuStream() const { return hlStream_; } - - void setCuEvent(hl_event_t event) { hlEvent_ = event; } - - hl_event_t getCuEvent() const { return hlEvent_; } - - void createCuEvent() { - if (!hlEvent_) { - hlStream_ = HPPL_STREAM_1; - hl_create_event(&hlEvent_); - } - } - - void syncEvent() { - if (hlEvent_) { - hl_stream_wait_event(hlStream_, hlEvent_); - } - } - - void swap(BufferBatch* bufBatch); - void clone(DataBatch* srcBatch, bool useGpu); - - protected: - DataBatch* batchData_; - hl_stream_t hlStream_; - hl_event_t hlEvent_; -}; - -class DataProvider; -typedef std::shared_ptr DataProviderPtr; - -typedef Queue BufferBatchQueue; - -class DoubleBuffer { - public: - DoubleBuffer(DataProvider* dataPool, bool useGpu, int64_t batchSize = 0); - virtual ~DoubleBuffer(); - void removeOneBatch(DataBatch* dataBatch); - - void setBatchSize(int64_t newBatchSize) { batchSize_ = newBatchSize; } - - int64_t getBatchSize() { return batchSize_; } - - void startAsyncLoad(); - void finishAsyncLoad() { - stopping_ = true; - taskReadySem_.post(); - if (asyncLoader_) { - asyncLoader_->join(); - } - } - - void setPending(bool pending) { pending_ = pending; } - - protected: - virtual void asyncLoadBatch(); - void insertOneBatch(DataBatch* batch); - - DataProvider* dataPool_; - bool useGpu_; - int32_t batchSize_; - ThreadLocal usingBatch_; - BufferBatchQueue* dataQueue_; - BufferBatchQueue* bufferQueue_; - std::unique_ptr asyncLoader_; - Semaphore taskReadySem_; - bool stopping_; - bool pending_; -}; - -/** - * @brief Base class for DataProvider, which supplies data for training - * @note It can supplies multiple streams of data. - * For typical supervised training, there are two streams: - * one is for input, one is for label. - */ -class DataProvider { - public: - static ClassRegistrar registrar_; - static DataProvider* create(const DataConfig& config, - const ModelConfig& modelConfig, - bool useGpu = FLAGS_use_gpu); - - /** - * @brief create only used for unittest. - */ - inline static DataProvider* create(const DataConfig& config, - bool useGpu = FLAGS_use_gpu) { - return create(config, ModelConfig(), useGpu); - } - - DataProvider(const DataConfig& config, bool useGpu) - : config_(config), - skipShuffle_(false), - usageRatio_(config.usage_ratio()), - useGpu_(useGpu) { - if (config_.async_load_data()) { - initAsyncLoader(); - } - } - virtual ~DataProvider() {} - - const DataConfig& getConfig() const { return config_; } - - void setSkipShuffle() { skipShuffle_ = true; } - - /** - * @brief Get next batch of training samples - * @param[in] size size of training samples to get - * @param[out] batch a batch of training samples - * @return actual size of obtained training samples - */ - int64_t getNextBatch(int64_t size, DataBatch* batch); - - /** - * @brief Shuffle the data set - */ - virtual void shuffle() = 0; - - /** - * @brief reset all the value of index - * @note reset() must be called before any calls to getNextBatch() - * IMPORTANT: subclass reset() should always call the base class reset() - * at the end of the function - */ - virtual void reset() { - if (doubleBuffer_ != nullptr) { - doubleBuffer_->startAsyncLoad(); - } - } - - /** - * @brief Get the size of training samples - * @return the number of training samples in the data set. - * @note return -1 to indicate unlimited number of samples. - */ - virtual int64_t getSize() = 0; - - /** - * @brief Get next batch training samples internally - * @param[in] size size of training samples to get - * @param[out] batch a batch of training samples - * @return actual size of obtained training samples - */ - virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0; - - protected: - DataConfig config_; - bool skipShuffle_; - float usageRatio_; - bool useGpu_; - std::unique_ptr doubleBuffer_; - ThreadLocal> constantSlots_; - /** - * @@brief Get next batch training samples from buffer - * @param[in] size size of training samples to get - * @param[out] batch a batch of training samples - * @return actual size of obtained training samples - */ - int64_t getNextBatchFromBuffer(int64_t size, DataBatch* batch); - - void initAsyncLoader(); -}; - -/** - * A data provider which does nothing. It only serves as providing - * necessary configurations such as stream_names - */ -class DummyDataProvider : public DataProvider { - public: - DummyDataProvider(const DataConfig& config, bool useGpu) - : DataProvider(config, useGpu) {} - virtual void shuffle() {} - virtual void reset() { DataProvider::reset(); } - virtual int64_t getSize() { return 0; } - virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) { - (void)size; - (void)batch; - return 0; - } -}; - -/** - * Data provider for one input and one integer label. - */ -class SimpleDataProviderBase : public DataProvider { - protected: - /// sample feature dimension - int64_t sampleDim_; - /// the number of samples - int64_t bufferCapacity_; - int64_t sampleNumInBuf_; - /// next item to read in buffer - int64_t nextItemIndex_; - /// some user defined info for validation - bool withInfo_; - - /// data buffer: bufferCapacity_ * nDataDim_ - CpuMatrixPtr hInputDataBuf_; - - /// label buffer:bufferCapacity_ * 1 - CpuIVectorPtr hInputLabelBuf_; - - /// info buffer:bufferCapacity_ * 1 - CpuIVectorPtr hInputInfoBuf_; - - ThreadLocal dataBatch_; - ThreadLocal labelBatch_; - ThreadLocal infoBatch_; - - RWLock lock_; - - public: - SimpleDataProviderBase(const DataConfig& config, bool useGpu, bool withInfo); - ~SimpleDataProviderBase() {} - - void shuffle(); - - virtual void reset(); - - virtual int64_t getSize(); - - virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch); - - /// return the number of samples in the buffer - int64_t fillBuffer(); - - protected: - /** - * @brief Fill at most size samples into data and label. - * - * Each input is stored in contiguous memory locations in data. - * - * data[n * sampleDim_] .. data[n * sampleDim_ + sampleDim_ - 1] is for - * the input of the n-th sample. - * - * label[n] is the label for the n-th sample. - */ - virtual int64_t fillBufferImp(real* data, - int* label, - int* info, - int64_t size) = 0; -}; - -class SimpleDataProvider : public SimpleDataProviderBase { - public: - SimpleDataProvider(const DataConfig& config, bool useGpu); - ~SimpleDataProvider(); - virtual void reset(); - - protected: - void loadData(const std::string& fileName); - void loadDataFile(const std::string& fileName); - virtual int64_t fillBufferImp(real* data, - int* label, - int* info, - int64_t size); - - protected: - size_t currentSampleIndex_; - std::vector labels_; - std::vector data_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/dataproviders/DataProviderGroup.h b/paddle/legacy/gserver/dataproviders/DataProviderGroup.h deleted file mode 100644 index 91c94dc986c7aeb70df25511ce14a5f9c312a159..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/dataproviders/DataProviderGroup.h +++ /dev/null @@ -1,153 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "DataProvider.h" - -namespace paddle { - -template -class DataProviderGroup : public DataProvider { - protected: - typedef T ProviderType; - typedef std::shared_ptr ProviderPtrType; - ProviderPtrType provider_; - - std::vector fileList_; - std::mutex lock_; - std::unique_ptr> loader_; - - public: - DataProviderGroup(const DataConfig& config, bool useGpu); - ~DataProviderGroup() {} - - virtual void reset(); - virtual void shuffle() {} - virtual int64_t getSize() { return -1; } - virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch); - - private: - void startLoader(); - void stopLoader(); - void forceStopLoader(); - ProviderPtrType loadFile(const std::vector& fileList); -}; - -template -DataProviderGroup::DataProviderGroup(const DataConfig& config, bool useGpu) - : DataProvider(config, useGpu) { - // load file list - loadFileList(config_.files(), fileList_); - CHECK_GT(fileList_.size(), 0LU); - LOG(INFO) << "load file list, numfiles=" << fileList_.size() - << ", max_num_of_data_providers_in_memory=" - << (1 + config_.file_group_conf().queue_capacity() + - config_.file_group_conf().load_thread_num()); -} - -template -void DataProviderGroup::reset() { - forceStopLoader(); - CHECK(!loader_); - provider_ = nullptr; - - // shuffle file list - std::shuffle( - fileList_.begin(), fileList_.end(), ThreadLocalRandomEngine::get()); - - startLoader(); - DataProvider::reset(); -} - -template -int64_t DataProviderGroup::getNextBatchInternal(int64_t size, - DataBatch* batch) { - std::lock_guard guard(lock_); - - if (!loader_) { - return 0; - } - if (provider_) { - int64_t ret = provider_->getNextBatchInternal(size, batch); - if (ret > 0) { - return ret; - } - } - - // else get data from next data provider - if (loader_->testResult()) { - LOG(INFO) << "WAIT provider"; - } - provider_ = loader_->waitResult(); - if (!provider_) { - stopLoader(); // All the data providers have been returned - return 0; - } - int64_t ret = provider_->getNextBatchInternal(size, batch); - CHECK(ret > 0) << "new data provider does not contain any valid samples!"; - return ret; -} - -template -void DataProviderGroup::startLoader() { - loader_.reset(new MultiThreadWorker( - config_.file_group_conf().load_thread_num(), - config_.file_group_conf().queue_capacity())); - - int loadFileCount = config_.file_group_conf().load_file_count(); - for (size_t startPos = 0; startPos < fileList_.size(); - startPos += loadFileCount) { - size_t endPos = std::min(fileList_.size(), startPos + loadFileCount); - std::vector fileVec(fileList_.begin() + startPos, - fileList_.begin() + endPos); - loader_->addJob([this, fileVec]() -> ProviderPtrType { - return this->loadFile(fileVec); - }); - } - loader_->stopAddJob(); -} - -template -void DataProviderGroup::stopLoader() { - if (loader_) { - loader_->stop(); - loader_ = nullptr; - } -} - -template -void DataProviderGroup::forceStopLoader() { - if (loader_) { - loader_->forceStop(); - loader_ = nullptr; - } -} - -template -std::shared_ptr DataProviderGroup::loadFile( - const std::vector& fileList) { - // disable async_load_data in sub dataprovider - DataConfig subConfig = config_; - subConfig.set_async_load_data(false); - - CHECK(!fileList.empty()) << "fileList is empty"; - ProviderPtrType provider = - std::make_shared(subConfig, useGpu_, false); - provider->loadData(fileList); - provider->reset(); - return provider; -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/dataproviders/MultiDataProvider.cpp b/paddle/legacy/gserver/dataproviders/MultiDataProvider.cpp deleted file mode 100644 index e5fc6d8a88fe2c03cc74b4a38e999d11d676dfdf..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/dataproviders/MultiDataProvider.cpp +++ /dev/null @@ -1,122 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MultiDataProvider.h" -#include -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -using namespace std; - -MultiDataProvider::MultiDataProvider(const DataConfig& config, - const ModelConfig& modelConfig, - bool useGpu) - : DataProvider(config, useGpu) { - bool atLeastOneMainDataFlag = false; - totalDataRatio_ = 0; - LOG(INFO) << "MultiDataProvider: sub data provider size: " - << config.sub_data_configs_size(); - LOG(INFO) << "MultiDataProvider: for_test: " << config.for_test(); - isTestMode_ = config.for_test(); - for (int i = 0; i < config.sub_data_configs_size(); i++) { - LOG(INFO) << "dataRatio of sub(" << i - << ") is: " << config.sub_data_configs(i).data_ratio(); - totalDataRatio_ += config.sub_data_configs(i).data_ratio(); - if (config.sub_data_configs(i).is_main_data()) { - LOG(INFO) << "main data is [" << i << "]"; - atLeastOneMainDataFlag = true; - } - } - CHECK(atLeastOneMainDataFlag) << "all sub dataproviders in MultiData do not" - << " have is_main_data flag"; - LOG(INFO) << "totalDataRatio_=" << totalDataRatio_; - DataConfig subConfig; - int subDataProviderCount = config.sub_data_configs_size(); - if (isTestMode()) { - LOG(INFO) << "construct MultiDataProvider in test mode"; - } else { - LOG(INFO) << "construct MultiDataProvider in train mode"; - } - subDataProviders_.resize(subDataProviderCount); - for (int i = 0; i < subDataProviderCount; i++) { - subConfig = config.sub_data_configs(i); - if (subConfig.async_load_data()) { - LOG(INFO) << "can not use async_load_data in sub dataprovider of " - "MultiDataProvider"; - subConfig.set_async_load_data(false); - } - subDataProviders_[i] = std::unique_ptr( - DataProvider::create(subConfig, modelConfig, useGpu_)); - } -} - -void MultiDataProvider::reset() { - for (auto& elem : subDataProviders_) { - elem->reset(); - } - DataProvider::reset(); -} - -void MultiDataProvider::shuffle() { - for (auto& elem : subDataProviders_) { - elem->shuffle(); - } -} - -int64_t MultiDataProvider::getNextBatchInternal(int64_t size, - DataBatch* batch) { - batch->clear(); - for (size_t i = 0; i < subDataProviders_.size(); ++i) { - // calc size according to data ratio - int64_t subSize = - (int64_t)(1.0 * size * config_.sub_data_configs(i).data_ratio() / - totalDataRatio_); - DataBatch subBatch; - int64_t realSize = - subDataProviders_[i]->getNextBatchInternal(subSize, &subBatch); - if (realSize == 0) { - // current subDataProvider has no data - if (!isTestMode()) { - // in train mode - if (config_.sub_data_configs(i).is_main_data()) { - // is main data provider. then return 0 - batch->clear(); - return 0; - } else { - // not main data provider, reset current subDataProvider and try again - subDataProviders_[i]->reset(); - subBatch.clear(); - realSize = - subDataProviders_[i]->getNextBatchInternal(subSize, &subBatch); - CHECK_GT(realSize, 0); - } - } else { - // in test mode, make an empty argument - Argument emptyArgu; - std::vector argus; - argus.push_back(emptyArgu); - batch->appendArguments(argus, 0, -1); - continue; - } - } - batch->appendArguments(subBatch.getStreams(), subBatch.getSize(), i); - } - return batch->getSize(); -} - -REGISTER_DATA_PROVIDER_EX(multi, MultiDataProvider); - -} // namespace paddle diff --git a/paddle/legacy/gserver/dataproviders/MultiDataProvider.h b/paddle/legacy/gserver/dataproviders/MultiDataProvider.h deleted file mode 100644 index baa1fc019002f86414c9c45734ad65cda916d457..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/dataproviders/MultiDataProvider.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "DataProvider.h" - -namespace paddle { - -class MultiDataProvider : public DataProvider { - protected: - std::vector> subDataProviders_; - - public: - MultiDataProvider(const DataConfig& config, - const ModelConfig& modelConfig, - bool useGpu); - ~MultiDataProvider() {} - virtual void reset(); - virtual void shuffle(); - virtual int64_t getSize() { return -1; } - virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch); - bool isTestMode() const { return isTestMode_; } - - private: - int totalDataRatio_; - bool isTestMode_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/dataproviders/ProtoReader.h b/paddle/legacy/gserver/dataproviders/ProtoReader.h deleted file mode 100644 index 08d045226e1ebb014bdd91ebf0e8f0353179b0c8..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/dataproviders/ProtoReader.h +++ /dev/null @@ -1,177 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include -#include -#include -#include - -namespace paddle { - -/** - * ProtoReader/ProtoWriter are used to read/write a sequence of protobuf - * messages from/to i/ostream. - */ -class ProtoReader { - public: - explicit ProtoReader(std::istream* s, bool dataCompression = false) { - CHECK(s) << "istream pointer is nullptr"; - istreamInput_.reset(new google::protobuf::io::IstreamInputStream(s)); - if (dataCompression) { - gzipInput_.reset( - new google::protobuf::io::GzipInputStream(istreamInput_.get())); - codedInput_.reset( - new google::protobuf::io::CodedInputStream(gzipInput_.get())); - } else { - codedInput_.reset( - new google::protobuf::io::CodedInputStream(istreamInput_.get())); - } - dataCompression_ = dataCompression; - approximateReadedBytes_ = 0; - codedInput_->SetTotalBytesLimit(kDefaultTotalBytesLimit, - kDefaultTotalBytesLimit); - } - - /** - * read one message - */ - bool read(google::protobuf::MessageLite* msg) { - if (approximateReadedBytes_ >= kMaxLimitBytes) { - // Once bytes we read get close to 64MB(larger than 55MB), - // we re-intialize the codedInputStream object. - approximateReadedBytes_ = 0; - - /** - * Explicitly destroys the object owned by unique_ptr at first and then - * construct an new object. - * - * 1.reset() - * - * 2.reset(new ...) <-- such sequence is EXTREAMLY important! - * - * Reason: (!!!Read me before you modify the following 2 lines of - * codes!!!) - * - * Otherwise, reset() method will ask the CodedInputStream constructor - * to construct the new object at first forcing the IstreamInputStream - * object to move its underlying pointer to the next 8192 bytes. - * - * Then the old object will be destroied calling - * IstreamInputStream::BackUp() to move the underlying pointer back. - * This means that the InstreamInputStream object is referenced by - * 2 different CodedInputStream object at the same time which "confuses" - * the position of istreamInput_'s underlying pointer. Such fatal - * confusion will lead to undefined behaviour when 'codedInput_' is - * used to read new data. - * - */ - codedInput_.reset(); - if (dataCompression_) { - codedInput_.reset( - new google::protobuf::io::CodedInputStream(gzipInput_.get())); - } else { - codedInput_.reset( - new google::protobuf::io::CodedInputStream(istreamInput_.get())); - } - codedInput_->SetTotalBytesLimit(kDefaultTotalBytesLimit, - kDefaultTotalBytesLimit); - } - - uint32_t size; - if (!codedInput_->ReadVarint32(&size)) { - return false; - } - google::protobuf::io::CodedInputStream::Limit limit = - codedInput_->PushLimit(size); - CHECK(msg->ParseFromCodedStream(codedInput_.get())); - codedInput_->PopLimit(limit); - - /** - * size is varint in the data file, we don't know the length. - * We assume every size takes 4 bytes in the data file. - */ - approximateReadedBytes_ += 4 + size; - return true; - } - - protected: - std::unique_ptr istreamInput_; - std::unique_ptr gzipInput_; - std::unique_ptr codedInput_; - bool dataCompression_; - - /** - * This is the maximum number of bytes that this CodedInputStream will read - * before refusing to continue. - */ - static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB - - /** - * If data readed by the reader is more than 55MB( << 64MB), - * we reset the CodedInputStream object. - * This can help avoid 64MB warning which will cause the ParseFromCodedStream - * to fail. - */ - static const int kMaxLimitBytes = 55 << 20; - - /** - * This variable dosen't store the exact bytes readed by CodedInputStream - * object since which is constructed. Instead, it store the approximate bytes - * because we can't tell how many bytes are readed by the object with the - * help of API. - * - * @note this code depends on protobuf 2.4.0. There is nothing like - * CodedInputStream::CurrentPosition() in protobuf 2.5.0 to tell us how many - * bytes has the object readed so far. Therefore, we calculated bytes - * ourselves. - */ - int approximateReadedBytes_; -}; - -class ProtoWriter { - public: - explicit ProtoWriter(std::ostream* s, bool dataCompression = false) { - CHECK(s) << "ostream pointer is nullptr"; - ostreamOutput_.reset(new google::protobuf::io::OstreamOutputStream(s)); - if (dataCompression) { - gzipOutput_.reset( - new google::protobuf::io::GzipOutputStream(ostreamOutput_.get())); - codedOutput_.reset( - new google::protobuf::io::CodedOutputStream(gzipOutput_.get())); - } else { - codedOutput_.reset( - new google::protobuf::io::CodedOutputStream(ostreamOutput_.get())); - } - } - - /** - * write one message. - */ - bool write(const google::protobuf::MessageLite& msg) { - codedOutput_->WriteVarint32(msg.ByteSize()); - bool ret = msg.SerializeToCodedStream(codedOutput_.get()); - return ret; - } - - protected: - std::unique_ptr ostreamOutput_; - std::unique_ptr gzipOutput_; - std::unique_ptr codedOutput_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/dataproviders/PyDataProvider.cpp b/paddle/legacy/gserver/dataproviders/PyDataProvider.cpp deleted file mode 100644 index 0827bd39d4cc78ef5658d437b6502f2e60e90b4c..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/dataproviders/PyDataProvider.cpp +++ /dev/null @@ -1,498 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PyDataProvider.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/PythonUtil.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -#ifndef PADDLE_NO_PYTHON -REGISTER_DATA_PROVIDER(py, PyDataProvider); -#endif - -PyDataProvider::PyDataProvider(const DataConfig& config, - bool useGpu, - bool loadDataAll) - : DataProvider(config, useGpu), batchSize_(0) { - PyGuard guard; - pyModuleName_ = config_.load_data_module(); - pyClassName_ = config_.load_data_object(); - if (config_.load_data_args() != "") { - pyUserArgs_["load_data_args"] = config_.load_data_args(); - } - - if (loadDataAll) { - std::vector fileList; - if (!config_.files().empty()) { - loadFileList(config_.files(), fileList); - } - loadData(fileList); - } -} - -void PyDataProvider::loadData(const std::vector& fileList) { - VLOG(1) << "module:" << pyModuleName_ << " class:" << pyClassName_; - classInstance_ = - createPythonClass(pyModuleName_, pyClassName_, fileList, pyUserArgs_); - CHECK(classInstance_) << "Create class instance failed."; - PyObjectPtr obj(PyObject_CallMethod( - classInstance_.get(), const_cast("getHeader"), NULL)); - CHECK_PY(obj) << "Call function getHeader failed."; - std::string headerInfo = - std::string(PyString_AsString(obj.get()), PyString_Size(obj.get())); - parseHeaderData(headerInfo); - feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW); -} - -void PyDataProvider::parseHeaderData(const std::string& headerData) { - char* pHeader = const_cast(headerData.c_str()); - char* pHeaderEnd = pHeader + headerData.size(); - slotNum_ = readT(pHeader, pHeaderEnd); - unsigned int useSequenceFlag = readT(pHeader, pHeaderEnd); - isIID_ = useSequenceFlag != 1; - slots_.clear(); - slots_.reserve(slotNum_); - for (size_t i = 0; i < slotNum_; ++i) { - unsigned int slotType = readT(pHeader, pHeaderEnd); - unsigned int slotDim = readT(pHeader, pHeaderEnd); - slots_.emplace_back(); - slots_.back().dim = slotDim; - slots_.back().type = static_cast(slotType); - } -} - -void PyDataProvider::resetSlots() { - for (auto& slot : slots_) { - slot.indexData.clear(); - slot.denseData.clear(); - slot.sparseNonValueData.clear(); - slot.sparseFloatValueData.clear(); - slot.indices.clear(); - slot.sequenceStartPositions.clear(); - slot.sampleSequenceIdVec.clear(); - slot.subSequenceStartPositions.clear(); - slot.strData.clear(); - } -} - -void PyDataProvider::fillDenseSlot(ProtoSlot& slot, - char*& data, - const char* dataEnd) { - unsigned int dim = slot.dim; - slot.sampleNum = readT(data, dataEnd); - slot.denseData.resize(slot.sampleNum * dim); -#ifdef PADDLE_TYPE_DOUBLE - CHECK_LE(data + sizeof(real) * dim * slot.sampleNum, dataEnd) - << "std::copy data is out of range"; - // PyDataProvider always provide data in float - float* dat = reinterpret_cast(data); - std::copy(dat, dat + slot.sampleNum * dim, slot.denseData.begin()); -#else - memcpyWithCheck(slot.denseData.data(), - data, - sizeof(real) * dim * slot.sampleNum, - dataEnd); -#endif - // PyDataProvider always provide data in float - data += sizeof(float) * dim * slot.sampleNum; -} - -void PyDataProvider::fillSparseNonValueSlot(ProtoSlot& slot, - char*& data, - const char* dataEnd) { - slot.sampleNum = readT(data, dataEnd); - unsigned int* indexPtr = (unsigned int*)data; - CHECK_LE(data + sizeof(unsigned int) * slot.sampleNum, dataEnd) - << "Vector assign value is out of range"; - slot.indices.assign(indexPtr, indexPtr + slot.sampleNum); - data += sizeof(unsigned int) * slot.sampleNum; - unsigned int length = 0; - length = readT(data, dataEnd); - slot.indices.push_back(length); - slot.sparseNonValueData.resize(length); - memcpyWithCheck(slot.sparseNonValueData.data(), - data, - sizeof(unsigned int) * length, - dataEnd); - data += sizeof(unsigned int) * length; -} - -void PyDataProvider::fillSparseValueSlot(ProtoSlot& slot, - char*& data, - const char* dataEnd) { - slot.sampleNum = readT(data, dataEnd); - unsigned int* indexPtr = (unsigned int*)data; - CHECK_LE(data + sizeof(unsigned int) * slot.sampleNum, dataEnd) - << "Vector assign value is out of range"; - slot.indices.assign(indexPtr, indexPtr + slot.sampleNum); - data += sizeof(unsigned int) * slot.sampleNum; - unsigned int length = 0; - length = readT(data, dataEnd); - unsigned int* colPtr = reinterpret_cast(data); - CHECK_LE(data + sizeof(unsigned int) * length, dataEnd) - << "Data is out of range"; - data += sizeof(unsigned int) * length; - size_t colLen = readT(data, dataEnd); - CHECK_EQ(colLen, length); - float* valuePtr = reinterpret_cast(data); - CHECK_LE(data + sizeof(real) * length, dataEnd) << "Data is out of range"; - data += sizeof(real) * length; - slot.indices.push_back(length); - slot.sparseFloatValueData.resize(length); - for (unsigned int ii = 0; ii < length; ++ii) { - slot.sparseFloatValueData[ii].col = colPtr[ii]; - slot.sparseFloatValueData[ii].value = valuePtr[ii]; - } -} - -void PyDataProvider::fillIndexSlot(ProtoSlot& slot, - char*& data, - const char* dataEnd) { - slot.sampleNum = readT(data, dataEnd); - CHECK_LE(data + sizeof(unsigned int) * slot.sampleNum, dataEnd) - << "Vector assign is out of range"; - slot.indexData.assign(reinterpret_cast(data), - reinterpret_cast(data) + slot.sampleNum); - data += sizeof(unsigned int) * slot.sampleNum; -} - -void PyDataProvider::fillStringSlot(ProtoSlot& slot, - char*& data, - const char* dataEnd) { - slot.sampleNum = readT(data, dataEnd); - for (unsigned int i = 0; i < slot.sampleNum; ++i) { - size_t len = readT(data, dataEnd); - auto str_begin = data; - data += len; - CHECK_LE(data, dataEnd) << "Data is out of range"; - slot.strData.emplace_back(str_begin, len); - } -} - -void PyDataProvider::fillSlotsByStr(const std::string& samples) { - char* data = const_cast(samples.c_str()); - char* dataEnd = data + samples.size(); - batchSize_ = readT(data, dataEnd); - if (0 == batchSize_) { - return; - } - - for (size_t j = 0; j < slotNum_; ++j) { - auto& slot = slots_[j]; - CHECK(SlotDef::INDEX >= slot.type || SlotDef::STRING == slot.type) - << " Slot type:" << slot.type << " is out of range."; - CHECK_GE(slot.type, SlotDef::VECTOR_DENSE) << " Slot type:" << slot.type - << " is out of range."; - switch (slot.type) { - case SlotDef::VECTOR_DENSE: - fillDenseSlot(slot, data, dataEnd); - break; - case SlotDef::VECTOR_SPARSE_NON_VALUE: - fillSparseNonValueSlot(slot, data, dataEnd); - break; - case SlotDef::VECTOR_SPARSE_VALUE: - fillSparseValueSlot(slot, data, dataEnd); - break; - case SlotDef::INDEX: - fillIndexSlot(slot, data, dataEnd); - break; - case SlotDef::VAR_MDIM_DENSE: - LOG(FATAL) << "Not implemented"; - break; - case SlotDef::VAR_MDIM_INDEX: - LOG(FATAL) << "Not implemented"; - break; - case SlotDef::STRING: - fillStringSlot(slot, data, dataEnd); - break; - } - } - // read sequenceStartPositions - for (size_t j = 0; j < slotNum_; ++j) { - auto& slot = slots_[j]; - if (!iidData()) { - unsigned int sequenceNum = readT(data, dataEnd); - slot.sequenceNum = sequenceNum; - for (size_t i = 0; i < sequenceNum; ++i) { - slot.sequenceStartPositions.push_back( - readT(data, dataEnd)); - } - for (size_t i = 0; i < sequenceNum; ++i) { - size_t begin = slot.sequenceStartPositions[i]; - size_t end = (i < sequenceNum - 1) ? slot.sequenceStartPositions[i + 1] - : slot.sampleNum; - for (size_t ii = begin; ii < end; ++ii) { - slot.sampleSequenceIdVec.push_back(ii); - } - } - } else { - for (size_t i = 0; i < slot.sampleNum; ++i) { - slot.sampleSequenceIdVec.push_back(i); - } - } - } - // read subSequenceStartPositions, not all slots have this infomation. - for (size_t j = 0; j < slotNum_; ++j) { - auto& slot = slots_[j]; - if (!iidData() && data != dataEnd) { - unsigned int subSequenceNum = readT(data, dataEnd); - slot.subSequenceNum = subSequenceNum; - for (size_t i = 0; i < subSequenceNum; ++i) { - slot.subSequenceStartPositions.push_back( - readT(data, dataEnd)); - } - } - } -} - -void PyDataProvider::reset() { - { // Invoke PyDataProvider Reset - PyGuard guard; - PyObjectPtr obj(PyObject_CallMethod( - classInstance_.get(), const_cast("reset"), NULL)); - CHECK_PY(obj) << "Call function reset failed."; - } - - if (!skipShuffle_) { - // Invoke PyDataProvider Shuffle - shuffle(); - } - DataProvider::reset(); -} - -void PyDataProvider::shuffle() { - // py shuffle - PyGuard guard; - PyObjectPtr obj(PyObject_CallMethod( - classInstance_.get(), const_cast("shuffle"), NULL)); - CHECK_PY(obj) << "Call function shuffle failed."; -} - -void PyDataProvider::handleDenseSlot(ProtoSlot& slot, - size_t slotIndex, - std::vector& cpuArguments) { - unsigned int dim = slot.dim; - Matrix::resizeOrCreate(cpuArguments[slotIndex].value, - slot.sampleNum, - dim, - false, // trans = false - false); // useGpu = false - real* buf = cpuArguments[slotIndex].value->getData(); - for (size_t i = 0; i < slot.sampleNum; ++i) { - memcpyWithCheck(buf + i * dim, - slot.denseData.data() + slot.sampleSequenceIdVec[i] * dim, - sizeof(real) * dim, - slot.denseData.data() + slot.denseData.size()); - } -} - -void PyDataProvider::handleSparseNonValueSlot( - ProtoSlot& slot, size_t slotIndex, std::vector& cpuArguments) { - unsigned int dim = slot.dim; - if (!(cpuArguments[slotIndex].value)) { - cpuArguments[slotIndex].value = - Matrix::createSparseMatrix(slot.sampleNum, - dim, - slot.sampleNum /*DEFAULT_AVG_WIDTH = 1*/, - NO_VALUE, - SPARSE_CSR, - false, - useGpu_); - } - auto mat = cpuArguments[slotIndex].value; - mat->resize(slot.sampleNum, dim, slot.sampleNum, NO_VALUE, SPARSE_CSR); - if (std::dynamic_pointer_cast(mat)) { - std::dynamic_pointer_cast(mat)->copyFrom( - slot.sampleSequenceIdVec.data(), - slot.indices.data(), - slot.sparseNonValueData.data(), - HPPL_STREAM_1); - } else if (std::dynamic_pointer_cast(mat)) { - std::dynamic_pointer_cast(mat)->copyFrom( - slot.sampleSequenceIdVec.data(), - slot.indices.data(), - slot.sparseNonValueData.data()); - } else { - LOG(FATAL) << "Not Supported"; - } -} - -void PyDataProvider::handleSparseValueSlot( - ProtoSlot& slot, size_t slotIndex, std::vector& cpuArguments) { - unsigned int dim = slot.dim; - if (!(cpuArguments[slotIndex].value)) { - cpuArguments[slotIndex].value = - Matrix::createSparseMatrix(slot.sampleNum, - dim, - slot.sampleNum /*DEFAULT_AVG_WIDTH = 1*/, - FLOAT_VALUE, - SPARSE_CSR, - false, - useGpu_); - } - auto mat = cpuArguments[slotIndex].value; - mat->resize(slot.sampleNum, dim, slot.sampleNum, FLOAT_VALUE, SPARSE_CSR); - if (std::dynamic_pointer_cast(mat)) { - std::dynamic_pointer_cast(mat)->copyFrom( - slot.sampleSequenceIdVec.data(), - slot.indices.data(), - slot.sparseFloatValueData.data(), - HPPL_STREAM_DEFAULT); - } else if (std::dynamic_pointer_cast(mat)) { - std::dynamic_pointer_cast(mat)->copyFrom( - slot.sampleSequenceIdVec.data(), - slot.indices.data(), - slot.sparseFloatValueData.data()); - } else { - LOG(FATAL) << "Not Supported"; - } -} - -void PyDataProvider::handleIndexSlot(ProtoSlot& slot, - size_t slotIndex, - std::vector& cpuArguments) { - IVector::resizeOrCreate(cpuArguments[slotIndex].ids, - slot.sampleNum, - /*useGpu_*/ false); - int* buf = cpuArguments[slotIndex].ids->getData(); - for (size_t i = 0; i < slot.sampleNum; ++i) { - buf[i] = slot.indexData[slot.sampleSequenceIdVec[i]]; - } -} - -void PyDataProvider::handleStringSlot(ProtoSlot& slot, - size_t slotIndex, - std::vector& cpuArguments) { - if (cpuArguments[slotIndex].strs) { - cpuArguments[slotIndex].strs->resize(slot.sampleNum); - } else { - cpuArguments[slotIndex].strs = - std::make_shared>(slot.sampleNum); - } - for (size_t i = 0; i < slot.sampleNum; ++i) { - (*cpuArguments[slotIndex].strs)[i] = - slot.strData[slot.sampleSequenceIdVec[i]]; - } -} - -int64_t PyDataProvider::getNextBatchInternal(int64_t size, DataBatch* batch) { - PyGuard guard; - PyObjectPtr obj(PyObject_CallMethod(classInstance_.get(), - const_cast("getNextBatch"), - const_cast("i"), - size)); - CHECK_PY(obj) << "Call function getNextBatch failed."; - const std::string& samples = - std::string(PyString_AsString(obj.get()), PyString_Size(obj.get())); - resetSlots(); - fillSlotsByStr(samples); - size = batchSize_; - if (size <= 0) return 0; - - DataBatch& cpuBatch = *cpuBatch_; - std::vector& cpuArguments = cpuBatch.getStreams(); - cpuBatch.setSize(size); - cpuArguments.resize(slotNum_); - - if (!iidData()) { - for (size_t j = 0; j < slotNum_; ++j) { - auto& slot = slots_[j]; - ICpuGpuVector::resizeOrCreate(cpuArguments[j].sequenceStartPositions, - slot.sequenceNum + 1, - /* useGpu= */ false); - int* buf = cpuArguments[j].sequenceStartPositions->getMutableData(false); - std::copy(slot.sequenceStartPositions.begin(), - slot.sequenceStartPositions.end(), - buf); - buf[slot.sequenceStartPositions.size()] = slot.sampleNum; - - if (slot.subSequenceStartPositions.size()) { - ICpuGpuVector::resizeOrCreate(cpuArguments[j].subSequenceStartPositions, - slot.subSequenceNum + 1, - /* useGpu= */ false); - int* buf = - cpuArguments[j].subSequenceStartPositions->getMutableData(false); - std::copy(slot.subSequenceStartPositions.begin(), - slot.subSequenceStartPositions.end(), - buf); - buf[slot.subSequenceNum] = slot.sampleNum; - // check subSequenceStartPositions and sequenceStartPositions - cpuArguments[j].checkSubset(); - } - } - } - - for (size_t slotIndex = 0; slotIndex < slotNum_; ++slotIndex) { - auto& slot = slots_[slotIndex]; - SlotDef::SlotType slotType = slot.type; - switch (slotType) { - case SlotDef::VECTOR_DENSE: - handleDenseSlot(slot, slotIndex, cpuArguments); - break; - case SlotDef::VECTOR_SPARSE_NON_VALUE: - handleSparseNonValueSlot(slot, slotIndex, cpuArguments); - break; - case SlotDef::VECTOR_SPARSE_VALUE: - handleSparseValueSlot(slot, slotIndex, cpuArguments); - break; - case SlotDef::INDEX: - handleIndexSlot(slot, slotIndex, cpuArguments); - break; - case SlotDef::VAR_MDIM_DENSE: - LOG(FATAL) << "Not implemented"; - break; - case SlotDef::VAR_MDIM_INDEX: - LOG(FATAL) << "Not implemented"; - break; - case SlotDef::STRING: - handleStringSlot(slot, slotIndex, cpuArguments); - break; - } - } - - if (useGpu_) { - std::vector& cpuArguments = cpuBatch.getStreams(); - DataBatch& gpuBatch = *gpuBatch_; - std::vector& gpuArguments = gpuBatch.getStreams(); - gpuArguments.resize(cpuArguments.size()); - gpuBatch.setSize(size); - for (size_t i = 0; i < slotNum_; ++i) { - SlotDef::SlotType slotType = slots_[i].type; - if (SlotDef::VECTOR_SPARSE_VALUE == slotType || - SlotDef::VECTOR_SPARSE_NON_VALUE == slotType) { - gpuArguments[i] = cpuArguments[i]; - gpuArguments[i].sequenceStartPositions = - cpuArguments[i].sequenceStartPositions; - - if (slots_[i].subSequenceStartPositions.size()) { - gpuArguments[i].subSequenceStartPositions = - cpuArguments[i].subSequenceStartPositions; - } - } else { - gpuArguments[i].resizeAndCopyFrom( - cpuArguments[i], useGpu_, HPPL_STREAM_1); - } - } - hl_stream_synchronize(HPPL_STREAM_1); - *batch = gpuBatch; - } else { - *batch = cpuBatch; - } - - return batch->getSize(); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/dataproviders/PyDataProvider.h b/paddle/legacy/gserver/dataproviders/PyDataProvider.h deleted file mode 100644 index 4b8bea04a1670c60d5a801ca950f59116ba50195..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/dataproviders/PyDataProvider.h +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "DataFormat.pb.h" -#include "DataProvider.h" - -#include - -namespace paddle { - -class PyDataProvider : public DataProvider { - public: - PyDataProvider(const DataConfig& config, - bool useGpu, - bool loadDataAll = true); - - virtual void reset(); - - // Note this size includes the sequences which are skipped because they - // are longer than the batch size - virtual int64_t getSize() { - LOG(FATAL) << "Not implement yet"; - return -1; - } - virtual void shuffle(); - - virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch); - - protected: - struct ProtoSlot; - // return false if each each sample is one sequence, i.e., independent - // of other samples. - inline bool iidData() const { return isIID_; } - - void parseHeaderData(const std::string& headerData); - void fillDenseSlot(ProtoSlot& slot, char*& data, const char* dataEnd); - void fillSparseNonValueSlot(ProtoSlot& slot, - char*& data, - const char* dataEnd); - void fillSparseValueSlot(ProtoSlot& slot, char*& data, const char* dataEnd); - void fillIndexSlot(ProtoSlot& slot, char*& data, const char* dataEnd); - void fillStringSlot(ProtoSlot& slot, char*& data, const char* dataEnd); - void fillSlotsByStr(const std::string& samples); - void handleDenseSlot(ProtoSlot& slot, - size_t slotIndex, - std::vector& cpuArguments); - void handleSparseNonValueSlot(ProtoSlot& slot, - size_t slotIndex, - std::vector& cpuArguments); - void handleSparseValueSlot(ProtoSlot& slot, - size_t slotIndex, - std::vector& cpuArguments); - void handleIndexSlot(ProtoSlot& slot, - size_t slotIndex, - std::vector& cpuArguments); - void handleStringSlot(ProtoSlot& slot, - size_t slotIndex, - std::vector& cpuArguments); - void resetSlots(); - void loadData(const std::vector& fileList); - - protected: - struct ProtoSlot { - SlotDef::SlotType type; - int dim; - unsigned int sampleNum; - unsigned int sequenceNum; - unsigned int subSequenceNum; - // Store the data of index type slot - std::vector indexData; - // Store the data of dense type slot - std::vector denseData; - // Store the data of sparseNonValue type slot - std::vector sparseNonValueData; - // Store the data of sparseValue type slot - std::vector sparseFloatValueData; - // Used to store the index of each sample in slot values - std::vector indices; - // The starting position of each sequence in samples - // The last element should be the number of samples - // If empty, each sample is one sequence. - std::vector sequenceStartPositions; - // The index id of sequences in slot - std::vector sampleSequenceIdVec; - // The starting position of each subsequence in samples - // The last element should be the number of subsequence - // If empty, each sequence of sample has no subsequence. - std::vector subSequenceStartPositions; - // Store the data of string type slot - std::vector strData; - }; - std::vector slots_; - - PyObjectPtr classInstance_; - unsigned int batchSize_; - unsigned int slotNum_; - // if use sequence, isIID_ equals false, otherwise it is true. - bool isIID_; - // The name of python module name - std::string pyModuleName_; - // The name of python class name - std::string pyClassName_; - // User args set in config - std::map pyUserArgs_; - - ThreadLocalD cpuBatch_; - ThreadLocalD gpuBatch_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/dataproviders/PyDataProvider2.cpp b/paddle/legacy/gserver/dataproviders/PyDataProvider2.cpp deleted file mode 100644 index 8e931e40611e27caa43675c3567972384a4d9026..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/dataproviders/PyDataProvider2.cpp +++ /dev/null @@ -1,1031 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef PADDLE_NO_PYTHON - -#include -#include -#include -#include -#include -#include -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include - -#include "DataProvider.h" - -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/PythonUtil.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -namespace unittest { - -static std::unique_ptr> - OnPoolFilled; - -namespace pydp2 { - -void setOnPoolFilledHook(const std::function& callback) { - OnPoolFilled.reset(new std::function()); - *OnPoolFilled = callback; -} - -void clearOnPoolFilledHook() { OnPoolFilled.reset(); } - -} // namespace pydp2 -} // namespace unittest - -/** - * Slot type - */ -enum SlotType { - ST_DENSE = 0, - ST_NON_SPARSE_VALUE = 1, - ST_SPARSE_VALUE = 2, - ST_INDEX = 3 -}; - -/** - * Sequence type - */ -enum SeqType { SQT_NONE = 0, SQT_SEQ, SQT_SUBSEQ }; - -/** - * Cache Type. - */ -enum CacheType { - NO_CACHE = 0, // Each pass will load data from PyDataProvider2. - CACHE_PASS_IN_MEM = 1, // First pass will load data from PyDataProvider2, - // then cache all data in memory. Load data from - // memory in rest passes. -}; - -struct SlotHeader { // Slot Header will parse from python object's slots field. - size_t dim; - SlotType slotType; - SeqType seqType; -}; - -inline std::ostream& operator<<(std::ostream& os, const SlotHeader& header) { - os << "Dim = " << header.dim << " Type = " << header.slotType - << " SeqType = " << header.seqType; - return os; -} - -/** - * FieldScanner Interface. - * - * It will read python object, and fill to argument's each slot. - * There are two steps, prepare and fill. Scanner will alloc memory during - * prepare step, fill data into argument during fill step. - */ -class IFieldScanner { - public: - DISABLE_COPY(IFieldScanner); - /** - * Ctor. - * @param headerPtr slot header that scanner belong to. - */ - explicit IFieldScanner(SlotHeader* headerPtr) : headerPtr_(headerPtr) {} - virtual ~IFieldScanner() {} - - /** - * Start prepare step. - */ - virtual void startPrepare(Argument& argument) {} - - /** - * Prepare step. - * - * @note the obj could be a timestep of sample or whole sample. It depends - * what scanner it is. - */ - virtual void prepare(Argument& argument, PyObject* obj) {} - - /** - * Finish Prepare step. - */ - virtual void finishPrepare(Argument& argument) {} - - /** - * Start fill step. - */ - virtual void startFill(Argument& argument) {} - - /** - * Fill step. - * - * @note the obj could be a timestep of sample or whole sample. It depends - * what scanner it is. - */ - virtual void fill(Argument& argument, PyObject* obj) {} - - /** - * Finish fill step. - */ - virtual void finishFill(Argument& argument) {} - - /** - * Factory method. Create a scanner by header. The final scanner may be - * combine many scanners. - * - * @note Fatal if header is not support. - */ - static IFieldScanner* create(SlotHeader* header); - - protected: - SlotHeader* headerPtr_; -}; - -/** - * Py Data Provider Cache Interface. - */ -class IPyDataProviderCache { - public: - virtual ~IPyDataProviderCache() {} - - /** - * invoke when DataProvider::reset() - * @return true if read data from python. - */ - virtual bool reset() = 0; - - /** - * invoke when these data are used by DataProvider, and need to clear. - * @param [inout] data used data. - * - * @note The implemented class must clear these data array. Or if you want to - * delete the PyObjectPtr later, you should make sure the paddle process only - * have one active thread calling python code (use PyGuard otherwise). - */ - virtual void drop(std::deque* data) = 0; - - /** - * Return whole data in cache. - */ - virtual std::deque* load() = 0; - - /** - * Factory method. Convert CacheType to IPyDataProviderCache* - */ - static IPyDataProviderCache* create(CacheType ct); -}; - -/** - * PyDataProvider2. - * - * For usage, please refer python module 'paddle.trainer.PyDataProvider2' - * - * Here, we start a thread to read data. It is totally asynchronous for reading - * data. And it support cache strategies. - */ -class PyDataProvider2 : public DataProvider { - public: - /** - * Ctor - */ - PyDataProvider2(const DataConfig& config, - const ModelConfig& modelConfig, - bool useGpu) - : DataProvider(config, useGpu), callingContextCreated_(2) { - if (PyArray_API == NULL) import_array(); - auto& args = config.load_data_args(); - PyObjectPtr kwargs = PyObjectPtr(PyDict_New()); - if (!args.empty()) { - kwargs = callPythonFuncRetPyObj( - "paddle.trainer.PyDataProvider2", "deserialize_args", {args}); - } - - py::DictHelper kwargsDict(kwargs); - kwargsDict.setBool("is_train", !config.for_test()); - std::vector inputs; - inputs.reserve(modelConfig.input_layer_names().size()); - std::copy(modelConfig.input_layer_names().begin(), - modelConfig.input_layer_names().end(), - std::back_inserter(inputs)); - kwargsDict.setStringList("input_order", inputs); - - // kwargs is keyword arguemts to create object. - this->createPyDataObj(config.load_data_module(), - config.load_data_object(), - config.files(), - std::move(kwargs)); - DBG << "Instance " << instance_.get() << " loaded."; - this->readPyFields(config.for_test()); - DBG << "Py Field Done"; - } - - /** - * Dtor - * @note will stop loading thread when destructing - */ - virtual ~PyDataProvider2() { resetImpl(false); } - - private: - void createPyDataObj(const std::string& model, - const std::string& className, - const std::string& fileListName, - PyObjectPtr&& kwargs // NOLINT - ) { - LOG(INFO) << "loading dataprovider " << model << "::" << className; - - PyObjectPtr module = py::import(model); - PyObjectPtr moduleDict(PyModule_GetDict(module.get())); - CHECK_PY(moduleDict) << "Invoke module.__dict__ error"; - PyObjectPtr cls(PyDict_GetItemString(moduleDict.get(), className.c_str())); - CHECK_PY(cls) << "load class " << className.c_str() << "error"; - - // If there are multiple python instance share same module, the PyObjectPtr - // only for instance will make python reference-count error. - // - // So here, we increase reference count manually. - Py_XINCREF(module.get()); - Py_XINCREF(moduleDict.get()); - Py_XINCREF(cls.get()); - - PyObjectPtr fileListInPy = loadPyFileLists(fileListName); - PyDict_SetItemString(kwargs.get(), "file_list", fileListInPy.get()); - { - PyGuard guard; - instance_.reset(PyObject_Call(cls.get(), zeroTuple_.get(), kwargs.get())); - } - CHECK_PY(instance_) << "Cannot Create instance"; - } - - void readPyFields(bool testing) { - py::ObjectHelper self(this->instance_); - bool ok; - - this->skipShuffle_ = - !self.getBoolAttr("should_shuffle", &ok /*isBoolType*/); - if (!ok) { - this->skipShuffle_ = testing; // shuffle when is training, skip shuffle - // when is testing. - } - DBG << "Provider Skip Shuffle " << this->skipShuffle_; - - this->poolSize_ = self.getIntAttr("pool_size", &ok); - if (!ok) { - this->poolSize_ = -1UL; - } - this->minPoolSize_ = self.getIntAttr("min_pool_size", &ok); - if (!ok) { - this->minPoolSize_ = -1UL; - } - this->minPoolSize_ = std::min(this->poolSize_, this->minPoolSize_); - - this->canOverBatchSize_ = self.getBoolAttr("can_over_batch_size"); - - calcBatchSize_.reset(self.getAttr("calc_batch_size")); - if (this->calcBatchSize_ && !py::isCallable(this->calcBatchSize_)) { - this->calcBatchSize_.reset(); - } - - generator_.reset(self.getAttr("generator")); - CHECK(py::isCallable(generator_)); - - // Reading slots. - PyObjectPtr slotsPtr(self.getAttr("slots")); - py::SequenceHelper slots(slotsPtr); - headers_.reserve(slots.size()); - for (size_t i = 0; i < slots.size(); ++i) { - headers_.emplace_back(); - auto& header = headers_.back(); - PyObject* hdPtr = slots[i]; - CHECK(hdPtr != nullptr); - Py_XINCREF(hdPtr); - PyObjectPtr headerPtrWrap(hdPtr); - py::ObjectHelper hd(headerPtrWrap); - header.dim = hd.getIntAttrWithError("dim"); - header.seqType = (SeqType)hd.getIntAttrWithError("seq_type"); - header.slotType = (SlotType)hd.getIntAttrWithError("type"); - } - - DBG << "Data header size " << headers_.size(); - for (auto& header : headers_) { - DBG << header; - } - cache_.reset(IPyDataProviderCache::create( - (CacheType)self.getIntAttrWithError("cache"))); - } - - PyObjectPtr loadPyFileLists(const std::string& fileListName) { - loadFileList(fileListName, fileLists_); - PyObject* lst = PyList_New(fileLists_.size()); - for (size_t i = 0; i < fileLists_.size(); ++i) { - PyList_SET_ITEM(lst, i, PyString_FromString(fileLists_[i].c_str())); - } - return PyObjectPtr(lst); - } - - void loadThread() { - DBG << "Creating context"; - for (auto& filename : fileLists_) { - PyGuard g; - py::CallableHelper generator(this->generator_); - generator.setArgsSize(2); - generator.getArgs().set(0, instance_); - generator.getArgs().set(1, PyString_FromString(filename.c_str()), true); - callingContexts_.emplace_back(generator()); - CHECK_PY(callingContexts_.back()) << "Generator error."; - CHECK(PyIter_Check(callingContexts_.back())); - } - DBG << "Create context done"; - callingContextCreated_.wait(); - - PositionRandom p(skipShuffle_); - - while (!exit_ && !callingContexts_.empty()) { - PyObject* data = nullptr; - - { // Read data. - size_t cid = p(callingContexts_.size()); - bool atEnd; - data = py::iterNext(callingContexts_[cid], &atEnd); - if (atEnd || data == nullptr) { - if (cid != 0) { - std::swap(callingContexts_[cid], callingContexts_[0]); - cid = 0; - } - - PyObjectPtr front; - { - std::unique_lock l(mtx_); - front = pop_get_front(callingContexts_); - } - { - PyGuard g; - front.reset(); - } - this->pullCV_.notify_all(); - continue; - } - } - - size_t additionalBatchSize = 1; - if (calcBatchSize_) { - PyGuard guard; - py::CallableHelper calcBatchSize(this->calcBatchSize_); - calcBatchSize.setArgsSize(1); - calcBatchSize.getArgs().set(0, data); - PyObjectPtr bs(calcBatchSize()); - CHECK_PY(bs); - bool ok; - additionalBatchSize = py::castInt(bs.get(), &ok); - CHECK(ok) << "CalcBatchSize must return int or long"; - } - - if (this->loadThread_) { // wait poolActualSize < poolSize; - std::unique_lock l(mtx_); - pushCV_.wait(l, [this] { return this->poolActualSize_ < poolSize_; }); - } - - { - std::lock_guard guard(mtx_); - poolActualSize_ += additionalBatchSize; - dataPool_.emplace_back(data); - } - pullCV_.notify_all(); - } - DBG << "load thread end"; - } - - inline void resetImpl(bool startNewThread) { - DBG << "Reseting " << startNewThread; - exit_.store(true); - if (loadThread_) { // is loading. - loadThread_->join(); - loadThread_.reset(); - } - { - PyGuard g; - callingContexts_.clear(); - this->pullCV_.notify_one(); - } - - std::lock_guard guard(mutexForReset_); - { - PyGuard g; - dataPool_.clear(); - } - poolActualSize_ = 0; - - if (startNewThread && cache_->reset()) { - DBG << "Start new thread."; - loadThread_.reset(new std::thread([this] { - exit_ = false; - loadThread(); - })); - callingContextCreated_.wait(); - } - DBG << "Reset done"; - exit_ = false; - } - - private: - std::unique_ptr loadThread_; - std::atomic exit_; - std::deque callingContexts_; - std::deque dataPool_; - size_t poolActualSize_; - std::condition_variable pushCV_; - std::condition_variable pullCV_; - std::mutex mtx_; - - std::mutex mutexForReset_; - - ThreadBarrier callingContextCreated_; - std::unique_ptr cache_; - - PyObjectPtr instance_; - size_t poolSize_; - size_t minPoolSize_; - bool canOverBatchSize_; - PyObjectPtr calcBatchSize_; - PyObjectPtr generator_; - std::vector fileLists_; - std::vector headers_; - static PyObjectPtr zeroTuple_; - - class PositionRandom { - public: - inline explicit PositionRandom(bool skipRand) - : eng_(ThreadLocalRandomEngine::get()), skipRand_(skipRand) {} - - inline size_t operator()(size_t len) { - if (!skipRand_) { - if (!dist_ || dist_->b() != len - 1) { - dist_.reset(new std::uniform_int_distribution(0, len - 1)); - } - return (*dist_)(eng_); - } else { - return 0; - } - } - - private: - std::default_random_engine& eng_; - std::unique_ptr> dist_; - bool skipRand_; - }; - - // DataProvider interface - public: - /** - * Resetting the PyDataProvider. May start reading thread here. - */ - virtual void reset() { - resetImpl(true); - DataProvider::reset(); - } - - /** - * Shuffle. Do nothing because PyDataProvider do shuffle implicitly by random - * select data from datapool. - */ - void shuffle() {} - - /** - * Not limited size. - */ - int64_t getSize() { return -1; } - - /** - * Loading a batch of data. - */ - int64_t getNextBatchInternal(int64_t size_, DataBatch* batch) { - std::lock_guard guard(mutexForReset_); - REGISTER_TIMER("PyDP2.getNextBatchInternal") - CHECK_GE(size_, 0); - size_t size = (size_t)size_; - if (loadThread_) { // loading from thread should wait for data pool ready. - // but, loading from cache, cache object should ensure - // data pool ready. - std::unique_lock l(mtx_); - pullCV_.wait(l, [this, &size] { - return this->poolActualSize_ >= std::max(size, this->minPoolSize_) || - callingContexts_.empty(); - }); - - if (unittest::OnPoolFilled) { - (*unittest::OnPoolFilled)(this->poolActualSize_); - } - } - std::deque data; - size_t bsize = 0; - std::deque* poolPtr = nullptr; - - if (this->loadThread_) { // loading from thread. - poolPtr = &this->dataPool_; - } else { // loading from cache. - poolPtr = this->cache_->load(); - } - if (exit_) { - // PyDataProvider is destructing. - return 0; - } - CHECK(poolPtr != nullptr); - - std::deque& pool = *poolPtr; - - while (bsize < size && !pool.empty()) { - { - // move data from pool to data - std::lock_guard guard(mtx_); - if (skipShuffle_) { - size_t i = 0; - CHECK(pool[i] != nullptr); - data.emplace_back(std::move(pool[i])); - pool.pop_front(); - } else { // when shuffle, use swap to drop only last pool element. - size_t i = ThreadLocalRand::rand() % pool.size(); - CHECK(pool[i] != nullptr); - if (i != 0) { - std::swap(pool[i], pool.front()); - } - data.emplace_back(std::move(pool.front())); - pool.pop_front(); - } - - if (calcBatchSize_) { // custom calc batch size. - PyGuard guard; - Py_INCREF(data.back().get()); - py::CallableHelper calcBatchSize(calcBatchSize_); - calcBatchSize.setArgsSize(1); - calcBatchSize.getArgs().set(0, data.back()); - PyObjectPtr customBatchSize(calcBatchSize()); - bool ok; - size_t tmp = py::castInt(customBatchSize.get(), &ok); - CHECK(ok) << "calc_batch_size must return int"; - - if (bsize + tmp > size && !canOverBatchSize_) { - // Put data back. - pool.push_front(std::move(data.back())); - data.pop_back(); - break; - } else { - bsize += tmp; - } - } else { - bsize += 1; - } - } - } - - if (this->loadThread_) { - { - std::lock_guard g(mtx_); - poolActualSize_ -= bsize; - } - this->pushCV_.notify_all(); - } - - if (bsize == 0) { // end of pass. In data pool, cannot get any data. - return 0; - } - - DataBatch cpuBatch; - cpuBatch.setSize(bsize); - auto& inArgs = cpuBatch.getStreams(); - inArgs.resize(headers_.size()); - std::vector> scanners; - scanners.reserve(headers_.size()); - for (auto& header : headers_) { - scanners.emplace_back(IFieldScanner::create(&header)); - } - DBG << "Scanner created."; - for (size_t i = 0; i < headers_.size(); ++i) { - scanners[i]->startPrepare(inArgs[i]); - } - for (auto& d : data) { - py::SequenceHelper s(d); - for (size_t i = 0; i < headers_.size(); ++i) { - scanners[i]->prepare(inArgs[i], s[i]); - } - } - for (size_t i = 0; i < headers_.size(); ++i) { - scanners[i]->finishPrepare(inArgs[i]); - } - for (size_t i = 0; i < headers_.size(); ++i) { - scanners[i]->startFill(inArgs[i]); - } - for (auto& d : data) { - py::SequenceHelper s(d); - for (size_t i = 0; i < headers_.size(); ++i) { - scanners[i]->fill(inArgs[i], s[i]); - } - } - - for (size_t i = 0; i < headers_.size(); ++i) { - scanners[i]->finishFill(inArgs[i]); - } - - { - PyGuard g; - cache_->drop(&data); - } - - DBG << "Reading CPU Batch Done."; - - if (useGpu_) { - std::vector& cpuArguments = cpuBatch.getStreams(); - DataBatch& gpuBatch = *batch; - std::vector& gpuArguments = gpuBatch.getStreams(); - gpuArguments.resize(cpuArguments.size()); - gpuBatch.setSize(bsize); - for (size_t i = 0; i < headers_.size(); ++i) { - gpuArguments[i].resizeAndCopyFrom( - cpuArguments[i], useGpu_, HPPL_STREAM_1); - } - hl_stream_synchronize(HPPL_STREAM_1); - } else { - *batch = cpuBatch; - } - return bsize; - } -}; - -PyObjectPtr PyDataProvider2::zeroTuple_(PyTuple_New(0)); - -REGISTER_DATA_PROVIDER_EX(py2, PyDataProvider2); - -/** - * Scanner for dense slot. - */ -class DenseScanner : public IFieldScanner { - public: - explicit DenseScanner(SlotHeader* ptr) : IFieldScanner(ptr), height_(0) {} - - /** - * Prepare. - * @param argument target argument - * @param obj each timestep of a sample. - */ - virtual void prepare(Argument& argument, PyObject* obj) { ++height_; } - - virtual void finishPrepare(Argument& argument) { - Matrix::resizeOrCreate( - argument.value, height_, headerPtr_->dim, false, false); - height_ = 0; - } - - /** - * Fill argument from obj. - * @param argument - * @param obj - */ - virtual void fill(Argument& argument, PyObject* obj) { - real* dat = argument.value->getData() + height_ * headerPtr_->dim; - if (PyArray_Check(obj)) { - auto dtype = PyArray_DTYPE((PyArrayObject*)obj); - if (dtype->type == 'f' && dtype->elsize == sizeof(real)) { - real* data = (real*)PyArray_DATA((PyArrayObject*)obj); - auto sz = PyArray_SIZE((PyArrayObject*)obj); - std::copy(data, data + sz, dat); - } else { - LOG(FATAL) << "You should yield float" << sizeof(real) * 8 << " array"; - } - } else { - py::SequenceHelper s(obj); - // TODO(yuyang18): Here we can use AVX or SSE to accelerate memory copy. - for (size_t i = 0; i < headerPtr_->dim; ++i) { - dat[i] = (real)s.getDouble(i); - } - } - ++height_; - } - - private: - size_t height_; -}; - -/** - * Scanner for index slot - */ -class IndexScanner : public IFieldScanner { - public: - explicit IndexScanner(SlotHeader* ptr) : IFieldScanner(ptr), cnt_(0) {} - - /** - * Prepare memory space. - * - * @note obj is a single timestep of sample - */ - virtual void prepare(Argument& argument, PyObject* obj) { ++cnt_; } - - virtual void finishPrepare(Argument& argument) { - IVector::resizeOrCreate(argument.ids, cnt_, false); - cnt_ = 0; - } - - /** - * Fill one index to argument. - */ - virtual void fill(Argument& argument, PyObject* obj) { - bool ok; - argument.ids->getData()[cnt_++] = py::castInt(obj, &ok); - CHECK(ok) << "Cannot cast int " << py::repr(obj); - } - - private: - size_t cnt_; -}; - -class SparseNonValueScanner : public IFieldScanner { - public: - explicit SparseNonValueScanner(SlotHeader* ptr) - : IFieldScanner(ptr), nnz_(0), height_(0) {} - - /** - * Prepare memory space - * @note obj is a timestep of one sample. - */ - virtual void prepare(Argument& argument, PyObject* obj) { - ++height_; - nnz_ += py::SequenceHelper(obj).size(); - } - - virtual void finishPrepare(Argument& argument) { - Matrix::resizeOrCreateSparseMatrix( - argument.value, height_, headerPtr_->dim, nnz_, NO_VALUE); - } - - virtual void startFill(Argument& argument) { - auto smat = (CpuSparseMatrix*)(argument.value.get()); - smat->getRows()[0] = 0; - nnz_ = 0; - height_ = 1; - } - - /** - * Fill one sparse vector to argument. - * @note obj is a timestep of one sample. - */ - virtual void fill(Argument& argument, PyObject* obj) { - py::SequenceHelper s(obj); - auto sz = s.size(); - auto smat = (CpuSparseMatrix*)(argument.value.get()); - int* row = smat->getRows(); - int* col = smat->getCols(); - real* dat = smat->getData(); - row[height_] = row[height_ - 1] + (int)sz; - - for (decltype(sz) i = 0; i < sz; ++i) { - setData(col + nnz_, dat + nnz_, s[i]); - ++nnz_; - } - ++height_; - } - - protected: - /** - * Set a single sparse index and value. - * @param [out] col sparse index - * @param [out] dat sparse value - * @param [in] obj Python Object. For sparse_non_value is a PyInt or PyLong. - * For sparse_value is a Tuple (int, float). - */ - virtual void setData(int* col, real* dat, PyObject* obj) { - bool ok; - *col = py::castInt(obj, &ok); - CHECK(ok); - } - - size_t nnz_; - size_t height_; -}; - -class SparseValueScanner : public SparseNonValueScanner { - public: - explicit SparseValueScanner(SlotHeader* ptr) : SparseNonValueScanner(ptr) {} - - virtual void finishPrepare(Argument& argument) { - Matrix::resizeOrCreateSparseMatrix( - argument.value, height_, headerPtr_->dim, nnz_, FLOAT_VALUE); - } - - protected: - virtual void setData(int* col, real* dat, PyObject* obj) { - py::SequenceHelper s(obj); - SparseNonValueScanner::setData(col, dat, s[0]); - *dat = (real)s.getDouble(1); - } -}; - -/** - * Sequence Scanner. Scanner for sequence or sub-sequence. - */ -class SequenceScanner : public IFieldScanner { - public: - /** - * Ctor - * @param innerScanner inner scanner for each timestep or sub-sequence. - * @param getSeqStartPos A callback, (Argument) => ICpuGpuVectorPtr. - * return a sequence start position or a sub-sequence - * start position. - */ - SequenceScanner( - std::unique_ptr&& innerScanner, - const std::function& getSeqStartPos) - : IFieldScanner(nullptr), - inner_(std::move(innerScanner)), - cnt_(0), - getSeqStartPos_(getSeqStartPos) {} - - /** - * Start prepare. Invoke inner->startPrepare too. - */ - virtual void startPrepare(Argument& argument) { - inner_->startPrepare(argument); - } - - /** - * Prepare. obj is a list or tuple. it will invoke inner_->prepare for each - * element of sequence obj. - */ - virtual void prepare(Argument& argument, PyObject* obj) { - py::SequenceHelper s(obj); - ++cnt_; - for (size_t i = 0; i < s.size(); ++i) { - inner_->prepare(argument, s[i]); - } - } - - /** - * Finish prepare. invoke inner_->finishPrepare too. - */ - virtual void finishPrepare(Argument& argument) { - ICpuGpuVector::resizeOrCreate(getSeqStartPos_(argument), cnt_ + 1, false); - inner_->finishPrepare(argument); - } - - /** - * Start fill. invoke inner->startFill too. - */ - virtual void startFill(Argument& argument) { - getSeqStartPos_(argument)->getMutableData(false)[0] = 0; - cnt_ = 1; - inner_->startFill(argument); - } - - /** - * Fill. Obj is a tuple or list. invoke inner->fill for each element of - * sequence obj. And set seqStartPos at same time. The seqStartPos will be - * calculated by getSeqStartPos callback passed in ctor. - */ - virtual void fill(Argument& argument, PyObject* obj) { - getSeqStartPos_(argument)->getMutableData(false)[cnt_] = - getSeqStartPos_(argument)->getMutableData(false)[cnt_ - 1] + - (int)getSize(obj); - py::SequenceHelper s(obj); - ++cnt_; - for (size_t i = 0; i < s.size(); ++i) { - inner_->fill(argument, s[i]); - } - } - - /** - * Finish fill. will invoke inner->finishFill too. - */ - virtual void finishFill(Argument& argument) { inner_->finishFill(argument); } - - protected: - size_t getSize(PyObject* obj) { - py::SequenceHelper s(obj); - auto sc = dynamic_cast(inner_.get()); - if (sc) { - size_t sum = 0; - for (size_t i = 0; i < s.size(); ++i) { - sum += sc->getSize(s[i]); - } - return sum; - } else { - return s.size(); - } - } - - private: - std::unique_ptr inner_; - size_t cnt_; - std::function getSeqStartPos_; -}; - -IFieldScanner* IFieldScanner::create(SlotHeader* header) { - IFieldScanner* retv = nullptr; - switch (header->slotType) { - case ST_DENSE: - retv = new DenseScanner(header); - break; - case ST_INDEX: - retv = new IndexScanner(header); - break; - case ST_NON_SPARSE_VALUE: - retv = new SparseNonValueScanner(header); - break; - case ST_SPARSE_VALUE: - retv = new SparseValueScanner(header); - break; - default: - LOG(FATAL) << "Not implemented " << header->slotType; - } - - switch (header->seqType) { - case SQT_NONE: - break; - case SQT_SUBSEQ: - retv = new SequenceScanner(std::unique_ptr(retv), - [](Argument& arg) -> ICpuGpuVectorPtr& { - return arg.subSequenceStartPositions; - }); - // fall through, not break; - case SQT_SEQ: - retv = new SequenceScanner(std::unique_ptr(retv), - [](Argument& arg) -> ICpuGpuVectorPtr& { - return arg.sequenceStartPositions; - }); - break; - default: - LOG(FATAL) << "Not implemented"; - } - - return retv; -} - -/** - * No Cache Strategy. Will destruct old data immediately and load data from - * python every pass. - */ -class NoCacheStrategy : public IPyDataProviderCache { - public: - virtual bool reset() { return true; } - - virtual void drop(std::deque* data) { data->clear(); } - - virtual std::deque* load() { return nullptr; } -}; - -/** - * Cache One Pass In Memory strategy. - * - * In first pass, will load data from python and store them in memory. - * The rest passes, will load data from memory. - */ -class CacheOnePassInMemory : public IPyDataProviderCache { - public: - CacheOnePassInMemory() - : objPool_(new std::deque()), - droppedPool_(new std::deque()) {} - - virtual bool reset() { - if (objPool_->empty() && droppedPool_->empty()) { - return true; - } else if (objPool_->empty()) { - std::swap(objPool_, droppedPool_); - return false; - } else { - LOG(FATAL) << "Unexpected branch"; - } - } - - virtual void drop(std::deque* data) { - size_t orgSize = droppedPool_->size(); - droppedPool_->resize(orgSize + data->size()); - for (size_t i = 0; i < data->size(); ++i) { - std::swap((*droppedPool_)[orgSize + i], (*data)[i]); - } - data->clear(); - } - - virtual std::deque* load() { return objPool_.get(); } - - private: - std::unique_ptr> objPool_; - std::unique_ptr> droppedPool_; -}; - -IPyDataProviderCache* IPyDataProviderCache::create(CacheType ct) { - switch (ct) { - case NO_CACHE: - return new NoCacheStrategy(); - case CACHE_PASS_IN_MEM: - return new CacheOnePassInMemory(); - default: - LOG(FATAL) << "Not implemented"; - } -} -} // namespace paddle - -#endif diff --git a/paddle/legacy/gserver/evaluators/CTCErrorEvaluator.cpp b/paddle/legacy/gserver/evaluators/CTCErrorEvaluator.cpp deleted file mode 100644 index c145adda5e04fb4a35df480fd3d0cf93ad453e0d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/evaluators/CTCErrorEvaluator.cpp +++ /dev/null @@ -1,320 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Evaluator.h" -#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/legacy/utils/StringUtil.h" - -namespace paddle { - -/** - * calculate sequence-to-sequence edit distance - */ -class CTCErrorEvaluator : public Evaluator { - private: - MatrixPtr outActivations_; - int numTimes_, numClasses_, numSequences_, blank_; - real deletions_, insertions_, substitutions_; - int seqClassficationError_; - mutable std::unordered_map evalResults_; - - std::vector path2String(const std::vector& path) { - std::vector str; - str.clear(); - int prevLabel = -1; - for (std::vector::const_iterator label = path.begin(); - label != path.end(); - label++) { - if (*label != blank_ && - (str.empty() || *label != str.back() || prevLabel == blank_)) { - str.push_back(*label); - } - prevLabel = *label; - } - return str; - } - - std::vector bestLabelSeq() { - std::vector path; - path.clear(); - real* acts = outActivations_->getData(); - for (int i = 0; i < numTimes_; ++i) { - path.push_back(std::max_element(acts + i * numClasses_, - acts + (i + 1) * numClasses_) - - (acts + i * numClasses_)); - } - return path2String(path); - } - - /* "sp, dp, ip" is the weighting parameter of "substitution, deletion, - * insertion" - * in edit-distance error */ - real stringAlignment(std::vector& gtStr, - std::vector& recogStr, - bool backtrace = true, - real sp = 1.0, - real dp = 1.0, - real ip = 1.0) { - std::vector> matrix; - int substitutions, deletions, insertions; - real distance; - int n = gtStr.size(); - int m = recogStr.size(); - - if (n == 0) { - substitutions = 0; - deletions = 0; - insertions = m; - distance = m; - } else if (m == 0) { - substitutions = 0; - deletions = n; - insertions = 0; - distance = n; - } else { - substitutions = 0; - deletions = 0; - insertions = 0; - distance = 0; - // initialize the matrix - matrix.resize(n + 1); - for (int i = 0; i < n + 1; ++i) { - matrix[i].resize(m + 1); - for (int j = 0; j < m + 1; ++j) { - matrix[i][j] = 0; - } - } - for (int i = 0; i < n + 1; ++i) { - matrix[i][0] = i; - } - for (int j = 0; j < m + 1; ++j) { - matrix[0][j] = j; - } - - // calculate the insertions, substitutions and deletions - for (int i = 1; i < n + 1; ++i) { - int s_i = gtStr[i - 1]; - for (int j = 1; j < m + 1; ++j) { - int t_j = recogStr[j - 1]; - int cost = (s_i == t_j) ? 0 : 1; - const int above = matrix[i - 1][j]; - const int left = matrix[i][j - 1]; - const int diag = matrix[i - 1][j - 1]; - const int cell = std::min(above + 1, std::min(left + 1, diag + cost)); - matrix[i][j] = cell; - } - } - - if (backtrace) { - size_t i = n; - size_t j = m; - substitutions = 0; - deletions = 0; - insertions = 0; - - while (i != 0 && j != 0) { - if (matrix[i][j] == matrix[i - 1][j - 1]) { - --i; - --j; - } else if (matrix[i][j] == matrix[i - 1][j - 1] + 1) { - ++substitutions; - --i; - --j; - } else if (matrix[i][j] == matrix[i - 1][j] + 1) { - ++deletions; - --i; - } else { - ++insertions; - --j; - } - } - while (i != 0) { - ++deletions; - --i; - } - while (j != 0) { - ++insertions; - --j; - } - int diff = substitutions + deletions + insertions; - if (diff != matrix[n][m]) { - LOG(ERROR) << "Found path with distance " << diff - << " but Levenshtein distance is " << matrix[n][m]; - } - - distance = (sp * substitutions) + (dp * deletions) + (ip * insertions); - } else { - distance = (real)matrix[n][m]; - } - } - real maxLen = std::max(m, n); - deletions_ += deletions / maxLen; - insertions_ += insertions / maxLen; - substitutions_ += substitutions / maxLen; - - if (distance != 0) { - seqClassficationError_ += 1; - } - - return distance / maxLen; - } - - real editDistance( - real* output, int numTimes, int numClasses, int* labels, int labelsLen) { - numTimes_ = numTimes; - numClasses_ = numClasses; - blank_ = numClasses_ - 1; - outActivations_ = Matrix::create(output, numTimes, numClasses); - std::vector recogStr, gtStr; - recogStr = bestLabelSeq(); - for (int i = 0; i < labelsLen; ++i) { - gtStr.push_back(labels[i]); - } - - return stringAlignment(gtStr, recogStr); - } - - void storeLocalValues() const { - evalResults_["error"] = numSequences_ ? totalScore_ / numSequences_ : 0; - evalResults_["deletion_error"] = - numSequences_ ? deletions_ / numSequences_ : 0; - evalResults_["insertion_error"] = - numSequences_ ? insertions_ / numSequences_ : 0; - evalResults_["substitution_error"] = - numSequences_ ? substitutions_ / numSequences_ : 0; - evalResults_["sequence_error"] = - (real)seqClassficationError_ / numSequences_; - } - - public: - CTCErrorEvaluator() - : numTimes_(0), - numClasses_(0), - numSequences_(0), - blank_(0), - deletions_(0), - insertions_(0), - substitutions_(0), - seqClassficationError_(0) {} - - virtual real evalImp(std::vector& arguments) { - CHECK_EQ(arguments.size(), (size_t)2); - Argument output, label; - output.resizeAndCopyFrom(arguments[0], false, HPPL_STREAM_DEFAULT); - label.resizeAndCopyFrom(arguments[1], false, HPPL_STREAM_DEFAULT); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - CHECK(label.sequenceStartPositions); - CHECK(label.ids); - size_t numSequences = label.sequenceStartPositions->getSize() - 1; - const int* labelStarts = label.sequenceStartPositions->getData(false); - const int* outputStarts = output.sequenceStartPositions->getData(false); - real totalErr = 0; - for (size_t i = 0; i < numSequences; ++i) { - real err = 0; - err = editDistance( - output.value->getData() + output.value->getWidth() * outputStarts[i], - outputStarts[i + 1] - outputStarts[i], - output.value->getWidth(), - label.ids->getData() + labelStarts[i], - labelStarts[i + 1] - labelStarts[i]); - - totalErr += err; - } - - return totalErr; - } - - virtual void eval(const NeuralNetwork& nn) { - Evaluator::eval(nn); - std::vector arguments; - arguments.reserve(config_.input_layers_size()); - for (const std::string& name : config_.input_layers()) { - arguments.push_back(nn.getLayer(name)->getOutput()); - } - } - - virtual void updateSamplesNum(const std::vector& arguments) { - numSequences_ += arguments[1].getNumSequences(); - } - - virtual void start() { - Evaluator::start(); - numSequences_ = 0; - blank_ = 0; - deletions_ = 0; - insertions_ = 0; - substitutions_ = 0; - seqClassficationError_ = 0; - } - - virtual void printStats(std::ostream& os) const { - storeLocalValues(); - os << config_.name() << " error = " << evalResults_["error"]; - os << " deletions error = " << evalResults_["deletion_error"]; - os << " insertions error = " << evalResults_["insertion_error"]; - os << " substitution error = " << evalResults_["substitution_error"]; - os << " sequence error = " << evalResults_["sequence_error"]; - } - - virtual void distributeEval(ParameterClient2* client) { - double buf[6] = {totalScore_, - (double)deletions_, - (double)insertions_, - (double)substitutions_, - (double)seqClassficationError_, - (double)numSequences_}; - client->reduce(buf, buf, 6, FLAGS_trainer_id, 0); - totalScore_ = buf[0]; - deletions_ = (real)buf[1]; - insertions_ = (real)buf[2]; - substitutions_ = (real)buf[3]; - seqClassficationError_ = (int)buf[4]; - numSequences_ = (int)buf[5]; - } - - void getNames(std::vector* names) { - storeLocalValues(); - names->reserve(names->size() + evalResults_.size()); - for (auto it = evalResults_.begin(); it != evalResults_.end(); ++it) { - names->push_back(config_.name() + "." + it->first); - } - } - - real getValue(const std::string& name, Error* err) const { - storeLocalValues(); - - std::vector buffers; - paddle::str::split(name, '.', &buffers); - auto it = evalResults_.find(buffers[buffers.size() - 1]); - - if (it == evalResults_.end()) { - *err = Error("Evaluator does not have the key %s", name.c_str()); - return 0.0f; - } - - return it->second; - } - - std::string getType(const std::string& name, Error* err) const { - this->getValue(name, err); - if (!err->isOK()) { - return ""; - } - return "ctc_edit_distance"; - } -}; - -REGISTER_EVALUATOR(ctc_edit_distance, CTCErrorEvaluator); - -} // namespace paddle diff --git a/paddle/legacy/gserver/evaluators/ChunkEvaluator.cpp b/paddle/legacy/gserver/evaluators/ChunkEvaluator.cpp deleted file mode 100644 index 0ff3f2fa8cf06c13ef327aa7ae2511bfc0d028be..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/evaluators/ChunkEvaluator.cpp +++ /dev/null @@ -1,296 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/utils/StringUtil.h" - -#include "Evaluator.h" - -namespace paddle { - -/** - * Chunk evaluator is used to evaluate segment labelling accuracy for a - * sequence. It calculates the chunk detection F1 score. - * - * A chunk is correctly detected if its beginning, end and type are correct. - * Other chunk type is ignored. - * For each label in the label sequence, we have - * - * @code - * tagType = label % numTagType - * chunkType = label / numTagType - * otherChunkType = numChunkTypes - * @endcode - * - * The total number of different labels is numTagType*numChunkTypes+1 - * We support 4 labelling scheme - * The tag type for each of the scheme is shown as follows: - * - * @code - * Scheme Begin Inside End Single - * plain 0 - - - - * IOB 0 1 - - - * IOE - 0 1 - - * IOBES 0 1 2 3 - * @endcode - * - * 'plain' means the whole chunk must contain exactly the same chunk label. - */ -class ChunkEvaluator : public Evaluator { - int otherChunkType_; - int numChunkTypes_; // number of chunk types besides other chunk type - int numTagTypes_; - int tagBegin_; - int tagInside_; - int tagEnd_; - int tagSingle_; - - int64_t numLabelSegments_; - int64_t numOutputSegments_; - int64_t numCorrect_; - - struct Segment { - int begin; - int end; - int type; - bool operator==(const Segment& y) const { - return begin == y.begin && end == y.end && type == y.type; - } - }; - - std::vector labelSegments_; - std::vector outputSegments_; - std::set excludedChunkTypes_; - mutable std::unordered_map values_; - - public: - virtual void init(const EvaluatorConfig& config) { - Evaluator::init(config); - if (config.chunk_scheme() == "IOB") { - numTagTypes_ = 2; - tagBegin_ = 0; - tagInside_ = 1; - tagEnd_ = -1; - tagSingle_ = -1; - } else if (config.chunk_scheme() == "IOE") { - numTagTypes_ = 2; - tagBegin_ = -1; - tagInside_ = 0; - tagEnd_ = 1; - tagSingle_ = -1; - } else if (config.chunk_scheme() == "IOBES") { - numTagTypes_ = 4; - tagBegin_ = 0; - tagInside_ = 1; - tagEnd_ = 2; - tagSingle_ = 3; - } else if (config.chunk_scheme() == "plain") { - numTagTypes_ = 1; - tagBegin_ = -1; - tagInside_ = -1; - tagEnd_ = -1; - tagSingle_ = -1; - } else { - LOG(FATAL) << "Unknown chunk scheme: " << config.chunk_scheme(); - } - CHECK(config.has_num_chunk_types()) << "Missing num_chunk_types in config"; - otherChunkType_ = numChunkTypes_ = config.num_chunk_types(); - - // the chunks of types in excludedChunkTypes_ will not be counted - auto& tmp = config.excluded_chunk_types(); - excludedChunkTypes_.insert(tmp.begin(), tmp.end()); - } - - virtual void start() { - Evaluator::start(); - numLabelSegments_ = 0; - numOutputSegments_ = 0; - numCorrect_ = 0; - } - - virtual void printStats(std::ostream& os) const { - storeLocalValues(); - os << config_.name() << "=" << values_["F1-score"] - << " true_chunks=" << numLabelSegments_ - << " result_chunks=" << numOutputSegments_ - << " correct_chunks=" << numCorrect_; - } - - virtual void distributeEval(ParameterClient2* client) { - int64_t buf[3] = {numLabelSegments_, numOutputSegments_, numCorrect_}; - client->reduce(buf, buf, 3, FLAGS_trainer_id, 0); - numLabelSegments_ = buf[0]; - numOutputSegments_ = buf[1]; - numCorrect_ = buf[2]; - } - - virtual real evalImp(std::vector& arguments) { - CHECK_EQ(arguments.size(), (size_t)2); - IVectorPtr& output = arguments[0].ids; - IVectorPtr& label = arguments[1].ids; - CHECK(!output->useGpu() && !label->useGpu()) << "Not supported"; - auto sequenceStartPositions = - arguments[1].sequenceStartPositions->getVector(false); - CHECK_EQ(output->getSize(), label->getSize()); - CHECK(sequenceStartPositions); - size_t numSequences = sequenceStartPositions->getSize() - 1; - const int* starts = sequenceStartPositions->getData(); - for (size_t i = 0; i < numSequences; ++i) { - eval1(output->getData() + starts[i], - label->getData() + starts[i], - starts[i + 1] - starts[i]); - } - return 0; - } - - void eval1(int* output, int* label, int length) { - getSegments(output, length, outputSegments_); - getSegments(label, length, labelSegments_); - size_t i = 0, j = 0; - while (i < outputSegments_.size() && j < labelSegments_.size()) { - if (outputSegments_[i] == labelSegments_[j] && - excludedChunkTypes_.count(outputSegments_[i].type) != 1) { - ++numCorrect_; - } - if (outputSegments_[i].end < labelSegments_[j].end) { - ++i; - } else if (outputSegments_[i].end > labelSegments_[j].end) { - ++j; - } else { - ++i; - ++j; - } - } - for (auto& segment : labelSegments_) { - if (excludedChunkTypes_.count(segment.type) != 1) ++numLabelSegments_; - } - for (auto& segment : outputSegments_) { - if (excludedChunkTypes_.count(segment.type) != 1) ++numOutputSegments_; - } - } - - void getSegments(int* label, int length, std::vector& segments) { - segments.clear(); - segments.reserve(length); - int chunkStart = 0; - bool inChunk = false; - int tag = -1; - int type = otherChunkType_; - for (int i = 0; i < length; ++i) { - int prevTag = tag; - int prevType = type; - CHECK_LE(label[i], numChunkTypes_ * numTagTypes_); - tag = label[i] % numTagTypes_; - type = label[i] / numTagTypes_; - if (inChunk && isChunkEnd(prevTag, prevType, tag, type)) { - Segment segment{ - chunkStart, // begin - i - 1, // end - prevType, - }; - segments.push_back(segment); - inChunk = false; - } - if (isChunkBegin(prevTag, prevType, tag, type)) { - chunkStart = i; - inChunk = true; - } - } - if (inChunk) { - Segment segment{ - chunkStart, // begin - length - 1, // end - type, - }; - segments.push_back(segment); - } - } - - // whether (prevTag, prevType) is the end of a chunk - bool isChunkEnd(int prevTag, int prevType, int tag, int type) { - if (prevType == otherChunkType_) return false; - if (type == otherChunkType_) return true; - if (type != prevType) return true; - if (prevTag == tagBegin_) return tag == tagBegin_ || tag == tagSingle_; - if (prevTag == tagInside_) return tag == tagBegin_ || tag == tagSingle_; - if (prevTag == tagEnd_) return true; - if (prevTag == tagSingle_) return true; - return false; - } - - // whether (tag, type) is the beginning of a chunk - bool isChunkBegin(int prevTag, int prevType, int tag, int type) { - if (prevType == otherChunkType_) return type != otherChunkType_; - if (type == otherChunkType_) return false; - if (type != prevType) return true; - if (tag == tagBegin_) return true; - if (tag == tagInside_) return prevTag == tagEnd_ || prevTag == tagSingle_; - if (tag == tagEnd_) return prevTag == tagEnd_ || prevTag == tagSingle_; - if (tag == tagSingle_) return true; - return false; - } - - // three metrics: precision, recall and F1-score - void getNames(std::vector* names) { - storeLocalValues(); - names->reserve(names->size() + values_.size()); - for (auto it = values_.begin(); it != values_.end(); ++it) { - names->push_back(config_.name() + "." + it->first); - } - } - - // get value by field name - real getValue(const std::string& name, Error* err) const { - storeLocalValues(); - std::vector buffers; - paddle::str::split(name, '.', &buffers); - auto it = values_.find(buffers.back()); - if (it == values_.end()) { // not found - *err = Error("No such key %s", name.c_str()); - return 0.0f; - } - - return it->second; - } - - // get type of evaluator - std::string getType(const std::string& name, Error* err) const { - this->getValue(name, err); - if (!err->isOK()) { - return ""; - } - return "chunk"; - } - - private: - void storeLocalValues() const { - CHECK_GE(numOutputSegments_, 0); - CHECK_GE(numLabelSegments_, 0); - double precision = - !numOutputSegments_ ? 0 : (double)numCorrect_ / numOutputSegments_; - double recall = - !numLabelSegments_ ? 0 : (double)numCorrect_ / numLabelSegments_; - values_["precision"] = precision; - values_["recall"] = recall; - values_["F1-score"] = - !numCorrect_ ? 0 : 2 * precision * recall / (precision + recall); - } -}; - -REGISTER_EVALUATOR(chunk, ChunkEvaluator); - -} // namespace paddle diff --git a/paddle/legacy/gserver/evaluators/DetectionMAPEvaluator.cpp b/paddle/legacy/gserver/evaluators/DetectionMAPEvaluator.cpp deleted file mode 100644 index 57657241f8c1517f674670d34cb984b85996bfc7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/evaluators/DetectionMAPEvaluator.cpp +++ /dev/null @@ -1,308 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Evaluator.h" -#include "paddle/legacy/gserver/layers/DetectionUtil.h" - -using std::map; -using std::vector; -using std::pair; -using std::make_pair; - -namespace paddle { - -/** - * @brief detection map Evaluator - * - * The config file api is detection_map_evaluator. - */ -class DetectionMAPEvaluator : public Evaluator { - public: - DetectionMAPEvaluator() - : evaluateDifficult_(false), cpuOutput_(nullptr), cpuLabel_(nullptr) {} - - virtual void start() { - Evaluator::start(); - allTruePos_.clear(); - allFalsePos_.clear(); - numPos_.clear(); - } - - virtual real evalImp(std::vector& arguments) { - overlapThreshold_ = config_.overlap_threshold(); - backgroundId_ = config_.background_id(); - evaluateDifficult_ = config_.evaluate_difficult(); - apType_ = config_.ap_type(); - - MatrixPtr detectTmpValue = arguments[0].value; - Matrix::resizeOrCreate(cpuOutput_, - detectTmpValue->getHeight(), - detectTmpValue->getWidth(), - false, - false); - - MatrixPtr labelTmpValue = arguments[1].value; - Matrix::resizeOrCreate(cpuLabel_, - labelTmpValue->getHeight(), - labelTmpValue->getWidth(), - false, - false); - - cpuOutput_->copyFrom(*detectTmpValue); - cpuLabel_->copyFrom(*labelTmpValue); - - Argument label = arguments[1]; - const int* labelIndex = label.sequenceStartPositions->getData(false); - size_t batchSize = label.getNumSequences(); - - vector>> allGTBBoxes; - vector>>> allDetectBBoxes; - - for (size_t n = 0; n < batchSize; ++n) { - map> bboxes; - for (int i = labelIndex[n]; i < labelIndex[n + 1]; ++i) { - vector bbox; - getBBoxFromLabelData(cpuLabel_->getData() + i * 6, 1, bbox); - int c = cpuLabel_->getData()[i * 6]; - bboxes[c].push_back(bbox[0]); - } - allGTBBoxes.push_back(bboxes); - } - - size_t n = 0; - const real* cpuOutputData = cpuOutput_->getData(); - for (size_t imgId = 0; imgId < batchSize; ++imgId) { - map>> bboxes; - size_t curImgId = static_cast((cpuOutputData + n * 7)[0]); - while (curImgId == imgId && n < cpuOutput_->getHeight()) { - vector label; - vector score; - vector bbox; - getBBoxFromDetectData(cpuOutputData + n * 7, 1, label, score, bbox); - bboxes[label[0]].push_back(make_pair(score[0], bbox[0])); - ++n; - curImgId = static_cast((cpuOutputData + n * 7)[0]); - } - allDetectBBoxes.push_back(bboxes); - } - - for (size_t n = 0; n < batchSize; ++n) { - for (map>::iterator it = - allGTBBoxes[n].begin(); - it != allGTBBoxes[n].end(); - ++it) { - size_t count = 0; - if (evaluateDifficult_) { - count = it->second.size(); - } else { - for (size_t i = 0; i < it->second.size(); ++i) - if (!(it->second[i].isDifficult)) ++count; - } - if (numPos_.find(it->first) == numPos_.end() && count != 0) { - numPos_[it->first] = count; - } else { - numPos_[it->first] += count; - } - } - } - - // calcTFPos - calcTFPos(batchSize, allGTBBoxes, allDetectBBoxes); - - return 0; - } - - virtual void printStats(std::ostream& os) const { - real mAP = calcMAP(); - os << "Detection mAP=" << mAP; - } - - virtual void distributeEval(ParameterClient2* client) { - LOG(FATAL) << "Distribute detection evaluation not implemented."; - } - - protected: - void calcTFPos(const size_t batchSize, - const vector>>& allGTBBoxes, - const vector>>>& - allDetectBBoxes) { - for (size_t n = 0; n < allDetectBBoxes.size(); ++n) { - if (allGTBBoxes[n].size() == 0) { - for (map>>::const_iterator - it = allDetectBBoxes[n].begin(); - it != allDetectBBoxes[n].end(); - ++it) { - size_t label = it->first; - for (size_t i = 0; i < it->second.size(); ++i) { - allTruePos_[label].push_back(make_pair(it->second[i].first, 0)); - allFalsePos_[label].push_back(make_pair(it->second[i].first, 1)); - } - } - } else { - for (map>>::const_iterator - it = allDetectBBoxes[n].begin(); - it != allDetectBBoxes[n].end(); - ++it) { - size_t label = it->first; - vector> predBBoxes = it->second; - if (allGTBBoxes[n].find(label) == allGTBBoxes[n].end()) { - for (size_t i = 0; i < predBBoxes.size(); ++i) { - allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0)); - allFalsePos_[label].push_back(make_pair(predBBoxes[i].first, 1)); - } - } else { - vector gtBBoxes = - allGTBBoxes[n].find(label)->second; - vector visited(gtBBoxes.size(), false); - // Sort detections in descend order based on scores - std::sort(predBBoxes.begin(), - predBBoxes.end(), - sortScorePairDescend); - for (size_t i = 0; i < predBBoxes.size(); ++i) { - real maxOverlap = -1.0; - size_t maxIdx = 0; - for (size_t j = 0; j < gtBBoxes.size(); ++j) { - real overlap = - jaccardOverlap(predBBoxes[i].second, gtBBoxes[j]); - if (overlap > maxOverlap) { - maxOverlap = overlap; - maxIdx = j; - } - } - if (maxOverlap > overlapThreshold_) { - if (evaluateDifficult_ || - (!evaluateDifficult_ && !gtBBoxes[maxIdx].isDifficult)) { - if (!visited[maxIdx]) { - allTruePos_[label].push_back( - make_pair(predBBoxes[i].first, 1)); - allFalsePos_[label].push_back( - make_pair(predBBoxes[i].first, 0)); - visited[maxIdx] = true; - } else { - allTruePos_[label].push_back( - make_pair(predBBoxes[i].first, 0)); - allFalsePos_[label].push_back( - make_pair(predBBoxes[i].first, 1)); - } - } - } else { - allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0)); - allFalsePos_[label].push_back( - make_pair(predBBoxes[i].first, 1)); - } - } - } - } - } - } - } - - real calcMAP() const { - real mAP = 0.0; - size_t count = 0; - for (map::const_iterator it = numPos_.begin(); - it != numPos_.end(); - ++it) { - size_t label = it->first; - size_t labelNumPos = it->second; - if (labelNumPos == 0 || allTruePos_.find(label) == allTruePos_.end()) - continue; - vector> labelTruePos = allTruePos_.find(label)->second; - vector> labelFalsePos = - allFalsePos_.find(label)->second; - // Compute average precision. - vector tpCumSum; - getAccumulation(labelTruePos, &tpCumSum); - vector fpCumSum; - getAccumulation(labelFalsePos, &fpCumSum); - std::vector precision, recall; - size_t num = tpCumSum.size(); - // Compute Precision. - for (size_t i = 0; i < num; ++i) { - CHECK_LE(tpCumSum[i], labelNumPos); - precision.push_back(static_cast(tpCumSum[i]) / - static_cast(tpCumSum[i] + fpCumSum[i])); - recall.push_back(static_cast(tpCumSum[i]) / labelNumPos); - } - // VOC2007 style - if (apType_ == "11point") { - vector maxPrecisions(11, 0.0); - int startIdx = num - 1; - for (int j = 10; j >= 0; --j) - for (int i = startIdx; i >= 0; --i) { - if (recall[i] < j / 10.) { - startIdx = i; - if (j > 0) maxPrecisions[j - 1] = maxPrecisions[j]; - break; - } else { - if (maxPrecisions[j] < precision[i]) - maxPrecisions[j] = precision[i]; - } - } - for (int j = 10; j >= 0; --j) mAP += maxPrecisions[j] / 11; - ++count; - } else if (apType_ == "Integral") { - // Nature integral - real averagePrecisions = 0.; - real prevRecall = 0.; - for (size_t i = 0; i < num; ++i) { - if (fabs(recall[i] - prevRecall) > 1e-6) - averagePrecisions += precision[i] * fabs(recall[i] - prevRecall); - prevRecall = recall[i]; - } - mAP += averagePrecisions; - ++count; - } else { - LOG(FATAL) << "Unkown ap version: " << apType_; - } - } - if (count != 0) mAP /= count; - return mAP * 100; - } - - void getAccumulation(vector> inPairs, - vector* accuVec) const { - std::stable_sort( - inPairs.begin(), inPairs.end(), sortScorePairDescend); - accuVec->clear(); - size_t sum = 0; - for (size_t i = 0; i < inPairs.size(); ++i) { - sum += inPairs[i].second; - accuVec->push_back(sum); - } - } - - std::string getTypeImpl() const { return "detection_map"; } - - real getValueImpl() const { return calcMAP(); } - - private: - real overlapThreshold_; // overlap threshold when determining whether matched - bool evaluateDifficult_; // whether evaluate difficult ground truth - size_t backgroundId_; // class index of background - std::string apType_; // how to calculate mAP (Integral or 11point) - - MatrixPtr cpuOutput_; - MatrixPtr cpuLabel_; - - map numPos_; // counts of true objects each classification - map>> - allTruePos_; // true positive prediction - map>> - allFalsePos_; // false positive prediction -}; - -REGISTER_EVALUATOR(detection_map, DetectionMAPEvaluator); - -} // namespace paddle diff --git a/paddle/legacy/gserver/evaluators/Evaluator.cpp b/paddle/legacy/gserver/evaluators/Evaluator.cpp deleted file mode 100644 index a956f40d02e39ac57ca745988491c2b54741dca3..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/evaluators/Evaluator.cpp +++ /dev/null @@ -1,1361 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/gserver/evaluators/Evaluator.h" -#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/StringUtil.h" - -DECLARE_int32(trainer_id); - -namespace paddle { - -void Evaluator::eval(const NeuralNetwork& nn) { - std::vector arguments; - arguments.reserve(config_.input_layers_size()); - for (const std::string& name : config_.input_layers()) { - arguments.push_back(nn.getLayer(name)->getOutput()); - } - SetDevice device(arguments[0].deviceId); - real score = evalImp(arguments); - totalScore_ += score; - updateSamplesNum(arguments); -} -/** - * @brief classification error Evaluator - * - * The config file api is classification_error_evaluator. - */ -class ClassificationErrorEvaluator : public Evaluator { - public: - /* - ClassificationErrorEvaluator() : totalScore2_(0) {} - - virtual void start() { - Evaluator::start(); - totalScore2_ = 0; - } */ - - virtual void updateSamplesNum(const std::vector& arguments) { - if (3 == arguments.size()) { - numSamples_ += arguments[2].value->getSum(); - } else { - numSamples_ += arguments[0].getBatchSize(); - } - } - - MatrixPtr calcError(std::vector& arguments) { - CHECK_GE(arguments.size(), (size_t)2); - CHECK_LE(arguments.size(), (size_t)3); - MatrixPtr& output = arguments[0].value; - IVectorPtr& label = arguments[1].ids; - MatrixPtr& multiBinaryLabel = arguments[1].value; // For multi binary label - bool supportWeight = (3 == arguments.size()) ? true : false; - MatrixPtr weight = supportWeight ? arguments[2].value : nullptr; - if (nullptr == output || - (nullptr == label && nullptr == multiBinaryLabel) || - (supportWeight && nullptr == weight)) { - return 0; - } - - if (label != nullptr) { - CHECK_EQ(label->getSize(), output->getHeight()); - } else { - CHECK_EQ(multiBinaryLabel->getHeight(), output->getHeight()); - CHECK_EQ(multiBinaryLabel->getWidth(), output->getWidth()); - } - if (supportWeight) { - CHECK_EQ(output->getHeight(), weight->getHeight()); - CHECK_EQ((size_t)1, weight->getWidth()); - } - - const MatrixPtr errorMat = Matrix::create(output->getHeight(), - 1, - /* trans= */ false, - useGpu(arguments[0].deviceId)); - - errorMat->zeroMem(); - - if (label != nullptr) { - errorMat->classificationError(*output, *label, config_.top_k()); - } else if (dynamic_cast(multiBinaryLabel.get()) || - dynamic_cast(multiBinaryLabel.get())) { - errorMat->classificationErrorMulti( - *output, *multiBinaryLabel, config_.classification_threshold()); - } else { - errorMat->binaryClassificationError( - 0, *output, *multiBinaryLabel, config_.classification_threshold()); - } - - if (supportWeight) { - errorMat->dotMul(*errorMat, *weight); - } - return errorMat; - } - - void printStats(std::ostream& os) const { - if (config_.top_k() == 1) { - os << config_.name() << "=" - << (numSamples_ ? totalScore_ / numSamples_ : 0); - } else { - os << " top_" << config_.top_k() - << "_error=" << (numSamples_ ? totalScore_ / numSamples_ : 0); - } - } - - virtual real evalImp(std::vector& arguments) { - MatrixPtr errorMat = calcError(arguments); - return errorMat->getSum(); - } - - virtual void distributeEval(ParameterClient2* client) { - mergeResultsOfAllClients(client); - } - - // Evaluator interface - protected: - std::string getTypeImpl() const { return "classification_error"; } -}; - -/** - * @brief sequence classification error Evaluator - * @note sequence level classification error stats, - * if any frame in one sequence has error, the sequence is error - */ -class SequenceClassificationErrorEvaluator - : public ClassificationErrorEvaluator { - public: - virtual void updateSamplesNum(const std::vector& arguments) { - numSamples_ += arguments[0].getNumSequences(); - } - - virtual real evalImp(std::vector& arguments) { - auto sequenceStartPositions = - arguments[0].sequenceStartPositions->getVector(false); - CHECK(sequenceStartPositions != nullptr); - const int* starts = sequenceStartPositions->getData(); - - MatrixPtr errorMat = calcError(arguments); - - int errCounter = 0; - CpuVector errorVec(0, nullptr); - for (size_t i = 0; i < sequenceStartPositions->getSize() - 1; ++i) { - errorVec.subVecFrom( - errorMat->getData(), starts[i], starts[i + 1] - starts[i]); - if (errorVec.getSum() > 0) { - errCounter += 1; - } - } - - return static_cast(errCounter); - } - - virtual void distributeEval(ParameterClient2* client) { - mergeResultsOfAllClients(client); - } - - // Evaluator interface - protected: - std::string getTypeImpl() const { return "seq_classification_error"; } -}; -REGISTER_EVALUATOR(seq_classification_error, - SequenceClassificationErrorEvaluator); -/** - * @brief sum Evaluator - * Calculate the sum of output or label - * - * The config file api is sum_evaluator. - */ -class SumEvaluator : public Evaluator { - public: - SumEvaluator() : cpuLabel_(nullptr), cpuWeight_(nullptr) {} - - virtual void updateSamplesNum(const std::vector& arguments) { - if (2 == arguments.size()) { - numSamples_ += arguments[1].value->getSum(); - } else { - numSamples_ += arguments[0].getBatchSize(); - } - } - - virtual real evalImp(std::vector& arguments) { - REGISTER_TIMER("SumEvaluator"); - CHECK_GE(arguments.size(), (size_t)1); - CHECK_LE(arguments.size(), (size_t)2); - bool supportWeight = (2 == arguments.size()) ? true : false; - if (supportWeight) { - if (nullptr == arguments[1].value) { - return 0; - } - CHECK_EQ(arguments[1].value->getWidth(), (size_t)1); - } - - // The sum of output - if (arguments[0].value) { - if (supportWeight) { - CHECK_EQ(arguments[0].value->getHeight(), - arguments[1].value->getHeight()); - MatrixPtr tmpMat = Matrix::create(arguments[0].value->getHeight(), - arguments[0].value->getWidth(), - /* trans= */ false, - arguments[0].value->useGpu()); - tmpMat->copyFrom(*arguments[0].value); - tmpMat->rowScale(0, *tmpMat, *arguments[1].value); - return tmpMat->getSum(); - } else { - return arguments[0].value->getSum(); - } - // The sum of label - } else if (arguments[0].ids) { - size_t insNum = arguments[0].ids->getSize(); - IVectorPtr label = arguments[0].ids; - MatrixPtr weight = supportWeight ? arguments[1].value : nullptr; - if (dynamic_cast(label.get())) { - IVector::resizeOrCreate(cpuLabel_, insNum, false); - cpuLabel_->copyFrom(*arguments[0].ids); - - if (supportWeight) { - CHECK_EQ(insNum, arguments[1].value->getHeight()); - Matrix::resizeOrCreate(cpuWeight_, insNum, (size_t)1, false, false); - cpuWeight_->copyFrom(*arguments[1].value); - } - - label = cpuLabel_; - weight = cpuWeight_; - } - - if (supportWeight) { - real score = 0.0; - int* labelD = label->getData(); - real* weightD = weight->getData(); - for (size_t i = 0; i < insNum; ++i) { - score += (labelD[i] * weightD[i]); - } - return score; - } else { - return label->getSum(); - } - } else { - return 0; - } - } - - virtual void distributeEval(ParameterClient2* client) { - mergeResultsOfAllClients(client); - } - - private: - IVectorPtr cpuLabel_; - MatrixPtr cpuWeight_; - - // Evaluator interface - protected: - std::string getTypeImpl() const { return "sum"; } -}; -/** - * @brief column sum Evaluator - * @note column sum for the colIdx-th column * - * - colIdx = 0: the 0-th column. - * - colIdx > 0: the colIdx-th column. - * - colIdx < 0: the last colIdx-th column. - * - * The config file api is column_sum_evaluator. - * - */ -class ColumnSumEvaluator : public Evaluator { - public: - explicit ColumnSumEvaluator(int32_t colIdx) - : colIdx_(colIdx), colNum_(0), sum_(nullptr) {} - - virtual void start() { - Evaluator::start(); - if (nullptr != sum_) { - sum_->zeroMem(); - } - } - - virtual void updateSamplesNum(const std::vector& arguments) { - if (2 == arguments.size()) { - numSamples_ += arguments[1].value->getSum(); - } else { - numSamples_ += arguments[0].getBatchSize(); - } - } - - virtual real evalImp(std::vector& arguments) { - REGISTER_TIMER("ColumnSumEvaluator"); - CHECK_GE(arguments.size(), (size_t)1); - CHECK_LE(arguments.size(), (size_t)2); - bool supportWeight = (2 == arguments.size()) ? true : false; - if (nullptr == arguments[0].value || - (supportWeight && nullptr == arguments[1].value)) { - return 0; - } - - size_t insNum = arguments[0].value->getHeight(); - size_t colNum = arguments[0].value->getWidth(); - if (nullptr == sum_) { - sum_ = Matrix::create((size_t)1, colNum, false, /* useGpu */ false); - colNum_ = colNum; - sum_->zeroMem(); - } else { - CHECK_EQ(colNum, sum_->getWidth()); - } - - if (supportWeight) { - CHECK_EQ(insNum, arguments[1].value->getHeight()); - CHECK_EQ((size_t)1, arguments[1].value->getWidth()); - MatrixPtr tmpMat = Matrix::create(insNum, colNum); - if (arguments[0].value->useGpu()) { - tmpMat->copyFrom(*arguments[0].value); - } - if (!arguments[1].value->useGpu()) { - if (!arguments[0].value->useGpu()) { - tmpMat->rowScale(0, *arguments[0].value, *arguments[1].value); - } else { - tmpMat->rowScale(0, *tmpMat, *arguments[1].value); - } - } else { - MatrixPtr tmp2 = Matrix::create(insNum, 1); - tmp2->copyFrom(*arguments[1].value); - if (!arguments[0].value->useGpu()) { - tmpMat->rowScale(0, *arguments[0].value, *tmp2); - } else { - tmpMat->rowScale(0, *tmpMat, *tmp2); - } - } - sum_->accumulateColSum(*tmpMat); - } else { - if (!arguments[0].value->useGpu()) { - sum_->accumulateColSum(*arguments[0].value); - } else { - MatrixPtr tmpMat = Matrix::create(insNum, colNum); - tmpMat->copyFrom(*arguments[0].value); - sum_->accumulateColSum(*tmpMat); - } - } - return 0; - } - - virtual void printStats(std::ostream& os) const { - CHECK(colIdx_ + (int32_t)colNum_ >= 0 && colIdx_ - (int32_t)colNum_ < 0) - << "column index [" << colIdx_ << "] out of range [-" << colNum_ << ", " - << colNum_ << ")"; - size_t colIdx = 0; - if (colIdx_ >= 0) { - colIdx = colIdx_; - } else { - colIdx = colNum_ + colIdx_; - } - os << config_.name() << "=" - << (numSamples_ ? sum_->getElement(0, colIdx) / numSamples_ : 0); - } - - void distributeEval(ParameterClient2* client) { - client->reduce( - sum_->getData(), sum_->getData(), colNum_, FLAGS_trainer_id, 0); - client->reduce(&numSamples_, &numSamples_, 1, FLAGS_trainer_id, 0); - } - - private: - int32_t colIdx_; - size_t colNum_; - MatrixPtr sum_; /* cpu matrix */ - - // Evaluator interface - protected: - std::string getTypeImpl() const { - if (colIdx_ == -1) - return "last-column-sum"; - else - return "column-sum"; - } -}; - -void AucEvaluator::start() { - Evaluator::start(); - memset(statPos_, 0, sizeof(statPos_)); - memset(statNeg_, 0, sizeof(statNeg_)); -} - -real AucEvaluator::evalImp(std::vector& arguments) { - REGISTER_TIMER("AucEvaluator"); - CHECK_GE(arguments.size(), (size_t)2); - CHECK_LE(arguments.size(), (size_t)3); - MatrixPtr output = arguments[0].value; - IVectorPtr label = arguments[1].ids; - MatrixPtr labelval = arguments[1].value; - bool supportWeight = (3 == arguments.size()) ? true : false; - MatrixPtr weight = supportWeight ? arguments[2].value : nullptr; - - if (nullptr == output || (supportWeight && nullptr == weight)) { - return 0; - } - size_t insNum = output->getHeight(); - size_t outputDim = output->getWidth(); - // Copy label from value to a vector. - if (nullptr == label && nullptr != labelval) { - // label width is 1 - CHECK_EQ(1U, labelval->getWidth()); - VectorPtr vec = - Vector::create(labelval->getData(), insNum, output->useGpu()); - label = vec->castToInt(); - } - - CHECK_EQ(insNum, label->getSize()); - if (supportWeight) { - CHECK_EQ(insNum, weight->getHeight()); - CHECK_EQ((size_t)1, weight->getWidth()); - } - - CHECK(colIdx_ + (int32_t)outputDim >= 0 && colIdx_ - (int32_t)outputDim < 0) - << "column index [" << colIdx_ << "] out of range [-" << outputDim << ", " - << outputDim << ")"; - realColumnIdx_ = 0; - if (colIdx_ >= 0) { - realColumnIdx_ = colIdx_; - } else { - realColumnIdx_ = outputDim + colIdx_; - } - - if (dynamic_cast(output.get())) { - Matrix::resizeOrCreate(cpuOutput_, - insNum, - outputDim, - /* trans=*/false, - /* useGpu=*/false); - cpuOutput_->copyFrom(*output); - IVector::resizeOrCreate(cpuLabel_, insNum, false); - cpuLabel_->copyFrom(*label); - - if (supportWeight) { - Matrix::resizeOrCreate(cpuWeight_, insNum, (size_t)1, false, false); - cpuWeight_->copyFrom(*weight); - } - - output = cpuOutput_; - label = cpuLabel_; - weight = cpuWeight_; - } - - real* outputD = output->getData(); - int* labelD = label->getData(); - real* weightD = supportWeight ? weight->getData() : nullptr; - size_t pos = realColumnIdx_; - - for (size_t i = 0; i < insNum; ++i) { - real value = outputD[pos]; - uint32_t binIdx = static_cast(value * kBinNum_); - CHECK(binIdx <= kBinNum_) << "bin index [" << binIdx - << "] out of range, predict value[" << value - << "]"; - real w = supportWeight ? weightD[i] : 1.0; - if (labelD[i] == kNegativeLabel_) { - statNeg_[binIdx] += w; - } else { - statPos_[binIdx] += w; - } - pos += outputDim; - } - return 0; -} - -void AucEvaluator::distributeEval(ParameterClient2* client) { - client->reduce(statPos_, statPos_, kBinNum_ + 1, FLAGS_trainer_id, 0); - client->reduce(statNeg_, statNeg_, kBinNum_ + 1, FLAGS_trainer_id, 0); -} - -double AucEvaluator::calcAuc() const { - double totPos = 0.0; - double totNeg = 0.0; - double totPosPrev = 0.0; - double totNegPrev = 0.0; - double auc = 0.0; - - int64_t idx = kBinNum_; - while (idx >= 0) { - totPosPrev = totPos; - totNegPrev = totNeg; - totPos += statPos_[idx]; - totNeg += statNeg_[idx]; - auc += trapezoidArea(totNeg, totNegPrev, totPos, totPosPrev); - --idx; - } - - if (totPos > 0.0 && totNeg > 0.0) { - return auc / totPos / totNeg; - } else { - return 0.0; - } -} - -real AucEvaluator::getValueImpl() const { return calcAuc(); } - -std::string AucEvaluator::getTypeImpl() const { - if (colIdx_ == -1) { - return "last-column-auc"; - } else { - return "auc"; - } -} - -// class RankAucEvaluator -REGISTER_EVALUATOR(rankauc, RankAucEvaluator); - -void RankAucEvaluator::start() { Evaluator::start(); } -void RankAucEvaluator::updateSamplesNum( - const std::vector& arguments) { - numSamples_ += arguments[0].getNumSequences(); -} -real RankAucEvaluator::evalImp(std::vector& arguments) { - CHECK_GE(arguments.size(), 2U); - CHECK_LE(arguments.size(), 3U); - double batchAuc = 0.0; - output_ = arguments[0].value; - click_ = arguments[1].value; - size_t batchSize = output_->getHeight(); - CHECK(!output_->useGpu()) << "RankAUC evaluator does not support GPU!"; - - if (arguments.size() == 3U) { - pv_ = arguments[2].value; - } else { - Matrix::resizeOrCreate(pv_, batchSize, 1, false, false); - std::fill(pv_->getData(), pv_->getData() + batchSize, 1.0); - } - - real* outputData = output_->getData(); - real* clickData = click_->getData(); - real* pvData = pv_->getData(); - - auto startPos = arguments[0].sequenceStartPositions->getVector(false); - const int* startPosData = startPos->getData(); - size_t batchNum = startPos->getSize() - 1; - for (size_t i = 0; i < batchNum; ++i) { - int beginPos = startPosData[i]; - int endPos = startPosData[i + 1]; - batchAuc += calcRankAuc(outputData + beginPos, - clickData + beginPos, - pvData + beginPos, - endPos - beginPos); - } - return batchAuc; -} - -double RankAucEvaluator::calcRankAuc(real* outputData, - real* clickData, - real* pvData, - size_t size) { - outputPair_.clear(); - for (size_t i = 0; i < size; ++i) { - outputPair_.push_back(std::make_pair(outputData[i], i)); - } - std::sort(outputPair_.begin(), - outputPair_.end(), - [](const std::pair& a, const std::pair& b) { - return a.first > b.first; - }); - double aucTmp = 0.0; - double clickSum = 0.0; - double oldClickSum = 0.0; - double noClick = 0.0; - double noClickSum = 0.0; - - double lastScore = outputPair_[0].first + 1.0; - for (size_t i = 0; i < size; ++i) { - if (lastScore != outputPair_[i].first) { - aucTmp += (clickSum + oldClickSum) * noClick / 2.0; - oldClickSum = clickSum; - noClick = 0.0; - lastScore = outputPair_[i].first; - } - size_t id = outputPair_[i].second; - noClick += pvData[id] - clickData[id]; - noClickSum += noClick; - clickSum += clickData[id]; - } - aucTmp += (clickSum + oldClickSum) * noClick / 2.0; - return (clickSum * noClickSum) == 0.0 ? 0.0 - : aucTmp / (clickSum * noClickSum); -} - -std::string RankAucEvaluator::getTypeImpl() const { return "rankauc"; } - -// class PrecisionRecallEvaluator -REGISTER_EVALUATOR(precision_recall, PrecisionRecallEvaluator); - -void PrecisionRecallEvaluator::start() { - Evaluator::start(); - statsInfo_.clear(); - values_.clear(); -} - -real PrecisionRecallEvaluator::evalImp(std::vector& arguments) { - REGISTER_TIMER("PrecisionRecallEvaluator"); - CHECK_GE(arguments.size(), (size_t)2); - CHECK_LE(arguments.size(), (size_t)3); - MatrixPtr output = arguments[0].value; - IVectorPtr label = arguments[1].ids; - MatrixPtr multiBinaryLabel = arguments[1].value; - bool supportWeight = (3 == arguments.size()) ? true : false; - MatrixPtr weight = supportWeight ? arguments[2].value : nullptr; - if (nullptr == output || (nullptr == label && nullptr == multiBinaryLabel) || - (supportWeight && nullptr == weight)) { - return 0; - } - - size_t insNum = output->getHeight(); - size_t outputDim = output->getWidth(); - if (label != nullptr) { - CHECK_EQ(insNum, label->getSize()); - } else { - CHECK_EQ(insNum, multiBinaryLabel->getHeight()); - CHECK_EQ(outputDim, multiBinaryLabel->getWidth()); - } - if (supportWeight) { - CHECK_EQ(insNum, weight->getHeight()); - CHECK_EQ((size_t)1, weight->getWidth()); - } - - if (statsInfo_.size() != outputDim) { - statsInfo_.clear(); - statsInfo_.resize(outputDim); - } - - isMultiBinaryLabel_ = (nullptr == label) ? true : false; - if (label != nullptr) { - if (dynamic_cast(output.get())) { - Matrix::resizeOrCreate(cpuOutput_, insNum, outputDim, false, false); - cpuOutput_->copyFrom(*output); - IVector::resizeOrCreate(cpuLabel_, insNum, false); - cpuLabel_->copyFrom(*label); - if (supportWeight) { - Matrix::resizeOrCreate(cpuWeight_, insNum, (size_t)1, false, false); - cpuWeight_->copyFrom(*weight); - } - - output = cpuOutput_; - label = cpuLabel_; - weight = cpuWeight_; - } - calcStatsInfo(output, label, weight); - } else { - // Not support GPU for multi binary labels - CHECK(dynamic_cast(multiBinaryLabel.get())); - calcStatsInfoMulti(output, multiBinaryLabel, weight); - } - return 0; -} - -void PrecisionRecallEvaluator::printStats(std::ostream& os) const { - PrintStatsInfo info; - bool containMacroMicroInfo = getStatsInfo(&info); - os << "positive_label=" << config_.positive_label() - << " precision=" << info.precision << " recall=" << info.recall - << " F1-score=" << info.f1; - if (containMacroMicroInfo) { - os << "macro-average-precision=" << info.macroAvgPrecision - << " macro-average-recall=" << info.macroAvgRecall - << " macro-average-F1-score=" << info.macroAvgF1Score; - if (!isMultiBinaryLabel_) { - // precision and recall are equal in this case - os << " micro-average-precision=" << info.microAvgPrecision; - } else { - os << " micro-average-precision=" << info.microAvgPrecision - << " micro-average-recall=" << info.microAvgRecall - << " micro-average-F1-score=" << info.microAvgF1Score; - } - } -} - -void PrecisionRecallEvaluator::calcStatsInfo(const MatrixPtr& output, - const IVectorPtr& label, - const MatrixPtr& weight) { - size_t insNum = output->getHeight(); - size_t dim = output->getWidth(); - real* outputD = output->getData(); - int* labelD = label->getData(); - real* weightD = (weight != nullptr) ? weight->getData() : nullptr; - for (size_t i = 0; i < insNum; ++i) { - CHECK_GE(labelD[i], 0); - CHECK_LT((size_t)labelD[i], dim); - size_t maxIdx = 0; - real maxValue = outputD[i * dim]; - for (size_t j = 1; j < dim; ++j) { - size_t idx = i * dim + j; - if (maxValue < outputD[idx]) { - maxIdx = j; - maxValue = outputD[idx]; - } - } - - real w = (weightD != nullptr) ? weightD[i] : 1.0; - if (maxIdx == (size_t)labelD[i]) { - statsInfo_[maxIdx].TP += w; // true positive for labelD[i] - // true negative for all labels except for labelD[i] - for (size_t j = 0; j < dim; ++j) { - statsInfo_[j].TN += w; - } - statsInfo_[maxIdx].TN -= w; - } else { - statsInfo_[labelD[i]].FN += w; // false negative for labelD[i] - statsInfo_[maxIdx].FP += w; // false positive for maxIdx - // true negatives for all labels except for maxIdx and labelD[i] - for (size_t j = 0; j < dim; ++j) { - statsInfo_[j].TN += w; - } - statsInfo_[maxIdx].TN -= w; - statsInfo_[labelD[i]].TN -= w; - } - } -} - -void PrecisionRecallEvaluator::calcStatsInfoMulti(const MatrixPtr& output, - const MatrixPtr& label, - const MatrixPtr& weight) { - size_t insNum = output->getHeight(); - size_t dim = output->getWidth(); - real* outputD = output->getData(); - auto labelD = dynamic_cast(label.get()); - real* weightD = (weight != nullptr) ? weight->getData() : nullptr; - real threshold = config_.classification_threshold(); - for (size_t i = 0; i < insNum; ++i) { - for (size_t j = 0; j < dim; ++j) { - real w = (weightD != nullptr) ? weightD[i] : 1.0; - size_t idx = i * dim + j; - if (outputD[idx] < threshold) { - statsInfo_[j].TN += w; // true negative - } else { - statsInfo_[j].FP += w; // false positive - } - } - - const int* cols = labelD->getRowCols(i); - for (size_t j = 0; j < labelD->getColNum(i); ++j) { - CHECK_LT(size_t(cols[j]), dim); - real w = (weightD != nullptr) ? weightD[i] : 1.0; - size_t idx = i * dim + cols[j]; - if (outputD[idx] < threshold) { - statsInfo_[cols[j]].FN += w; // false negative - statsInfo_[cols[j]].TN -= w; // true negative - } else { - statsInfo_[cols[j]].TP += w; // true positive - statsInfo_[cols[j]].FP -= w; // false positive - } - } - } -} - -void PrecisionRecallEvaluator::storeLocalValues() const { - if (this->values_.size() == 0) { - PrintStatsInfo info; - bool containMacroMicroInfo = getStatsInfo(&info); - values_["precision"] = info.precision; - values_["recal"] = info.recall; - values_["F1-score"] = info.f1; - if (containMacroMicroInfo) { - values_["macro-average-precision"] = info.macroAvgPrecision; - values_["macro-average-recall"] = info.macroAvgRecall; - values_["macro-average-F1-score"] = info.macroAvgF1Score; - if (!isMultiBinaryLabel_) { - // precision and recall are equal in this case - values_["micro-average-precision"] = info.microAvgPrecision; - } else { - values_["micro-average-precision"] = info.microAvgPrecision; - values_["micro-average-recall"] = info.microAvgRecall; - values_["micro-average-F1-score"] = info.microAvgF1Score; - } - } - } -} - -void PrecisionRecallEvaluator::getNames(std::vector* names) { - this->storeLocalValues(); - names->reserve(this->values_.size()); - for (auto it = this->values_.begin(); it != this->values_.end(); ++it) { - names->push_back(this->config_.name() + "." + it->first); - } -} - -real PrecisionRecallEvaluator::getValue(const std::string& name, - Error* err) const { - this->storeLocalValues(); - std::vector buffers; - paddle::str::split(name, '.', &buffers); - auto it = this->values_.find(buffers[buffers.size() - 1]); - if (it == this->values_.end()) { // not found - *err = Error("No such key %s", name.c_str()); - return .0f; - } - - return it->second; -} - -std::string PrecisionRecallEvaluator::getType(const std::string& name, - Error* err) const { - this->getValue(name, err); - if (!err->isOK()) { - return ""; - } - return "precision_recall"; -} - -void PrecisionRecallEvaluator::distributeEval(ParameterClient2* client) { - size_t size = 4 * statsInfo_.size(); - double* buf = new double[size]; - for (size_t i = 0; i < statsInfo_.size(); ++i) { - buf[4 * i + 0] = statsInfo_[i].TP; - buf[4 * i + 1] = statsInfo_[i].TN; - buf[4 * i + 2] = statsInfo_[i].FP; - buf[4 * i + 3] = statsInfo_[i].FN; - } - client->reduce(buf, buf, size, FLAGS_trainer_id, 0); - for (size_t i = 0; i < statsInfo_.size(); ++i) { - statsInfo_[i].TP = buf[4 * i + 0]; - statsInfo_[i].TN = buf[4 * i + 1]; - statsInfo_[i].FP = buf[4 * i + 2]; - statsInfo_[i].FN = buf[4 * i + 3]; - } - delete[] buf; -} - -bool PrecisionRecallEvaluator::getStatsInfo( - PrecisionRecallEvaluator::PrintStatsInfo* info) const { - int label = config_.positive_label(); - if (label != -1) { - CHECK(label >= 0 && label < (int)statsInfo_.size()) - << "positive_label [" << label << "] should be in range [0, " - << statsInfo_.size() << ")"; - info->precision = calcPrecision(statsInfo_[label].TP, statsInfo_[label].FP); - info->recall = calcRecall(statsInfo_[label].TP, statsInfo_[label].FN); - info->f1 = calcF1Score(info->precision, info->recall); - return false; - } - - // micro average method: precision = (TP1+TP2)/(TP1+FP1+TP2+FP2) - // macro average method: precision = (precision1+precision2)/2 - double microTotalTP = 0; - double microTotalFP = 0; - double microTotalFN = 0; - info->macroAvgPrecision = 0; - info->macroAvgRecall = 0; - size_t numLabels = statsInfo_.size(); - for (size_t i = 0; i < numLabels; ++i) { - microTotalTP += statsInfo_[i].TP; - microTotalFP += statsInfo_[i].FP; - microTotalFN += statsInfo_[i].FN; - info->macroAvgPrecision += - calcPrecision(statsInfo_[i].TP, statsInfo_[i].FP); - info->macroAvgRecall += calcRecall(statsInfo_[i].TP, statsInfo_[i].FN); - } - info->macroAvgPrecision /= numLabels; - info->macroAvgRecall /= numLabels; - info->macroAvgF1Score = - calcF1Score(info->macroAvgPrecision, info->macroAvgRecall); - - info->microAvgPrecision = calcPrecision(microTotalTP, microTotalFP); - info->microAvgRecall = calcPrecision(microTotalTP, microTotalFN); - info->microAvgF1Score = - calcF1Score(info->microAvgPrecision, info->microAvgRecall); - return true; -} - -REGISTER_EVALUATOR(pnpair, PnpairEvaluator); -void PnpairEvaluator::start() { - Evaluator::start(); - memset(pairArray_, 0, sizeof(pairArray_)); - predictArray_.clear(); -} - -real PnpairEvaluator::evalImp(std::vector& arguments) { - CHECK_GE(arguments.size(), 3UL); - CHECK_LE(arguments.size(), 4UL); - MatrixPtr output = arguments[0].value; - IVectorPtr label = arguments[1].ids; - IVectorPtr info = arguments[2].ids; - bool supportWeight = (4 == arguments.size()) ? true : false; - MatrixPtr weight = supportWeight ? arguments[3].value : nullptr; - if (nullptr == output || nullptr == label || - (supportWeight && nullptr == weight)) { - return 0; - } - size_t height = output->getHeight(); - size_t width = output->getWidth(); - CHECK_EQ(height, label->getSize()); - CHECK_EQ(height, info->getSize()); - if (supportWeight) { - CHECK_EQ(height, weight->getHeight()); - CHECK_EQ((size_t)1, weight->getWidth()); - } - - if (dynamic_cast(output.get())) { - Matrix::resizeOrCreate(cpuOutput_, height, width, false, false); - IVector::resizeOrCreate(cpuLabel_, height, false); - IVector::resizeOrCreate(cpuInfo_, height, false); - cpuOutput_->copyFrom(*output); - cpuLabel_->copyFrom(*label); - cpuInfo_->copyFrom(*info); - - output = cpuOutput_; - label = cpuLabel_; - info = cpuInfo_; - - if (supportWeight) { - Matrix::resizeOrCreate(cpuWeight_, height, (size_t)1, false, false); - cpuWeight_->copyFrom(*weight); - weight = cpuWeight_; - } - } - - real* outputs = output->getData(); - int* labels = label->getData(); - int* infos = info->getData(); - real* weights = supportWeight ? weight->getData() : nullptr; - for (size_t i = 0; i < output->getHeight(); i++) { - real y1 = outputs[i * width + (width - 1)]; - real w = supportWeight ? weights[i] : 1.0; - predictArray_.push_back(PredictionResult(y1, labels[i], infos[i], w)); - } - return 0; -} - -void PnpairEvaluator::stat(size_t start, - size_t end, - PredictionResult* answers, - double& pos, - double& neg, - double& spe) { - for (size_t i = start; i < end; i++) { - for (size_t j = i + 1; j < end; j++) { - CHECK_EQ(answers[i].queryid, answers[j].queryid); - // The pair weight is the mean of the two samples' weight - double weight = (answers[i].weight + answers[j].weight) / 2.0; - if (answers[i].label != answers[j].label) { - if ((answers[i].out > answers[j].out && - answers[i].label > answers[j].label) || - (answers[i].out < answers[j].out && - answers[i].label < answers[j].label)) { - pos += weight; - } else if ((answers[i].out > answers[j].out && - answers[i].label < answers[j].label) || - (answers[i].out < answers[j].out && - answers[i].label > answers[j].label)) { - neg += weight; - } else { - spe += weight; - } - } - } - } -} - -void PnpairEvaluator::calc(std::vector& predictArray) { - std::sort(predictArray.begin(), - predictArray.end(), - [](const PredictionResult& x, const PredictionResult& y) { - return x.queryid < y.queryid; - }); - - double pos = 0; - double neg = 0; - double special = 0; - auto start = predictArray.begin(); - while (start != predictArray.end()) { - auto end = std::find_if( - start + 1, predictArray.end(), [=](const PredictionResult& x) { - return x.queryid != start->queryid; - }); - CHECK(end != start); - stat(start - predictArray.begin(), - end - predictArray.begin(), - predictArray.data(), - pos, - neg, - special); - - start = end; - } - - pairArray_[0] += pos; - pairArray_[1] += neg; - - LOG(INFO) << " calc total pos pair: " << pos - << " calc total neg pair: " << neg - << " calc total special pair: " << special; -} - -std::string PnpairEvaluator::getTypeImpl() const { return "pnpair"; } - -ClassRegistrar Evaluator::registrar_; -Evaluator* Evaluator::create(const EvaluatorConfig& config) { - Evaluator* evaluator = registrar_.createByType(config.type()); - evaluator->init(config); - return evaluator; -} - -REGISTER_EVALUATOR(classification_error, ClassificationErrorEvaluator); -REGISTER_EVALUATOR(sum, SumEvaluator); -static InitFunction __reg_type_auc_sum__([]() { - Evaluator::registrar_.registerClass( - "last-column-sum", [] { return new ColumnSumEvaluator(-1); }); - Evaluator::registrar_.registerClass("last-column-auc", - [] { return new AucEvaluator(-1); }); -}); - -/** - * @brief print value of each layer. - * - * The config file api is value_printer_evaluator. - */ -class ValuePrinter : public NotGetableEvaluator { - public: - virtual void eval(const NeuralNetwork& nn) { - for (const std::string& name : config_.input_layers()) { - nn.getLayer(name)->getOutput().printValueString(LOG(INFO), - "layer=" + name + " "); - } - } - - virtual void updateSamplesNum(const std::vector& arguments) {} - - virtual real evalImp(std::vector& arguments) { return 0; } -}; -REGISTER_EVALUATOR(value_printer, ValuePrinter); - -/** - * @brief print gradient of each layer. - * - * The config file api is gradient_printer_evaluator. - */ -class GradientPrinter : public NotGetableEvaluator { - public: - virtual void eval(const NeuralNetwork& nn) { - for (const std::string& name : config_.input_layers()) { - const Argument& argu = nn.getLayer(name)->getOutput(); - if (argu.grad) { - std::ostringstream os; - argu.grad->print(os); - LOG(INFO) << "layer=" << name << " grad matrix:\n" << os.str(); - } - } - } - - virtual void updateSamplesNum(const std::vector& arguments) {} - - virtual real evalImp(std::vector& arguments) { return 0; } -}; -REGISTER_EVALUATOR(gradient_printer, GradientPrinter); -/** - * @brief print row max id vctor of each layer - * - * The config file api is maxid_printer_evaluator. - */ -class MaxIdPrinter : public NotGetableEvaluator { - private: - IVectorPtr maxIds_; - MatrixPtr maxValues_; - - public: - MaxIdPrinter() {} - - virtual void eval(const NeuralNetwork& nn) { - for (const std::string& name : config_.input_layers()) { - const Argument& argu = nn.getLayer(name)->getOutput(); - if (argu.value) { - size_t height = argu.value->getHeight(); - size_t width = config_.num_results(); - IVector::resizeOrCreate(maxIds_, height * width, false); - Matrix::resizeOrCreate(maxValues_, height, width, false); - argu.value->rowMax(*maxIds_, *maxValues_); - std::ostringstream os; - int* ids = maxIds_->getData(); - real* values = maxValues_->getData(); - for (size_t i = 0; i < height; ++i) { - for (size_t j = 0; j < width; ++j) { - size_t pos = i * width + j; - os << ids[pos] << " : " << values[pos] << ", "; - } - os << std::endl; - } - LOG(INFO) << "layer=" << name << " row max id vector:\n" << os.str(); - } - } - } - - virtual void updateSamplesNum(const std::vector& arguments) {} - - virtual real evalImp(std::vector& arguments) { return 0; } -}; -REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter); -/** - * @brief print sequence max frames of each layer - * - * The config file api is maxframe_printer_evaluator. - */ -class MaxFramePrinter : public NotGetableEvaluator { - private: - IVectorPtr maxIds_; - MatrixPtr maxValues_; - MatrixPtr value_; - - public: - MaxFramePrinter() { - value_ = - Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, false); - } - - virtual void eval(const NeuralNetwork& nn) { - for (const std::string& name : config_.input_layers()) { - const Argument& argu = nn.getLayer(name)->getOutput(); - - CHECK_EQ(argu.value->getWidth(), 1LU); - size_t numSequences = argu.getNumSequences(); - const int* starts = argu.sequenceStartPositions->getData(false); - - std::ostringstream os; - for (size_t i = 0; i < numSequences; ++i) { - size_t offset = starts[i]; - size_t size = starts[i + 1] - starts[i]; - value_->setData(argu.value->getData() + offset, 1LU, size); - - size_t height = 1LU; - size_t width = std::min((size_t)config_.num_results(), size); - IVector::resizeOrCreate(maxIds_, height * width, false); - Matrix::resizeOrCreate(maxValues_, height, width, false); - - value_->rowMax(*maxIds_, *maxValues_); - - int* ids = maxIds_->getData(); - real* values = maxValues_->getData(); - for (size_t j = 0; j < width; ++j) { - os << ids[j] << " : " << values[j] << ", "; - } - os << "total " << size << " frames" << std::endl; - } - LOG(INFO) << "layer=" << name << " sequence max frames:\n" << os.str(); - } - } - - virtual void updateSamplesNum(const std::vector& arguments) {} - - virtual real evalImp(std::vector& arguments) { return 0; } -}; -REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter); - -/** - * @brief print text according to index matrix and a dictionary. - * - * There can be multiple input to this layer: - * - If there is only one input, the input must be a matrix containing - * the sequence of indices; - * - If there are more than one input, the first input should be ids, - * and are interpreted as sample ids. - * - * The output format will be: - * - * - sequence without sub-sequence, and there is probability. - * - * @code - * id \t prob space_seperated_tokens_from_dictionary_according_to_seq - * @endcode - * - * - sequence without sub-sequence, and there is not probability. - * - * @code - * id \t space_seperated_tokens_from_dictionary_according_to_seq - * @endcode - * - * - sequence with sub-sequence, and there is not probability. - * - * @code - * id \t space_seperated_tokens_from_dictionary_according_to_sub_seq - * \t \t space_seperated_tokens_from_dictionary_according_to_sub_seq - * ... - * @endcode - * - * Typically SequenceTextPrinter layer takes output of maxid or RecurrentGroup - * with maxid (when generating) as an input. - * - * The config file api is seqtext_printer_evaluator. - * - */ -class SequenceTextPrinter : public NotGetableEvaluator { - private: - /// dict_file, which contains a list of tokens - std::vector dict_; - /// result_file, which is the output file - std::ofstream os_; - /// True/False, to indicate whether to use space to separate output tokens. - /// Default is True. No space is added if set to False. - bool delimited_; - /// store the cpu version of argument.ids - std::vector cpuIds_; - /// store the probability associated with each sequence - std::vector cpuIn_; - - public: - SequenceTextPrinter() {} - - virtual void init(const EvaluatorConfig& config) { - Evaluator::init(config); - if (!config.dict_file().empty()) { - loadFileList(config.dict_file(), dict_); - } - - os_.open(config.result_file(), std::ofstream::trunc); - CHECK(os_.is_open()) << "Failed to open file " << config.result_file(); - delimited_ = config.delimited(); - } - - virtual void updateSamplesNum(const std::vector& arguments) {} - - virtual real evalImp(std::vector& arguments) { - CHECK_GE(arguments.size(), 1LU); - bool hasId = arguments.size() > 1; - size_t numSequences = arguments[0].getNumSequences(); - if (hasId) { - CHECK_EQ(arguments[0].ids->getSize(), numSequences) - << "first input must be sample id."; - } - for (size_t i = hasId ? 1 : 0; i < arguments.size(); ++i) { - CHECK_EQ((size_t)arguments[i].getNumSequences(), numSequences); - } - - auto resizeVector = [](IVectorPtr& dest, const IVectorPtr& src) { - if (src && src->useGpu()) { - IVector::resizeOrCreate(dest, src->getSize(), false); - dest->copyFrom(*src); - } else { - dest = src; - } - }; - - auto resizeMatrix = [](MatrixPtr& dest, const MatrixPtr& src) { - if (src && src->useGpu()) { - Matrix::resizeOrCreate( - dest, src->getHeight(), src->getWidth(), false, false); - dest->copyFrom(*src); - } else { - dest = src; - } - }; - - cpuIds_.resize(arguments.size()); - cpuIn_.resize(arguments.size()); - for (size_t i = 0; i < arguments.size(); ++i) { - resizeVector(cpuIds_[i], arguments[i].ids); - resizeMatrix(cpuIn_[i], arguments[i].in); - } - - int* sampleIds = nullptr; - if (hasId) { - sampleIds = cpuIds_[0]->getData(); - } - - for (size_t i = 0; i < numSequences; ++i) { - os_ << (hasId ? sampleIds[i] : i); - for (size_t j = hasId ? 1 : 0; j < arguments.size(); ++j) { - int* output = cpuIds_[j]->getData(); - const int* starts = arguments[j].sequenceStartPositions->getData(false); - - auto seqPrint = [&](int start, int end) { - os_ << "\t"; - for (int k = start; k < end; k++) { - int id = output[k]; - os_ << (delimited_ ? " " : ""); - if (!dict_.empty()) { - CHECK_LT((size_t)id, dict_.size()); - os_ << dict_[id]; - } else { - os_ << id; - } - } - }; - - if (arguments[j].hasSubseq()) { - // print sequence with sub-sequence - const int* subStarts = - arguments[j].subSequenceStartPositions->getData(false); - int subSeqId_start = 0; - int subSeqId_end = 0; - for (size_t k = 0; k < (size_t)arguments[j].getNumSubSequences() + 1; - ++k) { - if (starts[i] == subStarts[k]) subSeqId_start = k; - if (starts[i + 1] == subStarts[k]) subSeqId_end = k; - } - for (int k = subSeqId_start; k < subSeqId_end; k++) { - seqPrint(subStarts[k], subStarts[k + 1]); - os_ << std::endl; - } - - } else { - // print sequence without sub-sequence - if (arguments[j].in) { // beam print - real* probs = cpuIn_[j]->rowBuf(i); - os_ << std::endl; - int start = starts[i]; - int seqEnd = starts[i + 1]; - for (size_t k = 0; k < arguments[j].in->getWidth(); ++k) { - if (start == seqEnd) { - break; - } - int end = start + output[start] + 2; - CHECK_LE(end, seqEnd); - CHECK_EQ(output[end - 1], -1); - os_ << k << "\t" << probs[k]; - seqPrint(start + 1, end - 1); - os_ << std::endl; - start = end; - } - } else { - seqPrint(starts[i], starts[i + 1]); - } - } - } - os_ << std::endl; - } - return 0; - } -}; -REGISTER_EVALUATOR(seq_text_printer, SequenceTextPrinter); -/** - * @brief print classification error. - * - * The config file api is classification_error_printer_evaluator. - */ -class ClassificationErrorPrinter : public ClassificationErrorEvaluator { - public: - virtual void updateSamplesNum(const std::vector& arguments) {} - - virtual real evalImp(std::vector& arguments) { - MatrixPtr errorMat = calcError(arguments); - - std::ostringstream os; - errorMat->print(os); - LOG(INFO) << "Printer=" << config_.name() << " Classification Error:\n" - << os.str(); - - if (auto startPos = arguments[0].sequenceStartPositions) { - std::ostringstream os; - startPos->getVector(false)->print(os, startPos->getSize()); - LOG(INFO) << "Printer=" << config_.name() << " sequence pos vector:\n" - << os.str(); - } - return 0; - } -}; -REGISTER_EVALUATOR(classification_error_printer, ClassificationErrorPrinter); - -std::string DummyEvaluator::getTypeImpl() const { return "dummy"; } - -} // namespace paddle diff --git a/paddle/legacy/gserver/evaluators/Evaluator.h b/paddle/legacy/gserver/evaluators/Evaluator.h deleted file mode 100644 index b3462819b1244e9f2d1a463cb44e7c550406c000..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/evaluators/Evaluator.h +++ /dev/null @@ -1,510 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/parameter/Argument.h" -#include "paddle/legacy/pserver/ParameterClient2.h" -#include "paddle/legacy/utils/ClassRegistrar.h" -#include "paddle/legacy/utils/Error.h" - -namespace paddle { - -class NeuralNetwork; -/** - * @def REGISTER_EVALUATOR - * @brief Macro for registering evaluator class - */ - -#define REGISTER_EVALUATOR(__type_name, __class_name) \ - static InitFunction __reg_type_##__type_name([]() { \ - Evaluator::registrar_.registerClass<__class_name>(#__type_name); \ - }) -/** - * @brief Base class for Evaluator - * Evaluating the performance of a model is very important. - * It indicates how successful the scores(predictions) of a datasets - * has been by a trained model. - */ -class Evaluator { - public: - static Evaluator* create(const EvaluatorConfig& config); - - Evaluator() : numSamples_(0), totalScore_(0) {} - - virtual ~Evaluator() {} - - virtual void init(const EvaluatorConfig& config) { config_ = config; } - - /** - * @brief start to evaluate some data - */ - virtual void start() { - numSamples_ = 0; - totalScore_ = 0; - } - - /** - * @brief Process a batch of data. - */ - virtual void eval(const NeuralNetwork& nn); - - /** - * @brief Process a batch of data. - * @return the score for the batch if it make sense to sum the score across - * batches. - * @note Otherwise evaluator should return 0 and override finish() and - * printStats() to do the right calculation. - */ - virtual real evalImp(std::vector& arguments) = 0; - - /** - * @brief Update the number of processed samples - */ - virtual void updateSamplesNum(const std::vector& arguments) { - numSamples_ += arguments[0].getBatchSize(); - } - - /// finish() should be called before distributeEval - virtual void distributeEval(ParameterClient2* client) { - LOG(FATAL) << "Not implemeted"; - } - - void mergeResultsOfAllClients(ParameterClient2* client) { - double data[2] = {totalScore_, numSamples_}; - client->reduce(data, data, 2, FLAGS_trainer_id, 0); - totalScore_ = data[0]; - numSamples_ = data[1]; - } - - /** - * @brief finish the evaluation. - */ - virtual void finish() {} - - /** - * @brief print the statistics of evaluate result - * @note finish() should be called before printStats - */ - virtual void printStats(std::ostream& os) const { - os << config_.name() << "=" - << (numSamples_ ? totalScore_ / numSamples_ : 0); - } - - friend std::ostream& operator<<(std::ostream& os, - const Evaluator& evaluator) { - evaluator.printStats(os); - return os; - } - - friend std::ostream&& operator<<(std::ostream&& os, // NOLINT - const Evaluator& evaluator) { - evaluator.printStats(os); - return std::move(os); - } - - static ClassRegistrar registrar_; - - /** - * @brief getNames will return all field names of current evaluator. - * - * The format of name is `evaluator_name.evaluator_fields`. If the evaluator - * has multiple field, the name could be `evaluator_name.field1`. For example - * the PrecisionRecallEvaluator contains `precision`, `recall` fields. The get - * names will return `precision_recall_evaluator.precision`, - * `precision_recall_evaluator.recal`, etc. - * - * Also, if current Evaluator is a combined evaluator. getNames will return - * all names of all evaluators inside the combined evaluator. - * - * @param names [out]: the field names of current evaluator. - * @note Never clear the names parameter inside getNames. - */ - virtual void getNames(std::vector* names) { - names->push_back(config_.name()); - } - - /** - * @brief getValue will return the current evaluate value of one field. - * - * @param name: The field name of current evaluator. - * @param err [out]: The error state. - * - * @return The evaluate value(metric). - */ - virtual real getValue(const std::string& name, Error* err) const { - if (name != config_.name()) { - *err = Error("no such name of evaluator %s", name.c_str()); - return .0f; - } - return this->getValueImpl(); - } - - /** - * @brief getType will return the evaluator type by field name. - * - * Evaluate Type is the current type of evaluator in string. Such as 'auc', - * 'precision_recall'. In combined evaluator, different name may get different - * evaluate type because it could be evaluated by different evaluator inside. - * - * @param name: The field name of current Evaluator. - * @param err: The error state. nullptr means don't care. - * @return the evaluator type string. - */ - virtual std::string getType(const std::string& name, Error* err) const { - if (name != config_.name()) { - *err = Error("no such name of evaluator %s", name.c_str()); - return std::string(); - } - return this->getTypeImpl(); - } - - protected: - /** - * @brief getValueImpl The simplest way to define getValue result. If this - * evaluator doesn't contain multiple fields, and do not throw any error, just - * implemented this method to get the evaluate result(metric). - * @return Evaluate result(metric). - */ - virtual real getValueImpl() const { - return numSamples_ != .0 ? totalScore_ / numSamples_ : .0; - } - - /** - * @brief getTypeImpl The simplest way to define getType result. If this - * evaluator doesn't combine many evaluators, the get type should only return - * itself type. - * @return Evaluator type. - */ - virtual std::string getTypeImpl() const { return "base"; } - - protected: - EvaluatorConfig config_; - double numSamples_; - double totalScore_; -}; - -/** - * @brief The NotGetableEvaluator class is the base class of evaluator that - * cannot get value in runtime. The most NotGetableEvaluator is Printer - * Evaluator, which is only used to debug network configuration. - */ -class NotGetableEvaluator : public Evaluator { - // Evaluator interface - public: - void getNames(std::vector* names) {} - - real getValue(const std::string& name, Error* err) const { - *err = Error("Not implemented"); - return .0f; - } - - std::string getType(const std::string& name, Error* err) const { - *err = Error("Not implemented"); - return ""; - } -}; - -class DummyEvaluator : public Evaluator { - public: - DummyEvaluator() {} - virtual void init(const EvaluatorConfig&) {} - virtual void start() {} - virtual void eval(const NeuralNetwork&) {} - virtual real evalImp(std::vector& arguments) { - (void)arguments; - return -1; - } - virtual void finish() {} - virtual void printStats(std::ostream&) const {} - - // Evaluator interface - protected: - std::string getTypeImpl() const; -}; -/** - * @brief evaluate AUC using colIdx-th column as prediction. - * The AUC(Area Under the Curve) is a common evaluation metric - * for binary classification problems. It computes the area under - * the receiver operating characteristic(ROC) curve. - * - * @note colIdx-th column - * - * - colIdx = 0: the 0-th column. - * - colIdx > 0: the colIdx-th column. - * - colIdx < 0: the last colIdx-th column. - * - * The config file api is auc_evaluator. - * - */ -class AucEvaluator : public Evaluator { - public: - AucEvaluator(int32_t colIdx) - : colIdx_(colIdx), - realColumnIdx_(0), - cpuOutput_(nullptr), - cpuLabel_(nullptr), - cpuWeight_(nullptr) {} - - virtual void start(); - - virtual real evalImp(std::vector& arguments); - - virtual void printStats(std::ostream& os) const { - os << config_.name() << "=" << calcAuc(); - } - - virtual void distributeEval(ParameterClient2* client); - - private: - static const uint32_t kBinNum_ = (1 << 24) - 1; - static const int kNegativeLabel_ = 0; - double statPos_[kBinNum_ + 1]; - double statNeg_[kBinNum_ + 1]; - int32_t colIdx_; - uint32_t realColumnIdx_; - MatrixPtr cpuOutput_; - IVectorPtr cpuLabel_; - MatrixPtr cpuWeight_; - - AucEvaluator() {} - - inline static double trapezoidArea(double X1, - double X2, - double Y1, - double Y2) { - return (X1 > X2 ? (X1 - X2) : (X2 - X1)) * (Y1 + Y2) / 2.0; - } - - double calcAuc() const; - - // Evaluator interface - protected: - real getValueImpl() const; - std::string getTypeImpl() const; -}; - -/** - * @brief RankAucEvaluator calculates the AUC of each list (i.e., titles - * under the same query), and averages them. Each list should be organized - * as a sequence. The inputs of this evaluator is [output, click, pv]. If pv - * is not provided, it will be set to 1. The types of click and pv are - * dense value. - */ -class RankAucEvaluator : public Evaluator { - public: - // evaluate ranking AUC - virtual void start(); - - virtual void updateSamplesNum(const std::vector& arguments); - - virtual real evalImp(std::vector& arguments); - - virtual void distributeEval(ParameterClient2* client) { - mergeResultsOfAllClients(client); - } - - private: - MatrixPtr output_; - MatrixPtr click_; - MatrixPtr pv_; - std::vector> outputPair_; - - double calcRankAuc(real* outputData, - real* clickData, - real* pvData, - size_t size); - - // Evaluator interface - protected: - std::string getTypeImpl() const; -}; - -/** - * @brief precision, recall and f1 score Evaluator - * \f[ - * precision = \frac{tp}{tp+tn} \\ - * recall=\frac{tp}{tp+fn} \\ - * f1=2*\frac{precsion*recall}{precision+recall} - * \f] - * - * The config file api is precision_recall_evaluator. - */ -class PrecisionRecallEvaluator : public Evaluator { - public: - // Evaluate precision, recall and F1 score - PrecisionRecallEvaluator() - : isMultiBinaryLabel_(false), - cpuOutput_(nullptr), - cpuLabel_(nullptr), - cpuWeight_(nullptr) {} - - virtual void start(); - - virtual real evalImp(std::vector& arguments); - - virtual void printStats(std::ostream& os) const; - - virtual void distributeEval(ParameterClient2* client); - - void getNames(std::vector* names); - - real getValue(const std::string& name, Error* err) const; - - std::string getType(const std::string& name, Error* err) const; - - struct StatsInfo { - /// numbers of true positives - double TP; - /// numbers of true negatives - double TN; - /// numbers of false positives - double FP; - /// numbers of false negatives - double FN; - - StatsInfo() : TP(0.0), TN(0.0), FP(0.0), FN(0.0) {} - }; - - private: - bool isMultiBinaryLabel_; - std::vector statsInfo_; - - MatrixPtr cpuOutput_; - IVectorPtr cpuLabel_; - MatrixPtr cpuWeight_; - - struct PrintStatsInfo { - double precision; - double recall; - double f1; - double macroAvgPrecision; - double macroAvgRecall; - double macroAvgF1Score; - double microAvgPrecision; - double microAvgRecall; - double microAvgF1Score; - }; - - bool getStatsInfo(PrintStatsInfo* info) const; - - void calcStatsInfo(const MatrixPtr& output, - const IVectorPtr& label, - const MatrixPtr& weight); - - void calcStatsInfoMulti(const MatrixPtr& output, - const MatrixPtr& label, - const MatrixPtr& weight); - - inline static double calcPrecision(double TP, double FP) { - if (TP > 0.0 || FP > 0.0) { - return TP / (TP + FP); - } else { - return 1.0; - } - } - - inline static double calcRecall(double TP, double FN) { - if (TP > 0.0 || FN > 0.0) { - return TP / (TP + FN); - } else { - return 1.0; - } - } - - inline static double calcF1Score(double precision, double recall) { - if (precision > 0.0 || recall > 0.0) { - return 2 * precision * recall / (precision + recall); - } else { - return 0; - } - } - - mutable std::unordered_map values_; - - void storeLocalValues() const; -}; - -/* - * @brief positive-negative pair rate Evaluator - * - * The config file api is pnpair_evaluator. - */ -class PnpairEvaluator : public Evaluator { - public: - PnpairEvaluator() - : cpuOutput_(nullptr), - cpuLabel_(nullptr), - cpuInfo_(nullptr), - cpuWeight_(nullptr) {} - - virtual void start(); - virtual real evalImp(std::vector& arguments); - - struct PredictionResult { - PredictionResult(real __out, int __label, int __queryid, real __weight) - : out(__out), label(__label), queryid(__queryid), weight(__weight) {} - real out; - int label; - int queryid; - real weight; - }; - std::vector predictArray_; - void printPredictResults() { - std::ofstream fs(FLAGS_predict_file); - CHECK(fs) << "Fail to open " << FLAGS_predict_file; - for (auto& res : predictArray_) { - fs << res.out << " " << res.label << " " << res.queryid << std::endl; - } - } - - void stat(size_t start, - size_t end, - PredictionResult* answers, - double& pos, - double& neg, - double& spe); - void calc(std::vector& predictArray); - - virtual void finish() { calc(predictArray_); } - - virtual void printStats(std::ostream& os) const { - os << " pos/neg=" << this->getValueImpl(); - } - - virtual void distributeEval(ParameterClient2* client) { - client->reduce(pairArray_, pairArray_, kPairArrayNum_, FLAGS_trainer_id, 0); - LOG(INFO) << " distribute eval calc total pos pair: " << pairArray_[0] - << " calc total neg pair: " << pairArray_[1]; - } - - private: - static const uint32_t kPairArrayNum_ = 2; - double pairArray_[kPairArrayNum_]; - MatrixPtr cpuOutput_; - IVectorPtr cpuLabel_; - IVectorPtr cpuInfo_; - MatrixPtr cpuWeight_; - - // Evaluator interface - protected: - real getValueImpl() const { - return pairArray_[0] / ((pairArray_[1] <= 0) ? 1.0 : pairArray_[1]); - } - std::string getTypeImpl() const; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/GradientMachine.cpp b/paddle/legacy/gserver/gradientmachines/GradientMachine.cpp deleted file mode 100644 index 1c4034d8bba59dbae0a1059b96ac2b6f18c5971b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/GradientMachine.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "GradientMachine.h" - -#include -#include "paddle/legacy/utils/Logging.h" - -#include "NeuralNetwork.h" -#include "hl_gpu.h" - -#ifndef PADDLE_MOBILE_INFERENCE -#include "GradientMachineMode.h" -#include "MultiGradientMachine.h" -#include "MultiNetwork.h" -#include "ParallelNeuralNetwork.h" -#endif - -namespace paddle { - -GradientMachine* GradientMachine::create( - const ModelConfig& config, - int mode, - const std::vector& parameterTypes) { -#ifndef PADDLE_MOBILE_INFERENCE - if (auto gm = IGradientMachineMode::tryCreateGradientMachine(mode, config)) { - return gm; - } - if (FLAGS_trainer_count > 1) { - return new MultiGradientMachine(config, FLAGS_use_gpu); - } -#endif - if (FLAGS_trainer_count == 1) { // single -#ifndef PADDLE_MOBILE_INFERENCE - NeuralNetwork* nn; - if (config.type() == "multi_nn") { - /* multi submodel calculate, thread(s) will be initialized inside */ - nn = new MultiNetwork("root"); - } else if (FLAGS_parallel_nn) { - /* multi threads calculate */ - nn = new ParallelNeuralNetwork(); - } else { - /* single thread calculate */ - nn = NeuralNetwork::create(config); - } -#else - NeuralNetwork* nn = NeuralNetwork::create(config); -#endif - ParamInitCallback testParamInitCb = [](int paramId, Parameter* para) { - para->enableType(PARAMETER_VALUE); - }; - nn->init( - config, mode == kTesting ? testParamInitCb : nullptr, parameterTypes); - return nn; - } - LOG(FATAL) << "Unknown model type: " << config.type(); - return nullptr; -} - -void GradientMachine::saveParameters(const std::string& dir) const { - LOG(INFO) << "Saving parameters to " << dir; - - for (auto& para : parameters_) { - std::string filename = dir + "/" + para->getName(); - if (para->isFullSize()) { - para->save(filename); - } - } -} - -void GradientMachine::loadParameters(const std::string& dir) { - LOG(INFO) << "Loading parameters from " << dir; - - for (auto& para : parameters_) { - std::string filename = dir + "/" + para->getName(); - if (para->isFullSize()) { - para->load(filename); - } - } -} - -void GradientMachine::randParameters() { - LOG(INFO) << "Initing parameters.."; - - for (auto& para : parameters_) { - if (para->isFullSize()) { - para->randomize(); - } - } - LOG(INFO) << "Init parameters done."; -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/GradientMachine.h b/paddle/legacy/gserver/gradientmachines/GradientMachine.h deleted file mode 100644 index d4f754a9f4dc3175f5000774c77a0e7334df7d85..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/GradientMachine.h +++ /dev/null @@ -1,250 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include - -#include "ModelConfig.pb.h" -#include "TrainerConfig.pb.h" -#include "paddle/legacy/gserver/dataproviders/DataProvider.h" -#include "paddle/legacy/gserver/layers/Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/parameter/ParameterUpdaterBase.h" -#include "paddle/legacy/utils/Thread.h" - -#ifndef PADDLE_MOBILE_INFERENCE -#include "paddle/legacy/gserver/evaluators/Evaluator.h" -#endif - -namespace paddle { -/** - * @brief A gradient machine is capable of calculating some outputs given - * some inputs and performing gradient calculation based on the - * derivative from the outputs. - * - * A gradient machine can be either a full neural network or part of a neural - * network. - * - * Usage for training: - * - * 1. Prepare inArgs. Put your input data into inArgs[i].value. - * - * 2. Call forward(inArgs, &outArgs) - * - * 3. Calculate gradient with respect to outArgs[i]->value - * and fill them into outArgs[i]->grad. - * This step can be skipped if your the outputs are from cost layers. - * - * 4. Call backward(). After backward, gradient of each parameter is - * accumulated to getParameters()[i]->getBuf(PARAMETER_GRADIENT) - * - * 5. Update parameter value getParameters()[i]->getBuf(PARAMETER_VALUE) using - * gradients. - * - * 6. Clear gradients to zero. - * - * Usage for prediction: - * - * 1. Prepare inArgs. Put your input data into inArgs[i].value. - * - * 2. Call forward(inArgs, &outArgs) - * - * 3. Obtain the prediction result from outArgs[i] - */ - -typedef std::vector MachineState; - -class GradientMachine; - -typedef std::shared_ptr GradientMachinePtr; - -class GradientMachine { - public: - enum CreateMode { - kNormal = 0, - kSgdSparseCpuTraining = 3, - kTesting = 4, - kCustom = 10 - }; - - /** - * Create a gradient machine from ModelConfig - * Parameter will have parameterTypes - */ - static GradientMachine* create( - const ModelConfig& config, - int mode = kNormal, - const std::vector& parameterTypes = - std::vector{ - PARAMETER_VALUE, PARAMETER_GRADIENT, PARAMETER_MOMENTUM}); - - virtual ~GradientMachine() {} - - /** - * Prefetch row ids of sparse parameter. - */ - virtual void prefetch(const std::vector& inArgs) { (void)inArgs; } - - /** - * @brief Forward propagation. - * - * Calculate outputs (outArgs) based the inputs (inArgs) - * - * @note: if passType==PASS_TEST, then backward() should not be called - */ - virtual void forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType) = 0; - - /** - * @brief Backward propagation. - * - * Calculate the gradient of inArgs and parameter. - * - * This function should only be called after a corresponding forward() call. - * The caller is responsible for filling the correct grad for the outArgs - * obtained using forward(). - * - * It may also change the grad field for the inArgs supplied at forward() - */ - virtual void backward(const UpdateCallback& callback = nullptr) = 0; - - /** - * Combine forward() and backward(). For multithread training, this - * may be faster. - * - * @note: passType PASS_TEST is not allowed for forwardBackward(). - */ - virtual void forwardBackward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - const UpdateCallback& callback = nullptr) { - forward(inArgs, outArgs, passType); - backward(callback); - } - - virtual Argument getLayerOutput(const std::string& layerName) = 0; - - // see comment in Layer.h for the function with the same name - virtual void resetState() {} - - // set machine state - virtual void setState(const MachineState& machineState) {} - - // save machine state - virtual void getState(MachineState& machineState) {} - - virtual void onPassEnd() = 0; - -#ifndef PADDLE_MOBILE_INFERENCE - /** - * Create an evaluator which can be used for eval() - */ - virtual Evaluator* makeEvaluator() const = 0; - - /** - * evaluate using the given evaluator - */ - virtual void eval(Evaluator* evaluator) const = 0; -#endif - - std::vector& getParameters() { return parameters_; } - - std::vector& getNonStaticParameters() { - if (nonStaticParameters_.empty()) { - for (auto para : parameters_) { - if (!para->isStatic()) { - nonStaticParameters_.push_back(para); - } - } - } - return nonStaticParameters_; - } - - inline bool hasStaticParameters() { - return parameters_.size() != getNonStaticParameters().size(); - } - - /** - * @brief Used before formal training, start work-threads and set - * trainer Parameters; - * - * @note This function will only been implemented and used in a - * multithreaded environment. - */ - virtual void start() {} - - /** - * @brief check each work-thread whether is failed/error/finish, - * if not, return ture, and yes return false. - * - * @note This function will only been implemented and used in a - * multithreaded environment. - */ - virtual void finish() {} - - /** - * @brief set the training status a "finished" value, the sub_work_threads - * will option the change, and then exit. - * - * @note This function will only been implemented and used in a - * multithreaded environment. - */ - virtual bool trainIsOn() { return true; } - - /** - * @brief when all or some of the sub-workThreads are suspended to waiting - * controller's instructions, and after some processing done in the - * controller, it will call this function to wake up all the pending - * thread. - * - * @note This function will only been implemented and used in a - * multithreaded environment. - */ - virtual void restart() {} - - /// Set the gradient of the output from outside. - virtual void setOutputGrad(const std::vector& args) { - LOG(FATAL) << "Not implemented!"; - } - - void saveParameters(const std::string& dir) const; - - void loadParameters(const std::string& dir); - - void randParameters(); - - virtual void getStats(real& cost, int64_t& numProcessed) { - (void)cost; - (void)numProcessed; - } - - /** - * @brief Release the middle layer's output memory. - * - * @note This function is used for memory optimization in inference. - */ - virtual void releaseOutput() {} - - protected: - virtual void onLoadParameter() {} - - std::vector parameters_; - std::vector nonStaticParameters_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/GradientMachineMode.cpp b/paddle/legacy/gserver/gradientmachines/GradientMachineMode.cpp deleted file mode 100644 index 9a0b2643e03f9a1a978f9bd2fcd583d6dde948c8..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/GradientMachineMode.cpp +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "GradientMachineMode.h" - -namespace paddle { -std::unordered_map> - IGradientMachineMode::modes_; -} diff --git a/paddle/legacy/gserver/gradientmachines/GradientMachineMode.h b/paddle/legacy/gserver/gradientmachines/GradientMachineMode.h deleted file mode 100644 index dd944a35f8952e354f8e4f3eb5c67b136c5f080e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/GradientMachineMode.h +++ /dev/null @@ -1,149 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "GradientMachine.h" -#include "unordered_map" - -namespace paddle { - -class IGradientMachineMode { - public: - virtual ~IGradientMachineMode() {} - - public: // interfaces - /** - * @brief create current mode's gradient machine by model config. - * @param config model config - */ - virtual GradientMachine* create(const ModelConfig& config) = 0; - - /** - * @brief shouldBeMe the current mode of GradientMachine should be this mode. - * @param algo training algorithm name. - * @param trainerCount trainer count. - * @param isLocal is local mode (without pserver) - * @param isGpu is using gpu. - * @return true if mode should be this mode. - */ - virtual bool shouldBeMe(const std::string& algo, - size_t trainerCount, - bool isLocal, - bool isGpu) const = 0; - - /** - * @brief Is data must be in cpu even if using gpu mode. - * @param trainerCount trainer count - * @return true if data must be gpu. - */ - virtual bool isDataMustInCpu(size_t trainerCount) const = 0; - - /** - * @brief Need not to use mini-batch method, and should train all data in one - * batch in one pass. - */ - virtual bool needTrainWholeDataInOneBatch() const = 0; - - public: // static methods. - /** - * @brief register a custom gradient machine mode. - * @note For user to register a custom gradient machine mode, id should >= - * kCustom. - * @param mode mode id. - * @param ptr mode description object. - */ - static void regGradientMachineMode( - int32_t mode, std::unique_ptr&& ptr) { - modes_.insert(std::make_pair(mode, std::move(ptr))); - } - - /** - * @brief get custom mode from mode id. - * @param mode mode id - * @return mode description object. - */ - static IGradientMachineMode* mode(int32_t mode) { - if (modes_.find(mode) != modes_.end()) { - return modes_[mode].get(); - } else { - return nullptr; - } - } - - /** - * @brief helper function to test trainWholeDataInOneBatch or not for mode - */ - static bool trainWholeDataInOneBatch(int32_t mode) { - if (modes_.find(mode) != modes_.end()) { - return modes_[mode]->needTrainWholeDataInOneBatch(); - } else { - return false; - } - } - - /** - * @brief Try to get custom mode if we can. - * @param [out] mode the custom mode id. - * @param [in] algo algorithm name - * @param [in] trainerCount trainer count. - * @param [in] isLocal is local or not - * @param [in] isGpu using gpu or not. - * @return true if there is a custom mode fit these conditions. - */ - static bool tryGetMode(int* mode, - const std::string& algo, - int32_t trainerCount, - bool isLocal, - bool isGpu) { - for (auto it = modes_.begin(); it != modes_.end(); ++it) { - if (it->second->shouldBeMe(algo, trainerCount, isLocal, isGpu)) { - *mode = it->first; - return true; - } - } - return false; - } - - /** - * @brief helper function for data must in cpu - */ - static bool dataMustInCpu(int32_t mode, size_t trainerCount) { - if (modes_.find(mode) != modes_.end()) { - return modes_[mode]->isDataMustInCpu(trainerCount); - } else { - // provide data to cpu if using synchronized multi-gpu gradient machine. - return trainerCount > 1; - } - } - - /** - * @brief try to create gradient machine by mode & config. - * @return nullptr if we cannot create a gradient machine by such mode. - */ - static GradientMachine* tryCreateGradientMachine(int32_t mode, - const ModelConfig& config) { - auto m = IGradientMachineMode::mode(mode); - if (m) { - return m->create(config); - } else { - return nullptr; - } - } - - private: - static std::unordered_map> - modes_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp deleted file mode 100644 index 3ef0dfbfe2e5842918500a3b0706c1a55024ce46..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp +++ /dev/null @@ -1,898 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MultiGradientMachine.h" - -#include "paddle/legacy/utils/Logging.h" - -#include "paddle/legacy/utils/Stat.h" - -#include "NeuralNetwork.h" -#include "ParallelNeuralNetwork.h" - -DEFINE_bool(allow_only_one_model_on_one_gpu, - true, - "If true, do not allow multiple models on one GPU device"); - -namespace paddle { - -// get types of the parameters which need to be merged after backward() -static void fillMergeTypes(PassType passType, - std::vector* mergeTypes) { - mergeTypes->clear(); - if (passType != PASS_TEST) { - mergeTypes->push_back(PARAMETER_GRADIENT); - } -} - -MultiGradientMachine::MultiGradientMachine(const ModelConfig& config, - bool useGpu) - : useGpu_(useGpu), - trainerBarrier_(FLAGS_trainer_count), - allBarrier_(FLAGS_trainer_count + 1), - inArgsCopied_(false) { - isPassGrad_ = false; - numThreads_ = FLAGS_trainer_count; - if (useGpu) { - //! TODO(yuyang18): When useGpu=false && paddle is not compiled with gpu, - //! the hl_get_device_count will get an error result. It seems should return - //! 0 when hppl is not compiled as gpu version. - numDevices_ = hl_get_device_count(); - } else { - numDevices_ = 0; - } - ParamInitCallback mainParamInitCb = [](int paramId, Parameter* para) { - // only create buf for CPU parameters - // GPU parameters will be created in each thread - if (para->useGpu()) return; - - if (para->isSparseRemoteUpdate()) { - para->enableType(PARAMETER_VALUE, - FLAGS_loadsave_parameters_in_pserver - ? Parameter::MAT_SPARSE_ROW_PREFETCH - : Parameter::MAT_SPARSE_ROW_PREFETCH_FULL_SIZE); - para->enableType(PARAMETER_GRADIENT, Parameter::MAT_SPARSE_ROW); - } else if (para->isGradSparseUpdate()) { - para->enableType(PARAMETER_VALUE); - para->enableType(PARAMETER_GRADIENT, Parameter::MAT_SPARSE_ROW_IDS); - SparseRowIdsCpuMatrix* mat = dynamic_cast( - para->getMat(PARAMETER_GRADIENT).get()); - mat->setNumOfThreads(FLAGS_trainer_count); - } else if (para->isValueShared()) { - para->enableType(PARAMETER_VALUE, Parameter::MAT_VALUE_SHARED); - if (!para->isStatic()) { - para->enableType(PARAMETER_GRADIENT); - } - } else { - para->enableType(PARAMETER_VALUE); - if (!para->isStatic()) { - para->enableType(PARAMETER_GRADIENT); - } - } - }; - - NeuralNetwork* nn = NeuralNetwork::create(config); - nn->init(config, mainParamInitCb); - gradientMachine_.reset(nn); - parameters_ = gradientMachine_->getParameters(); - - numLogicalDevices_ = 0; - if (useGpu_) { - numLogicalDevices_ = 1; - - for (size_t pid = 0; pid < parameters_.size(); pid++) { - if (parameters_[pid]->getConfig().device() + 1 > numLogicalDevices_) { - numLogicalDevices_ = parameters_[pid]->getConfig().device() + 1; - } - } - LOG(INFO) << "numLogicalDevices=" << numLogicalDevices_ - << " numThreads=" << numThreads_ << " numDevices=" << numDevices_; - - if (numLogicalDevices_ * numThreads_ > numDevices_ && - FLAGS_allow_only_one_model_on_one_gpu) { - LOG(FATAL) << "trainer_count * num_devices_in_model " - << "(" << numThreads_ << "*" << numLogicalDevices_ << ")" - << "=" << numThreads_ * numLogicalDevices_ - << " exceeds number of GPU devices(" << numDevices_ << ")"; - } - numLogicalDevices_ = std::min(numLogicalDevices_, numDevices_); - - /* Enables direct access to memory allocations on a peer device */ - for (int i = 0; i < numThreads_; i++) { - for (int d = 0; d < numLogicalDevices_; ++d) { - enablePeerAccess(logicalDeviceId2RealDeviceId(d, i), - logicalDeviceId2RealDeviceId(d, i + 1)); - enablePeerAccess(logicalDeviceId2RealDeviceId(d, i), - logicalDeviceId2RealDeviceId(d, i - 1)); - } - } - } - - for (int i = 0; i < numThreads_; ++i) { - threads_.emplace_back(new TrainerThread(config, i, this)); - } - - bufferSizes_.resize(numLogicalDevices_, 0); - paraMainThread_.reserve(parameters_.size()); - int pid = 0; - for (auto& para : parameters_) { - if (para->isStatic() || !para->useGpu()) { - paraMainThread_.push_back(0); - } else { - int end = pid++ % numThreads_; - paraMainThread_.push_back(end); - int paraDeviceId = para->getDeviceId(); - if (paraDeviceId == -1) paraDeviceId = 0; - paraDeviceId = paraDeviceId % numLogicalDevices_; - if (para->getSize() > bufferSizes_[paraDeviceId]) { - bufferSizes_[paraDeviceId] = para->getSize(); - VLOG(1) << "bufferSize[" << paraDeviceId << "]" << para->getSize(); - } - } - } - - // TODO(xuwei06) Instead of using maximal buffer size, we may use a smaller - // fixed buffer size and use pipeline to dispatch parameter value and merge - // parameter gradient, which may be faster. - - // combination of all trainers mainPara into GradientMachine parameters - hasNonstaticCpuParamters_ = false; - for (size_t pid = 0; pid < parameters_.size(); pid++) { - if (parameters_[pid]->useGpu()) { - parameters_[pid] = threads_[paraMainThread_[pid]]->getParameters()[pid]; - } else if (!parameters_[pid]->isStatic()) { - hasNonstaticCpuParamters_ = true; - } - } - - gradBufs_.resize(numThreads_); - for (int i = 0; i < numThreads_; ++i) { - gradBufs_[i].resize(numLogicalDevices_); - for (int d = 0; d < numLogicalDevices_; ++d) { - gradBufs_[i][d].sem.post(); - } - } - - outArgStream_ = HPPL_STREAM_1; - - start(); -} - -void MultiGradientMachine::start() { - for (auto& thread : threads_) { - thread->start(); - } -} - -void MultiGradientMachine::finish() { - for (auto& thread : threads_) { - thread->stop(); - } -} - -std::vector*> -MultiGradientMachine::getSlaveParameters() { - std::vector*> vec; - vec.reserve(threads_.size()); - for (auto& thread : threads_) { - vec.push_back(&thread->getParameters()); - } - return vec; -} - -void MultiGradientMachine::notifyGradientTransfer(int paramId) { - gradQueue_.enqueue(paramId); -} - -void MultiGradientMachine::allocGradBufs() { - if (numLogicalDevices_ == 0) return; - if (gradBufs_[0][0].bufs.size() >= mergeTypes_.size()) return; - - for (int i = 0; i < numThreads_; i++) { - for (int d = 0; d < numLogicalDevices_; ++d) { - if (bufferSizes_[d] == 0) continue; - SetDevice device(logicalDeviceId2RealDeviceId(d, i)); - for (size_t j = 0; j < mergeTypes_.size(); j++) { - gradBufs_[i][d].bufs.push_back( - Vector::create(bufferSizes_[d], /* useGpu= */ true)); - } - } - } -} - -void MultiGradientMachine::prefetch(const std::vector& inArgs) { - // Each gradient machine in threads needs to do prefetch on its own - // part of inArgs. So we need to first divide inArgs to each thread - inArgs_ = inArgs; - startTask(TASK_COPY_IN_ARGS); - - for (auto& para : parameters_) { - if (para->isSparseRemoteUpdate()) { - auto mat = dynamic_cast( - para->getMat(PARAMETER_VALUE).get()); - mat->clearIndices(); - } - } - - waitForCopyInArgs(); - - // Because SparsePrefetchRowCpuMatrix can only be changed by ONE thread - // at one time, we need to do prefetch sequentially - for (auto& thread : threads_) { - thread->prefetch(); - } - - for (auto& para : parameters_) { - if (para->isSparseRemoteUpdate()) { - auto mat = dynamic_cast( - para->getMat(PARAMETER_VALUE).get()); - mat->setupIndices(); - auto matGrad = dynamic_cast( - para->getMat(PARAMETER_GRADIENT).get()); - matGrad->reserveStore(); - } - } -} - -void MultiGradientMachine::forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType) { - forwardImp(inArgs, outArgs, passType, TASK_FORWARD); -} - -void MultiGradientMachine::forwardImp(const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - TaskType taskType) { - updateThreadParameters(); - passType_ = passType; - - if (!inArgsCopied_) { - inArgs_ = inArgs; - inArgsCopied_ = false; - } - - fillMergeTypes(passType, &mergeTypes_); - allocGradBufs(); - startTask(taskType); - - getOutArgs(outArgs, passType); -} - -void MultiGradientMachine::backward(const UpdateCallback& callback) { - backwardCallback_ = callback; - startTask(TASK_BACKWARD); - backwardImp(callback); -} - -void MultiGradientMachine::forwardBackward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - const UpdateCallback& callback) { - backwardCallback_ = callback; - forwardImp(inArgs, outArgs, passType, TASK_FORWARD_BACKWARD); - backwardImp(callback); -} - -Argument MultiGradientMachine::getLayerOutput(const std::string& layerName) { - std::vector args; - args.reserve(threads_.size()); - - for (auto& thread : threads_) { - args.push_back(thread->getGradientMachine()->getLayerOutput(layerName)); - } - outLayerArgs_.concat(args, false /* use_gpu */, outArgStream_, passType_); - - return outLayerArgs_; -} - -void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { - for (size_t i = 0; i < parameters_.size(); i++) { - if (!parameters_[i]->useGpu() || parameters_[i]->isStatic()) continue; - REGISTER_TIMER("controller_dequeue"); - gradQueue_.dequeue(); - } - if (hasNonstaticCpuParamters()) { - waitAfterMerge(); - if (backwardCallback_) { - for (auto& para : parameters_) { - if (!para->useGpu() && !para->isStatic()) { - backwardCallback_(para.get()); - } - } - } - } -} - -void MultiGradientMachine::updateThreadParameters() { - for (size_t pid = 0; pid < parameters_.size(); ++pid) { - if (!parameters_[pid]->useGpu()) continue; - if (!parameters_[pid]->isValueUpdated()) continue; - parameters_[pid]->clearValueUpdated(); - for (int i = 0; i < (int)threads_.size(); i++) { - threads_[i]->incUpdateCounter(); - } - // NotifyValueReady should happen after that all threads' incUpdateCounter() - // are called so that the counters are correct when notifyValueReady() - // is called. - threads_[paraMainThread_[pid]]->notifyValueReady(pid); - } -} - -void MultiGradientMachine::onPassEnd() { - for (auto& thread : threads_) { - thread->onPassEnd(); - } -} - -Evaluator* MultiGradientMachine::makeEvaluator() const { - return threads_[0]->getGradientMachine()->makeEvaluator(); -} - -void MultiGradientMachine::eval(Evaluator* evaluator) const { - for (auto& thread : threads_) { - SetDevice device(thread->getDeviceId()); - if (thread->hasInputData()) { - thread->getGradientMachine()->eval(evaluator); - } - } -} - -void MultiGradientMachine::getOutArgs(std::vector* outArgs, - PassType passType) { - for (auto& thread : threads_) { - REGISTER_TIMER("waitOutArgs"); - thread->waitOutArgsReady(); - } - - outArgs_.resize(threads_[threads_.size() - 1]->getOutArgs().size()); - - REGISTER_TIMER("copyOutArgs"); - for (size_t i = 0; i < outArgs_.size(); ++i) { - std::vector args; - args.reserve(threads_.size()); - for (auto& thread : threads_) { - // If the thread input is empty, then the output is empty. - auto tmp = thread->getOutArgs(); - if (tmp.size() > 0) { - args.push_back(tmp[i]); - } - } - outArgs_[i].concat(args, useGpu_, outArgStream_, passType); - } - - if (useGpu_) { - hl_stream_synchronize(outArgStream_); - } - - *outArgs = outArgs_; -} - -void MultiGradientMachine::setOutputGrad(const std::vector& args) { - CHECK_EQ(args.size(), outArgs_.size()); - for (size_t i = 0; i < args.size(); i++) { - outArgs_[i].grad = args[i].grad; - } -} - -void MultiGradientMachine::startTask(TaskType taskType) { - taskType_ = taskType; - for (auto& thread : threads_) { - thread->notifyTaskReady(); - } -} - -TrainerThread::TrainerThread(const ModelConfig& config, - int threadId, - MultiGradientMachine* multiMachine) - : multiMachine_(multiMachine), - config_(config), - threadId_(threadId), - inArgsCopied_(false) { - int numThreads = multiMachine->getNumThreads(); - - auto& mainParas = multiMachine->getParameters(); - - using std::placeholders::_1; - using std::placeholders::_2; - - partnerId_ = mod(threadId_ - 1, numThreads); - - deviceId_ = !multiMachine_->useGpu() - ? -1 - : multiMachine_->logicalDeviceId2RealDeviceId(0, threadId_); - SetDevice gpuDevice(deviceId_); - - NeuralNetwork* nn = nullptr; - if (!multiMachine->useGpu() || !FLAGS_parallel_nn) { - nn = NeuralNetwork::create(config); - } else { - nn = new ParallelNeuralNetwork(); - for (auto& paraConfig : *config_.mutable_parameters()) { - if (paraConfig.device() != -1) { - paraConfig.set_device(multiMachine_->logicalDeviceId2RealDeviceId( - paraConfig.device(), threadId_)); - } - } - for (auto& layerConfig : *config_.mutable_layers()) { - if (layerConfig.device() != -1) { - layerConfig.set_device(multiMachine_->logicalDeviceId2RealDeviceId( - layerConfig.device(), threadId_)); - } - } - } - // Only GPU do not share parameter values with main paramters. - ParamInitCallback slaveParamInitCb = - std::bind(parameterInitNN, _1, _2, &mainParas); - nn->init(config_, slaveParamInitCb); - gradientMachine_.reset(nn); - parameters_ = gradientMachine_->getParameters(); - if (!FLAGS_parallel_nn) { - for (auto& para : parameters_) { - para->setDevice(deviceId_); - } - } - - backwardCallback_ = - std::bind(&TrainerThread::backwardCallback, this, std::placeholders::_1); - - gradStream_ = HPPL_STREAM_2; - valueStream_ = HPPL_STREAM_3; - stopping_ = true; - updateCounter_ = 0; - parameterUpdated_ = false; -} - -TrainerThread::~TrainerThread() { stop(); } - -void TrainerThread::start() { - if (!stopping_) return; - - stopping_ = false; - - gradientMachine_->start(); - - computeThread_.reset(new std::thread([this]() { computeThread(); })); - - if (multiMachine_->useGpu()) { - gradCollectThread_.reset( - new std::thread([this]() { gradCollectThread(); })); - - valueDispatchThread_.reset( - new std::thread([this]() { valueDispatchThread(); })); - - copyThread_.reset(new std::thread([this]() { copyGradToBufferThread(); })); - } -} - -void TrainerThread::stop() { - if (stopping_) return; - - stopping_ = true; - - if (computeThread_) { - taskReadySem_.post(); - computeThread_->join(); - } - if (gradCollectThread_) { - gradQueue_.enqueue(0); - gradCollectThread_->join(); - } - if (copyThread_) { - gradBufQueue_.enqueue(0); - copyThread_->join(); - } - if (valueDispatchThread_) { - valueReadyQueue_.enqueue(0); - valueDispatchThread_->join(); - } -} - -void TrainerThread::computeThread() { - VLOG(1) << "gradComputeThread " << threadId_; - - if (deviceId_ >= 0) { - hl_init(deviceId_); - } - - while (true) { - { - REGISTER_TIMER("taskSem_wait"); - taskReadySem_.wait(); - } - - if (stopping_) break; - - switch (multiMachine_->getTaskType()) { - case MultiGradientMachine::TASK_FORWARD_BACKWARD: - forward(); - backward(); - break; - case MultiGradientMachine::TASK_FORWARD: - forward(); - break; - case MultiGradientMachine::TASK_BACKWARD: - backward(); - break; - case MultiGradientMachine::TASK_COPY_IN_ARGS: - batchSize_ = copyInArgs(); - inArgsCopied_ = true; - multiMachine_->waitForCopyInArgs(); - break; - } - } - hl_fini(); -} - -void TrainerThread::prefetch() { - SetDevice setDevice(deviceId_); - gradientMachine_->prefetch(inArgs_); -} - -void TrainerThread::forward() { - if (!inArgsCopied_) { - REGISTER_TIMER("copyInArgs"); - batchSize_ = copyInArgs(); - } else { - inArgsCopied_ = false; - } - - if (multiMachine_->getPassType() != PASS_TEST) { - REGISTER_TIMER("clearGradient"); - // For main parameter, the user of MultiGpuSyncMachine is responsible - // for setting the gradient to zero - for (size_t i = 0; i < parameters_.size(); i++) { - if (parameters_[i]->useGpu()) { - if (multiMachine_->paraMainThread(i) != threadId_) { - SetDevice device(parameters_[i]->getDeviceId()); - parameters_[i]->clearGradient(); - } - } else { - parameters_[i]->clearGradient(); - } - } - } - - { - REGISTER_TIMER("wait_value"); - valueReadyCond_.wait([this]() { return !parameterUpdated_; }); - } - - { fillMergeTypes(multiMachine_->getPassType(), &mergeTypes_); } - - { - REGISTER_TIMER("thread_forward"); - if (batchSize_ > 0) { - gradientMachine_->forward( - inArgs_, &outArgs_, multiMachine_->getPassType()); - } else { - outArgs_.clear(); - } - } - outArgsReadySem_.post(); -} - -void TrainerThread::backward() { - REGISTER_TIMER("thread_backward"); - if (multiMachine_->isPassGrad()) { - copyOutputGrad(); - } - if (batchSize_ > 0) { - gradientMachine_->backward(backwardCallback_); - } else { - for (size_t i = parameters_.size(); i > 0; i--) { - backwardCallback(parameters_[i - 1].get()); - } - } - if (multiMachine_->hasNonstaticCpuParamters()) { - mergeCpuGradients(); - } -} - -void TrainerThread::backwardCallback(Parameter* para) { - // CPU parameters are merged in the end - if (!para->useGpu() || para->isStatic()) return; - - int paramId = para->getID(); - if (multiMachine_->getNumThreads() == 1) { - // no need to do merge if there is only one thread - doCallback(paramId); - } else if (threadId_ == mod(multiMachine_->paraMainThread(paramId) - 1, - multiMachine_->getNumThreads())) { - notifyCopyGradToBuffer(paramId); - } else { - notifyGradientCollect(paramId); - } -} - -void TrainerThread::copyGradToBufferThread() { - VLOG(1) << "copyGradToBufferThread " << threadId_; - - if (deviceId_ >= 0) { - hl_init(deviceId_); - } - auto& partnerThread = multiMachine_->getThread(partnerId_); - auto& gradBufs = multiMachine_->getGradBuf(partnerId_); - - while (true) { - int pid = gradBufQueue_.dequeue(); - if (stopping_) break; - - int pdeviceId = multiMachine_->realDeviceId2LogicalDeviceId( - parameters_[pid]->getDeviceId(), threadId_); - - auto& gradBuf = gradBufs[pdeviceId]; - - { - REGISTER_TIMER("waitBufferReady"); - gradBuf.sem.wait(); - } - - { - REGISTER_TIMER("copyGradToBuffer"); - SetDevice setDevice(parameters_[pid]->getDeviceId()); - for (size_t i = 0; i < mergeTypes_.size(); ++i) { - gradBuf.bufs[i]->resize( - parameters_[pid]->getBuf(mergeTypes_[i])->getSize()); - gradBuf.bufs[i]->copyFrom(*parameters_[pid]->getBuf(mergeTypes_[i]), - gradStream_); - } - hl_stream_synchronize(gradStream_); - } - partnerThread->notifyGradientCollect(pid); - } - hl_fini(); -} - -void TrainerThread::gradCollectThread() { - VLOG(1) << "gradCollectThread " << threadId_; - - if (deviceId_ >= 0) { - hl_init(deviceId_); - } - - std::vector gradReadyCount(parameters_.size(), 0); - - auto& gradBufs = multiMachine_->getGradBuf(threadId_); - - while (true) { - int pid = gradQueue_.dequeue(); - if (stopping_) break; - - if (++gradReadyCount[pid] < 2) continue; - gradReadyCount[pid] = 0; - int pdeviceId = multiMachine_->realDeviceId2LogicalDeviceId( - parameters_[pid]->getDeviceId(), threadId_); - - auto& gradBuf = gradBufs[pdeviceId]; - - { - REGISTER_TIMER("mergeGrad"); - for (size_t i = 0; i < mergeTypes_.size(); ++i) { - ParameterType type = mergeTypes_[i]; - const VectorPtr& localGrad = parameters_[pid]->getBuf(type); - SetDevice setDevice(parameters_[pid]->getDeviceId()); - localGrad->add(*gradBuf.bufs[i]); - } - } - - gradBuf.sem.post(); - - if (multiMachine_->paraMainThread(pid) == threadId_) { - doCallback(pid); - } else { - notifyCopyGradToBuffer(pid); - } - } - hl_fini(); -} - -void TrainerThread::doCallback(int pid) { - REGISTER_TIMER("callback"); - auto& gpuThreads = multiMachine_->getAllThreads(); - if (multiMachine_->getBackwardCallback()) { - // The callback supplied by the user of MultiGradientMachine may handle - // the parameter update using the gradient. - multiMachine_->getBackwardCallback()(parameters_[pid].get()); - if (parameters_[pid]->isValueUpdated()) { - parameters_[pid]->clearValueUpdated(); - for (auto& thread : gpuThreads) { - thread->incUpdateCounter(); - } - notifyValueReady(pid); - } - } - multiMachine_->notifyGradientTransfer(pid); -} - -void TrainerThread::valueDispatchThread() { - VLOG(1) << "valueDispatchThread " << threadId_; - - if (deviceId_ >= 0) { - hl_init(deviceId_); - } - - auto& thread = multiMachine_->getThread(partnerId_); - - while (true) { - int pid; - { - REGISTER_TIMER("value_dequeue"); - pid = valueReadyQueue_.dequeue(); - } - if (stopping_) break; - - if (multiMachine_->paraMainThread(pid) == partnerId_) continue; - - { - REGISTER_TIMER("copyValue"); - SetDevice setDevice(parameters_[pid]->getDeviceId()); - thread->getValueBuf(pid)->copyFrom(*getValueBuf(pid), valueStream_); - hl_stream_synchronize(valueStream_); - } - - thread->notifyValueReady(pid); - } - hl_fini(); -} - -void TrainerThread::notifyValueReady(int paramId) { - if (--updateCounter_ == 0) { - valueReadyCond_.notify_all([this] { parameterUpdated_ = false; }); - } - - notifyValueDispatch(paramId); -} - -int TrainerThread::copyInArgs() { - const std::vector& fullInArgs = multiMachine_->getInArgs(); - int numThreads = multiMachine_->getAllThreads().size(); - int32_t numSequences = fullInArgs[0].getNumSequences(); - int32_t startSeq = numSequences * threadId_ / numThreads; - int32_t endSeq = numSequences * (threadId_ + 1) / numThreads; - int32_t copySize = endSeq - startSeq; - - /** - * For the first copy, need to allocate space here - */ - if (inArgs_.size() == 0) { - inArgs_.resize(fullInArgs.size()); - } - - if (copySize == 0) { - return 0; - } - - for (size_t i = 0; i < fullInArgs.size(); i++) { - inArgs_[i].resizeAndCopyFrom( - fullInArgs[i], - startSeq, - copySize, - FLAGS_parallel_nn ? false : multiMachine_->useGpu()); - } - return copySize; -} - -void TrainerThread::mergeCpuGradients() { - CHECK_EQ(mergeTypes_.size(), 1UL); - CHECK_EQ(mergeTypes_[0], PARAMETER_GRADIENT); - - { - REGISTER_TIMER("waitbeforeMerge"); - multiMachine_->waitBeforeMerge(); - } - std::vector*> slaveParameters = - multiMachine_->getSlaveParameters(); - - CHECK(slaveParameters.size()); - for (auto& para : multiMachine_->getNonStaticParameters()) { - if (para->useGpu()) continue; - if (para->isSparseRemoteUpdate()) { - REGISTER_TIMER("mergeRemoteGradSparse"); - mergeGradSparseRemote(para.get(), slaveParameters); - } else if (para->isGradSparseUpdate()) { - REGISTER_TIMER("mergeGradSparse"); - mergeGradSparse(para.get(), slaveParameters); - } else { - REGISTER_TIMER("mergeGradDense"); - mergeGradDense(para.get(), slaveParameters); - } - } - { - REGISTER_TIMER("waitbeforeMerge"); - multiMachine_->waitAfterMerge(); - } -} - -void TrainerThread::mergeGradSparse( - Parameter* para, - std::vector*>& slaveParameters) { - size_t pid = para->getID(); - SparseRowIdsCpuMatrix* mainMat = dynamic_cast( - para->getMat(PARAMETER_GRADIENT).get()); - std::vector& ids = mainMat->getIds(threadId_); - - for (auto slaveParams : slaveParameters) { - SparseRowCpuMatrix* mat = dynamic_cast( - (*slaveParams)[pid]->getMat(PARAMETER_GRADIENT).get()); - mat->addTo(*mainMat, ids, threadId_, multiMachine_->getNumThreads()); - // we use a sample hash method(%) instead of range partition, - // because range partition has balance issue sometimes, - // when feature ids are not generated from hashcode. - } - uniqueIds(ids); -} - -void TrainerThread::mergeGradSparseRemote( - Parameter* para, - std::vector*>& slaveParameters) { - size_t pid = para->getID(); - SparseRowCpuMatrix* mainMat = - dynamic_cast(para->getMat(PARAMETER_GRADIENT).get()); - - mainMat->checkIndices(); - mainMat->zeroMemThread(threadId_, multiMachine_->getNumThreads()); - - for (auto slaveParams : slaveParameters) { - SparseRowCpuMatrix* mat = dynamic_cast( - (*slaveParams)[pid]->getMat(PARAMETER_GRADIENT).get()); - mat->addTo(*mainMat, threadId_, multiMachine_->getNumThreads()); - } -} - -void TrainerThread::mergeGradDense( - Parameter* para, - std::vector*>& slaveParameters) { - size_t pid = para->getID(); - auto interval = calcSplitArrayInterval(para->getSize(), - (size_t)threadId_, - multiMachine_->getNumThreads(), - 8LU /*for avx*/); - size_t startSeq = interval.first; - size_t copySize = interval.second - interval.first; - - // setup sub bufs - CpuVector destGrad(0, nullptr); - destGrad.subVecFrom(*para->getBuf(PARAMETER_GRADIENT), startSeq, copySize); - - // merge - CpuVector slaveGradSub(0, nullptr); - for (auto slaveParams : slaveParameters) { - slaveGradSub.subVecFrom( - *(*slaveParams)[pid]->getBuf(PARAMETER_GRADIENT), startSeq, copySize); - destGrad.add(slaveGradSub); - } -} - -void TrainerThread::copyOutputGrad() { - const std::vector& outputGradArgs = multiMachine_->outArgs_; - int numThreads = multiMachine_->getAllThreads().size(); - int32_t numSequences = outputGradArgs[0].getNumSequences(); - int32_t startSeq = numSequences * threadId_ / numThreads; - int32_t endSeq = numSequences * (threadId_ + 1) / numThreads; - int32_t copySize = endSeq - startSeq; - outArgs_.resize(outputGradArgs.size()); - for (size_t i = 0; i < outputGradArgs.size(); i++) { - outArgs_[i].resizeAndCopyFrom(outputGradArgs[i], - startSeq, - copySize, - multiMachine_->useGpu(), - HPPL_STREAM_DEFAULT); - } - if (multiMachine_->useGpu()) { - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - } - gradientMachine_->setOutputGrad(outArgs_); -} -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.h b/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.h deleted file mode 100644 index 674acd4124981face13b21aee02f031ea775ffec..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.h +++ /dev/null @@ -1,478 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "GradientMachine.h" - -#include "hl_gpu.h" -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/Queue.h" - -namespace paddle { - -class TrainerThread; - -typedef Queue PidQueue; -typedef std::unique_ptr TrainerThreadPtr; - -struct GradBuffer { - /// GradBuffer is used for gathering gradient for GPU parameters - int paramId; - - /// sem is used to notify that the local gradient merge of the current thread - /// finished for the current thread. - Semaphore sem; - - // bufs[mergeIndex] - std::vector bufs; -}; - -/** - * A MultiGradientMachine is a synchronous GradientMachine which devides - * one data batch into several smaller batches and assign each one small batch - * to one computint thread for computation. After each thread finishes - * computation, it merges result (including output Argument and gradient during - * backward()). It basically is the same as single thread gradient machine, - * except that it uses multi-thread to do the computation. - * - * It handles GPU and Cpu parameters differently. In GPU, one computing thread - * generally corresponds to one GPU device. Thus, each thread keeps a separate - * copy of the parameter in its own device's memory. In CPU, we only need to - keep - * one copy of the parameters in the main memory. After, each computing thread - * computes its own parameter gradient, the update process needs to accumulate - * the parameter gradients from all the computing threads, and update the - * accumulated parameter gradient to the corresponding parameter value. - * - * Each GPU parameter is assigned to a thread called its main thread. For each - * parameter, the accumulation of its gradients and the update of its value - * happens in its main thread. The main thread first gather the parameter - * gradients from all the computing thread. Then, it performs parameter update. - * After a gradient is updated by the main thread, it is scattered to all the - * computing thread so that the parameters in all the computing threads are - * synchronized. The scatter and gather process are implemented by ring-style - * communication. Assume we have N computing threads, its thread ids will be - * 0, 1, ..., N-1. For each parameter, the id of the main thread is specified - in - * paraMainThread_[pid], where pid is the id of the parameter. Each thread i - only - * sends data to its partner thread (i - 1) % N. For example, for a parameter - * gradient that is computed in thread 4, and its main thread is 2. Its - * traveling process would be 4, 5,..., N-1, 0, 1, 2. In each step, the - gradient - * buffer is added to the local gradient, and the local gradient is then copied - * to the gradient buffer of the next thread. At last, its main thread 2 will - * get the accumulated parameter gradient. For the same parameter, after its - * value is updated, the value's traveling process would be 2, 1, 0, N-1, ... - 3. - * At the end, all the computing threads would have the updated parameter - value. - * - * A computing thread (TrainerThread) uses 4 threads to do different jobs: - * - * 1. computeThread(): performing forward(), backward(), prefetch(). - * - * 2. valueDispatchThread(): copying parameter values to partner thread. - * - * 3. copyGradToBufferThread(): copying parameter gradient to partner thread. - * - * 4. gradCollectThread(): merging the gradient from step 3 with local gradient - * and call the callback supplied by the user to update parameter value. - * - * CPU parameter value has only one copy. And their gradients are merged at the - * end of backward(). - * - * * Handling of sparse update - * Currently, sparse update is only supported for CPU parameters. - - * Sparse updates refers to gradient caculation where the gradient is sparse. - For - * example, if the input argument to a 'fc' layer is sparse, the gradient of - the - * weight matrix of this layer will be sparse. It is usually more efficient to - * treat the gradient explicitly as sparse vector during the parameter update. - - * There are two types of sparse updates called local sparse update and remote - * sparse update. - - * For both types of sparse updates, there is one copy of parameter value and - * gradient called main parameter value and gradient, and there is a copy of - * parameter value and gradient for each computing thread called slave - parameter - * value and gradient. The slave parameter values are always shared with the - * corresponding main parameter value. The slave parameter grad is a sparse row - * matrix. The sparse pattern for slave parameter grads are different, because - * the small batches for each computing thread might have different sparsity - * pattern. - - * 1. Local sparse update - * - * Main parameter value type is MAT_NORMAL. It is a dense matrix. - * - * Main parameter grad type is MAT_SPARSE_ROW_IDS (SparseRowIdsCpuMatrix) - * It is also a dense matrix, but the updated values are specified by IDS. - * - * Slave parameter value shares with main parameter value. - * - * Slave parameter grad type is MAT_SPARSE_ROW_AUTO_GROW - * (SparseAutoGrowRowCpuMatrix). It is a sparse row matrix. - * - * During backward() of each TrainerThread, SparseAutoGrowRowCpuMatrix will - * gather all the non-zero gradient. And After backward(), they will be - merged - * into main parameter grad (SparseRowIdsCpuMatrix), with indices indicating - * which rows have nonzero gradient. - * - * 2. Remote sparse update - * - * Main parameter value type is MAT_SPARSE_ROW_PREFETCH(_FULL_SIZE) - * (SparsePrefetchRowCpuMatrix). MAT_SPARSE_ROW_PREFETCH is a sparse matrix. - * MAT_SPARSE_ROW_PREFETCH_FULL_SIZE is a dense matrix. However, only the - * parameter values that are prefetched is up-to-date. - * - * Main parameter grad type is MAT_SPARSE_ROW (SparseRowCpuMatrix). - * And it shares sparse pattern with value by sharing indexDictHandle_, - which - * is an internal data structure used by SparseRowCpuMatrixto specify the - * sparsity pattern of Slave parameter value shares with main parameter - value. - * - * Slave parameter grad type is MAT_SPARSE_ROW_AUTO_GROW - * (SparsePrefetchRowCpuMatrix). It is a sparse row matrix - * - * During prefetch(), all the layers will indicates which rows of each - * parameter are needed. Then the framework will retrieve those rows from - * parameter server. - * - * During backward() of each TrainerThread, SparseAutoGrowRowCpuMatrix will - * gather all the non-zero gradient. And After backward(), they will be - merged - * into main parameter grad (SparseRowCpuMatrix). And the framework will - send - * the merged gradient to parameter server. - */ -class MultiGradientMachine : public GradientMachine { - public: - enum TaskType { - TASK_FORWARD_BACKWARD = 0, - TASK_FORWARD = 1, - TASK_BACKWARD = 2, - TASK_COPY_IN_ARGS = 3, - }; - - explicit MultiGradientMachine(const ModelConfig& config, bool useGpu); - - virtual void start(); - - virtual void finish(); - - virtual void prefetch(const std::vector& inArgs); - - virtual void forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType); - - virtual void backward(const UpdateCallback& callback = nullptr); - - void forwardBackward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - const UpdateCallback& callback); - - virtual Argument getLayerOutput(const std::string& layerName); - - virtual void onPassEnd(); - - virtual Evaluator* makeEvaluator() const; - - virtual void eval(Evaluator* evaluator) const; - - bool useGpu() const { return useGpu_; } - - /// @return whether to pass the gradients in outArgs_ to each threads. - bool isPassGrad() { return isPassGrad_; } - - /// @brief set whether to pass the gradient in outArgs_ to each threads. - void setPassGrad(bool isPass) { isPassGrad_ = isPass; } - - /// Set the gradients of the outputs. - /// The gradietns will be copied to each thread in the computing threads. - virtual void setOutputGrad(const std::vector& args); - - protected: - friend class TrainerThread; - - std::vector& getAllThreads() { return threads_; } - /// Calculate the real device id based on the logical device id and the - /// thread id. - int logicalDeviceId2RealDeviceId(int logicalId, int threadId = 0) const { - if (logicalId == -1) { - logicalId = 0; - } - return mod(logicalId + FLAGS_gpu_id + threadId * numLogicalDevices_, - numDevices_); - } - - /// Calculate the logical device id based on the real device id and the - /// thread id. - int realDeviceId2LogicalDeviceId(int realId, int threadId = 0) const { - if (realId == -1) { - return 0; - } else { - return mod(realId - FLAGS_gpu_id - threadId * numLogicalDevices_, - numDevices_); - } - } - - std::vector*> getSlaveParameters(); - - bool hasNonstaticCpuParamters() const { return hasNonstaticCpuParamters_; } - - /// Called TrainerThread to wait before merging CPU parameter gradients. - void waitBeforeMerge() { trainerBarrier_.wait(); } - - /// called by MultiGradientMachine and TrainerThread to wait after merging - /// CPU parameter graidents. - void waitAfterMerge() { allBarrier_.wait(); } - - /// called by MultiGradientMachine and TrainerThread to wait for copyInArgs() - /// finishing - void waitForCopyInArgs() { allBarrier_.wait(); } - - TrainerThreadPtr& getThread(int threadId) { return threads_[threadId]; } - - std::vector& getGradBuf(int threadId) { - return gradBufs_[threadId]; - } - - PassType getPassType() const { return passType_; } - - /// Called by TrainerThread to notify MultiGradientMachine that the gradient - /// for paramId is ready - void notifyGradientTransfer(int paramId); - - const std::vector& getInArgs() { return inArgs_; } - - TaskType getTaskType() const { return taskType_; } - - const UpdateCallback& getBackwardCallback() const { - return backwardCallback_; - } - - int getNumDevices() const { return numDevices_; } - - int getNumLogicalDevices() const { return numLogicalDevices_; } - - int getNumThreads() const { return numThreads_; } - - int paraMainThread(int pid) const { return paraMainThread_[pid]; } - - protected: - virtual void forwardImp(const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - TaskType taskType); - - virtual void backwardImp(const UpdateCallback& callback = NULL); - - /// update all parameters - void updateThreadParameters(); - - void startTask(TaskType taskType); - - void getOutArgs(std::vector* outArgs, PassType passType); - - void allocGradBufs(); - - protected: - bool useGpu_; - - bool hasNonstaticCpuParamters_; - - /// store main parameter only - std::unique_ptr gradientMachine_; - - std::vector threads_; - std::vector paraMainThread_; - std::vector> gradBufs_; // [threadId][deviceId] - std::vector bufferSizes_; - - PassType passType_; - TaskType taskType_; - PidQueue gradQueue_; - std::vector inArgs_; - std::vector outArgs_; - hl_stream_t outArgStream_; - - Argument outLayerArgs_; - - /// ParameterType which needs to be merged from each GPU - std::vector mergeTypes_; - int numDevices_; /* number of gpu devices */ - int numLogicalDevices_; // number of GPU used by one NN - int numThreads_; /* number of train threads */ - - UpdateCallback backwardCallback_; - - /// barrrier for threads_ - ThreadBarrier trainerBarrier_; - - /// barrier for both MultiGradientMachine and threds_ - ThreadBarrier allBarrier_; - - /// indicate whether inArgs is copied before forward() - bool inArgsCopied_; - - /// Whether to copy the gradient back from an external input. - bool isPassGrad_; -}; - -class TrainerThread { - public: - TrainerThread(const ModelConfig& config, - int threadId, - MultiGradientMachine* multiMachine); - - ~TrainerThread(); - - void start(); - - void onPassEnd() { gradientMachine_->onPassEnd(); } - - void waitOutArgsReady() { outArgsReadySem_.wait(); } - - void notifyTaskReady() { taskReadySem_.post(); } - - int getDeviceId() const { return deviceId_; } - - GradientMachine* getGradientMachine() { return gradientMachine_.get(); } - - const std::vector& getParameters() { return parameters_; } - - void stop(); - - void notifyValueReady(int paramId); - - const VectorPtr& getValueBuf(int paramId) { - return parameters_[paramId]->getBuf(PARAMETER_VALUE); - } - - const std::vector& getOutArgs() { return outArgs_; } - - void incUpdateCounter(int n = 1) { - updateCounter_ += n; - parameterUpdated_ = true; - } - - void notifyGradientCollect(int paramId) { gradQueue_.enqueue(paramId); } - - void notifyCopyGradToBuffer(int paramId) { gradBufQueue_.enqueue(paramId); } - - void notifyValueDispatch(int paramId) { valueReadyQueue_.enqueue(paramId); } - - void prefetch(); - - /// copy the output gradient from the main GradientMachine. - void copyOutputGrad(); - - /// Whether the thread has input data. - bool hasInputData() { return batchSize_ != 0; } - - protected: - void mergeCpuGradients(); - - void mergeGradSparse( - Parameter* para, - std::vector*>& slaveParameters); - - void mergeGradSparseRemote( - Parameter* para, - std::vector*>& slaveParameters); - - void mergeGradDense( - Parameter* para, - std::vector*>& slaveParameters); - - void computeThread(); - void valueDispatchThread(); - void copyGradToBufferThread(); - void gradCollectThread(); - - int copyInArgs(); - void forward(); - void backward(); - void backwardCallback(Parameter* para); - - /// call the actuall callback supplied by the caller of - /// GradientMachine::backward - void doCallback(int pid); - - protected: - MultiGradientMachine* multiMachine_; - ModelConfig config_; - /// whether the thread should stop - bool stopping_; - /// the threads form which to collect gradient - int partnerId_; - /// from 0 to threads-1 - int threadId_; - int deviceId_; - std::unique_ptr gradientMachine_; - std::vector parameters_; - - /// ParameterType which needs to be merged from each GPU - std::vector mergeTypes_; - - /// compute thread - std::unique_ptr computeThread_; - std::vector inArgs_; - std::vector outArgs_; - Semaphore taskReadySem_; - Semaphore outArgsReadySem_; - - /// copy thread - std::unique_ptr copyThread_; - /// queue of gradient needs to be copied to partner - PidQueue gradBufQueue_; - hl_stream_t gradStream_; - - /// grad merge thread - std::unique_ptr gradCollectThread_; - /// queue of gradient needs to be merged with gradient coopied by - /// copyGradToBufferThread - PidQueue gradQueue_; - UpdateCallback backwardCallback_; - - /// value dispatch thread - std::unique_ptr valueDispatchThread_; - /// queue of the parameter whose the vale are ready for copy - PidQueue valueReadyQueue_; - - /// used to notify all the parameter values are ready - LockedCondition valueReadyCond_; - - hl_stream_t valueStream_; - /// how many parameters are updated - std::atomic updateCounter_; - bool parameterUpdated_; - - /// indicate whether inArgs is copied before forward() - bool inArgsCopied_; - int batchSize_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/MultiNetwork.cpp b/paddle/legacy/gserver/gradientmachines/MultiNetwork.cpp deleted file mode 100644 index 1245c441036a601025192ab23a6d2899b688a9dc..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/MultiNetwork.cpp +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" - -#include "MultiNetwork.h" - -#include "NeuralNetwork.h" -#include "ParallelNeuralNetwork.h" - -namespace paddle { - -void MultiNetwork::init(const ModelConfig& config, - ParamInitCallback callback, - const std::vector& parameterTypes, - bool useGpu) { - CHECK_GT(config.sub_models_size(), 1) << "sub_models_size should GT 1"; - // check submodel[0] is root - CHECK_EQ("root", config.sub_models(0).name()) - << "sub_models(0) should be root"; - // ignore root - subNetworks_.resize(config.sub_models_size() - 1); - // base class - NeuralNetwork::init(config, callback, parameterTypes, useGpu); - // sub networks - for (int i = 1; i < config.sub_models_size(); ++i) { - std::string subModelName = config.sub_models(i).name(); - if (FLAGS_parallel_nn) { - subNetworks_[i - 1] = std::unique_ptr( - new ParallelNeuralNetwork(subModelName, this)); - } else { - subNetworks_[i - 1] = std::unique_ptr( - NeuralNetwork::newNeuralNetwork(subModelName, this)); - } - subNetworks_[i - 1]->init(config); - } -} - -void MultiNetwork::prefetch(const std::vector& inArgs) { - std::vector> argumentGroups; - Argument::splitByDataId(inArgs, &argumentGroups); - // check group size is equal to sub network size - CHECK_EQ(argumentGroups.size(), subNetworks_.size()); - for (size_t i = 0; i < subNetworks_.size(); i++) { - if (argumentGroups[i].size() == 1 && argumentGroups[i][0].dataId == -1) { - // check input args: if dataId is -1, then skip this sub network - continue; - } - subNetworks_[i]->prefetch(argumentGroups[i]); - } -} - -void MultiNetwork::forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType) { - // split inArgs to several vectors - std::vector> argumentGroups; - Argument::splitByDataId(inArgs, &argumentGroups); - - // check group size is equal to sub network size - CHECK_EQ(argumentGroups.size(), subNetworks_.size()); - std::vector tempOutArgs; - outArgs->clear(); - - for (size_t i = 0; i < subNetworks_.size(); i++) { - tempOutArgs.clear(); - if (argumentGroups[i].size() == 1 && argumentGroups[i][0].dataId == -1) { - // check input args: if dataId is -1, then skip this sub network - continue; - } - subNetworks_[i]->forward(argumentGroups[i], &tempOutArgs, passType); - for (const auto& elem : tempOutArgs) { - outArgs->push_back(elem); - outArgs->back().dataId = i; - } - } -} - -void MultiNetwork::backward(const UpdateCallback& callback) { - for (size_t i = 0; i < subNetworks_.size(); i++) { - subNetworks_[i]->backward(callback); - } -} - -void MultiNetwork::forwardBackward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - const UpdateCallback& callback) { - forward(inArgs, outArgs, passType); - backward(callback); -} - -void MultiNetwork::onPassEnd() { - for (size_t i = 0; i < subNetworks_.size(); i++) { - subNetworks_[i]->onPassEnd(); - } -} - -void MultiNetwork::start() { - for (auto& subNetwork : subNetworks_) { - subNetwork->start(); - } -} - -void MultiNetwork::finish() { - for (size_t i = 0; i < subNetworks_.size(); i++) { - subNetworks_[i]->finish(); - } -} - -class MultiCombinedEvaluator : public Evaluator { - public: - MultiCombinedEvaluator() {} - void addEvaluator(std::unique_ptr&& evaluator) { - evaluators_.emplace_back(std::move(evaluator)); - } - virtual void start() { - for (auto& evaluator : evaluators_) { - evaluator->start(); - } - } - - virtual void finish() { - for (auto& evaluator : evaluators_) { - evaluator->finish(); - } - } - - virtual void eval(const NeuralNetwork& nn) { - const MultiNetwork& multiNetwork = dynamic_cast(nn); - CHECK_EQ(evaluators_.size(), multiNetwork.getSubNetworks().size()); - int size = evaluators_.size(); - for (int i = 0; i < size; i++) { - // one evaluator for one subNetwork - evaluators_[i]->eval(*multiNetwork.getSubNetworks()[i]); - } - } - - virtual real evalImp(std::vector& arguments) { - (void)arguments; - return -1; - } - - virtual void printStats(std::ostream& os) const { - for (auto& evaluator : evaluators_) { - evaluator->printStats(os); - os << ' '; - } - } - - virtual void distributeEval(ParameterClient2* client) { - for (auto& evaluator : evaluators_) { - evaluator->distributeEval(client); - } - } - - protected: - std::vector> evaluators_; -}; - -Evaluator* MultiNetwork::makeEvaluator() const { - MultiCombinedEvaluator* multiCombinedEvaluator = new MultiCombinedEvaluator(); - for (size_t i = 0; i < subNetworks_.size(); i++) { - std::unique_ptr evaluator(subNetworks_[i]->makeEvaluator()); - multiCombinedEvaluator->addEvaluator(std::move(evaluator)); - } - return multiCombinedEvaluator; -} - -void MultiNetwork::eval(Evaluator* evaluator) const { evaluator->eval(*this); } - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/MultiNetwork.h b/paddle/legacy/gserver/gradientmachines/MultiNetwork.h deleted file mode 100644 index afe15cb020ebe3bbe051800a72562c9543f3faa4..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/MultiNetwork.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "GradientMachine.h" -#include "NeuralNetwork.h" - -#include "paddle/legacy/utils/Locks.h" - -namespace paddle { - -class MultiNetwork : public NeuralNetwork { - public: - explicit MultiNetwork(std::string subModelName = "") - : NeuralNetwork(subModelName) {} - - virtual void init(const ModelConfig& config, - ParamInitCallback callback, - const std::vector& parameterTypes, - bool useGpu); - - virtual void prefetch(const std::vector& inArgs); - - virtual void forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType); - - virtual void backward(const UpdateCallback& callback = nullptr); - - void forwardBackward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - const UpdateCallback& callback); - - virtual void onPassEnd(); - - virtual Evaluator* makeEvaluator() const; - - virtual void eval(Evaluator* evaluator) const; - - const std::vector>& getSubNetworks() const { - return subNetworks_; - } - - virtual void start(); - - virtual void finish(); - - protected: - std::vector> subNetworks_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/legacy/gserver/gradientmachines/NeuralNetwork.cpp deleted file mode 100644 index 0f8048152ff317a1e445249fa7093158d2d4a5c5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/NeuralNetwork.cpp +++ /dev/null @@ -1,548 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Util.h" - -#include "NeuralNetwork.h" -#include "hl_gpu.h" -#include "paddle/legacy/utils/CustomStackTrace.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -#ifdef PADDLE_WITH_MKLDNN -#include "paddle/legacy/gserver/layers/MKLDNNLayer.h" -#endif - -#ifndef PADDLE_MOBILE_INFERENCE -#include "MultiNetwork.h" -#include "RecurrentGradientMachine.h" -#include "paddle/legacy/gserver/layers/AgentLayer.h" -#endif - -namespace paddle { -void parameterInitNN(int paramId, - Parameter* para, - std::vector* sharedParams) { - // Create parameters values. - if (!para->useGpu() && sharedParams) { - para->enableSharedType(PARAMETER_VALUE, - (*sharedParams)[paramId]->getBuf(PARAMETER_VALUE), - (*sharedParams)[paramId]->getMat(PARAMETER_VALUE)); - } else { - if (para->isSparseRemoteUpdate()) { - para->enableType(PARAMETER_VALUE, - FLAGS_loadsave_parameters_in_pserver - ? Parameter::MAT_SPARSE_ROW_PREFETCH - : Parameter::MAT_SPARSE_ROW_PREFETCH_FULL_SIZE); - } else { - para->enableType(PARAMETER_VALUE); - } - } - // Create parameter gradients. - if (para->isSparseRemoteUpdate() && !sharedParams) { - para->enableType(PARAMETER_GRADIENT, Parameter::MAT_SPARSE_ROW); - } else if (para->isGradSparseUpdate()) { - para->enableType(PARAMETER_GRADIENT, Parameter::MAT_SPARSE_ROW_AUTO_GROW); - } else if (!para->isStatic()) { - para->enableType(PARAMETER_GRADIENT); - } -} - -NeuralNetwork* NeuralNetwork::create(const ModelConfig& config) { -#ifndef PADDLE_MOBILE_INFERENCE - if (config.type() == "recurrent_nn") { - return newNeuralNetwork("root"); - } else if (config.type() == "multi_nn") { - return new MultiNetwork("root"); - } else { - return newNeuralNetwork(); - } -#else - return new NeuralNetwork(); -#endif -} - -std::map NeuralNetwork::dllInitMap; - -void NeuralNetwork::init(const ModelConfig& config, - ParamInitCallback callback, - const std::vector& parameterTypes, - bool useGpu) { - using std::placeholders::_1; - using std::placeholders::_2; - ParamInitCallback paramCallback = nullptr; - if (callback != nullptr) { - paramSelfInited_ = false; - paramCallback = callback; - } else { - paramSelfInited_ = true; - paramCallback = std::bind(parameterInitNN, _1, _2, nullptr); - } - config_ = config; - - if (rootNetwork_ != nullptr) { - // direct use parameters_ and parameterMap_ from base network - CHECK_EQ((size_t)config.parameters_size(), - rootNetwork_->getParameters().size()); - parameters_ = rootNetwork_->getParameters(); - parameterMap_ = *(rootNetwork_->getParameterMap()); - } else { - parameters_.reserve(config.parameters_size()); - for (const auto& para_config : config.parameters()) { - auto parameter = std::make_shared(para_config, - useGpu, - /*initialize=*/false); - paramCallback(parameters_.size(), parameter.get()); - if (!callback) { - for (ParameterType type : - (parameter->isStatic() - ? std::vector{PARAMETER_VALUE} - : parameterTypes)) { - if (type != PARAMETER_VALUE && type != PARAMETER_GRADIENT) { - parameter->enableType(type); - } - } - } - parameter->setID(parameters_.size()); - parameters_.push_back(parameter); - CHECK(!parameterMap_.count(parameter->getName())); - parameterMap_[parameter->getName()] = parameter; - } - } - - auto layerCreate = [&](const LayerConfig& layer_config) { - auto layer = Layer::create(layer_config); - CHECK(layer) << "Create layer failed. Layer name:" << layer->getName(); - layers_.push_back(layer); - CHECK(!layerMap_.count(layer->getName())); - layerMap_[layer->getName()] = layer; - }; - - auto subModelConfig = std::find_if(config.sub_models().begin(), - config.sub_models().end(), - [=](const SubModelConfig& sub_model) { - return sub_model.name() == subModelName_; - }); - bool useSubModel = (subModelConfig != config.sub_models().end()); - CHECK_EQ(useSubModel, !subModelName_.empty()); - if (useSubModel) { - layers_.reserve(subModelConfig->layer_names_size()); - for (const auto& layer_name : subModelConfig->layer_names()) { - auto layer_config = - std::find_if(config.layers().begin(), - config.layers().end(), - [=](const LayerConfig& layer_config) { - return layer_config.name() == layer_name; - }); - CHECK(layer_config != config.layers().end()); - layerCreate(*layer_config); - } - } else { - layers_.reserve(config.layers_size()); - for (const auto& layer_config : config.layers()) { - bool useLayer = true; - if (config.has_external_config()) { - useLayer = true; - for (const auto& name : config.external_config().layer_names()) { - if (layer_config.name() == name) { - useLayer = false; - break; - } - } - } - if (useLayer) { - layerCreate(layer_config); - } - } - } - - for (const auto& layer : layers_) { - layer->init(layerMap_, parameterMap_); - layer->initSubNetwork(this /*root*/, config_, parameterTypes, useGpu); - } - - for (const auto& layer_name : - (useSubModel ? subModelConfig->input_layer_names() - : config.input_layer_names())) { - auto it = layerMap_.find(layer_name); - CHECK(it != layerMap_.end()); - dataLayers_.push_back(std::dynamic_pointer_cast(it->second)); - } - - for (const auto& layer_name : - (useSubModel ? subModelConfig->output_layer_names() - : config.output_layer_names())) { - auto it = layerMap_.find(layer_name); - CHECK(it != layerMap_.end()); - outputLayers_.push_back(it->second); - } - - for (const auto& layer : layers_) { - const auto& name = layer->getName(); - bool isMiddleLayer = true; - - // if data layer - for (const auto& dataLayer : dataLayers_) { - if (name == dataLayer->getName()) { - isMiddleLayer = false; - break; - } - } - - // if output layer - for (const auto& dataLayer : outputLayers_) { - if (name == dataLayer->getName()) { - isMiddleLayer = false; - break; - } - } - - if (isMiddleLayer) { - middleLayers_.push_back(layer); - } - } -} - -void NeuralNetwork::connect(LayerPtr agentLayer, - LayerPtr realLayer, - int height) { -#ifndef PADDLE_MOBILE_INFERENCE - AgentLayer* agent = dynamic_cast(agentLayer.get()); - CHECK_NOTNULL(agent); - agent->setRealLayer(realLayer, height); -#endif -} - -void NeuralNetwork::connect(std::string agentLayerName, - NeuralNetwork* srcNN, - std::string realLayerName) { - connect(this->getLayer(agentLayerName), srcNN->getLayer(realLayerName)); -} - -void NeuralNetwork::prefetch(const std::vector& inArgs) { - CHECK_EQ(inArgs.size(), dataLayers_.size()); - - if (paramSelfInited_) { - for (auto& para : parameters_) { - if (para->isSparseRemoteUpdate()) { - auto mat = dynamic_cast( - para->getMat(PARAMETER_VALUE).get()); - para->clearGradient(); - if (mat) mat->clearIndices(); - } - } - } - - for (size_t i = 0; i != dataLayers_.size(); ++i) { - if (FLAGS_parallel_nn) { - const_cast(inArgs[i]).deviceId = -1; - } - dataLayers_[i]->setData(inArgs[i]); - } - - for (auto& layer : layers_) { - layer->prefetch(); - } - - if (paramSelfInited_) { - for (auto& para : parameters_) { - if (para->isSparseRemoteUpdate()) { - auto mat = dynamic_cast( - para->getMat(PARAMETER_VALUE).get()); - mat->setupIndices(); - auto matGrad = dynamic_cast( - para->getMat(PARAMETER_GRADIENT).get()); - matGrad->reserveStore(); - } - } - } -} - -void NeuralNetwork::forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType) { - CHECK_EQ(inArgs.size(), dataLayers_.size()); - outArgs->resize(outputLayers_.size()); - for (size_t i = 0; i != dataLayers_.size(); ++i) { - dataLayers_[i]->setData(inArgs[i]); - } - - gLayerStackTrace.set_stage(true); - - { - for (auto& layer : layers_) { - REGISTER_TIMER_INFO("ForwardTimer", layer->getName().c_str()); - gLayerStackTrace.push(layer->getName()); - layer->forward(passType); - gLayerStackTrace.pop(layer->getName()); - } - } - - outArgs->clear(); - outArgs->reserve(outputLayers_.size()); - for (auto& layer : outputLayers_) { - outArgs->push_back(layer->getOutput()); - } -} - -void NeuralNetwork::resetState() { - for (auto& layer : layers_) { - layer->resetState(); - } -} - -void NeuralNetwork::setState(const MachineState& machineState) { - for (size_t i = 0; i < layers_.size(); i++) { - if (machineState[i] != nullptr) { - layers_[i]->setState(machineState[i]); - } - } -} - -void NeuralNetwork::getState(MachineState& machineState) { - machineState.clear(); - machineState.reserve(layers_.size()); - for (auto& layer : layers_) { - LayerStatePtr p = layer->getState(); - machineState.push_back(p); - } -} - -void NeuralNetwork::backward(const UpdateCallback& callback) { - gLayerStackTrace.set_stage(false); - FOR_EACH_R(layer, layers_) { - REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); - gLayerStackTrace.push((*layer)->getName()); - if ((*layer)->needGradient()) { - (*layer)->backward(callback); - } - gLayerStackTrace.pop((*layer)->getName()); - } -} - -void NeuralNetwork::finish() { -#ifdef PADDLE_WITH_MKLDNN - FOR_EACH_R(layer, layers_) { - MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast(*layer); - if (dnnLayer) { - dnnLayer->convertWeightsToPaddle(); - } - } -#endif -} - -Argument NeuralNetwork::getLayerOutput(const std::string& layerName) { - return getLayer(layerName)->getOutput(); -} - -void NeuralNetwork::onPassEnd() { - for (auto& layer : layers_) { - layer->onPassEnd(); - } -} - -void NeuralNetwork::releaseOutput() { - for (auto& layer : middleLayers_) { - Argument& arg = layer->getOutput(); - arg.value.reset(); - } -} - -#ifndef PADDLE_MOBILE_INFERENCE - -class CombinedEvaluator : public Evaluator { - public: - void addEvaluator(std::unique_ptr&& evaluator) { - evaluators_.emplace_back(std::move(evaluator)); - } - void start() override { - for (auto& evaluator : evaluators_) { - evaluator->start(); - } - } - - void finish() override { - for (auto& evaluator : evaluators_) { - evaluator->finish(); - } - } - - void eval(const NeuralNetwork& nn) override { - for (auto& evaluator : evaluators_) { - evaluator->eval(nn); - } - } - real evalImp(std::vector& arguments) override { - (void)arguments; - return -1; - } - void printStats(std::ostream& os) const override { - for (auto& evaluator : evaluators_) { - evaluator->printStats(os); - os << ' '; - } - } - - void distributeEval(ParameterClient2* client) override { - for (auto& evaluator : evaluators_) { - evaluator->distributeEval(client); - } - } - - protected: - std::vector> evaluators_; - - // Evaluator interface - public: - /** - * @brief getNames will return all inside evaluators' names. - * @param names [out]: return names. - */ - void getNames(std::vector* names) override { - for (auto& eval : evaluators_) { - eval->getNames(names); - } - } - - /** - * @brief getValue could get all inside evaluators' value. - */ - real getValue(const std::string& name, Error* err) const override { - return this->getMethodHelper( - name, err, [&name, err](const std::unique_ptr& eval) { - return eval->getValue(name, err); - }); - } - - /** - * @brief getType could get all inside evaluators' type. - */ - std::string getType(const std::string& name, Error* err) const override { - return this->getMethodHelper( - name, err, [&name, err](const std::unique_ptr& eval) { - return eval->getType(name, err); - }); - } - - private: - template - T getMethodHelper(const std::string& name, - Error* err, - const std::function&)>& - callback) const { - for (auto& eval : evaluators_) { - std::vector names; - eval->getNames(&names); - if (std::find(names.begin(), names.end(), name) != names.end()) { - return callback(eval); - } - } - *err = Error("No such key %s", name.c_str()); - return T(); - } -}; - -class SubnetEvaluator : public CombinedEvaluator { - public: - SubnetEvaluator(const std::string& layerName, - std::unique_ptr&& evaluator) - : layerName_(layerName) { - addEvaluator(std::move(evaluator)); - } - void eval(const NeuralNetwork& nn) override { - const LayerPtr& layer = nn.getLayer(layerName_); - CHECK(layer) << "Nonexisted layer: " << layerName_ << " in submodel " - << nn.getName(); - bool accessed = false; - layer->accessSubNetwork([this, &accessed](NeuralNetwork& subnet) { - subnet.eval(evaluators_[0].get()); - accessed = true; - }); - CHECK(accessed) << "There is no subnetwork for layer " << layerName_ - << " in submodel " << nn.getName(); - } - - protected: - std::string layerName_; -}; - -Evaluator* NeuralNetwork::makeEvaluator() const { - CombinedEvaluator* combinedEvaluator = new CombinedEvaluator(); - auto subModelConfig = std::find_if(config_.sub_models().begin(), - config_.sub_models().end(), - [=](const SubModelConfig& sub_model) { - return sub_model.name() == subModelName_; - }); - bool useSubModel = (subModelConfig != config_.sub_models().end()); - CHECK_EQ(useSubModel, !subModelName_.empty()); - if (useSubModel) { - // create the evaluators that belong to CURRENT submodel - for (int i = 0; i < subModelConfig->evaluator_names_size(); ++i) { - // find evaluator by name - auto thisEvalConfig = std::find_if( - config_.evaluators().begin(), - config_.evaluators().end(), - [=](const EvaluatorConfig& ecfg) { - return ecfg.name() == subModelConfig->evaluator_names(i); - }); - bool validConfig = (thisEvalConfig != config_.evaluators().end()); - if (validConfig) { - std::unique_ptr evaluator( - Evaluator::create(*thisEvalConfig)); - combinedEvaluator->addEvaluator(std::move(evaluator)); - } - } - for (auto& layer : layers_) { - layer->accessSubNetwork( - [layer, combinedEvaluator](NeuralNetwork& subnet) { - std::unique_ptr subEvaluator(new SubnetEvaluator( - layer->getName(), - std::unique_ptr(subnet.makeEvaluator()))); - combinedEvaluator->addEvaluator(std::move(subEvaluator)); - }); - } - } else { - for (const EvaluatorConfig& evalConfig : config_.evaluators()) { - std::unique_ptr evaluator(Evaluator::create(evalConfig)); - combinedEvaluator->addEvaluator(std::move(evaluator)); - } - } - return combinedEvaluator; -} - -void NeuralNetwork::eval(Evaluator* evaluator) const { evaluator->eval(*this); } - -#endif - -void NeuralNetwork::setOutputGrad(const std::vector& args) { - CHECK_GE(outputLayers_.size(), args.size()); - for (size_t i = 0; i < args.size(); ++i) { - outputLayers_[i]->getOutput().grad = args[i].grad; - } -} - -extern NeuralNetwork* newCustomNerualNetwork(const std::string& name, - NeuralNetwork* network) - __attribute__((weak)); - -NeuralNetwork* NeuralNetwork::newNeuralNetwork(const std::string& name, - NeuralNetwork* rootNetwork) { - if (newCustomNerualNetwork) { - return newCustomNerualNetwork(name, rootNetwork); - } else { - return new NeuralNetwork(name, rootNetwork); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/NeuralNetwork.h b/paddle/legacy/gserver/gradientmachines/NeuralNetwork.h deleted file mode 100644 index 566157c8998a38aef4a3620a4dca7246c6e66391..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/NeuralNetwork.h +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/dataproviders/DataProvider.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" -#include "paddle/legacy/gserver/layers/CostLayer.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/gserver/layers/Layer.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/utils/ClassRegistrar.h" - -namespace paddle { -/* - * @brief Init function for the parameters. - * @param paramId: the id of the parameter to init. - * @param para: the pointer to the parameter to init. - * @param sharedParams: the pointer to an array of the parameter to be shared. - * If it is null, no parameter sharing is used. - * Only CPU paramters can be shared. - * It handles CPU, CPU sparse, CPU sparse remote, - * and GPU parameters differently. If the type - * of a parameter is NORMAL. Basically nothing need to be done. - * CPU value: NORMAL. - * CPU param: NORMAL. - * - * CPU sparse value: NORMAL. - * CPU sparse gradient: MAT_SPARSE_ROW_AUTO_GROW. - * - * CPU sparse remote value: MAT_SPARSE_ROW_PREFETCH(_FULL_SIZE). - * CPU sparse remote gradient: MAT_SPARSE_ROW (!sharedParams) - * MAT_SPARSE_ROW_AUTO_GROW (sharedParams) - * - * GPU value: NORMAL - * GPU param: NORMAL - */ -void parameterInitNN(int paramId, - Parameter* para, - std::vector* sharedParams); - -class NeuralNetwork : public GradientMachine { - public: - virtual void init(const ModelConfig& config, - ParamInitCallback callback = nullptr, - const std::vector& parameterTypes = - std::vector{PARAMETER_VALUE, - PARAMETER_GRADIENT, - PARAMETER_MOMENTUM}, - bool useGpu = FLAGS_use_gpu); - - /** - * Connect two submodels and - * down-submodel's output become up-submodel's input. - * By default, connection is one by one, - * If the agent height is smaller than real layer, *height* has to be filled. - * - * @param realLayer The down-submodel's output layer. - * @param agentLayer The up-submodel's input agent layer. - */ - static void connect(LayerPtr agentLayer, LayerPtr realLayer, int height = 0); - void connect(std::string agentLayerName, - NeuralNetwork* srcNN, - std::string realLayerName); - - virtual void prefetch(const std::vector& inArgs); - - virtual void forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType); - - virtual void backward(const UpdateCallback& callback = nullptr); - - virtual Argument getLayerOutput(const std::string& layerName); - - const LayerPtr& getLayer(const std::string& layerName) const { - auto it = layerMap_.find(layerName); - CHECK(it != layerMap_.end()) << "Unknown layer " << layerName; - return it->second; - } - - virtual void onPassEnd(); - -#ifndef PADDLE_MOBILE_INFERENCE - virtual Evaluator* makeEvaluator() const; - - virtual void eval(Evaluator* evaluator) const; -#endif - - virtual void resetState(); - virtual void setOutputGrad(const std::vector& args); - - /// set machine state - virtual void setState(const MachineState& machineState); - - /// get machine state - virtual void getState(MachineState& machineState); - - static NeuralNetwork* create(const ModelConfig& config); - - ParameterMap* getParameterMap() { return ¶meterMap_; } - - /** - * @brief Access each layer as a for each loop. - * @param callback invoke with each layer. - */ - template - void forEachLayer(T callback) { - for (auto& l : layers_) { - if (callback(l)) { - break; - } - } - } - - static NeuralNetwork* newNeuralNetwork(const std::string& name = "", - NeuralNetwork* rootNetwork = nullptr); - - const std::string& getName() const { return subModelName_; } - - /// some finish work, like convert the weight format of MKLDNNLayers - void finish(); - - /** - * @brief Release the middle layer's output memory. - * - * @note This function is used for memory optimization in inference. - */ - void releaseOutput(); - - protected: - /** - * The constructor of NeuralNetwork. - * The sub networks can get parameters_ and parameterMap_ - * from base NeuralNetwork. - * - * @param subModelName The name of sub-model. - * @param rootNetwork It used in MultiNetwork. - */ - NeuralNetwork(std::string subModelName = "", - NeuralNetwork* rootNetwork = nullptr) - : subModelName_(subModelName), rootNetwork_(rootNetwork) {} - - std::string subModelName_; - ModelConfig config_; - std::vector layers_; - ParameterMap parameterMap_; - LayerMap layerMap_; - - std::vector dataLayers_; - std::vector outputLayers_; - std::vector middleLayers_; - - static std::map dllInitMap; - - NeuralNetwork* rootNetwork_; - - /// Whether parameter of this NN is initialized by its own - /// (i.e., not by callback supplied with the caller) - bool paramSelfInited_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp b/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp deleted file mode 100644 index 33d24b5b832fe9011591606860e0f50361367790..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" - -#include "ParallelNeuralNetwork.h" - -#include -#include - -namespace paddle { - -void ParallelNeuralNetwork::init( - const ModelConfig& config, - ParamInitCallback callback, - const std::vector& parameterTypes, - bool useGpu) { - NeuralNetwork::init(config, callback, parameterTypes, useGpu); - - if (config.type() == "recurrent_nn") { - LOG(FATAL) - << "You can not add `--parallel_nn=true` on the command line, " - << "parallel_nn training mode does not support the recurrent_nn model."; - } - - useGpu_ = useGpu; - numDevices_ = 0; - if (useGpu_) { - numDevices_ = hl_get_device_count(); - } - - for (auto& layer : layers_) { - int deviceId = layer->getDeviceId(); - CHECK_LT(deviceId, numDevices_); - addComputeThread(deviceId); - } -} - -void ParallelNeuralNetwork::addComputeThread(int deviceId) { - for (auto& thread : threads_) { - if (thread->getDeviceId() == deviceId) { - return; - } - } - - threads_.emplace_back(new ParallelThread( - threads_.size(), deviceId, deviceId >= 0 ? useGpu_ : false)); -} - -void ParallelNeuralNetwork::waitAllThread() { - for (auto& thread : threads_) { - thread->jobEnqueue(NULL, TASK_END_LAYER); - } - - for (size_t i = 0; i < threads_.size(); i++) { - threads_[i]->queue_.waitEmpty(); - } -} - -void ParallelNeuralNetwork::dispatchByDeviceId(int deviceId, - LayerPtr layer, - TaskType task) { - for (auto& thread : threads_) { - if (thread->getDeviceId() == deviceId) { - thread->jobEnqueue(layer, task); - return; - } - } - LOG(FATAL) << "No specific device thread "; -} - -void ParallelNeuralNetwork::forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType) { - for (auto& thread : threads_) { - thread->setForwardPassType(passType); - } - CHECK_EQ(inArgs.size(), dataLayers_.size()); - outArgs->resize(outputLayers_.size()); - for (size_t i = 0; i != dataLayers_.size(); ++i) { - const_cast(inArgs[i]).deviceId = -1; - dataLayers_[i]->setData(inArgs[i]); - } - - for (auto& layer : layers_) { - dispatchByDeviceId(layer->getDeviceId(), layer, TASK_FORWARD); - } - - { - REGISTER_TIMER("forwardTime"); - waitAllThread(); - } - outArgs->clear(); - outArgs->reserve(outputLayers_.size()); - for (auto& layer : outputLayers_) { - outArgs->push_back(layer->getOutput()); - } -} - -void ParallelNeuralNetwork::backward(const UpdateCallback& callback) { - for (auto& thread : threads_) { - thread->setBackwardCallback(callback); - } - - FOR_EACH_R(layer, layers_) { - dispatchByDeviceId((*layer)->getDeviceId(), *layer, TASK_BACKWARD); - } - { - REGISTER_TIMER("backwardTime"); - waitAllThread(); - } -} - -void ParallelNeuralNetwork::forwardBackward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - const UpdateCallback& callback) { - forward(inArgs, outArgs, passType); - backward(callback); -} - -void ParallelNeuralNetwork::start() { - for (auto& thread : threads_) { - thread->start(); - } -} - -ParallelThread::ParallelThread(int threadId, int deviceId, bool useGpu) - : threadId_(threadId), deviceId_(deviceId), useGpu_(useGpu) {} - -ParallelThread::~ParallelThread() { stop(); } - -void ParallelThread::stop() { - if (computeThread_) { - jobEnqueue(NULL, TASK_THREAD_FINISH); - computeThread_->join(); - computeThread_.reset(nullptr); - } -} - -void ParallelThread::computeThread() { - LOG(INFO) << "gradComputeThread " << threadId_; - - if (useGpu_) { - hl_init(deviceId_); - } - - while (true) { - struct Job job_work = queue_.dequeue(); - - if (job_work.task_ == TASK_END_LAYER) { - continue; - } else if (job_work.task_ == TASK_THREAD_FINISH) { - break; - } - - if (TASK_FORWARD == job_work.task_) { - { - REGISTER_TIMER_INFO("waitInputValue", - job_work.layer_->getName().c_str()); - job_work.layer_->waitInputValue(); - } - { - REGISTER_TIMER_INFO("threadForwardTimer", - job_work.layer_->getName().c_str()); - job_work.layer_->forward(passType_); - } - { - REGISTER_TIMER_INFO("copyOutputToOtherDevice", - job_work.layer_->getName().c_str()); - job_work.layer_->copyOutputToOtherDevice(); - } - } else { - { - REGISTER_TIMER_INFO("waitAndMergeOutputGrad", - job_work.layer_->getName().c_str()); - job_work.layer_->waitAndMergeOutputGrad(); - } - { - REGISTER_TIMER_INFO("threadBackwardTimer", - job_work.layer_->getName().c_str()); - job_work.layer_->backward(backwardCallback_); - } - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - job_work.layer_->markAllInputGrad(); - } - } - hl_fini(); -} - -void ParallelThread::start() { - computeThread_.reset(new std::thread([this]() { computeThread(); })); -} - -void ParallelThread::jobEnqueue(LayerPtr layer, TaskType task) { - struct Job job_work; - job_work.layer_ = layer; - job_work.task_ = task; - queue_.enqueue(job_work); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.h b/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.h deleted file mode 100644 index c091459506ad477bed3f429a22071eccedd664bb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.h +++ /dev/null @@ -1,113 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "NeuralNetwork.h" - -namespace paddle { - -class ParallelThread; - -enum TaskType { - TASK_FORWARD = 0, - TASK_BACKWARD = 1, - TASK_END_LAYER = 2, - TASK_THREAD_FINISH = 3, -}; - -/** - * A ParallelNeuralNetwork is capable of calculating a neural network through - * multiple threads in parallel. - */ -class ParallelNeuralNetwork : public NeuralNetwork { - public: - ParallelNeuralNetwork(std::string subModelName = "", - NeuralNetwork *rootNetwork = nullptr) - : NeuralNetwork(subModelName, rootNetwork) {} - - virtual void init(const ModelConfig &config, - ParamInitCallback callback = nullptr, - const std::vector ¶meterTypes = - std::vector{PARAMETER_VALUE, - PARAMETER_GRADIENT, - PARAMETER_MOMENTUM}, - bool useGpu = FLAGS_use_gpu); - - virtual void forward(const std::vector &inArgs, - std::vector *outArgs, - PassType passType); - - virtual void backward(const UpdateCallback &callback = nullptr); - - void forwardBackward(const std::vector &inArgs, - std::vector *outArgs, - PassType passType, - const UpdateCallback &callback = NULL); - - virtual void start(); - - void addComputeThread(int deviceId); - - void dispatchByDeviceId(int deviceId, LayerPtr layer, TaskType task); - - void waitAllThread(); - - // virtual void eval(Evaluator* evaluator); - - protected: - bool useGpu_; - /// number of gpu devices - int numDevices_; - std::vector> threads_; -}; - -class ParallelThread { - public: - ParallelThread(int threadId, int deviceId, bool useGpu); - ~ParallelThread(); - void jobEnqueue(LayerPtr layer, TaskType task); - void start(); - void stop(); - int getDeviceId() const { return deviceId_; } - - void setBackwardCallback(const UpdateCallback &callback) { - backwardCallback_ = callback; - } - void setForwardPassType(PassType passType) { passType_ = passType; } - - protected: - void computeThread(); - - public: - struct Job { - LayerPtr layer_; - TaskType task_; - }; - typedef Queue JobQueue; - JobQueue queue_; - - protected: - /// from 0 to threads-1 - int threadId_; - /// the GPU device Id which the computeThread_ used - int deviceId_; - bool useGpu_; - std::unique_ptr computeThread_; - /// whether the thread should stop - bool stopping_; - UpdateCallback backwardCallback_; - PassType passType_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.cpp deleted file mode 100644 index e49f042404f80a21293545023efa3e68417c1edb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ /dev/null @@ -1,1501 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "RecurrentGradientMachine.h" -#include -#include -#include -#include -#include -#include "NeuralNetwork.h" -#include "paddle/legacy/gserver/layers/AgentLayer.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" - -DEFINE_string(diy_beam_search_prob_so, "", "the diy beam search cost so"); - -static const char* DIY_CALC_PROB_SYMBOL_NAME = "calc_prob"; -static const char* DIY_START_CALC_PROB_SYMBOL_NAME = "start_calc_prob"; -static const char* DIY_FINISH_CALC_PROB_SYMBOL_NAME = "finish_calc_prob"; - -namespace paddle { - -/** - * Start Custom Calculate Probability callback type. - * - * @param nNode, nodes: the path will be explored. nNodes is array size. - * nodes is array elements. - * - * @return: A custom handler id that will passed to another callback. - */ -typedef int (*DiyStartCalcProbCallback)(size_t nNodes, int* nodes); - -/** - * Doing Custom Calculation of Probability callback type. - * - * @param handler: User custom handler. The return value from start calc prob. - * @param nNode, nodes: Array. The current path. - * @param curProb: The current log probability that neural network returns. - * - * @return: Log probability which user calculated, it will be updated to this - * path. - * @NOTE: Return -INFINITY will DROP this path IMMEDIATELY!! - */ -typedef real (*DiyCalcProbCallback)( - int handler, size_t nNodes, int* nodes, real curProb, bool atEos); - -/** - * Finish Custom Calculation of Probability callback type. - * - * @param handler: User custom handler. The return value from start calc prob. - */ -typedef void (*DiyStopCalcProbCallback)(int handler); - -static DiyCalcProbCallback gDiyProbMethod = nullptr; -static DiyStartCalcProbCallback gDiyProbStart = nullptr; -static DiyStopCalcProbCallback gDiyProbStop = nullptr; -static void* gDiyProbHandle = nullptr; - -static void exit_diy_prob() { dlclose(gDiyProbHandle); } - -template -static inline SymbolType loadDiySymbol(const char* symbolName) { - void* sym = dlsym(gDiyProbHandle, symbolName); - CHECK(sym) << "Cannot load symbol " << symbolName << " from " - << FLAGS_diy_beam_search_prob_so; - return reinterpret_cast(sym); -} - -static InitFunction __init__diy_prob_method( - [] { - std::string soName = FLAGS_diy_beam_search_prob_so; - if (!soName.empty()) { - gDiyProbHandle = dlopen(soName.c_str(), RTLD_LAZY); - CHECK(gDiyProbHandle) << "Cannot Open DIY Prob So " << soName; - atexit(exit_diy_prob); - gDiyProbMethod = - loadDiySymbol(DIY_CALC_PROB_SYMBOL_NAME); - gDiyProbStart = loadDiySymbol( - DIY_START_CALC_PROB_SYMBOL_NAME); - gDiyProbStop = loadDiySymbol( - DIY_FINISH_CALC_PROB_SYMBOL_NAME); - } - }, - std::numeric_limits::max()); - -class BeamSearchControlCallbacks { - public: - RecurrentGradientMachine::BeamSearchCandidatesAdjustCallback - beamSearchCandidateAdjust; - RecurrentGradientMachine::NormOrDropNodeCallback normOrDropNode; - RecurrentGradientMachine::DropCallback stopDetermineCandidates; - - //! for gcc46 aggregate initialization is not very well, so we need to - //! explicit - BeamSearchControlCallbacks( - const RecurrentGradientMachine::BeamSearchCandidatesAdjustCallback& - candidateAdjust, - const RecurrentGradientMachine::NormOrDropNodeCallback& norm, - const RecurrentGradientMachine::DropCallback& stop) - : beamSearchCandidateAdjust(candidateAdjust), - normOrDropNode(norm), - stopDetermineCandidates(stop) {} -}; - -class BeamSearchStatisticsCallbacks { - public: - RecurrentGradientMachine::EachStepCallback onEachStepStarted; - RecurrentGradientMachine::EachStepCallback onEachStepStoped; - - BeamSearchStatisticsCallbacks( - const RecurrentGradientMachine::EachStepCallback& start, - const RecurrentGradientMachine::EachStepCallback& stop) - : onEachStepStarted(start), onEachStepStoped(stop) {} -}; - -RecurrentGradientMachine::RecurrentGradientMachine( - const std::string& subModelName, NeuralNetwork* rootNetwork) - : NeuralNetwork(subModelName), - rootNetwork_(rootNetwork), - beamSearchCtrlCallbacks_(nullptr), - beamSearchStatistics_(nullptr) { - CHECK(!subModelName_.empty()); -} - -/** - * bias layer, as input of memory frame 0 will give vector of zeros - * if bias parameter is not set. - * - * boot bias layer create directly in recurrent gradient machine, because: - * - * 1. It is only one frame, so it should not be placed in layer group, - * which is one instance for every one frame. - * - * 2. It is no input layer, so it need resetHeight() before forward(), - * and resetHeight() must be called in recurrent gradient machine, - * so it's should not be placed in root network. - */ -class BootBiasLayer : public Layer { - protected: - std::unique_ptr biases_; - IVectorPtr cpuIds_; - - public: - explicit BootBiasLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override { - if (!Layer::init(layerMap, parameterMap)) return false; - - if (biasParameter_) { - biases_ = - std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - return true; - } - - void resetHeight(int height) { - if (config_.has_bos_id()) { // used as a constant id layerConfig - IVector::resizeOrCreate(output_.ids, height, useGpu_); - output_.ids->reset((int)config_.bos_id()); - } else { - resetOutput(height, getSize()); - } - } - - void forward(PassType passType) override { - if (biases_) { - MatrixPtr outV = getOutputValue(); - outV->addBias(*(biases_->getW()), 1); - forwardActivation(); - } - } - - void backward(const UpdateCallback& callback) override { - if (biases_ && biases_->getWGrad()) { - backwardActivation(); - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - biases_->getParameterPtr()->incUpdate(callback); - } - } -}; - -void RecurrentGradientMachine::init( - const ModelConfig& config, - ParamInitCallback callback, - const std::vector& parameterTypes, - bool useGpu) { - NeuralNetwork::init(config, callback, parameterTypes, useGpu); - useGpu_ = useGpu; - - auto subModelConfig = - std::find_if(config.sub_models().begin(), - config.sub_models().end(), - [this](const SubModelConfig& sub_model) { - return sub_model.name() == this->subModelName_; - }); - CHECK(subModelConfig != config.sub_models().end()); - reversed_ = subModelConfig->reversed(); - generating_ = subModelConfig->has_generator(); - - inFrameLines_.resize(subModelConfig->in_links_size()); - for (size_t i = 0; i < inFrameLines_.size(); ++i) { - inFrameLines_[i].linkName = subModelConfig->in_links(i).link_name(); - inFrameLines_[i].inLayer = - rootNetwork_->getLayer(subModelConfig->in_links(i).layer_name()); - } - - outFrameLines_.resize(subModelConfig->out_links_size()); - for (size_t i = 0; i < outFrameLines_.size(); ++i) { - auto& linkPair = subModelConfig->out_links(i); - outFrameLines_[i].layerName = linkPair.layer_name(); - outFrameLines_[i].agentLayer = rootNetwork_->getLayer(linkPair.link_name()); - } - - memoryFrameLines_.resize(subModelConfig->memories_size()); - for (size_t i = 0; i < memoryFrameLines_.size(); ++i) { - auto& memoryConfig = subModelConfig->memories(i); - memoryFrameLines_[i].layerName = memoryConfig.layer_name(); - memoryFrameLines_[i].linkName = memoryConfig.link_name(); - auto agentConfig = - std::find_if(config.layers().begin(), - config.layers().end(), - [&memoryConfig](const LayerConfig& layerConfig) { - return layerConfig.name() == memoryConfig.link_name(); - }); - CHECK(agentConfig != config.layers().end()); - if (memoryConfig.has_boot_layer_name()) { - memoryFrameLines_[i].rootLayer = - rootNetwork_->getLayer(memoryConfig.boot_layer_name()); - - LayerConfig scatterConfig = *agentConfig; - memoryFrameLines_[i].rootAgent.reset( - new ScatterAgentLayer(scatterConfig)); - memoryFrameLines_[i].rootAgent->init(LayerMap(), parameterMap_); - - memoryFrameLines_[i].bootLayer = memoryFrameLines_[i].rootAgent; - } else { - LayerConfig biasConfig = *agentConfig; - if (memoryConfig.has_boot_bias_parameter_name()) { - biasConfig.set_bias_parameter_name( - memoryConfig.boot_bias_parameter_name()); - biasConfig.set_active_type(memoryConfig.boot_bias_active_type()); - } else if (memoryConfig.has_boot_with_const_id()) { - biasConfig.set_bos_id(memoryConfig.boot_with_const_id()); - } - memoryFrameLines_[i].biasLayer.reset(new BootBiasLayer(biasConfig)); - memoryFrameLines_[i].biasLayer->init(LayerMap(), parameterMap_); - - memoryFrameLines_[i].bootLayer = memoryFrameLines_[i].biasLayer; - } - - if (subModelConfig->has_generator()) { - memoryFrameLines_[i].scatterAgents.resize(2); - for (auto& agent : memoryFrameLines_[i].scatterAgents) { - agent.reset(new ScatterAgentLayer(*agentConfig)); - agent->init(LayerMap(), parameterMap_); - } - } - } - - if (subModelConfig->has_generator()) { - generator_.config = subModelConfig->generator(); - eosFrameLine_.reset(new EosFrameLine); - maxSequenceLength_ = generator_.config.max_num_frames(); - } - - // get parameters actually used by this Layer Group - resizeOrCreateFrames(1); - for (auto& para : frames_[0]->getParameters()) { - if (para->getSharedCount() > 0) { - parameterIds_.push_back(para->getID()); - } - } - for (auto& para : parameters_) { // bias layer parameters - if (para->getSharedCount() > 0) { - parameterIds_.push_back(para->getID()); - } - } -} - -void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) { - if ((size_t)numFrames <= frames_.size()) { - return; - } - - frames_.reserve(numFrames); - for (auto& inFrameLine : inFrameLines_) { - inFrameLine.agents.reserve(numFrames); - } - for (auto& outFrameLine : outFrameLines_) { - outFrameLine.frames.reserve(numFrames); - } - for (auto& memoryFrameLine : memoryFrameLines_) { - memoryFrameLine.frames.reserve(numFrames); - memoryFrameLine.agents.reserve(numFrames); - } - if (eosFrameLine_) { - eosFrameLine_->layers.reserve(numFrames); - } - - ParamInitCallback subParamInitCb = [this](int paramId, Parameter* para) { - para->enableSharedType(PARAMETER_VALUE, - this->parameters_[paramId]->getBuf(PARAMETER_VALUE), - this->parameters_[paramId]->getMat(PARAMETER_VALUE)); - para->enableSharedType( - PARAMETER_GRADIENT, - this->parameters_[paramId]->getBuf(PARAMETER_GRADIENT), - this->parameters_[paramId]->getMat(PARAMETER_GRADIENT)); - }; - - for (int i = frames_.size(); i < numFrames; ++i) { - std::unique_ptr frame( - NeuralNetwork::newNeuralNetwork(subModelName_)); - frame->init(config_, subParamInitCb); - - for (auto& inFrameLine : inFrameLines_) { - inFrameLine.agents.push_back(frame->getLayer(inFrameLine.linkName)); - } - - for (auto& outFrameLine : outFrameLines_) { - outFrameLine.frames.push_back(frame->getLayer(outFrameLine.layerName)); - } - for (auto& memoryFrameLine : memoryFrameLines_) { - memoryFrameLine.frames.push_back( - frame->getLayer(memoryFrameLine.layerName)); - memoryFrameLine.agents.push_back( - frame->getLayer(memoryFrameLine.linkName)); - } - if (eosFrameLine_) { - eosFrameLine_->layers.push_back( - frame->getLayer(generator_.config.eos_layer_name())); - } - - frames_.emplace_back(std::move(frame)); - } -} - -void RecurrentGradientMachine::resizeBootFrame(int numSequences) { - for (auto& memoryFrameLine : memoryFrameLines_) { - if (memoryFrameLine.biasLayer) { - auto biasLayer = - dynamic_cast(memoryFrameLine.biasLayer.get()); - CHECK_NOTNULL(biasLayer); - biasLayer->resetHeight(numSequences); - } else { // check input root layer height - CHECK_EQ(numSequences, - memoryFrameLine.rootLayer->getOutput().getNumSequences()); - } - } -} - -void RecurrentGradientMachine::prefetch(const std::vector& inArgs) { - LOG(FATAL) << "should not use this function"; -} - -void RecurrentGradientMachine::checkInputConsistency( - int inlinkId, const std::vector& seqInfo) { - if (commonSeqInfo_.empty()) { - commonSeqInfo_.resize(seqInfo.size()); - for (size_t i = 0; i < seqInfo.size(); ++i) { - commonSeqInfo_[i].topLevelLength = seqInfo[i].topLevelLength; - commonSeqInfo_[i].seqId = seqInfo[i].seqId; - } - } else { - CHECK_EQ(commonSeqInfo_.size(), seqInfo.size()) - << " RecurrentGroup " << subModelName_ << " input " << inlinkId - << " has mismatched number of sequences"; - for (size_t i = 0; i < seqInfo.size(); ++i) { - CHECK_EQ(commonSeqInfo_[i].topLevelLength, seqInfo[i].topLevelLength) - << " RecurrentGroup " << subModelName_ << " input " << inlinkId - << " has mismatched sequence length"; - CHECK_EQ(commonSeqInfo_[i].seqId, seqInfo[i].seqId) - << " RecurrentGroup " << subModelName_ << " input " << inlinkId - << " has mismatched sequence length"; - } - } -} - -void RecurrentGradientMachine::calcNumSequencesAtEachStep() { - int numSequences = commonSeqInfo_.size(); - numSeqs_.resize(maxSequenceLength_); - for (int i = 0; i < numSequences; ++i) { - for (int j = 0; j < commonSeqInfo_[i].topLevelLength; ++j) { - numSeqs_[j] = i + 1; - } - } -} - -void RecurrentGradientMachine::reorganizeInput(PassType passType) { - info_.clear(); - info_.resize(inFrameLines_.size()); - - commonSeqInfo_.clear(); - seqInfos_.clear(); - seqInfos_.resize(inFrameLines_.size()); - - for (size_t i = 0; i < inFrameLines_.size(); i++) { - const Argument& input = inFrameLines_[i].inLayer->getOutput(); - if (!input.hasSeq()) { - continue; - } - input.getSeqInfo(&seqInfos_[i]); - checkInputConsistency(i, seqInfos_[i]); - } - CHECK(!commonSeqInfo_.empty()) - << "At least one input needs to be sequence or subsequence"; - maxSequenceLength_ = commonSeqInfo_[0].topLevelLength; - - calcNumSequencesAtEachStep(); - - for (size_t i = 0; i < inFrameLines_.size(); ++i) { - const Argument& input = inFrameLines_[i].inLayer->getOutput(); - if (!input.hasSeq()) { - seqInfos_[i] = commonSeqInfo_; - } - createInFrameInfo(i, input, passType); - } - - { - AsyncGpuBlock asyncGpuBlock; - - // inFrameLine select rows in real layer one time - for (size_t i = 0; i < inFrameLines_.size(); i++) { - selectRowsOneTime(inFrameLines_[i].inLayer, - info_[i].allIds, - &(inFrameLines_[i].outArg), - passType); - } - } -} - -void RecurrentGradientMachine::reorganizeOutput(PassType passType) { - calcSequenceStartPositions(); - for (size_t i = 0; i < outFrameLines_.size(); ++i) { - Info info; - auto& outFrameLine = outFrameLines_[i]; - ICpuGpuVectorPtr sequenceStartPositions; - ICpuGpuVectorPtr subSequenceStartPositions; - createOutFrameInfo( - outFrameLine, info, sequenceStartPositions, subSequenceStartPositions); - auto gatherAgent = - dynamic_cast(outFrameLine.agentLayer.get()); - CHECK_NOTNULL(gatherAgent); - gatherAgent->copyIdAndSequenceInfo(sequenceStartPositions, - subSequenceStartPositions, - info.allIds, - info.idIndex); - } -} - -void RecurrentGradientMachine::connectFrames(PassType passType) { - for (auto& memoryFrameLine : memoryFrameLines_) { - if (memoryFrameLine.rootAgent) { - auto scatterAgent = - dynamic_cast(memoryFrameLine.rootAgent.get()); - createMemoryFrameInfo(&memoryFrameLine, passType); - scatterAgent->setRealLayerAndOutput(memoryFrameLine.rootLayer, - memoryFrameLine.outArg, - memoryFrameLine.allIds, - /* idIndex */ 0, - memoryFrameLine.allIds->getSize(), - /* handleBackward */ true); - if (memoryFrameLine.sequenceStartPositions) { - int size = memoryFrameLine.sequenceStartPositions->getSize(); - scatterAgent->setSequenceStartPositions( - memoryFrameLine.sequenceStartPositions, - /* seqStartPosIndex */ 0, - size); - } - } - } - - for (auto& outFrameLine : outFrameLines_) { - auto gatherAgent = - dynamic_cast(outFrameLine.agentLayer.get()); - gatherAgent->clearRealLayers(); - } - for (int i = 0; i < maxSequenceLength_; ++i) { - // connect in_links - for (size_t j = 0; j < inFrameLines_.size(); ++j) { - Info& info = info_[j]; - // idSize denotes the sum number of tokens in each length i - int idIndex = info.idIndex.empty() ? 0 : info.idIndex[i]; - int idSize = info.idIndex.empty() ? numSeqs_[i] - : info.idIndex[i + 1] - info.idIndex[i]; - InFrameLine inFrameLine = inFrameLines_[j]; - auto scatterAgent = - dynamic_cast(inFrameLine.agents[i].get()); - scatterAgent->setRealLayerAndOutput(inFrameLine.inLayer, - inFrameLine.outArg, - info.allIds, - idIndex, - idSize, - i == 0); - if (info.sequenceStartPositions) { - // size: the length of subsequence - int size = info.seqStartPosIndex[i + 1] - info.seqStartPosIndex[i]; - scatterAgent->setSequenceStartPositions( - info.sequenceStartPositions, info.seqStartPosIndex[i], size); - } - } - - // connect out_links - for (auto& outFrameLine : outFrameLines_) { - auto gatherAgent = - dynamic_cast(outFrameLine.agentLayer.get()); - gatherAgent->addRealLayer(outFrameLine.frames[i]); - } - for (auto& memoryFrameLine : memoryFrameLines_) { - NeuralNetwork::connect( - memoryFrameLine.agents[i], - i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1], - numSeqs_[i] /*height of agent*/); - } - } -} - -void RecurrentGradientMachine::forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType) { - /* inArgs and outArgs are not used. - The inputs are inFrameLines_[i].inLayer. - The outputs are outFramesLines_[i].agentLayer - */ - - if (generating_) { - generateSequence(); - return; - } // else forward.. - - reorganizeInput(passType); - int numSequences = commonSeqInfo_.size(); - - resizeOrCreateFrames(maxSequenceLength_); - resizeBootFrame(numSequences); - - connectFrames(passType); - - REGISTER_TIMER_INFO("RecurrentFwTime", "RecurrentFwTime"); - // forward - for (auto& memoryFrameLine : memoryFrameLines_) { - memoryFrameLine.bootLayer->forward(passType); - } - for (int i = 0; i < maxSequenceLength_; ++i) { - const std::vector inArgs; - std::vector outArgs; - frames_[i]->forward(inArgs, &outArgs, passType); - } - - reorganizeOutput(passType); -} - -void RecurrentGradientMachine::backward(const UpdateCallback& callback) { - if (generating_) { - return; - } - REGISTER_TIMER_INFO("RecurrentBwTime", "RecurrentBwTime"); - AsyncGpuBlock asyncGpuBlock; - for (int i = maxSequenceLength_ - 1; i >= 0; --i) { - frames_[i]->backward(nullptr); - } - for (auto& memoryFrameLine : memoryFrameLines_) { - memoryFrameLine.bootLayer->backward(nullptr); - } -} - -void RecurrentGradientMachine::forwardBackward( - const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - const UpdateCallback& callback) { - LOG(FATAL) << "should not use this function"; -} - -void RecurrentGradientMachine::eval(Evaluator* evaluator) const { - // call printers frame by frame - for (int i = 0; i < maxSequenceLength_; ++i) { - VLOG(2) << "Recurrent Layer Group eval frame " << i << " begin"; - evaluator->eval(*(frames_[i].get())); - VLOG(2) << "Recurrent Layer Group eval frame " << i << " end"; - } -} - -void RecurrentGradientMachine::registerBeamSearchControlCallbacks( - const BeamSearchCandidatesAdjustCallback& adjustBeamSearch, - const NormOrDropNodeCallback& normOrDropNode, - const DropCallback& stopBeamSearch) { - this->removeBeamSearchControlCallbacks(); - //! for gcc 46, aggregate initialization is not supported. TAT - this->beamSearchCtrlCallbacks_ = new BeamSearchControlCallbacks( - adjustBeamSearch, normOrDropNode, stopBeamSearch); -} - -void RecurrentGradientMachine::removeBeamSearchControlCallbacks() { - if (this->beamSearchCtrlCallbacks_) { - delete this->beamSearchCtrlCallbacks_; - this->beamSearchCtrlCallbacks_ = nullptr; - } -} - -void RecurrentGradientMachine::registerBeamSearchStatisticsCallbacks( - const EachStepCallback& onEachStepStarted, - const EachStepCallback& onEachStepStoped) { - this->removeBeamSearchStatisticsCallbacks(); - this->beamSearchStatistics_ = - new BeamSearchStatisticsCallbacks(onEachStepStarted, onEachStepStoped); -} - -void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() { - if (this->beamSearchStatistics_) { - delete this->beamSearchStatistics_; - this->beamSearchStatistics_ = nullptr; - } -} - -namespace { -void lenToStarts(std::vector& starts) { - int pos = 0; - starts.back() = 0; - for (auto& start : starts) { - int tmp = start; - start = pos; - pos += tmp; - } - starts.back() = pos; -} -} // namespace - -void RecurrentGradientMachine::calcSequenceStartPositions() { - std::vector starts(commonSeqInfo_.size() + 1); - for (auto& seqInfo : commonSeqInfo_) { - starts[seqInfo.seqId] = seqInfo.topLevelLength; - } - lenToStarts(starts); - ICpuGpuVector::resizeOrCreate(sequenceStartPositions_, starts.size(), false); - std::copy(starts.begin(), - starts.end(), - sequenceStartPositions_->getMutableData(false)); -} - -void RecurrentGradientMachine::checkOutputConsistency( - OutFrameLine& outFrameLine) { - bool hasSeq = outFrameLine.frames[0]->getOutput().hasSeq(); - for (int i = 0; i < maxSequenceLength_; ++i) { - LayerPtr frame = outFrameLine.frames[i]; - CHECK_EQ(hasSeq, frame->getOutput().hasSeq()); - int numSequences = frame->getOutput().getNumSequences(); - CHECK_EQ(numSeqs_[i], numSequences); - } -} - -void RecurrentGradientMachine::createOutFrameInfo( - OutFrameLine& outFrameLine, - Info& info, - ICpuGpuVectorPtr& sequenceStartPositions, - ICpuGpuVectorPtr& subSequenceStartPositions) { - checkOutputConsistency(outFrameLine); - - if (!outFrameLine.frames[0]->getOutput().hasSeq()) { - createOutFrameInfo_seq( - outFrameLine, info, sequenceStartPositions, subSequenceStartPositions); - } else { - createOutFrameInfo_subseq( - outFrameLine, info, sequenceStartPositions, subSequenceStartPositions); - } -} - -void RecurrentGradientMachine::createOutFrameInfo_seq( - OutFrameLine& outFrameLine, - Info& info, - ICpuGpuVectorPtr& sequenceStartPositions, - ICpuGpuVectorPtr& subSequenceStartPositions) { - std::vector allIds; - info.idIndex.resize(1, 0); // first idIndex = 0 - - const int* starts = sequenceStartPositions_->getData(false); - - for (int i = 0; i < maxSequenceLength_; ++i) { - LayerPtr frame = outFrameLine.frames[i]; - size_t numSequences = frame->getOutput().getNumSequences(); - for (size_t j = 0; j < numSequences; ++j) { - int seqStart = starts[commonSeqInfo_[j].seqId]; - int seqLength = commonSeqInfo_[j].topLevelLength; - allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i) - : (seqStart + i)); - } - info.idIndex.push_back(allIds.size()); - } - sequenceStartPositions = sequenceStartPositions_; - copyScattedId(allIds, &info.allIds, allIds.size()); - CHECK_EQ(info.idIndex.size(), static_cast(maxSequenceLength_ + 1)); -} - -void RecurrentGradientMachine::createOutFrameInfo_subseq( - OutFrameLine& outFrameLine, - Info& info, - ICpuGpuVectorPtr& sequenceStartPositions, - ICpuGpuVectorPtr& subSequenceStartPositions) { - size_t numSequences = commonSeqInfo_.size(); - std::vector allIds; - info.idIndex.resize(1, 0); // first idIndex = 0 - - const int* starts = sequenceStartPositions_->getData(false); - std::vector subStarts(starts[numSequences] + 1); - for (int i = 0; i < maxSequenceLength_; ++i) { - LayerPtr frame = outFrameLine.frames[i]; - size_t numSequences = frame->getOutput().getNumSequences(); - const int* seqStarts = - frame->getOutput().sequenceStartPositions->getData(false); - for (size_t j = 0; j < numSequences; ++j) { - subStarts[starts[commonSeqInfo_[j].seqId] + i] = - seqStarts[j + 1] - seqStarts[j]; - } - } - lenToStarts(subStarts); - - for (int i = 0; i < maxSequenceLength_; ++i) { - LayerPtr frame = outFrameLine.frames[i]; - size_t numSequences = frame->getOutput().getNumSequences(); - for (size_t j = 0; j < numSequences; ++j) { - int pos = starts[commonSeqInfo_[j].seqId] + i; - int subSeqStart = subStarts[pos]; - int subSeqEnd = subStarts[pos + 1]; - for (int k = subSeqStart; k < subSeqEnd; ++k) { - allIds.push_back(k); - } - } - info.idIndex.push_back(allIds.size()); - } - - ICpuGpuVector::resizeOrCreate( - subSequenceStartPositions, subStarts.size(), false); - int* cpuSubSequenceStartPositions = - subSequenceStartPositions->getMutableData(false); - std::copy(subStarts.begin(), subStarts.end(), cpuSubSequenceStartPositions); - ICpuGpuVector::resizeOrCreate( - sequenceStartPositions, numSequences + 1, false); - int* cpuSequenceStartPositions = - sequenceStartPositions->getMutableData(false); - for (size_t i = 0; i <= numSequences; ++i) { - cpuSequenceStartPositions[i] = subStarts[starts[i]]; - } - copyScattedId(allIds, &info.allIds, allIds.size()); - CHECK_EQ(info.idIndex.size(), static_cast(maxSequenceLength_ + 1)); -} - -/* create scattered id infomation for all realLayer of inFrameLines one time. - * If hasSubseq, will also create scattered sequenceStartPositions infomation - * for all realLayer of inFrameLines one time. - */ -void RecurrentGradientMachine::createInFrameInfo(int inlinkId, - const Argument& input, - PassType passType) { - if (!input.hasSeq()) { - createInFrameInfo_nonseq(inlinkId, input, passType); - } else if (!input.hasSubseq()) { - createInFrameInfo_seq(inlinkId, input, passType); - } else { - createInFrameInfo_subseq(inlinkId, input, passType); - } -} - -void RecurrentGradientMachine::createInFrameInfo_nonseq(int inlinkId, - const Argument& input, - PassType passType) { - std::vector allIds; - - auto& seqInfo = seqInfos_[inlinkId]; - Info* inlinkInfo = &info_[inlinkId]; - inlinkInfo->idIndex.clear(); - for (size_t i = 0; i < seqInfo.size(); ++i) { - allIds.push_back(seqInfo[i].seqId); - } - // copy and check scatterId - copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize()); -} - -void RecurrentGradientMachine::createInFrameInfo_seq(int inlinkId, - const Argument& input, - PassType passType) { - std::vector allIds; - auto& seqInfo = seqInfos_[inlinkId]; - Info* inlinkInfo = &info_[inlinkId]; - inlinkInfo->idIndex.resize(1, 0); // first idIndex = 0 - - for (int i = 0; i < maxSequenceLength_; ++i) { - for (int j = 0; j < numSeqs_[i]; ++j) { - int seqLength = seqInfo[j].topLevelLength; - int seqStart = seqInfo[j].seqStart; - allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i) - : (seqStart + i)); - } - inlinkInfo->idIndex.push_back(allIds.size()); - } - - // copy and check scatterId - copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize()); - CHECK_EQ(inlinkInfo->idIndex.size(), - static_cast(maxSequenceLength_ + 1)); -} -void RecurrentGradientMachine::createInFrameInfo_subseq(int inlinkId, - const Argument& input, - PassType passType) { - std::vector allIds; - - auto& seqInfo = seqInfos_[inlinkId]; - - Info* inlinkInfo = &info_[inlinkId]; - inlinkInfo->idIndex.resize(1, 0); // first idIndex = 0 - std::vector sequenceStartPositions; - const int* subSequenceStartPositions = nullptr; - - subSequenceStartPositions = input.subSequenceStartPositions->getData(false); - inlinkInfo->seqStartPosIndex.clear(); - inlinkInfo->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0 - for (int i = 0; i < maxSequenceLength_; ++i) { - sequenceStartPositions.push_back(0); // first element = 0 - for (int j = 0; j < numSeqs_[i]; ++j) { - int subSeqStart = subSequenceStartPositions[seqInfo[j].subSeqStart + i]; - int subSeqEnd = subSequenceStartPositions[seqInfo[j].subSeqStart + i + 1]; - for (int k = subSeqStart; k < subSeqEnd; ++k) { - allIds.push_back(k); - } - sequenceStartPositions.push_back(sequenceStartPositions.back() + - subSeqEnd - subSeqStart); - } - inlinkInfo->idIndex.push_back(allIds.size()); - inlinkInfo->seqStartPosIndex.push_back(sequenceStartPositions.size()); - } - // inFrameLine create sequenceStartPositions one time - CHECK_EQ( - sequenceStartPositions.size(), - static_cast(maxSequenceLength_ + input.getNumSubSequences())); - CHECK_EQ(inlinkInfo->seqStartPosIndex.size(), - static_cast(maxSequenceLength_ + 1)); - createSeqPos(sequenceStartPositions, &inlinkInfo->sequenceStartPositions); - - // copy and check scatterId - copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize()); - CHECK_EQ(inlinkInfo->idIndex.size(), - static_cast(maxSequenceLength_ + 1)); -} - -/* like createInFrameInfo, but for all realLayer of memoryFrameLines*/ -void RecurrentGradientMachine::createMemoryFrameInfo( - MemoryFrameLine* memoryFrameLine, PassType passType) { - const Argument& input = (*memoryFrameLine).rootLayer->getOutput(); - size_t numSequences = input.getNumSequences(); - std::vector allIds; - bool seqFlag = input.hasSeq(); - CHECK(!input.hasSubseq()) - << "Subsequence boot layer for memory is not supported"; - - if (seqFlag) { // for sequenceScatterAgentLayer - std::vector sequenceStartPositions; - sequenceStartPositions.push_back(0); // first element = 0 - const int* starts = input.sequenceStartPositions->getData(false); - for (size_t i = 0; i < numSequences; ++i) { - // memory info adopt info of inlinks[0] - int seqId = seqInfos_[0][i].seqId; - for (int k = starts[seqId]; k < starts[seqId + 1]; ++k) { - allIds.push_back(k); - } - sequenceStartPositions.push_back(sequenceStartPositions.back() + - starts[seqId + 1] - starts[seqId]); - } - createSeqPos(sequenceStartPositions, - &(*memoryFrameLine).sequenceStartPositions); - - } else { // for scatterAgentLayer - for (size_t i = 0; i < numSequences; ++i) { - allIds.push_back(seqInfos_[0][i].seqId); - } - } - // copy and check scatterId - copyScattedId(allIds, &(*memoryFrameLine).allIds, input.getBatchSize()); - // memoryFrameLine select rows in real layer one time - selectRowsOneTime((*memoryFrameLine).rootLayer, - (*memoryFrameLine).allIds, - &(*memoryFrameLine).outArg, - passType); -} - -void RecurrentGradientMachine::copyScattedId(std::vector& srcIds, - IVectorPtr* dstIds, - int size) { - int idSize = srcIds.size(); - CHECK_EQ(idSize, size); - IVector::resizeOrCreate(*dstIds, idSize, useGpu_); - (*dstIds)->copyFrom(srcIds.data(), idSize); - // check - std::sort(srcIds.begin(), srcIds.end()); - for (int i = 0; i < idSize; ++i) { - CHECK_EQ(srcIds[i], i); - } -} - -void RecurrentGradientMachine::selectRowsOneTime(LayerPtr layer, - const IVectorPtr& allIds, - Argument* arg, - PassType passType) { - Argument& src = layer->getOutput(); - if (src.value) { - const MatrixPtr& realV = src.value; - int height = realV->getHeight(); - int width = realV->getWidth(); - Matrix::resizeOrCreate( - arg->value, height, width, /* trans */ false, useGpu_); - arg->value->zeroMem(); - arg->value->selectRows(*realV, *allIds); - if (passType != PASS_TEST) { - Matrix::resizeOrCreate( - arg->grad, height, width, /* trans */ false, useGpu_); - arg->grad->zeroMem(); - } - } - if (src.ids) { - IVector::resizeOrCreate(arg->ids, src.ids->getSize(), useGpu_); - arg->ids->selectFrom(*src.ids, *allIds); - } -} - -void RecurrentGradientMachine::createSeqPos( - const std::vector& sequenceStartPosition, - ICpuGpuVectorPtr* sequenceStartPositions) { - int size = sequenceStartPosition.size(); - const int* data = sequenceStartPosition.data(); - ICpuGpuVector::resizeOrCreate(*sequenceStartPositions, size, false); - (*sequenceStartPositions)->copyFrom(data, size, false); -} - -size_t RecurrentGradientMachine::getGenBatchSize() { - size_t numSequences = 0; - for (auto& memoryFrameLine : memoryFrameLines_) { - if (!memoryFrameLine.rootLayer) continue; - Argument& bootArg = memoryFrameLine.rootLayer->getOutput(); - size_t batchSize = bootArg.getNumSequences(); - if (numSequences) { - CHECK_EQ(numSequences, batchSize); - } else { - numSequences = batchSize; - } - } - CHECK(numSequences) - << "Fail to get batch size in generation. " - "At least one of the Memory layer MUST have a layer that is NOT in " - "the layer group to boot it, and this boot layer is used to " - "decide batch_size in generation process."; - return numSequences; -} - -void RecurrentGradientMachine::generateSequence() { - CHECK_NOTNULL(eosFrameLine_.get()); - CHECK_GE(outFrameLines_.size(), 1UL); - size_t numSequences = getGenBatchSize(); - - resizeBootFrame(numSequences); - // We create only two sub-network in generation, one stores states of all - // layers in previous time step and the other storing the states at current - // time step. - resizeOrCreateFrames(2); - - // outFrameLines_.size() > 1UL - dataArgsSize_ = outFrameLines_.size() - 1; - dataArgs_.resize(dataArgsSize_); - dataArgsFrame_.clear(); - dataArgsFrame_.resize(dataArgsSize_); - - // connect boot frame memory links - std::vector ids(numSequences); - for (size_t i = 0; i < numSequences; ++i) { - ids[i] = i; - } - for (auto& memoryFrameLine : memoryFrameLines_) { - if (memoryFrameLine.rootAgent) { - auto scatterAgent = - dynamic_cast(memoryFrameLine.rootAgent.get()); - scatterAgent->setRealLayer(memoryFrameLine.rootLayer, ids); - } - NeuralNetwork::connect( - memoryFrameLine.agents[0], memoryFrameLine.bootLayer, ids.size()); - } - - // boot layer forward - AsyncGpuBlock asyncGpuBlock; - - for (auto& memoryFrameLine : memoryFrameLines_) { - memoryFrameLine.bootLayer->forward(PASS_TEST); - } - - // init outArg - size_t resultNum = generator_.config.num_results_per_sample(); - size_t maxGenWordCount = - generator_.config.max_num_frames() * numSequences * resultNum; - IVector::resizeOrCreate(generator_.outArg.ids, maxGenWordCount, false); - if (resultNum > 1) { - CHECK_LE(resultNum, static_cast(generator_.config.beam_size())); - Matrix::resizeOrCreate(generator_.outArg.in, - /* height */ numSequences, - /* width */ resultNum, - false, - /* useGpu */ false); - } - ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions, - numSequences + 1, - /* useGpu */ false); - if (getBeamSize() > 1) { - beamSearch(numSequences); - } else { - oneWaySearch(numSequences); - } - if (dataArgsSize_) createDataOutlink(); - - size_t size = generator_.ids.size(); - generator_.outArg.ids->resize(size); - generator_.outArg.ids->copyFrom(generator_.ids.data(), size); - - OutFrameLine& outFrameLine = outFrameLines_[0]; - auto dataAgent = dynamic_cast(outFrameLine.agentLayer.get()); - CHECK_NOTNULL(dataAgent); - dataAgent->setData(generator_.outArg); - dataAgent->prefetch(); -} - -void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { - OutFrameLine& outFrameLine = outFrameLines_[0]; - - // finalPaths_[0] stores the generated results of the - // entire batch, so its size exactly equals to batchSize. - finalPaths_.clear(); - finalPaths_.resize(1); - std::vector& finalPaths = finalPaths_[0]; - finalPaths.resize(batchSize); - - seqIds_.resize(batchSize); - std::vector scatterIds; - for (size_t i = 0; i < batchSize; ++i) { - finalPaths[i].seqId = i; - seqIds_[i] = i; - } - - // forward - for (int i = 0; i < maxSequenceLength_; ++i) { - if (i && scatterIds.empty()) break; - int machineCur = i % 2; - int machinePrev = (i - 1) % 2; - // connect memory links - if (i) { - seqIds_.clear(); - for (size_t j = 0; j < batchSize; ++j) { - if (finalPaths[j].seqId != -1) seqIds_.push_back(j); - } - - for (auto& memoryFrameLine : memoryFrameLines_) { - auto scatterAgent = dynamic_cast( - memoryFrameLine.scatterAgents[machineCur].get()); - scatterAgent->setRealLayer(memoryFrameLine.frames[machinePrev], - scatterIds); - scatterAgent->forward(PASS_TEST); - NeuralNetwork::connect(memoryFrameLine.agents[machineCur], - memoryFrameLine.scatterAgents[machineCur]); - } - } - const std::vector inArgs; - std::vector outArgs; - frames_[machineCur]->forward(inArgs, &outArgs, PASS_TEST); - - const IVectorPtr& idVec = outFrameLine.frames[machineCur]->getOutput().ids; - for (size_t j = 0; j < seqIds_.size(); ++j) { - finalPaths[seqIds_[j]].ids.push_back(idVec->getElement(j)); - finalPaths[seqIds_[j]].machineIdVec.push_back(j); - } - - copyDataOutlinkFrame(machineCur); - - // check eos - const IVectorPtr& eosVec = - eosFrameLine_->layers[machineCur]->getOutput().ids; - scatterIds.clear(); - for (size_t j = 0; j < seqIds_.size(); ++j) { - if (eosVec->getElement(j) == 1U) { - // path.seqId = -1 indicates end of generation - // of an input sequence - finalPaths[seqIds_[j]].seqId = -1; - } else { - scatterIds.push_back(j); - } - } - } - - batchMachineIdVec_.clear(); - batchMachineStartPos_.clear(); - int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); - starts[0] = 0; - generator_.ids.clear(); - for (size_t i = 0; i < batchSize; ++i) { - generator_.ids.insert(generator_.ids.end(), - finalPaths[i].ids.begin(), - finalPaths[i].ids.end()); - starts[i + 1] = generator_.ids.size(); - batchMachineIdVec_.insert(batchMachineIdVec_.end(), - finalPaths[i].machineIdVec.begin(), - finalPaths[i].machineIdVec.end()); - } -} - -void RecurrentGradientMachine::connectPrevFrame(int stepId, - std::vector& paths) { - int machineCur = stepId % 2; - int machinePrev = (stepId - 1) % 2; - int beam = getBeamSize(); - machineIds_.clear(); - topIds_.clear(); - seqIds_.clear(); - - for (size_t j = 0; j < paths.size(); ++j) { - machineIds_.push_back(paths[j].machineId); - topIds_.push_back(paths[j].machineId * beam + paths[j].topIndex); - seqIds_.push_back(paths[j].seqId); - } - - for (auto& memoryFrameLine : memoryFrameLines_) { - bool isOutIds = (memoryFrameLine.layerName == outFrameLines_[0].layerName); - auto scatterAgent = dynamic_cast( - memoryFrameLine.scatterAgents[machineCur].get()); - scatterAgent->setRealLayer(memoryFrameLine.frames[machinePrev], - isOutIds ? topIds_ : machineIds_); - scatterAgent->forward(PASS_TEST); - NeuralNetwork::connect(memoryFrameLine.agents[machineCur], - memoryFrameLine.scatterAgents[machineCur]); - } -} - -void RecurrentGradientMachine::forwardFrame(int machineCur) { - // forward - const std::vector inArgs; - std::vector outArgs; - frames_[machineCur]->forward(inArgs, &outArgs, PASS_TEST); - - copyDataOutlinkFrame(machineCur); - - IVectorPtr& ids = outFrameLines_[0].frames[machineCur]->getOutput().ids; - MatrixPtr in = outFrameLines_[0].frames[machineCur]->getOutput().in; - IVectorPtr& eos = eosFrameLine_->layers[machineCur]->getOutput().ids; - if (useGpu_) { - IVector::resizeOrCreate(cpuId_, ids->getSize(), false /* useGpu */); - cpuId_->copyFrom(*ids); - Matrix::resizeOrCreate(cpuProb_, - in->getHeight(), - in->getWidth(), - false /* trans */, - false /* useGpu */); - cpuProb_->copyFrom(*in); - IVector::resizeOrCreate(cpuEos_, eos->getSize(), false /* useGpu */); - cpuEos_->copyFrom(*eos); - } else { - cpuId_ = ids; - cpuProb_ = in; - cpuEos_ = eos; - } -} - -void RecurrentGradientMachine::singlePathExpand(Path& curPath, - size_t curPathId, - std::vector& newPaths, - size_t expandWidth) { - int calc_id = - gDiyProbStart ? gDiyProbStart(curPath.ids.size(), curPath.ids.data()) : 0; - - const int* idVec = cpuId_->getData(); - const real* probMat = cpuProb_->getData(); - const int* eosVec = cpuEos_->getData(); - - for (size_t k = 0; k < expandWidth; k++) { - int index = curPathId * expandWidth + k; - int id = idVec[index]; - real prob = probMat[index]; - /* - * Ordinarily, beam search greedily expands the most promising expandWidth - * paths that currently are ALWAYS returned by MaxIdLayer. - * In one condition, if user customizes the beam search procedure by - * restricting the expansion within a user defined subset, - * as a result, MaxIdLayer possibly COULD NOT return expandWidth - * vaild expansions, and it will use -1 to indicate the end of valid - * expansion candidates. - */ - if (id == -1) break; - - real newLogProb = generator_.config.log_prob() ? std::log(prob) : prob; - Path newPath( - curPath, id, newLogProb, curPathId /*machineId*/, k /*topIndex*/); - if (this->beamSearchCtrlCallbacks_) { - if (beamSearchCtrlCallbacks_->stopDetermineCandidates( - newPath.seqId, newPath.ids, newPath.probHistory)) - return; - } - // outFrameLines_.size() > 1UL - if (dataArgsSize_) { - newPath.machineIdVec = curPath.machineIdVec; - newPath.machineIdVec.push_back(curPathId); - } - bool atEos = - eosVec[index] == 1U || newPath.ids.size() >= (size_t)maxSequenceLength_; - // adjustNewPath - newPath.adjustProb(calc_id, atEos); - if (this->beamSearchCtrlCallbacks_) { - this->beamSearchCtrlCallbacks_->normOrDropNode( - newPath.seqId, newPath.ids, newPath.probHistory, &newPath.logProb); - } - if (!newPath.isDropable()) { - atEos ? finalPaths_[curPath.seqId].push_back(newPath) - : newPaths.push_back(newPath); - } - } // for expandWidth - - if (gDiyProbStop) { - gDiyProbStop(calc_id); - } -} - -void RecurrentGradientMachine::beamExpand(std::vector& paths, - std::vector& newPaths) { - size_t candidatePathCount = paths.size(); - // idVec.size() could be larger than candidatePathCount * beam, - // so user can drop some node customly. - CHECK_EQ(cpuId_->getSize() % candidatePathCount, 0UL); - size_t expandWidth = cpuId_->getSize() / candidatePathCount; - - // iterate over each sequence - size_t totalExpandCount = 0; - int prevSeqId = -1; - int curSeqId = 0; - for (size_t j = 0; j <= candidatePathCount; j++) { - // expansions of a single sequence are all processed - curSeqId = (j < candidatePathCount ? paths[j].seqId : curSeqId + 1); - if (prevSeqId != -1 && curSeqId != prevSeqId) { - totalExpandCount += beamShrink(newPaths, prevSeqId, totalExpandCount); - } - if (j == candidatePathCount) return; - singlePathExpand(paths[j], j, newPaths, expandWidth); - - prevSeqId = paths[j].seqId; - } // for paths -} - -// Drop extra nodes to beam size. -size_t RecurrentGradientMachine::beamShrink(std::vector& newPaths, - size_t seqId, - size_t totalExpandCount) { - size_t minNewPathSize = - std::min(getBeamSize(), newPaths.size() - totalExpandCount); - if (!minNewPathSize) { - return 0; - } - std::nth_element(newPaths.begin() + totalExpandCount, - newPaths.begin() + totalExpandCount + minNewPathSize, - newPaths.end(), - Path::greaterPath); - newPaths.resize(totalExpandCount + minNewPathSize); - - real minPathLogProb = - std::min_element(newPaths.end() - minNewPathSize, newPaths.end()) - ->logProb; - real maxPathLogProb = - std::max_element(newPaths.end() - minNewPathSize, newPaths.end()) - ->logProb; - - // Remove the already formed paths that are relatively short - finalPaths_[seqId].erase( - std::remove_if(finalPaths_[seqId].begin(), - finalPaths_[seqId].end(), - [&](Path& p) { return p.logProb < minPathLogProb; }), - finalPaths_[seqId].end()); - for (auto p : finalPaths_[seqId]) { - if (minFinalPathLogProb_[seqId] > p.logProb) { - minFinalPathLogProb_[seqId] = p.logProb; - } - } - - if (finalPaths_[seqId].size() >= getBeamSize() && - minFinalPathLogProb_[seqId] >= maxPathLogProb) { - newPaths.resize(totalExpandCount); - return 0; - } - return minNewPathSize; -} - -void RecurrentGradientMachine::fillGenOutputs() { - size_t numResults = generator_.config.num_results_per_sample(); - for (size_t i = 0; i < finalPaths_.size(); ++i) { - size_t minFinalPathsSize = std::min(numResults, finalPaths_[i].size()); - std::partial_sort(finalPaths_[i].begin(), - finalPaths_[i].begin() + minFinalPathsSize, - finalPaths_[i].end(), - Path::greaterPath); - finalPaths_[i].resize(minFinalPathsSize); - } - - generator_.ids.clear(); - int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); - starts[0] = 0; - if (numResults > 1) { - int idsProbSaveSize = 0; - for (auto inSeq : finalPaths_) { - for (auto path : inSeq) idsProbSaveSize += path.ids.size(); - idsProbSaveSize += inSeq.size(); - } - Matrix::resizeOrCreate( - generator_.outArg.value, idsProbSaveSize, 1, false, false); - real* idsProb = generator_.outArg.value->getData(); - - real* probs = generator_.outArg.in->getData(); - size_t curPos = 0; - for (size_t i = 0; i < finalPaths_.size(); ++i) { - for (size_t j = 0; j < finalPaths_[i].size(); ++j) { - Path& path = finalPaths_[i][j]; - size_t genLen = path.ids.size(); - generator_.ids.push_back(genLen); // sequence size - generator_.ids.insert( - generator_.ids.end(), path.ids.begin(), path.ids.end()); - generator_.ids.push_back(-1); // end of sequence - - memcpy(idsProb + curPos, path.idsProb.data(), sizeof(real) * genLen); - curPos += genLen; - idsProb[curPos++] = -1.0; - probs[i * numResults + j] = path.logProb; - } - starts[i + 1] = generator_.ids.size(); - } - } else { - for (size_t i = 0; i < finalPaths_.size(); ++i) { - CHECK(!finalPaths_[i].empty()); - Path& path = finalPaths_[i][0]; - generator_.ids.insert( - generator_.ids.end(), path.ids.begin(), path.ids.end()); - starts[i + 1] = starts[i] + path.ids.size(); - } - } -} - -void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) { - for (size_t i = 0; i < dataArgsSize_; i++) { - Argument outFrame; - outFrame.resizeAndCopyFrom( - outFrameLines_[i + 1].frames[machineCur]->getOutput(), useGpu_); - dataArgsFrame_[i].emplace_back(outFrame); - } -} - -void RecurrentGradientMachine::createDataOutlinkSelRowsInfo( - bool isSeq, std::vector& outArgs) { - batchMachineIdVec_.clear(); - - size_t seqIdx = 0; - for (size_t i = 0; i < finalPaths_.size(); ++i) { - for (size_t j = 0; j < finalPaths_[i].size(); ++j) { - std::vector& machineIdVec = finalPaths_[i][j].machineIdVec; - if (isSeq) { - for (size_t i = 0; i < machineIdVec.size(); ++i) { - size_t rowId = machineIdVec[i]; - int* seqPos = - outArgs[i].sequenceStartPositions->getMutableData(false); - batchMachineIdVec_.push_back(seqPos[rowId]); - } - } else { - batchMachineIdVec_.insert( - batchMachineIdVec_.end(), machineIdVec.begin(), machineIdVec.end()); - } - seqIdx++; - } - } -} - -void RecurrentGradientMachine::createDataOutlinkCopySizeInfo( - bool isSeq, std::vector& outArgs, std::vector& copySize) { - size_t totalSeqNum = std::accumulate( - finalPaths_.begin(), - finalPaths_.end(), - 0UL, - [](size_t a, const std::vector& b) { return a + b.size(); }); - copySize.resize(totalSeqNum, 1); - - batchMachineStartPos_.resize(totalSeqNum + 1, 0); - if (isSeq) { - ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions; - CHECK_EQ(static_cast(inputSeqStartPos->getSize() - 1), - getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size()); - int* starts = inputSeqStartPos->getMutableData(false); - int seqId = 0; - for (size_t i = 0; i < finalPaths_.size(); ++i) { - for (size_t j = 0; j < finalPaths_[i].size(); ++j) { - copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i] - : starts[j + 1] - starts[j]; - batchMachineStartPos_[seqId + 1] = - batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size(); - seqId++; - } - } - } else { - for (size_t i = 0; i < finalPaths_[0].size(); ++i) - batchMachineStartPos_[i + 1] = - batchMachineStartPos_[i] + finalPaths_[0][i].ids.size(); - } -} - -void RecurrentGradientMachine::createDataOutlink() { - for (size_t i = 0; i < dataArgsSize_; i++) { - bool isSeq = dataArgsFrame_[i][0].hasSeq(); - std::vector copySize; - createDataOutlinkCopySizeInfo(isSeq, dataArgsFrame_[i], copySize); - createDataOutlinkSelRowsInfo(isSeq, dataArgsFrame_[i]); - - dataArgs_[i].concat(dataArgsFrame_[i], - batchMachineIdVec_, - batchMachineStartPos_, - copySize, - useGpu_, - HPPL_STREAM_1, - PASS_TEST); - auto dataAgent = - dynamic_cast(outFrameLines_[i + 1].agentLayer.get()); - CHECK_NOTNULL(dataAgent); - dataAgent->setData(dataArgs_[i]); - } -} - -void RecurrentGradientMachine::beamSearch(size_t batchSize) { - finalPaths_.clear(); - finalPaths_.resize(batchSize); - seqIds_.resize(batchSize); - minFinalPathLogProb_.clear(); - minFinalPathLogProb_.resize(batchSize, 0); - - std::vector paths; - std::vector newPaths; - for (size_t i = 0; i < batchSize; ++i) { - paths.push_back(Path(i)); - if (this->beamSearchCtrlCallbacks_) { - paths.back().recordHistory(); - } - } - - // restart beam search - stopBeamSearch_ = false; - for (int i = 0; i < maxSequenceLength_; ++i) { - int machineCur = i % 2; - std::unique_ptr< - ScopedCallbacks> - statisticsBlock; - if (this->beamSearchStatistics_) { - auto ptr = - new ScopedCallbacks(beamSearchStatistics_->onEachStepStarted, - beamSearchStatistics_->onEachStepStoped, - i); - statisticsBlock.reset(ptr); - } - if (stopBeamSearch_) break; - - if (i) connectPrevFrame(i, paths); - - if (this->beamSearchCtrlCallbacks_) { - std::vector*> prefixes; - prefixes.resize(paths.size()); - std::transform( - paths.begin(), paths.end(), prefixes.begin(), [](const Path& p) { - return const_cast*>(&p.ids); - }); - beamSearchCtrlCallbacks_->beamSearchCandidateAdjust( - prefixes, frames_[machineCur].get(), i); - } - - forwardFrame(machineCur); - beamExpand(paths, newPaths); - if (newPaths.empty()) break; - - paths = newPaths; - newPaths.clear(); - } // end for machineCur - fillGenOutputs(); -} - -void RecurrentGradientMachine::Path::adjustProb(int calc_id, bool atEos) { - if (gDiyProbMethod) { - logProb = gDiyProbMethod(calc_id, ids.size(), ids.data(), logProb, atEos); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.h deleted file mode 100644 index 0a13d4f6f84eb5309a1b25f039357cb8af02c35e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.h +++ /dev/null @@ -1,580 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "GradientMachine.h" -#include "NeuralNetwork.h" - -#include "paddle/legacy/utils/Locks.h" - -namespace paddle { - -/** - * Private data class declares. - * Used for user customized beam search. - */ -class BeamSearchControlCallbacks; -class BeamSearchStatisticsCallbacks; - -class RecurrentGradientMachine : public NeuralNetwork { - public: - RecurrentGradientMachine(const std::string& subModelName, - NeuralNetwork* rootNetwork); - - // Disable copy and assign. - RecurrentGradientMachine(const RecurrentGradientMachine& other) = delete; - RecurrentGradientMachine& operator=(const RecurrentGradientMachine& other) = - delete; - - virtual ~RecurrentGradientMachine() { - this->removeBeamSearchStatisticsCallbacks(); - this->removeBeamSearchControlCallbacks(); - } - - virtual void init(const ModelConfig& config, - ParamInitCallback callback, - const std::vector& parameterTypes, - bool useGpu); - - virtual void prefetch(const std::vector& inArgs); - - virtual void forward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType); - - virtual void backward(const UpdateCallback& callback = nullptr); - - void forwardBackward(const std::vector& inArgs, - std::vector* outArgs, - PassType passType, - const UpdateCallback& callback); - - virtual void resetState() {} - virtual void eval(Evaluator* evaluator) const; - - const std::vector& getParameterIds() { return parameterIds_; } - - /** - * @brief BeamSearchCandidatesAdjustCallback - * - * Adjust searching candidates to restrict beam search - * searching within a limited subset of all possibile paths. - * - * The first parameter is the prefixes of all formed paths in current - * beam search step, whose type is basically int[][]. - * - * The second parameter is a pointer to the network used to generate sequence, - * user can use this pointer to tranverse each layer in the network to - * modify behaivors of a particular layer. - * - * The third parameter is an integer to indicate the iteration number of - * beam search, so that user can customize different operations in different - * beam search iterations. - */ - typedef std::function*>&, NeuralNetwork*, const int)> - BeamSearchCandidatesAdjustCallback; - - /** - * @brief DropCallback - * - * Drop a whole prefix or one candidate in beam search or not. - * - * The first parameter is sequence index in a batch - * - * The second parameter is one path in beam search, - * which is made up of node indices. - * - * The third parameter is probabilites for each node in this path. - * - * Return true if this prefix or candidate is expected to be dropped. - */ - typedef std::function&, const std::vector&)> - DropCallback; - - /** - * @brief NormOrDropNodeCallback - * - * Normalize a path's probabilities or just drop it by modifying path.logProb - * - * The first parameter is sequence index in a batch - * - * The second parameter is path.ids - * - * The third parameter is probabilites for each node in this path. - * - * The fourth parameter is the probability of the whole path. - */ - typedef std::function&, std::vector&, real*)> - NormOrDropNodeCallback; - - /** - * @brief Register beam search control callbacks. Used for prediction. - * - * @param queryBeamSearch: Give the sequences already formed, return the - * nodes expected to be expanded. - * Input: A pointer to an array holding pathes which have been expanded - * Return: A pointer to an array holding nodes wanted to be expanded. - * - * @param dropOneNode: Early drop a node in one beam search step. - * Given the path formed and probability history, decide whether a node - * should be dropped or not. - * - * @param stopBeamSearch: Early stop a path in one beam search step. - * Given the path and probability history, decide whether a path - * should be dropped or not. - */ - void registerBeamSearchControlCallbacks( - const BeamSearchCandidatesAdjustCallback& adjustBeamSearch, - const NormOrDropNodeCallback& normOrDropNode, - const DropCallback& stopBeamSearch); - - /** - * @brief Remove user costumized beam search callbacks, - * - * make sequence generation acts like normal beam search. - */ - void removeBeamSearchControlCallbacks(); - - /** - * @brief EachStepCallback - * - * Invoke with beam search step. - */ - typedef std::function EachStepCallback; - - /** - * @brief register statistics methods for performance profile of beam search. - * - * @param onEachStepStarted: invoke once a beam search step starts. - * Its input is index of the beam search step. - * - * @param onEachStepStoped: invoke once a beam search step ends. - * Its input is index of the beam search step. - */ - void registerBeamSearchStatisticsCallbacks( - const EachStepCallback& onEachStepStarted, - const EachStepCallback& onEachStepStoped); - - /** - * @brief Remove beam search callbacks. - */ - void removeBeamSearchStatisticsCallbacks(); - - /** - * @brief Stop beam search for current source. - * - * Will restart beam search in the next forward - */ - void stopBeamSearch(); - - struct Path { - /** - * @brief ids, path of beam search. - */ - std::vector ids; - - /** - * @brief idsProb, log probability of each generated word. - */ - std::vector idsProb; - - /** - * @brief logProb, current probability of path. - */ - real logProb; - - int machineId; // index of sample in frame - int topIndex; // index of MaxIdLayer output in one sample - int seqId; // index of sequence in batch generation - std::vector machineIdVec; - - /** - * @brief A record of each node's probality in a formed path in beam search. - * - * @note It could be empty when history is not recorded. If the history is - * wanted to be recorded, recordHistory() MUST be invoked first. - */ - std::vector probHistory; - - /** - * @brief Path default ctor, first logProb is 0. - */ - Path() { - logProb = 0; - seqId = 0; - } - explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; } - - /** - * @brief Create a new path based on an old path and - * a new node with probability. - * - * @param old old path - * @param newId index of the new node - * @param logProb probability of the new node. - * @param machineId sample index of a frame in RNN - * @param topIndex index of MaxIdLayer output in one sample - */ - Path(Path& old, int newId, real logProb, int machineId, int topIndex) - : ids(old.ids), - idsProb(old.idsProb), - logProb(old.logProb + logProb), - machineId(machineId), - topIndex(topIndex), - seqId(old.seqId) { - ids.push_back(newId); - idsProb.push_back(logProb); - if (!old.probHistory.empty()) { - this->probHistory = old.probHistory; - // probHistory store current prob, not sum - this->probHistory.push_back(logProb); - } - } - - /** - * @brief operator < - * - * Path a < Path b means log probability of a is smaller than that of b - */ - bool operator<(const Path& other) const { - return (logProb < other.logProb); - } - - static bool greaterPath(const Path& a, const Path& b) { return (b < a); } - - /** - * @brief Start recording history in this path. - */ - void recordHistory() { this->probHistory.push_back(this->logProb); } - - /** - * @brief Adjust probability for DIY beam search interface. - * In normal situation, it will do nothing. - * - * @param calc_id: the object id for DIY beam search interface. - * @param atEos: at end of sequence or not. - */ - void adjustProb(int calc_id, bool atEos = false); - - /** - * @brief isDropable indacating whether the current node will be - * dropped or not in beam search. - * - * @note: if logProb is -inf, current node will be dropped. - * @return true to drop the current node. - */ - bool isDropable() const { return std::isinf(logProb) && logProb < 0; } - }; - - /** - * @brief access beam search results. - * @return beam search results. - */ - const std::vector>& getFinalPaths() const { - return this->finalPaths_; - } - - protected: - std::vector commonSeqInfo_; - ICpuGpuVectorPtr sequenceStartPositions_; - void calcSequenceStartPositions(); - void checkInputConsistency(int inlinkId, - const std::vector& seqInfo); - void reorganizeInput(PassType passType); - void reorganizeOutput(PassType passType); - void connectFrames(PassType passType); - void calcNumSequencesAtEachStep(); - - void resizeOrCreateFrames(int numFrames); - void resizeBootFrame(int numSequences); - - void generateSequence(); - void oneWaySearch(size_t batchSize); - void beamSearch(size_t batchSize); - - struct InFrameLine { - std::string linkName; - LayerPtr inLayer; - std::vector agents; // Scatter Agents to reform batch input - Argument outArg; // scatter output argument - }; - std::vector inFrameLines_; - - struct OutFrameLine { - std::string layerName; - LayerPtr agentLayer; - std::vector frames; - }; - std::vector outFrameLines_; - - struct MemoryFrameLine { - std::string layerName; - std::string linkName; - LayerPtr bootLayer; // actually used biasLayer or rootAgent - LayerPtr biasLayer; - LayerPtr rootLayer; // layer in root network to boot this memory - LayerPtr rootAgent; // agent to link rootLayer - std::vector frames; - std::vector agents; - std::vector scatterAgents; // scatter agent used by beam search - Argument outArg; // scatter output argument - // Different memoryFrameLine have different element as follows - IVectorPtr allIds; // scattered id of realLayer - ICpuGpuVectorPtr - sequenceStartPositions; // scattered sequenceStartPositions - }; - std::vector memoryFrameLines_; - - // Each inFrameLines(inlinks) has its own info(elements) below, - // and all outFrameLines(outlinks) share the info with one inFrameLine, - // which is assigned by targetInfoInlinkId_. - struct Info { - // The original positions in the original batch - IVectorPtr allIds; // scattered id of realLayer [batchSize] - - // index of allIds for each step [maxSequenceLength_] - // idIndex[i] is the total length of the first i sequences - std::vector idIndex; - - ICpuGpuVectorPtr - sequenceStartPositions; // scattered sequenceStartPositions - std::vector seqStartPosIndex; // index of sequenceStartPositions - }; - std::vector info_; // for input - - // numSeqs_[i] is the number sequences which is longer than i (for sequence - // data) or has more than i subsequences (for subsequence data) - // Equivalently, numSeqs_[i] is the number of sequences at step i; - std::vector numSeqs_; - - std::vector> seqInfos_; - - void checkOutputConsistency(OutFrameLine& outFrameLine); - - /* create scattered id infomation for all realLayer of inFrameLines one time. - * If hasSubseq, will also create scattered sequenceStartPositions infomation - * for all realLayer of inFrameLines one time. - */ - void createInFrameInfo(int inlinks_id, - const Argument& input, - PassType passType); - void createInFrameInfo_nonseq(int inlinks_id, - const Argument& input, - PassType passType); - void createInFrameInfo_seq(int inlinks_id, - const Argument& input, - PassType passType); - void createInFrameInfo_subseq(int inlinks_id, - const Argument& input, - PassType passType); - - void createOutFrameInfo(OutFrameLine& outFrameLine, - Info& info, - ICpuGpuVectorPtr& sequenceStartPositions, - ICpuGpuVectorPtr& subSequenceStartPositions); - void createOutFrameInfo_seq(OutFrameLine& outFrameLine, - Info& info, - ICpuGpuVectorPtr& sequenceStartPositions, - ICpuGpuVectorPtr& subSequenceStartPositions); - void createOutFrameInfo_subseq(OutFrameLine& outFrameLine, - Info& info, - ICpuGpuVectorPtr& sequenceStartPositions, - ICpuGpuVectorPtr& subSequenceStartPositions); - - void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine, - PassType passType); - - void copyScattedId(std::vector& srcIds, IVectorPtr* dstIds, int size); - - void selectRowsOneTime(LayerPtr layer, - const IVectorPtr& allIds, - Argument* arg, - PassType passType); - - void createSeqPos(const std::vector& sequenceStartPosition, - ICpuGpuVectorPtr* sequenceStartPositions); - - // for generator - struct EosFrameLine { - std::vector layers; - }; - std::unique_ptr eosFrameLine_; - - struct Generator { - GeneratorConfig config; - std::vector ids; // store generated sequences - std::vector idsProb; // log probability of each generated word - Argument outArg; // final output argument - }; - bool generating_; - Generator generator_; - - std::vector> frames_; - - NeuralNetwork* rootNetwork_; - bool reversed_; - - int maxSequenceLength_; // Max top-level length - bool useGpu_; - bool stopBeamSearch_; - - std::vector - parameterIds_; // parameters actually used by this Layer Group - - // store final argument of outFrameLines_ - std::vector dataArgs_; - // store each frame's output argument of outFrameLines_ - std::vector> dataArgsFrame_; - size_t dataArgsSize_; // size of dataArgs_ = size of dataArgsFrame_ - - IVectorPtr cpuId_; - MatrixPtr cpuProb_; - IVectorPtr cpuEos_; - - private: - /* - * @return beam size in beam search - */ - size_t getBeamSize() { return generator_.config.beam_size(); } - - /* - * @return number of sequence in a batch in generation - */ - size_t getGenBatchSize(); - - /* - * @brief store output of the machineCur-th frame during generation, for - * creating the final outlink after the entire generation process is finished. - * - * In generation, if the layer group has more than 1 outlink, the first - * one is reserved to store the generated word indices, the others are data - * outlinks, that can be used like a common layer in the network. - * - * @param machineCur : index to access the layer group frame in - * currrent generation step. - */ - void copyDataOutlinkFrame(size_t machineCur); - - /* - * @brief In generation, if the layer group has more than 1 outlink, outlink - * except the first one is a data outlink. In RecurrentLayerGroup, each time - * step is a separate Network, outputs of a layer inside the - * RecurrentLayerGroup are stored in separate Arguments. If one layer is - * specified as an outlink of RecurrentLayerGroup. This function will - * collect outputs in each time step of each generated sequence which are - * dispersed in separate Arguments to form a new single Argument as output of - * RecurrentLayerGroup. - */ - void createDataOutlink(); - - /* - * @brief decide to select how many rows from the Matrix stored the forward - * pass results from a start position. - * - * @param isSeq: a flag indicating whetehr the layer to be output of the - * RecurrentGradientMachine is a sequence or not - * @param outArgs: all of the the returned Arguments of the forward pass - * during the generation process. - * @param copySize: the returned result, number of rows to select from the - * Matrix stored the forward pass results from a start position. - */ - void createDataOutlinkCopySizeInfo(bool isSeq, - std::vector& outArgs, - std::vector& copySize); - - /* - * @brief decide index of the start row for each time step of a generated - * sequence in Matrix stored the entire beam search batch's forward pass - * results. - * - * @param isSeq: a flag indicating whether the layer to be output of the - * RecurrentGradientMachine is a sequence or not - * @param outArgs: all of the returned Arguments of the forward pass - * during the generation process. - */ - void createDataOutlinkSelRowsInfo(bool isSeq, std::vector& outArgs); - - /* - * @brief used in beam search, connect previous frame to form recurrent link - * @param stepId : iteration number of generation process. - * It equals to the length of longest half-generated sequence. - * @param paths : half-generated paths that are going to be expanded - * in current beam search iteration. - */ - void connectPrevFrame(int stepId, std::vector& paths); - - /* - * @brief used in beam search, forward current recurrent frame - * @param machineCur : index to access the layer group frame in - * currrent generation step. - */ - void forwardFrame(int machineCur); - - /* - * @brief reduce all expanded paths to beam size. - * - * @param newPaths : newPaths[totalExpandCount : ] stores all expanded paths - * for the seqId-th sequence - * @param seqId : sequence index in a batch - * @param totalExpandCount : number of already shrinked paths in newPaths - * @return size of retained paths at the end of a beam search iteration - */ - size_t beamShrink(std::vector& newPaths, - size_t seqId, - size_t totalExpandCount); - - /* - * @brief expand a single path to expandWidth new paths - * with highest probability - * @param curPath : path to be expanded - * @param curPathId : index of curPath in member newPaths - * @param expandWidth : number of paths to be expanded - */ - void singlePathExpand(Path& curPath, - size_t curPathId, - std::vector& newPaths, - size_t expandWidth); - - /* - * @brief A new beam search iteration. Each half-generated paths in previous - * beam search iteration are further expanded to beam_size new paths - * with highest probabilities, and then all the expanded paths are again - * reduced to beam_size paths according to their log probabilities. - * @param paths : half-generated paths in previous iteration. - * @param newPaths : paths expanded and then reduces in current iteration. - */ - void beamExpand(std::vector& paths, std::vector& newPaths); - - /* - * @brief fill sequence start positions and some other information that are - * uesed by the "text_printer" evaluator. - */ - void fillGenOutputs(); - - std::vector machineIds_; - std::vector topIds_; - std::vector seqIds_; - std::vector batchMachineIdVec_; - std::vector batchMachineStartPos_; - std::vector> finalPaths_; - std::vector minFinalPathLogProb_; - BeamSearchControlCallbacks* beamSearchCtrlCallbacks_; - BeamSearchStatisticsCallbacks* beamSearchStatistics_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/AddtoLayer.cpp b/paddle/legacy/gserver/layers/AddtoLayer.cpp deleted file mode 100644 index 39c5603d9389b433b77e2876f34b3061c62f68f0..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/AddtoLayer.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "AddtoLayer.h" - -#include "paddle/legacy/utils/Logging.h" - -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(addto, AddtoLayer); - -bool AddtoLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - - return true; -} - -void AddtoLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInputValue(0)->getHeight(); - int size = getSize(); - - reserveOutput(batchSize, size); - - MatrixPtr outV = getOutputValue(); - for (size_t i = 0; i != inputLayers_.size(); ++i) { - MatrixPtr input = getInputValue(i); - i == 0 ? outV->assign(*input) : outV->add(*input); - } - /* add the bias-vector */ - if (biases_.get() != NULL) { - outV->addBias(*(biases_->getW()), 1); - } - - /* activation */ { forwardActivation(); } -} - -void AddtoLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { backwardActivation(); } - - if (biases_ && biases_->getWGrad()) { - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - - /* Increasing the number of gradient */ - biases_->getParameterPtr()->incUpdate(callback); - } - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - /* Calculate the input layers error */ - MatrixPtr preGrad = getInputGrad(i); - if (NULL != preGrad) { - preGrad->add(*getOutputGrad()); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/AddtoLayer.h b/paddle/legacy/gserver/layers/AddtoLayer.h deleted file mode 100644 index ad3cefe1a4d27953b2fef535e1b865175a2cadc2..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/AddtoLayer.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { - -/** - * This layer just simply add all input layers together, then activate - * the sum inputs. Each input of this layer should be the same size, - * which is also the output size of this layer. - * \f[ - * y=f(\sum_{i}x_i + b) - * \f] - * where \f$y\f$ is output, \f$x\f$ is input, \f$b\f$ is bias, and \f$f\f$ is - * activation function. - * - * The config file api is addto_layer. - */ -class AddtoLayer : public Layer { - protected: - std::unique_ptr biases_; - - public: - explicit AddtoLayer(const LayerConfig& config) : Layer(config) {} - - ~AddtoLayer() {} - - /** - * Intialization of AddtoLayer. - */ - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - /** - * Forward propagation. - * @note There is no weight matrix for each input, - * because it just a simple add operation. - */ - void forward(PassType passType) override; - - /** - * Backward propagation. - */ - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/AgentLayer.cpp b/paddle/legacy/gserver/layers/AgentLayer.cpp deleted file mode 100644 index bae89b2fa34d156adae1305d78d6c1465ccdd0ae..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/AgentLayer.cpp +++ /dev/null @@ -1,281 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "AgentLayer.h" - -#include "paddle/legacy/utils/Logging.h" - -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(agent, AgentLayer); - -bool AgentLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - CHECK_EQ(config_.inputs_size(), 0); - if (!Layer::init(layerMap, parameterMap)) { - return false; - } - setNeedGradient(true); - return true; -} - -void AgentLayer::forward(PassType passType) { - Layer::forward(passType); - - Argument& realOutput = realLayer_->getOutput(); - int realNumSequences = realOutput.getNumSequences(); - CHECK_LE(numSamples_, realNumSequences); - - // get Arguments from real layers - if (numSamples_ > 0 && numSamples_ < realNumSequences) { - if (realOutput.hasSeq()) { - int numRows = - realOutput.sequenceStartPositions->getData(false)[numSamples_]; - output_.subArgFrom(realOutput, - /* offset */ 0, - numRows, - getSize(), - useGpu_, - /* trans */ false, - /* seqFlag */ true, - /* seqStart */ 0, - /* seqSize */ numSamples_ + 1); - } else { - output_.subArgFrom( - realOutput, /* offset */ 0, numSamples_, getSize(), useGpu_); - } - } else { - output_ = realOutput; - } -} - -bool GatherAgentLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - CHECK_EQ(config_.inputs_size(), 0); - if (!Layer::init(layerMap, parameterMap)) { - return false; - } - setNeedGradient(true); - return true; -} - -void GatherAgentLayer::copyIdAndSequenceInfo( - ICpuGpuVectorPtr sequenceStartPositions, - ICpuGpuVectorPtr subSequenceStartPositions, - const IVectorPtr& ids, - const std::vector& idIndex) { - output_.sequenceStartPositions = sequenceStartPositions; - output_.subSequenceStartPositions = subSequenceStartPositions; - allIds_ = ids; - idIndex_ = idIndex; -} - -void GatherAgentLayer::forward(PassType passType) { - Layer::forward(passType); - forwardIds(passType); - forwardValue(passType); -} - -void GatherAgentLayer::forwardValue(PassType passType) { - MatrixPtr valueReal = realLayers_[0]->getOutputValue(); - if (!valueReal) return; - - int height = allIds_->getSize(); - int width = this->getSize(); - resetOutput(height, width); - idsVec_.resize(idIndex_.size()); - - const MatrixPtr& outV = getOutputValue(); - - for (size_t i = 0; i < realLayers_.size(); ++i) { - const MatrixPtr& realV = realLayers_[i]->getOutputValue(); - idsVec_[i] = IVector::create(allIds_->getData() + idIndex_[i], - /* size */ realV->getHeight(), - useGpu_); - realV->addToRows(*outV, *idsVec_[i]); - } -} - -namespace { - -// dest[index[i]] <- src[i] for each i -void copyElements(const IVector& srcVec, - const IVector& indexVec, - IVector& destVec) { - const int* src = srcVec.getData(); - const int* index = indexVec.getData(); - int* dest = destVec.getData(); - int len = indexVec.getSize(); - CHECK_EQ(srcVec.getSize(), indexVec.getSize()); - for (int i = 0; i < len; ++i) { - dest[index[i]] = src[i]; - } -} -} // namespace - -void GatherAgentLayer::forwardIds(PassType passType) { - IVectorPtr realId = realLayers_[0]->getOutputLabel(); - if (!realId) return; - - IVector::resizeOrCreate(output_.ids, allIds_->getSize(), useGpu_); - IVectorPtr outId = output_.ids; - idsVec_.resize(idIndex_.size()); - - for (size_t i = 0; i < realLayers_.size(); ++i) { - const IVectorPtr& realId = realLayers_[i]->getOutputLabel(); - idsVec_[i] = IVector::create(allIds_->getData() + idIndex_[i], - /* size */ realId->getSize(), - useGpu_); - execViaCpu(©Elements, *realId, *idsVec_[i], *outId); - } -} - -void GatherAgentLayer::backward(const UpdateCallback& callback) { - (void)callback; - const MatrixPtr& outputGrad = getOutputGrad(); - - for (size_t i = 0; i < realLayers_.size(); ++i) { - const MatrixPtr& realG = realLayers_[i]->getOutputGrad(); - if (realG) { - realG->selectRows(*outputGrad, *idsVec_[i]); - } - } -} - -bool ScatterAgentLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - CHECK_EQ(config_.inputs_size(), 0); - if (!Layer::init(layerMap, parameterMap)) { - return false; - } - setNeedGradient(true); - return true; -} - -void ScatterAgentLayer::forward(PassType passType) { - Layer::forward(passType); - CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); - - int width = this->getSize(); - if (selectionMode_) { - forwardWithSelection(passType); - } else { - if (realOutArg_.hasSeq()) { - output_.subArgFrom(realOutArg_, - /* offset */ idIndex_, - idSize_, - width, - useGpu_, - /* trans */ false, - /* seqFlag */ true, - /* seqStart */ seqStartPosIndex_, - /* seqSize */ numSequences_); - } else { - output_.subArgFrom( - realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); - } - } -} - -void ScatterAgentLayer::backward(const UpdateCallback& callback) { - (void)callback; - - CHECK(!selectionMode_); - - const MatrixPtr& outputGrad = realOutArg_.grad; - const MatrixPtr& realGrad = realLayer_->getOutputGrad(); - if (realGrad) { - // for agent in inFrameLines and memoryFrameLines, - // only first scatterAgentLayer should do addToRows in backward - if (handleBackward_) { - outputGrad->addToRows(*realGrad, *ids_); - } - } -} - -REGISTER_LAYER(gather_agent, GatherAgentLayer); -REGISTER_LAYER(scatter_agent, ScatterAgentLayer); - -void ScatterAgentLayer::forwardWithSelection(PassType passType) { - Layer::forward(passType); - CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); - - const Argument& input = realLayer_->getOutput(); - CHECK_EQ(realLayer_->getSize(), this->getSize()); - int width = this->getSize(); - - AsyncGpuBlock asyncGpuBlock; - REGISTER_TIMER_INFO("SequenceAgentLayerForward", getName().c_str()); - - if (!input.hasSeq()) { - if (realLayer_->getOutput().ids) { - IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); - output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); - } - if (realLayer_->getOutput().value) { - int height = ids_->getSize(); - resetOutput(height, width); - - const MatrixPtr& outV = getOutputValue(); - const MatrixPtr& realV = realLayer_->getOutputValue(); - outV->selectRows(*realV, *ids_); - } - } else { - // Putting the generation logic here is really an ugly hack! - // used in generation - int height = 0; - size_t numSequences = ids_->getSize(); - const int* starts = input.getCpuStartPositions(); - size_t size = input.hasSubseq() ? input.getNumSubSequences() - : input.getNumSequences(); - const int* cpuIds = cpuIds_->getData(); - - for (size_t i = 0; i < numSequences; ++i) { - size_t seqId = cpuIds[i]; - CHECK_LT(seqId, size); - height += starts[seqId + 1] - starts[seqId]; - } - reserveOutput(height, width); - - const MatrixPtr& outputValue = getOutputValue(); - - CHECK_NE(input.sequenceStartPositions.get(), - output_.sequenceStartPositions.get()); - ICpuGpuVector::resizeOrCreate( - output_.sequenceStartPositions, numSequences + 1, false); - int* outStarts = output_.sequenceStartPositions->getMutableData(false); - - ICpuGpuVector::resizeOrCreate(inputStartPos_, height, false); - int* inStarts = inputStartPos_->getMutableData(false); - - size_t offsetOut = 0; - for (size_t i = 0; i < numSequences; ++i) { - outStarts[i] = offsetOut; - size_t seqId = cpuIds[i]; - int size = starts[seqId + 1] - starts[seqId]; - for (int j = 0; j < size; j++) { - inStarts[offsetOut + j] = starts[seqId] + j; - } - offsetOut += size; - } - outStarts[numSequences] = offsetOut; - - outputValue->copyByRowIndex(*input.value, - *inputStartPos_->getVector(useGpu_)); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/AgentLayer.h b/paddle/legacy/gserver/layers/AgentLayer.h deleted file mode 100644 index a05eac5e704466df02a74ce6e5364ab6f03f7446..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/AgentLayer.h +++ /dev/null @@ -1,177 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { - -/** - * AgentLayer use as a virtual input of another layer in config, - * before execute forward/backward, setRealLayer() should be - * called to set one and only one real layer - */ -class AgentLayer : public Layer { - protected: - LayerPtr realLayer_; - int numSamples_; - - public: - explicit AgentLayer(const LayerConfig& config) : Layer(config) {} - - ~AgentLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - // if *numSamples* set, - // real layer output will only use first *numSamples* rows - void setRealLayer(LayerPtr layer, int numSamples = 0) { - realLayer_ = layer; - numSamples_ = numSamples; - } - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override {} -}; - -/** - * Like AgentLayer, but it can gather many real layers. Each real - * layer give a few rows of a sequence, after gather all real layers, - * GatherAgentLayer collect a complete sequence. - */ -class GatherAgentLayer : public Layer { - protected: - std::vector realLayers_; - std::vector idsVec_; - // we don't clear idsVec_ vector to aviod IVector alloc/free - IVectorPtr allIds_; - std::vector idIndex_; - - public: - explicit GatherAgentLayer(const LayerConfig& config) : Layer(config) {} - - virtual ~GatherAgentLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - // call before addRealLayer - void clearRealLayers() { realLayers_.clear(); } - - void copyIdAndSequenceInfo(ICpuGpuVectorPtr sequenceStartPositions, - ICpuGpuVectorPtr subSequenceStartPositions, - const IVectorPtr& allIds, - const std::vector& idIndex); - - // add one real layer, can call many times - void addRealLayer(LayerPtr layer) { realLayers_.push_back(layer); } - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - void forwardValue(PassType passType); - void forwardIds(PassType passType); -}; - -/** - * Like AgentLayer, but only select a few rows in real layer. - * [idIndex, idIndex + idSize) of *ids* in setRealLayerAndOutput() - * are the selected row ids. It's used to scatter one layer's output - * to many small submodels. ScatterAgentLayer can support ids real layer, - * if it is, the agent will select a few ids in real layer. - */ -class ScatterAgentLayer : public Layer { - protected: - LayerPtr realLayer_; - IVectorPtr ids_; - IVectorPtr cpuIds_; - Argument realOutArg_; - int idIndex_; - int idSize_; - int seqStartPosIndex_; - int numSequences_; // number of sequences in this scatterAgentLayer - bool handleBackward_; - - // use to store expanded cpuStartPositions or subSequenceStartPositions - // of real layer. - ICpuGpuVectorPtr inputStartPos_; - - // true for setRealLayer, false for setRealLayerAndOutput - bool selectionMode_; - - public: - explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {} - - virtual ~ScatterAgentLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - /** - * @brief set real layer in generation - * - * @param layer[input] realLayer - * @param ids[input] row id in real layer - * @param copyId[input] whether to copy a cpu version of ids, - * false(default) in ScatterAgentLayer, and - * true in SequenceScatterAgentLayer. - */ - void setRealLayer(LayerPtr layer, const std::vector& ids) { - realLayer_ = layer; - IVector::resizeOrCreate(ids_, ids.size(), useGpu_); - ids_->copyFrom(ids.data(), ids.size()); - if (useGpu_) { - IVector::resizeOrCreate(cpuIds_, ids.size(), false); - cpuIds_->copyFrom(ids.data(), ids.size()); - } else { - cpuIds_ = ids_; - } - selectionMode_ = true; - } - - // set real layer and output, [idIndex, idIndex + idSize) of *ids* - // are selected row for realOutArg in realLayer - void setRealLayerAndOutput(LayerPtr layer, - const Argument& outArg, - const IVectorPtr& ids, - int idIndex, - int idSize, - bool handleBackward) { - realLayer_ = layer; - realOutArg_ = outArg; - ids_ = ids; - idIndex_ = idIndex; - idSize_ = idSize; - handleBackward_ = handleBackward; - selectionMode_ = false; - } - - void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions, - int seqStartPosIndex, - int numSequences) { - realOutArg_.sequenceStartPositions = sequenceStartPositions; - seqStartPosIndex_ = seqStartPosIndex; - numSequences_ = numSequences; - } - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - - void forwardWithSelection(PassType passType); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/AverageLayer.cpp b/paddle/legacy/gserver/layers/AverageLayer.cpp deleted file mode 100644 index 0539da793712527c72792603ae28a1d0aa903bcc..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/AverageLayer.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "AverageLayer.h" - -#include "paddle/legacy/utils/Logging.h" - -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(average, AverageLayer); - -bool AverageLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - SequencePoolLayer::init(layerMap, parameterMap); - - // average strategy - if (config_.average_strategy() == "average") { - mode_ = kAverage; - } else if (config_.average_strategy() == "sum") { - mode_ = kSum; - } else if (config_.average_strategy() == "squarerootn") { - mode_ = kAverageSquareRootN; - } else { - LOG(FATAL) << "Unknown average strategy: " << config_.average_strategy(); - } - return true; -} - -void AverageLayer::forward(PassType passType) { - SequencePoolLayer::forward(passType); - - MatrixPtr inputValue = getInputValue(0); - getOutputValue()->sequenceAvgForward( - *inputValue, *startPositions_->getVector(useGpu_), mode_); - - /* add the bias-vector AFTER average operation */ - if (biases_.get() != NULL) { - MatrixPtr outV = getOutputValue(); - outV->addBias(*(biases_->getW()), 1); - } - - /* activation */ { forwardActivation(); } -} - -void AverageLayer::backward(const UpdateCallback& callback) { - SequencePoolLayer::backward(callback); - - if (getInputGrad(0)) { - getInputGrad(0)->sequenceAvgBackward( - *getOutputGrad(), *startPositions_->getVector(useGpu_), mode_); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/AverageLayer.h b/paddle/legacy/gserver/layers/AverageLayer.h deleted file mode 100644 index a0d457d35f4bce99860cf45e94525f323f45e286..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/AverageLayer.h +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "SequencePoolLayer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * A layer for "internal average" for sequence input. - * Input: one or more sequences. Each sequence contains some instances. - * If SequenceLevel = kNonSeq: - * Output: output size is the number of input sequences (NOT input instances) - * output[i] = average_{for each instance in this sequence}{input[i]} - * If stride_ > 0: - * Output: a shorten sequence. Stride is the step size by which we slide a - * window upon the input sequence, and the average pooling - * operation is then applied to each interval independently. - * If SequenceLevel = kSeq: - * Check input sequence must has sub-sequence - * Output: output size is the number of input sub-sequences - * output[i] = average_{for each instance in this sub-sequence}{input[i]} - * - * The config file api is pooling_layer. - */ -class AverageLayer : public SequencePoolLayer { - public: - enum AverageStrategy { kAverage = 0, kSum = 1, kAverageSquareRootN = 2 }; - explicit AverageLayer(const LayerConfig& config) - : SequencePoolLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - protected: - int mode_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/BatchNormBaseLayer.cpp b/paddle/legacy/gserver/layers/BatchNormBaseLayer.cpp deleted file mode 100644 index 4dcbd8dc270d5e5329b33b366ac937894833085f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/BatchNormBaseLayer.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "BatchNormBaseLayer.h" -#include "BatchNormalizationLayer.h" -#include "Layer.h" -#include "paddle/legacy/utils/Stat.h" -#ifdef PADDLE_WITH_CUDA -#include "CudnnBatchNormLayer.h" -#endif - -namespace paddle { - -bool BatchNormBaseLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - if (!Layer::init(layerMap, parameterMap)) return false; - - /* initialize the weightList */ - // first is Input in configure - // other two is created in config_parser.py - CHECK_EQ(inputLayers_.size(), 3U); - CHECK_EQ(inputLayers_.size(), parameters_.size()); - CHECK_EQ(inputLayers_.size(), size_t(config_.inputs_size())); - const ImageConfig& conf = config_.inputs(0).image_conf(); - channels_ = conf.channels(); - calFeatureMapSize(); - - if (config_.has_use_global_stats()) { - useGlobalStats_ = config_.use_global_stats(); - } - movingAvgFraction_ = config_.moving_average_fraction(); - epsilon_ = config_.epsilon(); - - weight_.reset(new Weight(1, channels_, parameters_[0])); - movingMean_.reset(new Weight(1, channels_, parameters_[1])); - movingVar_.reset(new Weight(1, channels_, parameters_[2])); - - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, channels_, biasParameter_)); - } - - savedMean_ = Matrix::create(1, channels_, false, useGpu_); - savedInvVar_ = Matrix::create(1, channels_, false, useGpu_); - savedMean_->zeroMem(); - savedInvVar_->zeroMem(); - - return true; -} - -void BatchNormBaseLayer::calFeatureMapSize() { - const ImageConfig& conf = config_.inputs(0).image_conf(); - imageH_ = inputLayers_[0]->getOutput().getFrameHeight(); - imageW_ = inputLayers_[0]->getOutput().getFrameWidth(); - imageD_ = inputLayers_[0]->getOutput().getFrameDepth(); - - if (0 == imageD_) imageD_ = conf.img_size_z(); - if (imageH_ == 0 && imageW_ == 0) { - imageH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - imageW_ = conf.img_size(); - } else { - getOutput().setFrameHeight(imageH_); - getOutput().setFrameWidth(imageW_); - getOutput().setFrameDepth(imageD_); - } - imgPixels_ = imageH_ * imageW_ * imageD_; -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/BatchNormBaseLayer.h b/paddle/legacy/gserver/layers/BatchNormBaseLayer.h deleted file mode 100644 index 8dc1d7883767b4aabc8501531996036c2def9481..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/BatchNormBaseLayer.h +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief Batch normalization layer use to normalizes the input to across the - * batch. - * - * By default, calculating global mean and variance statistics via a running - * average in the training peroid. Then the pre-calculated global mean and - * variance are used for testing. - * - * Moving mean and variance are located in Parameter object when constructing - * and the calculation will change them. Now we only save global mean and - * variance of one thread in first node for GPU. - * But the calculation in CPU is different, because parameters are shared by - * multiple threads. Here using ShareCpuMatrix with lock to calculate. We - * still save global mean and variance in first node in CPU when multi machine. - * - * [1] S. Ioffe and C. Szegedy, "Batch Normalization: Accelerating Deep Network - * Training by Reducing Internal Covariate Shift." arXiv preprint - * arXiv:1502.03167 (2015). - */ - -class BatchNormBaseLayer : public Layer { - public: - explicit BatchNormBaseLayer(const LayerConfig& config) : Layer(config) {} - - ~BatchNormBaseLayer() {} - - /** - * @brief Create BatchNorm layer by norm_type, including batch_norm and - * cudnn_batch_norm. If do not set norm_type, it will automatically select - * cudnn_batch_norm for GPU and batch_norm for CPU. - */ - static Layer* create(const LayerConfig& config); - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - /** - * @brief Calculate feature map size. Some input uses frameHeight and - * frameWidth to store feature size - */ - void calFeatureMapSize(); - - protected: - /// Batch normalization scale parameter, which is referred to as gamma in - /// in original paper. - std::unique_ptr weight_; - /// Moving average of mean. - std::unique_ptr movingMean_; - /// Moving average of variance. - std::unique_ptr movingVar_; - /// Batch normalization bias parameter, which is referred to as beta in - /// in original paper. - std::unique_ptr biases_; - - /// Save intermediate results computed during the forward pass, - /// these can then be reused to speed up the backward pass. - MatrixPtr savedMean_; - MatrixPtr savedInvVar_; - - /// Height or width of input image feature. - /// Both of them are 1 if the input is fully-connected layer. - int imageD_; - int imageH_; - int imageW_; - /// Height * Width. - int imgPixels_; - /// Feature dimension. If the input layer is conv layer, it is the channels - /// of feature map of the conv layer. If the input layer is fully-connected - /// layer, it is the dimension of fc layer. - int channels_; - // if useGlobalStats_ is true, will use the loaded mean and variance. - // otherwise, calculate mean and variance in this mini-batch. - bool useGlobalStats_; - // use to compute moving mean and variance. - real movingAvgFraction_; - // Epsilon is a small random noise used in batch normalization for stability. - real epsilon_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/BatchNormalizationLayer.cpp b/paddle/legacy/gserver/layers/BatchNormalizationLayer.cpp deleted file mode 100644 index 0297bd44c7b0485f34598f6926e5337da452460d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/BatchNormalizationLayer.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Stat.h" -#ifdef PADDLE_WITH_CUDA -#include "hl_batch_transpose.h" -#endif -#include "BatchNormalizationLayer.h" - -namespace paddle { - -REGISTER_LAYER(batch_norm, BatchNormalizationLayer); - -bool BatchNormalizationLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - if (!BatchNormBaseLayer::init(layerMap, parameterMap)) return false; - - return true; -} - -void BatchNormalizationLayer::calMeanAndStd(const MatrixPtr& mat) { - int numSamples = mat->getHeight(); - Matrix::resizeOrCreate(tmpMat_, numSamples, channels_, false, useGpu_); - savedMean_->zeroMem(); - savedMean_->accumulateColSum(*mat); - savedMean_->mulScalar(1.0 / numSamples); // E[x] - - tmpMat_->assign(*mat); - tmpMat_->square2(); - savedInvVar_->zeroMem(); - savedInvVar_->accumulateColSum(*tmpMat_); - savedInvVar_->mulScalar(1.0 / numSamples); // E[x^2] - savedInvVar_->addSquare(*savedMean_, -1.0); // E[x^2] - E^2[x] - - // Variance may be small negative value - // because of the subtraction operation. - // Here using clipping. - savedInvVar_->downClip(real(0.0)); - - calMovingMeanAndVar(); - - savedInvVar_->subScalar(-epsilon_); - savedInvVar_->sqrt2(*savedInvVar_); -} - -void BatchNormalizationLayer::calMovingMeanAndVar() { - // calculating and saving moving mean and variance - auto& movingMean = movingMean_->getW(); - auto& movingVar = movingVar_->getW(); - // movingMean = movingMean * movingAvgFraction_ - // + savedMean_ * (1 - movingAvgFraction_) - movingMean->add(*savedMean_, movingAvgFraction_, 1.0 - movingAvgFraction_); - // movingVar = movingVar * movingAvgFraction_ - // + savedInvVar_ * (1 - movingAvgFraction_) - movingVar->add(*savedInvVar_, movingAvgFraction_, 1.0 - movingAvgFraction_); -} - -void BatchNormalizationLayer::setMeanAndStd() { - savedMean_->copyFrom(*(movingMean_->getW())); - savedInvVar_->copyFrom(*(movingVar_->getW())); - savedInvVar_->downClip(real(0.0)); - - savedInvVar_->subScalar(-epsilon_); - savedInvVar_->sqrt2(*savedInvVar_); -} - -void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) { - CHECK_EQ(in->getWidth(), static_cast(channels_ * imgPixels_)); - CHECK_EQ(out->getWidth(), static_cast(channels_)); - CHECK(!in->isTransposed()); - CHECK(!out->isTransposed()); - if (imgPixels_ == 1) { - out->assign(*in); - return; - } - size_t batchSize = in->getHeight(); - CHECK_EQ(out->getHeight(), batchSize * imgPixels_); - if (useGpu_) { -#ifndef PADDLE_WITH_CUDA - LOG(FATAL) << "paddle is compiled only for cpu"; -#else - batchTranspose( - in->getData(), out->getData(), imgPixels_, channels_, batchSize); -#endif - } else { - for (size_t i = 0; i < batchSize; i++) { - const MatrixPtr inTmp = - Matrix::create(in->getData() + i * imgPixels_ * channels_, - channels_, - imgPixels_, - false, - useGpu_); - MatrixPtr outTmp = - Matrix::create(out->getData() + i * imgPixels_ * channels_, - imgPixels_, - channels_, - false, - useGpu_); - inTmp->transpose(outTmp, false); - } - } -} - -void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) { - CHECK_EQ(in->getWidth(), static_cast(channels_)); - CHECK_EQ(out->getWidth(), static_cast(channels_ * imgPixels_)); - size_t batchSize = out->getHeight(); - CHECK(!in->isTransposed()); - CHECK(!out->isTransposed()); - if (imgPixels_ == 1) { - out->assign(*in); - return; - } - CHECK_EQ(in->getHeight(), static_cast(batchSize * imgPixels_)); - if (useGpu_) { -#ifndef PADDLE_WITH_CUDA - LOG(FATAL) << "paddle is compiled only for cpu"; -#else - batchTranspose( - in->getData(), out->getData(), channels_, imgPixels_, batchSize); -#endif - } else { - for (size_t i = 0; i < batchSize; i++) { - const MatrixPtr inTmp = - Matrix::create(in->getData() + i * channels_ * imgPixels_, - imgPixels_, - channels_, - false, - useGpu_); - MatrixPtr outTmp = - Matrix::create(out->getData() + i * imgPixels_ * channels_, - channels_, - imgPixels_, - useGpu_); - inTmp->transpose(outTmp, false); - } - } -} - -void BatchNormalizationLayer::forward(PassType passType) { - Layer::forward(passType); - - int batchSize = getInputValue(0)->getHeight(); - calFeatureMapSize(); - resetOutput(batchSize, getInputValue(0)->getWidth()); - - // for testing in training peroid. - useGlobalStats_ = (passType == PASS_TEST); - if (passType == PASS_TEST && config_.has_use_global_stats()) { - useGlobalStats_ = config_.use_global_stats(); - } - - Matrix::resizeOrCreate( - expandedIn_, batchSize * imgPixels_, channels_, false, useGpu_); - Matrix::resizeOrCreate( - normIn_, batchSize * imgPixels_, channels_, false, useGpu_); - Matrix::resizeOrCreate( - expandedOut_, batchSize * imgPixels_, channels_, false, useGpu_); - expandMat(getInputValue(0), expandedIn_); - - if (useGlobalStats_) { - if (firstTest_) { - setMeanAndStd(); - firstTest_ = false; - } - } else { - calMeanAndStd(expandedIn_); - firstTest_ = true; - } - - normIn_->assign(*expandedIn_); - normIn_->addBias(*savedMean_, -1); // subtract mean. - normIn_->divRowVector(*savedInvVar_); // divide std. - - expandedOut_->assign(*normIn_); - expandedOut_->mulRowVector(*weight_->getW()); // multiple gamma. - if (biases_) { - expandedOut_->addBias(*(biases_->getW()), 1); // add beta. - } - MatrixPtr out = getOutputValue(); - shrinkMat(expandedOut_, out); - - /* activation */ { - REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void BatchNormalizationLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { - REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); - backwardActivation(); - } - int batchSize = getInputValue(0)->getHeight(); - - Matrix::resizeOrCreate(meanGrad_, 1, channels_, false, useGpu_); - Matrix::resizeOrCreate(stdGrad_, 1, channels_, false, useGpu_); - - Matrix::resizeOrCreate( - expandedInGrad_, batchSize * imgPixels_, channels_, false, useGpu_); - Matrix::resizeOrCreate( - inGrad_, batchSize, imgPixels_ * channels_, false, useGpu_); - Matrix::resizeOrCreate( - normInGrad_, batchSize * imgPixels_, channels_, false, useGpu_); - Matrix::resizeOrCreate( - expandedOutGrad_, batchSize * imgPixels_, channels_, false, useGpu_); - Matrix::resizeOrCreate( - tmpMat_, batchSize * imgPixels_, channels_, false, useGpu_); - Matrix::resizeOrCreate( - tmpGrad_, batchSize * imgPixels_, channels_, false, useGpu_); - - expandMat(getOutputGrad(), expandedOutGrad_); - - // compute derivatives. - if (biases_ && biases_->getWGrad()) { - REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str()); - biases_->getWGrad()->collectBias(*expandedOutGrad_, 1); - /* Increasing the number of gradient */ - biases_->getParameterPtr()->incUpdate(callback); - } - if (weight_->getWGrad()) { - tmpMat_->dotMul(*expandedOutGrad_, *normIn_); - weight_->getWGrad()->collectBias(*tmpMat_, 1); - } - - // compute input gradients. - normInGrad_->assign(*expandedOutGrad_); - normInGrad_->mulRowVector(*(weight_->getW())); // multiple gamma. - // normInGrad * (x - \mu)/ \sqrt(\delta^2) - tmpMat_->dotMul(*normInGrad_, *normIn_); - stdGrad_->zeroMem(); - stdGrad_->collectBias(*tmpMat_, -1.0 / (batchSize * imgPixels_)); - tmpGrad_->assign(*normIn_); - tmpGrad_->mulRowVector(*stdGrad_); - - meanGrad_->zeroMem(); - meanGrad_->collectBias(*normInGrad_, -1.0 / (batchSize * imgPixels_)); - - expandedInGrad_->zeroMem(); - expandedInGrad_->add(*normInGrad_, *tmpGrad_); - expandedInGrad_->addRowVector(*meanGrad_); - expandedInGrad_->divRowVector(*savedInvVar_); - - shrinkMat(expandedInGrad_, inGrad_); - if (getInputGrad(0)) { - getInputGrad(0)->add(*getInputGrad(0), *inGrad_); - } - { - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - weight_->getParameterPtr()->incUpdate(callback); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/BatchNormalizationLayer.h b/paddle/legacy/gserver/layers/BatchNormalizationLayer.h deleted file mode 100644 index e5e4e690b6017f32de0f4d7557065c02c03d689f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/BatchNormalizationLayer.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "BatchNormBaseLayer.h" -#include "Layer.h" - -namespace paddle { - -/** - * @brief A Inheritance class of Batch normalization layer. - * It supports both CPU and GPU. - * - * The config file api is batch_norm_layer. - */ - -class BatchNormalizationLayer : public BatchNormBaseLayer { - public: - explicit BatchNormalizationLayer(const LayerConfig& config) - : BatchNormBaseLayer(config), firstTest_(true) {} - - ~BatchNormalizationLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - protected: - /// Load pre-calculated mean and std. - void setMeanAndStd(); - - /// Calculate mean and std. - void calMeanAndStd(const MatrixPtr& mat); - - /// Calculate moving mean and variance. - void calMovingMeanAndVar(); - - /// expand a Matrix from batch, channels* imagePixels to - /// batch * ImagePixels * channels. - void expandMat(const MatrixPtr& in, MatrixPtr& out); - - /// Shrink a Matrix from from batch * ImagePixels * channels - /// to batch, channels* imagePixels. - void shrinkMat(const MatrixPtr& in, MatrixPtr& out); - - void onPassEnd() override { firstTest_ = true; } - - MatrixPtr tmpMat_, tmpGrad_; - MatrixPtr expandedIn_, expandedOut_; - MatrixPtr expandedInGrad_, expandedOutGrad_, inGrad_; - MatrixPtr normIn_, normInGrad_, meanGrad_, stdGrad_; - - /// Load mean and variance only once flag. - bool firstTest_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/BilinearInterpLayer.cpp b/paddle/legacy/gserver/layers/BilinearInterpLayer.cpp deleted file mode 100644 index a091f51bc20e219c3111fb07058b5adea5a3fc38..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/BilinearInterpLayer.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "BilinearInterpLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(bilinear_interp, BilinearInterpLayer); - -size_t BilinearInterpLayer::getSize() { - inImgH_ = inputLayers_[0]->getOutput().getFrameHeight(); - inImgW_ = inputLayers_[0]->getOutput().getFrameWidth(); - - const BilinearInterpConfig& conf = config_.inputs(0).bilinear_interp_conf(); - if (inImgH_ == 0) { - inImgH_ = conf.image_conf().img_size_y(); - } - if (inImgW_ == 0) { - inImgW_ = conf.image_conf().img_size(); - } - - outImgH_ = conf.out_size_y(); - outImgW_ = conf.out_size_x(); - numChannels_ = conf.image_conf().channels(); - - CHECK(outImgH_ > 0 && outImgW_ > 0); - CHECK(inImgH_ > 0 && inImgW_ > 0); - CHECK(numChannels_); - - ratioH_ = - (outImgH_ > 1) ? static_cast(inImgH_ - 1) / (outImgH_ - 1) : 0.f; - ratioW_ = - (outImgW_ > 1) ? static_cast(inImgW_ - 1) / (outImgW_ - 1) : 0.f; - - getOutput().setFrameHeight(outImgH_); - getOutput().setFrameWidth(outImgW_); - return outImgH_ * outImgW_ * numChannels_; -} - -bool BilinearInterpLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(1, config_.inputs_size()); - - return true; -} - -void BilinearInterpLayer::forward(PassType passType) { - Layer::forward(passType); - - size_t batchSize = getInput(0).getBatchSize(); - size_t size = getSize(); - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - resetOutput(batchSize, size); - } - - MatrixPtr inV = getInputValue(0); - MatrixPtr outV = getOutputValue(); - { - REGISTER_TIMER_INFO("FwBilinearInterpTimer", getName().c_str()); - outV->bilinearForward(*inV, - inImgH_, - inImgW_, - outImgH_, - outImgW_, - numChannels_, - ratioH_, - ratioW_); - } -} - -void BilinearInterpLayer::backward(const UpdateCallback& callback) { - (void)callback; - - MatrixPtr inputG = getInputGrad(0); - MatrixPtr outG = getOutputGrad(); - { - REGISTER_TIMER_INFO("BwBilinearInterpTimer", getName().c_str()); - if (inputG) { - inputG->bilinearBackward(*outG, - outImgH_, - outImgW_, - inImgH_, - inImgW_, - numChannels_, - ratioH_, - ratioW_); - } - } -} -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/BilinearInterpLayer.h b/paddle/legacy/gserver/layers/BilinearInterpLayer.h deleted file mode 100644 index c585a5ed10d9c8f241b5a5ff3a671752fda6d432..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/BilinearInterpLayer.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief A layer for bilinear interpolation which is - * used on conv layer output. - * - * @note The config file api is bilinear_interp_layer. - */ -class BilinearInterpLayer : public Layer { - protected: - size_t outImgH_, outImgW_; - size_t inImgH_, inImgW_; - real ratioH_, ratioW_; - size_t numChannels_; - - public: - explicit BilinearInterpLayer(const LayerConfig& config) : Layer(config) {} - - virtual ~BilinearInterpLayer() {} - - size_t getSize(); - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/BlockExpandLayer.cpp b/paddle/legacy/gserver/layers/BlockExpandLayer.cpp deleted file mode 100644 index 24b5af67d40958c940eb0864994e7e81464f6c70..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/BlockExpandLayer.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "BlockExpandLayer.h" - -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -REGISTER_LAYER(blockexpand, BlockExpandLayer); - -bool BlockExpandLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(config_.inputs_size(), 1); - const BlockExpandConfig& blockConf = config_.inputs(0).block_expand_conf(); - blockH_ = blockConf.block_y(); - blockW_ = blockConf.block_x(); - strideH_ = blockConf.stride_y(); - strideW_ = blockConf.stride_x(); - paddingH_ = blockConf.padding_y(); - paddingW_ = blockConf.padding_x(); - channels_ = blockConf.channels(); - imgSizeH_ = blockConf.img_size_y(); - imgSizeW_ = blockConf.img_size_x(); - - std::vector strides = {(size_t)strideH_, (size_t)strideW_}; - std::vector paddings = {(size_t)paddingH_, (size_t)paddingW_}; - std::vector blocks = {(size_t)blockH_, (size_t)blockW_}; - createFunction(forward_, - "BlockExpand", - FuncConfig() - .set("strides", strides) - .set("paddings", paddings) - .set("blocks", blocks)); - createFunction(backward_, - "BlockExpandGrad", - FuncConfig() - .set("strides", strides) - .set("paddings", paddings) - .set("blocks", blocks)); - - return true; -} - -size_t BlockExpandLayer::getBlockNum() { - CHECK_EQ(inputLayers_.size(), 1UL); - const BlockExpandConfig& blockConf = config_.inputs(0).block_expand_conf(); - imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); - imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); - if (imgSizeH_ == 0) { - imgSizeH_ = blockConf.img_size_y(); - } - if (imgSizeW_ == 0) { - imgSizeW_ = blockConf.img_size_x(); - } - size_t tmpH = 2 * paddingH_ + imgSizeH_ - blockH_; - outputH_ = (int)tmpH < 0 ? 1 : 1 + (tmpH + strideH_ - 1) / strideH_; - size_t tmpW = 2 * paddingW_ + imgSizeW_ - blockW_; - outputW_ = (int)tmpW < 0 ? 1 : 1 + (tmpW + strideW_ - 1) / strideW_; - - return outputH_ * outputW_; -} - -void BlockExpandLayer::forward(PassType passType) { - Layer::forward(passType); - - size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - size_t blockNum = getBlockNum(); - size_t blockSize = blockH_ * blockW_ * channels_; - resetOutput(blockNum * batchSize, blockSize); - - // calculate output_.value - inputShape_ = TensorShape({batchSize, channels_, imgSizeH_, imgSizeW_}); - outputShape_ = TensorShape({batchSize, blockNum, blockSize}); - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getInputValue(0), inputShape_); - outputs.addArg(*getOutputValue(), outputShape_, ASSIGN_TO); - forward_[0]->calc(inputs, outputs); - - // calculate output_.sequenceStartPositions and output_.cpuSequenceDims - Argument& out = getOutput(); - ICpuGpuVector::resizeOrCreate( - out.sequenceStartPositions, batchSize + 1, false); - IVector::resizeOrCreate(out.cpuSequenceDims, 2 * batchSize, false); - int* start = out.sequenceStartPositions->getMutableData(false); - int* dims = out.cpuSequenceDims->getData(); - for (size_t i = 0; i < batchSize; i++) { - start[i] = i * blockNum; - dims[2 * i] = outputH_; - dims[2 * i + 1] = outputW_; - } - start[batchSize] = batchSize * blockNum; -} - -void BlockExpandLayer::backward(const UpdateCallback& callback) { - /* Calculate the input layers error */ - if (getInputGrad(0)) { - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getOutputGrad(), outputShape_); - outputs.addArg(*getInputGrad(0), inputShape_, ADD_TO); - backward_[0]->calc(inputs, outputs); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/BlockExpandLayer.h b/paddle/legacy/gserver/layers/BlockExpandLayer.h deleted file mode 100644 index 8b90249bfb0958f0081e7c668cd3b38a53c39951..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/BlockExpandLayer.h +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief Expand feature map to minibatch matrix. - * - matrix width is: blockH_ * blockW_ * channels_ - * - matirx height is: outputH_ * outputW_ - * - * \f[ - * outputH\_ = 1 + (2 * paddingH\_ + imgSizeH\_ - blockH\_ + strideH\_ - 1) / - * strideH\_ \\ - * outputW\_ = 1 + (2 * paddingW\_ + imgSizeW\_ - blockW\_ + strideW\_ - 1) / - * strideW\_ - * \f] - * - * The expand method is the same with ExpandConvLayer, but saved the transposed - * value. After expanding, output_.sequenceStartPositions will store timeline. - * The number of time steps are outputH_ * outputW_ and the dimension of each - * time step is blockH_ * blockW_ * channels_. This layer can be used after - * convolution neural network, and before recurrent neural network. - * - * The config file api is block_expand_layer. - */ -class BlockExpandLayer : public Layer { - protected: - /** - * @brief Calculate outputH_ and outputW_ and return block number which - * actually is time steps. - * @return time steps, outoutH_ * outputW_. - */ - size_t getBlockNum(); - size_t blockH_, blockW_, strideH_, strideW_, paddingH_, paddingW_; - size_t imgSizeH_, imgSizeW_, outputH_, outputW_, channels_; - - TensorShape inputShape_; - TensorShape outputShape_; - - public: - explicit BlockExpandLayer(const LayerConfig& config) : Layer(config) {} - - ~BlockExpandLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CRFDecodingLayer.cpp b/paddle/legacy/gserver/layers/CRFDecodingLayer.cpp deleted file mode 100644 index 4afed7e29565eae662506116e9b7aff03b61a9f9..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CRFDecodingLayer.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CRFDecodingLayer.h" - -namespace paddle { - -REGISTER_LAYER(crf_decoding, CRFDecodingLayer); - -bool CRFDecodingLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!CRFLayer::init(layerMap, parameterMap)) { - return false; - } - crf_.reset(new LinearChainCRF( - numClasses_, parameter_->getBuf(PARAMETER_VALUE)->getData())); - return true; -} - -void CRFDecodingLayer::forward(PassType passType) { - Layer::forward(passType); - - CHECK(!useGpu_) << "GPU is not supported"; - - const Argument& output = getInput(0); - CHECK(output.sequenceStartPositions); - - size_t batchSize = output.getBatchSize(); - size_t numSequences = output.sequenceStartPositions->getSize() - 1; - - IVector::resizeOrCreate(output_.ids, batchSize, useGpu_); - const int* starts = output.sequenceStartPositions->getData(false); - CHECK_EQ(starts[numSequences], (int)batchSize); - - for (size_t i = 0; i < numSequences; ++i) { - crf_->decode(output.value->getData() + numClasses_ * starts[i], - output_.ids->getData() + starts[i], - starts[i + 1] - starts[i]); - } - - if (inputLayers_.size() == 2) { - const Argument& label = getInput(1); - resizeOutput(batchSize, 1); - CHECK(label.ids); - real* error = output_.value->getData(); - int* ids = label.ids->getData(); - int* result = output_.ids->getData(); - for (size_t i = 0; i < batchSize; ++i) { - error[i] = ids[i] == result[i] ? 0 : 1; - } - } -} - -void CRFDecodingLayer::backward(const UpdateCallback& callback) { - parameter_->incUpdate(callback); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CRFDecodingLayer.h b/paddle/legacy/gserver/layers/CRFDecodingLayer.h deleted file mode 100644 index 018162e146fa93725fe84bdf2da9a6124f3cea6f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CRFDecodingLayer.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "CRFLayer.h" -#include "LinearChainCRF.h" - -namespace paddle { - -/** - * A layer for calculating the decoding sequence of sequential conditional - * random field model. - * The decoding sequence is stored in output_.ids - * It also calculate error, output_.value[i] is 1 for incorrect decoding - * or 0 for correct decoding) - * See LinearChainCRF.h for the detail of the CRF formulation. - */ -class CRFDecodingLayer : public CRFLayer { - public: - explicit CRFDecodingLayer(const LayerConfig& config) : CRFLayer(config) {} - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - - protected: - std::unique_ptr crf_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CRFLayer.cpp b/paddle/legacy/gserver/layers/CRFLayer.cpp deleted file mode 100644 index 8b87a533a2ba832dc8882196046898e10708c916..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CRFLayer.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CRFLayer.h" - -namespace paddle { - -REGISTER_LAYER(crf, CRFLayer); - -bool CRFLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - if (config_.type() == "crf") { - CHECK_GE(inputLayers_.size(), 2UL); - // the third output is sequence weight. one weight for each sequence - CHECK_LE(inputLayers_.size(), 3UL); - } - - // coeff only affect bp, keep consistent with CostLayer - coeff_ = config_.coeff(); - if (inputLayers_.size() == 3) { - weightLayer_ = inputLayers_[2]; - } - - numClasses_ = inputLayers_[0]->getSize(); - - CHECK_GE(numClasses_, 2UL); - - CHECK_EQ(parameters_[0]->getSize(), numClasses_ * (numClasses_ + 2)); - - parameter_ = parameters_[0]; - weight_.reset(new Weight(numClasses_ + 2, numClasses_, parameter_)); - - // We don't need sequenceStartPositions because each sample of output_ is - // for the cost of one sequence. - setNeedSequenceInfo(false); - - return true; -} - -void CRFLayer::forward(PassType passType) { - Layer::forward(passType); - - CHECK(!useGpu_) << "GPU is not supported"; - - const Argument& output = getInput(0); - const Argument& label = getInput(1); - CHECK(label.sequenceStartPositions); - CHECK(label.ids); - - int batchSize = output.getBatchSize(); - size_t numSequences = label.sequenceStartPositions->getSize() - 1; - resizeOutput(numSequences, 1); - - const int* starts = label.sequenceStartPositions->getData(false); - CHECK_EQ(starts[numSequences], batchSize); - - for (size_t i = 0; i < numSequences; ++i) { - if (i >= crfs_.size()) { - crfs_.emplace_back(numClasses_, weight_->getW()->getData()); - } - output_.value->getData()[i] = - crfs_[i].forward(output.value->getData() + numClasses_ * starts[i], - label.ids->getData() + starts[i], - starts[i + 1] - starts[i]); - } - - if (weightLayer_) { - const MatrixPtr& weight = getInputValue(*weightLayer_); - getOutputValue()->dotMul(*getOutputValue(), *weight); - } -} - -void CRFLayer::backward(const UpdateCallback& callback) { - const Argument& output = getInput(0); - const Argument& label = getInput(1); - const int* starts = label.sequenceStartPositions->getData(false); - int numSequences = label.sequenceStartPositions->getSize() - 1; - - bool needWGrad = weight_->getWGrad() ? true : false; - for (int i = 0; i < numSequences; ++i) { - crfs_[i].backward(output.value->getData() + numClasses_ * starts[i], - label.ids->getData() + starts[i], - starts[i + 1] - starts[i], - needWGrad); - real instanceWeight = weightLayer_ - ? getInputValue(*weightLayer_)->getElement(i, 0) - : real(1.0f); - instanceWeight *= coeff_; - - if (output.grad) { - MatrixPtr grad = output.grad->subRowMatrix(starts[i], starts[i + 1]); - grad->add(*crfs_[i].getXGrad(), real(1.0f), instanceWeight); - } - if (needWGrad) { - weight_->getWGrad()->add( - *crfs_[i].getWGrad(), real(1.0f), instanceWeight); - } - } - - parameter_->incUpdate(callback); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CRFLayer.h b/paddle/legacy/gserver/layers/CRFLayer.h deleted file mode 100644 index 88c2ed343ad5743068c871fe351437270d85f223..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CRFLayer.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "Layer.h" -#include "LinearChainCRF.h" - -namespace paddle { - -/** - * A layer for calculating the cost of sequential conditional random field - * model. - * See class LinearChainCRF for the detail of the CRF formulation. - */ -class CRFLayer : public Layer { - public: - explicit CRFLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - - protected: - size_t numClasses_; - ParameterPtr parameter_; - std::vector crfs_; - LayerPtr weightLayer_; // weight for each sequence - std::unique_ptr weight_; // parameters - real coeff_; // weight for the layer -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CTCLayer.cpp b/paddle/legacy/gserver/layers/CTCLayer.cpp deleted file mode 100644 index 64eb15cd0dd23e180c61664a2ae24999e41b9bfb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CTCLayer.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CTCLayer.h" - -/* Please reference the Chapter7 in - * "Alex graves, Supervised Sequence Labelling with - * Recurrent Neural Networks" */ -namespace paddle { -REGISTER_LAYER(ctc, CTCLayer); - -bool CTCLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2UL); - - /* The inputLayers_[0] must be softmax output */ - numClasses_ = inputLayers_[0]->getSize(); - normByTimes_ = config_.norm_by_times(); - CHECK_GE(numClasses_, 2UL); - - // We don't need sequenceStartPositions because each sample of output_ is - // for the cost of one sequence. - setNeedSequenceInfo(false); - if (useGpu_) { - tmpCpuInput_.reserve(inputLayers_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - tmpCpuInput_.push_back(Argument()); - } - } - return true; -} - -void CTCLayer::forward(PassType passType) { - Layer::forward(passType); - if (useGpu_) { - for (size_t i = 0; i < inputLayers_.size(); i++) { - tmpCpuInput_[i].resizeAndCopyFrom( - getInput(i), false, HPPL_STREAM_DEFAULT); - } - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]); - } else { - forwardImp(getInput(0), getInput(1)); - } -} - -void CTCLayer::forwardImp(const Argument& softmaxSeqs, - const Argument& labelSeqs) { - CHECK(softmaxSeqs.sequenceStartPositions); - CHECK(labelSeqs.sequenceStartPositions); - CHECK(labelSeqs.ids); - - size_t numSequences = labelSeqs.sequenceStartPositions->getSize() - 1; - CHECK_EQ(numSequences, softmaxSeqs.sequenceStartPositions->getSize() - 1); - - resizeOutput(numSequences, 1); - std::vector out(numSequences); - - const int* labelSeqsStarts = labelSeqs.sequenceStartPositions->getData(false); - const int* softmaxSeqsStarts = - softmaxSeqs.sequenceStartPositions->getData(false); - - for (size_t i = 0; i < numSequences; i++) { - if (i >= ctcs_.size()) { - ctcs_.emplace_back(numClasses_, normByTimes_); - } - out[i] = ctcs_[i].forward( - softmaxSeqs.value->getData() + numClasses_ * softmaxSeqsStarts[i], - softmaxSeqsStarts[i + 1] - softmaxSeqsStarts[i], - labelSeqs.ids->getData() + labelSeqsStarts[i], - labelSeqsStarts[i + 1] - labelSeqsStarts[i]); - } - output_.value->copyFrom(out.data(), numSequences); -} - -void CTCLayer::backward(const UpdateCallback& callback) { - (void)callback; - if (useGpu_) { - backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]); - const_cast(getInput(0)) - .resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_DEFAULT); - const_cast(getInput(1)) - .resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_DEFAULT); - } else { - backwardImp(callback, getInput(0), getInput(1)); - } -} - -void CTCLayer::backwardImp(const UpdateCallback& callback, - const Argument& softmaxSeqs, - const Argument& labelSeqs) { - size_t numSequences = labelSeqs.sequenceStartPositions->getSize() - 1; - - const int* labelSeqsStarts = labelSeqs.sequenceStartPositions->getData(false); - const int* softmaxSeqsStarts = - softmaxSeqs.sequenceStartPositions->getData(false); - - for (size_t i = 0; i < numSequences; ++i) { - ctcs_[i].backward( - softmaxSeqs.value->getData() + numClasses_ * softmaxSeqsStarts[i], - softmaxSeqs.grad->getData() + numClasses_ * softmaxSeqsStarts[i], - labelSeqs.ids->getData() + labelSeqsStarts[i], - labelSeqsStarts[i + 1] - labelSeqsStarts[i]); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CTCLayer.h b/paddle/legacy/gserver/layers/CTCLayer.h deleted file mode 100644 index 5d70b1f4ceb03028865378d1d01b5706b35b10de..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CTCLayer.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "LinearChainCTC.h" - -namespace paddle { - -class CTCLayer : public Layer { - public: - explicit CTCLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void forwardImp(const Argument& softmaxSeqs, const Argument& labelSeqs); - void backward(const UpdateCallback& callback) override; - void backwardImp(const UpdateCallback& callback, - const Argument& softmaxSeqs, - const Argument& labelSeqs); - - protected: - size_t numClasses_; - bool normByTimes_; - std::vector ctcs_; - std::vector tmpCpuInput_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ClipLayer.cpp b/paddle/legacy/gserver/layers/ClipLayer.cpp deleted file mode 100644 index 6aa3c8fe64f5a59e82f3271baed99fd17fd6653f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ClipLayer.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" - -namespace paddle { - -/** - * A layer for clipping the input value by the threshold. - * \f[ - * out[i] = \min\left(\max\left(in[i],p_{1}\right),p_{2}\right) - * \f] - */ - -class ClipLayer : public Layer { - protected: - double min_; - double max_; - - public: - explicit ClipLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(clip, ClipLayer); - -bool ClipLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 1U); - auto layerConf = config_.inputs(0).clip_conf(); - min_ = layerConf.min(); - max_ = layerConf.max(); - CHECK_LT(min_, max_); - return true; -} - -void ClipLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV = getInputValue(0); - resetOutput(inV->getHeight(), inV->getWidth()); - MatrixPtr outV = getOutputValue(); - outV->copyFrom(*inV); - outV->clip(min_, max_); -} - -void ClipLayer::backward(const UpdateCallback& callback) { - MatrixPtr inV = getInputValue(0); - MatrixPtr inG = getInputGrad(0); - if (inG) { - MatrixPtr outV = getOutputValue(); - MatrixPtr outG = getOutputGrad(); - MatrixPtr tmpMtx; - Matrix::resizeOrCreate( - tmpMtx, outG->getHeight(), outG->getWidth(), false, useGpu_); - tmpMtx->clipDerivative(*inV, min_, max_); - inG->addDotMul(*outG, *tmpMtx, 1, 1); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConcatenateLayer.cpp b/paddle/legacy/gserver/layers/ConcatenateLayer.cpp deleted file mode 100644 index ce3f2ca950bf87e287163f1cfc8b15d815a68cf4..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConcatenateLayer.cpp +++ /dev/null @@ -1,208 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "Projection.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * A concatenate layer has multiple input layers. It concatenates rows of - * each input as one row for the output of this layer and apply activation. - */ -class ConcatenateLayer : public Layer { - public: - explicit ConcatenateLayer(const LayerConfig& config) : Layer(config) {} - - ~ConcatenateLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(concat, ConcatenateLayer); - -bool ConcatenateLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - if (!Layer::init(layerMap, parameterMap)) return false; - - CHECK(!biasParameter_); - - return true; -} - -void ConcatenateLayer::forward(PassType passType) { - Layer::forward(passType); - - int batchSize = getInput(0).getBatchSize(); - int size = getSize(); - reserveOutput(batchSize, size); - - const MatrixPtr& out = getOutputValue(); - int offset = 0; - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - const MatrixPtr& in = getInputValue(i); - size_t inSize = in->getWidth(); - out->assignAtOffset(*in, offset); - offset += inSize; - } - CHECK_EQ(size, offset); - - /* activation */ { - REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void ConcatenateLayer::backward(const UpdateCallback& callback) { - (void)callback; - - /* Do activation */ { - REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); - backwardActivation(); - } - - const MatrixPtr& out = getOutputGrad(); - int offset = 0; - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - const MatrixPtr& in = getInputGrad(i); - size_t inSize = getInputValue(i)->getWidth(); - if (in) { - in->addAtOffset(*out, offset); - } - offset += inSize; - } -} - -/** - * concat2 layer is like concat layer, but each input layer was - * processed by a Projection. - */ -class ConcatenateLayer2 : public Layer { - public: - explicit ConcatenateLayer2(const LayerConfig& config) : Layer(config) {} - - ~ConcatenateLayer2() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - protected: - std::vector> projections_; - std::vector projOutput_; - std::vector> projCol_; - bool sharedBias_; - std::unique_ptr biases_; -}; - -REGISTER_LAYER(concat2, ConcatenateLayer2); - -bool ConcatenateLayer2::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - if (!Layer::init(layerMap, parameterMap)) return false; - - CHECK_EQ(inputLayers_.size(), parameters_.size()); - projections_.reserve(inputLayers_.size()); - projCol_.reserve(inputLayers_.size()); - projOutput_.resize(inputLayers_.size()); - - size_t startCol = 0; - size_t endCol = 0; - for (size_t i = 0; i < inputLayers_.size(); i++) { - projections_.emplace_back(Projection::create( - config_.inputs(i).proj_conf(), parameters_[i], useGpu_)); - - endCol += projections_[i]->getOutputSize(); - projCol_.push_back(std::make_pair(startCol, endCol)); - startCol = endCol; - } - CHECK_EQ(getSize(), endCol); - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - sharedBias_ = config_.shared_biases(); - size_t psize = config_.bias_size(); - biases_ = std::unique_ptr(new Weight(1, psize, biasParameter_)); - } - - return true; -} - -void ConcatenateLayer2::forward(PassType passType) { - Layer::forward(passType); - - int batchSize = getInput(0).getBatchSize(); - int size = getSize(); - resetOutput(batchSize, size); - - for (size_t i = 0; i < projections_.size(); i++) { - size_t startCol = projCol_[i].first; - size_t endCol = projCol_[i].second; - projOutput_[i].value = output_.value->subColMatrix(startCol, endCol); - if (output_.grad) { - projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol); - } - } - - { - AsyncGpuBlock block; - for (size_t i = 0; i != inputLayers_.size(); ++i) { - projections_[i]->forward(&getInput(i), &projOutput_[i], passType); - } - } - - /* add the bias-vector */ - if (biases_) { - REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); - output_.value->addBias(*(biases_->getW()), 1, sharedBias_); - } - - /* activation */ { - REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void ConcatenateLayer2::backward(const UpdateCallback& callback) { - /* Do activation */ { - REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); - backwardActivation(); - } - - AsyncGpuBlock block; - if (biases_ && biases_->getWGrad()) { - REGISTER_TIMER_INFO("Concat2BpBiasTimer", getName().c_str()); - biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_); - biases_->getParameterPtr()->incUpdate(callback); - } - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - if (projections_[i]) { - projections_[i]->backward(callback); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ContextProjection.cpp b/paddle/legacy/gserver/layers/ContextProjection.cpp deleted file mode 100644 index 8bcf32663eb381a7d7700270efcaa08f9ff86356..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ContextProjection.cpp +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ContextProjection.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_PROJECTION(context, ContextProjection); - -ContextProjection::ContextProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - CHECK(config.has_context_start()); - CHECK(config.has_context_length()); - if (config.context_start() == 0 && config.context_length() == 1) { - config_.set_trainable_padding(false); - } - if (config_.trainable_padding()) { - CHECK(parameter); - beginPad_ = std::max(0, -config.context_start()); - endPad_ = std::max(0, config.context_start() + config.context_length() - 1); - size_t totalPad = beginPad_ + endPad_; - size_t inputDim = parameter->getSize() / totalPad; - CHECK_EQ(config.input_size(), inputDim); - CHECK_EQ(inputDim * totalPad, parameter->getSize()); - weight_.reset(new Weight(totalPad, inputDim, parameter)); - } - // init forward_ and backward_ functions - init(); -} - -bool ContextProjection::init() { - size_t context_length = config_.context_length(); - int context_start = config_.context_start(); - bool is_padding = config_.trainable_padding(); - size_t total_pad = is_padding ? beginPad_ + endPad_ : 0; - - createFunction(forward_, - "ContextProjectionForward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", beginPad_)); - createFunction(backward_, - "ContextProjectionBackward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", beginPad_) - .set("is_padding", is_padding) - .set("total_pad", total_pad)); - - return true; -} - -void ContextProjection::resetState() { - CHECK_LE(config_.context_start() + config_.context_length(), 1) - << "state is not allowed for future context"; - if (config_.context_start() >= 0) return; - Matrix::resizeOrCreate(state_, - -config_.context_start(), - config_.input_size(), - false, // trans - useGpu_); - Matrix::resizeOrCreate(state2_, - -config_.context_start(), - config_.input_size(), - false, // trans - useGpu_); - if (config_.trainable_padding()) { - state_->assign(*weight_->getW()->subMatrix(0, -config_.context_start())); - } else { - state_->zeroMem(); - } -} - -void ContextProjection::setState(LayerStatePtr state) { - CHECK(state->value.size() == 1) - << "one matrix is expected for ContextProjection state"; - state_->copyFrom(*(state->value[0])); -} - -LayerStatePtr ContextProjection::getState() { - if (state_ == nullptr) { - return nullptr; - } - LayerStatePtr res = std::make_shared(); - res->value.push_back(state_->clone(0, 0, false)); - res->value[0]->copyFrom(*state_); - return res; -} - -void ContextProjection::forward() { - CHECK(in_->value && out_->value); - CHECK(in_->sequenceStartPositions); - - size_t input_dim = in_->value->getWidth(); - size_t dim = out_->value->getWidth(); - CHECK_EQ(dim, input_dim * config_.context_length()); - // size_t batch_size = in_->value->getHeight(); - CHECK_EQ(forward_.size(), (size_t)1) << "Only one forward function here"; - - REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str()); - bool is_padding = config_.trainable_padding(); - /// first use state_, otherwise use weight_(padding false === w nullptr) - auto w_ptr = - state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr; - const auto start_pos = in_->sequenceStartPositions->getVector(useGpu_); - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*in_->value, *start_pos); - if (w_ptr) { - inputs.addArg(CpuMatrix(w_ptr->getData(), w_ptr->getHeight(), input_dim), - *start_pos); - } - outputs.addArg(*out_->value, *start_pos, ADD_TO); - forward_[0]->calc(inputs, outputs); - - if (state_ && config_.context_start() < 0) { - CHECK_EQ(1, in_->getNumSequences()); - const int* starts = in_->sequenceStartPositions->getData(false); - int length = starts[1] - starts[0]; - if (-config_.context_start() <= length) { - MatrixPtr sub = in_->value->subMatrix(starts[1] + config_.context_start(), - -config_.context_start()); - state_->copyFrom(*sub); - } else { - int prevLength = -config_.context_start() - length; - state2_->subMatrix(0, prevLength) - ->copyFrom(*state_->subMatrix(length, prevLength)); - state2_->subMatrix(prevLength, length) - ->copyFrom(*in_->value->subMatrix(starts[0], length)); - std::swap(state_, state2_); - } - } -} - -void ContextProjection::backward(const UpdateCallback& callback) { - CHECK(in_->value && out_->value && out_->grad); - size_t input_dim = in_->value->getWidth(); - size_t dim = out_->value->getWidth(); - CHECK_EQ(dim, input_dim * config_.context_length()); - size_t batch_size = in_->value->getHeight(); - CHECK_EQ(batch_size, out_->value->getHeight()); - CHECK_EQ(static_cast(backward_.size()), 1) - << "Only one backward function here"; - - REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str()); - bool is_padding = config_.trainable_padding(); - auto start_pos = in_->sequenceStartPositions; - auto w_ptr = is_padding ? weight_->getWGrad() : nullptr; - - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*out_->grad, *in_->sequenceStartPositions->getVector(useGpu_)); - outputs.addArg( - CpuMatrix( - in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim), - *in_->sequenceStartPositions->getVector(useGpu_), - ADD_TO); - outputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, - w_ptr ? w_ptr->getHeight() : 0, - input_dim), - ADD_TO); - backward_[0]->calc(inputs, outputs); - - if (config_.trainable_padding()) { - weight_->getParameterPtr()->incUpdate(callback); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ContextProjection.h b/paddle/legacy/gserver/layers/ContextProjection.h deleted file mode 100644 index 9c217145419048282a9a09ad899dc970e7c9704f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ContextProjection.h +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Projection.h" - -namespace paddle { - -/** - * @brief Context projection concatenate features in adjacent time steps in - * a sequence. The i-th row of the output is the concatenation of - * context_length rows of the input. The context_length rows are the - * consecutive rows from the i+shift_start row. - * - * For example, assumed input (x) has 4 words and the dimension of each word - * representation is 2. If we use zero to pad instead of learned weight to pad, - * and the context_lenth is 3, the output (y) is: - * - * @code - * x = [a1, a2; - * b1, b2; - * c1, c2; - * d1, d2] - * y = [0, 0, a1, a2, b1, b2; - * a1, a2, b1, b2, c1, c2; - * b1, b2, c1, c2, d1, d2; - * c1, c2, d1, d2, 0, 0] - * @endcode - * - * The config file api is context_projection. - */ -class ContextProjection : public Projection { - public: - /** - * Constructor. If context_start is zero and context_lenth is one, it will - * set trainable_padding false. trainable_padding is an optional arguments - * and if it is set, constructor will set learned weight, which is used to - * pad output. - */ - ContextProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu); - virtual void forward(); - virtual void backward(const UpdateCallback& callback); - - virtual void resetState(); - - virtual void setState(LayerStatePtr state); - - virtual LayerStatePtr getState(); - - virtual bool init(); - - protected: - std::unique_ptr weight_; - /// number of extra timesteps added at the beginning - size_t beginPad_; - /// number of extra timesteps added at the end - size_t endPad_; - /// state_ and state2_ are used in sequence generating and saved - /// previous inputs. - MatrixPtr state_; - MatrixPtr state2_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Conv3DLayer.cpp b/paddle/legacy/gserver/layers/Conv3DLayer.cpp deleted file mode 100644 index d072a74234b43e06c1194acc2ec2b3f961b4a97e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Conv3DLayer.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Conv3DLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(conv3d, Conv3DLayer); - -bool Conv3DLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { - if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; - int index = 0; - for (auto &inputConfig : config_.inputs()) { - const ConvConfig &conf = inputConfig.conv_conf(); - M_.push_back(numFilters_ / conf.groups()); - K_.push_back(filterPixels_[index] * filterChannels_[index]); - - // create a new weight - size_t height, width; - width = filterPixels_[index] * filterChannels_[index]; - height = numFilters_; - CHECK_EQ(parameters_[index]->getSize(), width * height); - Weight *w = new Weight(height, width, parameters_[index]); - weights_.emplace_back(w); - ++index; - } - if (biasParameter_.get()) { - if (sharedBiases_) { - CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); - biases_ = - std::unique_ptr(new Weight(numFilters_, 1, biasParameter_)); - } else { - biases_ = - std::unique_ptr(new Weight(getSize(), 1, biasParameter_)); - } - } - return true; -} - -size_t Conv3DLayer::getSize() { - CHECK_NE(inputLayers_.size(), 0UL); - outputH_.clear(); - outputW_.clear(); - outputD_.clear(); - N_.clear(); - size_t layerSize = 0; - for (size_t i = 0; i < inputLayers_.size(); ++i) { - outputW_.push_back(outputSize( - imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true)); - outputH_.push_back(outputSize( - imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true)); - outputD_.push_back(outputSize( - imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true)); - - N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); - CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); - layerSize += N_[i] * numFilters_; - } - getOutput().setFrameHeight(outputH_[0]); - getOutput().setFrameWidth(outputW_[0]); - getOutput().setFrameDepth(outputD_[0]); - return layerSize; -} - -void Conv3DLayer::forward(PassType passType) { - Layer::forward(passType); - - int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - int outWidth = getSize(); - resetOutput(batchSize, outWidth); - - REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); - for (size_t i = 0; i != inputLayers_.size(); ++i) { - const MatrixPtr &inMat = getInputValue(i); - const MatrixPtr &outMat = getOutputValue(); - int M = M_[i]; - int N = N_[i]; - int K = K_[i]; - Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); - MatrixPtr wMat = weights_[i]->getW(); - for (int n = 0; n < batchSize; ++n) { - colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(), - channels_[i], - imgSizeD_[i], - imgSizeH_[i], - imgSizeW_[i], - filterSizeZ_[i], - filterSizeY_[i], - filterSize_[i], - strideZ_[i], - strideY_[i], - stride_[i], - paddingZ_[i], - paddingY_[i], - padding_[i]); - - real *outData = outMat->getData() + n * outMat->getStride(); - MatrixPtr outMatSub = - Matrix::create(outData, groups_[i] * M, N, false, useGpu_); - for (int g = 0; g < groups_[i]; g++) { - MatrixPtr wMatSub = wMat->subMatrix(g * M, M); - MatrixPtr in = colBuf_->subMatrix(g * K, K); - MatrixPtr out = outMatSub->subMatrix(g * M, M); - out->mul(*wMatSub, *in, 1.0, 1.0); - } - } - } - if (nullptr != this->biasParameter_) { - this->addBias(); - } - forwardActivation(); -} - -void Conv3DLayer::backward(const UpdateCallback &callback) { - backwardActivation(); - - if (biases_ && biases_->getWGrad()) { - bpropBiases(); - biases_->getParameterPtr()->incUpdate(callback); - } - - REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); - for (size_t i = 0; i != inputLayers_.size(); ++i) { - if (weights_[i]->getWGrad()) { - bpropWeights(i); - } - if (getInputGrad(i)) { - bpropData(i); - } - weights_[i]->getParameterPtr()->incUpdate(callback); - } -} - -void Conv3DLayer::bpropWeights(int i) { - int M = M_[i]; - int N = N_[i]; - int K = K_[i]; - const MatrixPtr &inMat = getInputValue(i); - Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); - MatrixPtr wGradMat = weights_[i]->getWGrad(); - int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - for (int n = 0; n < batchSize; ++n) { - colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(), - channels_[i], - imgSizeD_[i], - imgSizeH_[i], - imgSizeW_[i], - filterSizeZ_[i], - filterSizeY_[i], - filterSize_[i], - strideZ_[i], - strideY_[i], - stride_[i], - paddingZ_[i], - paddingY_[i], - padding_[i]); - - real *outGradData = - getOutputGrad()->getData() + n * getOutputGrad()->getStride(); - MatrixPtr outGradSub = - Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K); - MatrixPtr outG = outGradSub->subMatrix(g * M, M); - MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M); - wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0); - } - } -} - -void Conv3DLayer::bpropData(int i) { - int M = M_[i]; - int N = N_[i]; - int K = K_[i]; - Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); - MatrixPtr wMat = weights_[i]->getW(); - int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - for (int n = 0; n < batchSize; ++n) { - real *outGradData = - getOutputGrad()->getData() + n * getOutputGrad()->getStride(); - real *preGradData = - getInputGrad(i)->getData() + n * getInputGrad(i)->getStride(); - MatrixPtr outGradSub = - Matrix::create(outGradData, M * groups_[i], N, false, useGpu_); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr wMatSub = wMat->subMatrix(g * M, M); - MatrixPtr outG = outGradSub->subMatrix(g * M, M); - MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K); - inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0); - } - colBuf_->col2Vol(preGradData, - channels_[i], - imgSizeD_[i], - imgSizeH_[i], - imgSizeW_[i], - filterSizeZ_[i], - filterSizeY_[i], - filterSize_[i], - strideZ_[i], - strideY_[i], - stride_[i], - paddingZ_[i], - paddingY_[i], - padding_[i], - 1.0, - 1.0); - } -} - -void Conv3DLayer::bpropBiases() { - MatrixPtr biases = Matrix::create(biases_->getWGrad()->getData(), - 1, - biases_->getWGrad()->getElementCnt(), - false, - useGpu_); - MatrixPtr outGradMat = getOutputGrad(); - - if (this->sharedBiases_) { - biases->collectSharedBias(*outGradMat, 1.0f); - } else { - biases->collectBias(*outGradMat, 1.0f); - } -} - -void Conv3DLayer::addBias() { - MatrixPtr outMat = getOutputValue(); - MatrixPtr bias = Matrix::create(biases_->getW()->getData(), - 1, - biases_->getW()->getElementCnt(), - false, - useGpu_); - if (this->sharedBiases_) { - outMat->addSharedBias(*(bias), 1.0f); - } else { - outMat->addBias(*(bias), 1.0f); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Conv3DLayer.h b/paddle/legacy/gserver/layers/Conv3DLayer.h deleted file mode 100644 index cb42a2f36d31365b473d7f593fd27dc063c83c47..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Conv3DLayer.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include "ConvBaseLayer.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief A subclass of convolution layer. - * This layer expands input and use matrix multiplication to - * calculate convolution operation. - */ -class Conv3DLayer : public ConvBaseLayer { - public: - explicit Conv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {} - ~Conv3DLayer() {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void addBias(); - void backward(const UpdateCallback& callback); - void bpropBiases(); - void bpropData(int i); - void bpropWeights(int i); - size_t getSize(); - - protected: - // Figure out the dimensions for individual gemms. - IntV M_; /// numFilters_ / filter_group_; - IntV N_; /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_ - IntV K_; /// outputD_ * outputH_ * outputW_ - MatrixPtr colBuf_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvBaseLayer.cpp b/paddle/legacy/gserver/layers/ConvBaseLayer.cpp deleted file mode 100644 index 76120915e48661a9b14fb6b9bb99e9ec9dd71e4b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvBaseLayer.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ConvBaseLayer.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/utils/Logging.h" -namespace paddle { - -bool ConvBaseLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv") - ? false - : true; - - /* Initialize the convolutional layer parameter */ - numFilters_ = config_.num_filters(); - sharedBiases_ = config_.shared_biases(); - for (auto& inputConfig : config_.inputs()) { - const ConvConfig& conf = inputConfig.conv_conf(); - padding_.push_back(conf.padding()); - stride_.push_back(conf.stride()); - dilation_.push_back(conf.dilation()); - filterSize_.push_back(conf.filter_size()); - paddingY_.push_back(conf.padding_y()); - strideY_.push_back(conf.stride_y()); - dilationY_.push_back(conf.dilation_y()); - filterSizeY_.push_back(conf.filter_size_y()); - channels_.push_back(conf.channels()); - imgSizeH_.push_back(conf.has_img_size_y() ? conf.img_size_y() - : conf.img_size()); - imgSizeW_.push_back(conf.img_size()); - groups_.push_back(conf.groups()); - filterChannels_.push_back(conf.filter_channels()); - outputH_.push_back(conf.has_output_y() ? conf.output_y() : conf.output_x()); - outputW_.push_back(conf.output_x()); - - paddingZ_.push_back(conf.padding_z()); - strideZ_.push_back(conf.stride_z()); - filterSizeZ_.push_back(conf.filter_size_z()); - imgSizeD_.push_back(conf.img_size_z()); - outputD_.push_back(conf.output_z()); - filterPixels_.push_back(filterSize_.back() * filterSizeY_.back() * - filterSizeZ_.back()); - } - - CHECK(inputLayers_.size() == parameters_.size()); - - // create new weights_ in derived class - // create new biases_ in derived class - - // default caffe model - caffeMode_ = true; - - return true; -} - -size_t ConvBaseLayer::calOutputSize() { - auto clearAndReserve = [this](IntV* vec) { - vec->clear(); - vec->reserve(this->inputLayers_.size()); - }; - clearAndReserve(&imgSizeH_); - clearAndReserve(&imgSizeW_); - clearAndReserve(&outputH_); - clearAndReserve(&outputW_); - size_t layerSize = 0; - - auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) { - size_t filterSizeY; - size_t filterSize; - for (size_t i = 0; i < inputLayers_.size(); i++) { - filterSizeY = (filterSizeY_[i] - 1) * dilationY_[i] + 1; - filterSize = (filterSize_[i] - 1) * dilation_[i] + 1; - inH.push_back(inputLayers_[i]->getOutput().getFrameHeight()); - inW.push_back(inputLayers_[i]->getOutput().getFrameWidth()); - const ConvConfig& conf = config_.inputs(i).conv_conf(); - if (isDeconv_) { - if (inH[i] == 0) - inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x(); - if (inW[i] == 0) inW[i] = conf.output_x(); - outH.push_back(imageSize( - inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_)); - outW.push_back( - imageSize(inW[i], filterSize, padding_[i], stride_[i], caffeMode_)); - } else { - if (inH[i] == 0) - inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - if (inW[i] == 0) inW[i] = conf.img_size(); - outH.push_back(outputSize( - inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_)); - outW.push_back(outputSize( - inW[i], filterSize, padding_[i], stride_[i], caffeMode_)); - } - CHECK_EQ(outH[i], outH[0]); - CHECK_EQ(outW[i], outW[0]); - } - getOutput().setFrameHeight(outH[0]); - getOutput().setFrameWidth(outW[0]); - layerSize = outH[0] * outW[0] * size_t(numFilters_); - }; - - setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_); - - return layerSize; -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvBaseLayer.h b/paddle/legacy/gserver/layers/ConvBaseLayer.h deleted file mode 100644 index 01e90e999625f986b0f13d2b73a883297c097841..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvBaseLayer.h +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/MathUtils.h" -namespace paddle { - -/** - * @brief A Base Convolution Layer, which convolves the input image - * with learned filters and (optionally) adds biases. - */ - -class ConvBaseLayer : public Layer { - protected: - typedef std::vector IntV; - - /// True if it's deconv layer, false if it's convolution layer - bool isDeconv_; - - /// The number of filters. - int numFilters_; - /// The x dimension of the padding. - IntV padding_; - /// The y dimension of the padding. - IntV paddingY_; - /// The x dimension of the stride. - IntV stride_; - /// The y dimension of the stride. - IntV strideY_; - /// The x dimension of the dilation. - IntV dilation_; - /// The y dimension of the dilation. - IntV dilationY_; - /// The x dimension of a filter kernel. - IntV filterSize_; - /// The y dimension of a filter kernel. - IntV filterSizeY_; - /// The spatial dimensions of the convolution input. - IntV channels_; - /// The spatial dimensions of input feature map height. - IntV imgSizeH_; - /// The spatial dimensions of input feature map width. - IntV imgSizeW_; - /// filterPixels_ = filterSizeX_ * filterSizeY_. - IntV filterPixels_; - /// filterChannels_ = channels_/groups_. - IntV filterChannels_; - /// The spatial dimensions of output feature map height. - IntV outputH_; - /// The spatial dimensions of output feature map width. - IntV outputW_; - - IntV outputD_; - IntV imgSizeD_; - IntV filterSizeZ_; - IntV strideZ_; - IntV paddingZ_; - - /// Group size, refer to grouped convolution in - /// Alex Krizhevsky's paper: when group=2, the first half of the - /// filters are only connected to the first half of the input channels, - /// and the second half only connected to the second half. - IntV groups_; - /// Whether the bias is shared for feature in each channel. - bool sharedBiases_; - - /// shape of weight: (numChannels * filterPixels_, numFilters) - WeightList weights_; - /// If shared_biases is false shape of bias: (numFilters_, 1) - /// If shared_biases is ture shape of bias: - /// (numFilters_ * outputX * outputY, 1) - std::unique_ptr biases_; - - /// True by default. The only difference is the calculation - /// of output size. - bool caffeMode_; - - public: - explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - /** - * imgSizeH_ and imgSizeW_ will be set according to the previous input layers - * in this function. Then it will calculate outputH_ and outputW_ and set them - * into output argument. - */ - virtual size_t calOutputSize(); - - Weight& getWeight(int idx) { return *weights_[idx]; } -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvBaseOperator.cpp b/paddle/legacy/gserver/layers/ConvBaseOperator.cpp deleted file mode 100644 index e8e59b3bfe9d8a9e54e5c11906707d10ec346a4d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvBaseOperator.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ConvBaseOperator.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief ConvBaseOperator takes two inputs to perform the convolution. - * The first input is the image, and the second input is the convolution kernel. - * The height of data for two inputs are the same. Each data of the first input - * is convolved with each data of the second input indepedently. - * - * The config file api is conv_operator. - */ - -ConvBaseOperator::ConvBaseOperator(const OperatorConfig &config, bool useGpu) - : Operator(config, useGpu) { - CHECK(useGpu); - CHECK_EQ(config_.input_indices_size(), 2L); - - caffeMode_ = true; - getConvParams(); - computeConvSizes(); - - // initialize all to default algorithms - fwdAlgo_ = 0; - bwdFilterAlgo_ = 0; - bwdDataAlgo_ = 0; - fwdLimitBytes_ = 0; - bwdDataLimitBytes_ = 0; - bwdFilterLimitBytes_ = 0; - workSpaceInBytes_ = 0; - workSpace_ = nullptr; - - isSelectAlgo_ = false; -} - -void ConvBaseOperator::allocConvWorkSpace() { - hl_conv_workspace(imageDesc_, - outputDesc_, - filterDesc_, - convDesc_, - &fwdAlgo_, - &fwdLimitBytes_, - &bwdDataAlgo_, - &bwdDataLimitBytes_, - &bwdFilterAlgo_, - &bwdFilterLimitBytes_, - /*useDilation*/ false); - - size_t maxWorkSpace = 0; - maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); - maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_); - - if (maxWorkSpace > workSpaceInBytes_) { - if (workSpaceInBytes_ != 0) { - hl_free_mem_device(workSpace_); - } - // total amount of storage needed - workSpace_ = hl_malloc_device(maxWorkSpace); - workSpaceInBytes_ = maxWorkSpace; - } -} - -void ConvBaseOperator::computeConvSizes() { - hl_create_filter_descriptor( - &filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_); - hl_create_tensor_descriptor(&imageDesc_); - hl_create_tensor_descriptor(&outputDesc_); - hl_create_convolution_descriptor(&convDesc_, - imageDesc_, - filterDesc_, - paddingY_, - padding_, - strideY_, - stride_); -} - -void ConvBaseOperator::reshapeImageDescriptors() { - hl_tensor_reshape(imageDesc_, - 1, - channels_, - imageH_, - imageW_, - channels_ * imageH_ * imageW_, - imageH_ * imageW_, - imageW_, - 1); - hl_tensor_reshape(outputDesc_, - 1, - numFilters_, - outputH_, - outputW_, - numFilters_ * outputH_ * outputW_, - outputH_ * outputW_, - outputW_, - 1); - hl_reset_convolution_descriptor(convDesc_, - imageDesc_, - filterDesc_, - paddingY_, - padding_, - strideY_, - stride_); -} - -void ConvBaseOperator::getConvParams() { - configNumFilters_ = config_.num_filters(); - const ConvConfig &conf = config_.conv_conf(); - padding_ = conf.padding(); - stride_ = conf.stride(); - filterSize_ = conf.filter_size(); - paddingY_ = conf.padding_y(); - strideY_ = conf.stride_y(); - filterSizeY_ = conf.filter_size_y(); - filterPixels_ = filterSize_ * filterSizeY_; - configChannels_ = conf.channels(); - imgSize_ = conf.img_size(); - imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - imgPixels_ = imgSize_ * imgSizeY_; - CHECK_EQ(conf.groups(), 1U); - filterChannels_ = conf.filter_channels(); - outputX_ = conf.output_x(); - outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); - outputs_ = outputX_ * outputX_; - - isDeconv_ = (config_.type() == "conv") ? false : true; - if (isDeconv_) { - channels_ = configNumFilters_; - numFilters_ = configChannels_; - } else { - channels_ = configChannels_; - numFilters_ = configNumFilters_; - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvBaseOperator.h b/paddle/legacy/gserver/layers/ConvBaseOperator.h deleted file mode 100644 index 4ac77f2d743abd6f01e8e3f1e2f4e730c0e6fb39..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvBaseOperator.h +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once - -#include "Operator.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief ConvOperator takes two inputs to perform the convolution. - * The first input is the image, and the second input is the convolution kernel. - * The height of data for two inputs are the same. Each data of the first input - * is convolved with each data of the second input indepedently. - * - * The config file api is conv_operator. - */ - -class ConvBaseOperator : public Operator { - public: - ConvBaseOperator(const OperatorConfig &config, bool useGpu); - /** - * Free workspace in device and destroy cudnn tensor descriptor. - */ - virtual ~ConvBaseOperator() { - if (workSpaceInBytes_ != 0) { - hl_free_mem_device(workSpace_); - workSpaceInBytes_ = 0; - } - - hl_destroy_tensor_descriptor(imageDesc_); - hl_destroy_tensor_descriptor(outputDesc_); - hl_destroy_filter_descriptor(filterDesc_); - hl_destroy_convolution_descriptor(convDesc_); - } - - protected: - /** - * Get convolution parameters from layer config and - * initialize member variables. - */ - void getConvParams(); - - /** - * Allocate Gpu Memory for cudnn convolution algorithms. - */ - void allocConvWorkSpace(); - - /** - * Create cudnn tensor descriptor for convolution operation. - */ - void computeConvSizes(); - - /** - * Reshape cudnn tensor descriptor. - */ - void reshapeImageDescriptors(); - - /** - * Reshape cudnn tensor descriptor. - */ - virtual void reshape(int batchSize) = 0; - - /** - * Check filter size is equal to the size calculated by parameters from - * layer config. - */ - void checkFilterSize(const MatrixPtr &filter) { - CHECK_EQ(static_cast(filter->getWidth()), - filterSize_ * filterSizeY_ * channels_ * numFilters_); - } - - /// Most of member variables are same with CudnnConvLayer. - /// There is no explanation here. - bool isDeconv_; - int imageH_, imageW_, outputH_, outputW_; - hl_tensor_descriptor imageDesc_; - hl_tensor_descriptor outputDesc_; - hl_filter_descriptor filterDesc_; - hl_convolution_descriptor convDesc_; - bool caffeMode_; - int inputOffset_, outputOffset_, weightOffset_; - int numFilters_, channels_; - - /// from parsing config - int configNumFilters_, configChannels_; - int padding_, stride_, filterSize_, imgSize_, imgSizeY_; - int paddingY_, strideY_, filterSizeY_; - int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_; - - /// Following member variables are same with CudnnConvLayer. - /// There is no explanation here. - int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_; - size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_; - size_t workSpaceInBytes_; - void *workSpace_; - bool isSelectAlgo_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvBaseProjection.cpp b/paddle/legacy/gserver/layers/ConvBaseProjection.cpp deleted file mode 100644 index ff5d3412de1c2940cdd9dcf9397370153c24b0c6..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvBaseProjection.cpp +++ /dev/null @@ -1,199 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ConvBaseProjection.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -ThreadLocalD> ConvBaseProjection::convMem_; - -ConvBaseProjection::ConvBaseProjection(const ProjectionConfig &config, - ParameterPtr parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - CHECK(useGpu); // only support GPU - getConvParams(); - initCudnn(); - - size_t height = filterH_ * filterW_ * channels_ / groups_; - size_t width = numFilters_; - weight_.reset(new Weight(height, width, parameter)); - weightOffset_ = height * width / groups_; -} - -void ConvBaseProjection::getConvParams() { - const ConvConfig &conf = config_.conv_conf(); - paddingH_ = conf.padding_y(); - paddingW_ = conf.padding(); - - strideH_ = conf.stride_y(); - strideW_ = conf.stride(); - - dilationH_ = conf.dilation_y(); - dilationW_ = conf.dilation(); - CHECK_GT(dilationH_, 0); - CHECK_GT(dilationW_, 0); - - filterH_ = conf.filter_size_y(); - filterW_ = conf.filter_size(); - - configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - configImgW_ = conf.img_size(); - - configOutH_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); - configOutW_ = conf.output_x(); - - configChannels_ = conf.channels(); - configNumFilters_ = config_.num_filters(); - - isDeconv_ = (config_.type() == "conv") ? false : true; - - channels_ = (isDeconv_) ? configNumFilters_ : configChannels_; - numFilters_ = (isDeconv_) ? configChannels_ : configNumFilters_; - - groups_ = conf.groups(); - CHECK_EQ(channels_ % groups_, 0); - CHECK_EQ(numFilters_ % groups_, 0); -} - -void ConvBaseProjection::initCudnn() { - hl_create_filter_descriptor(&filterDesc_, - channels_ / groups_, - numFilters_ / groups_, - filterH_, - filterW_); - hl_create_tensor_descriptor(&imageDesc_); - hl_create_tensor_descriptor(&outputDesc_); - hl_create_convolution_descriptor(&convDesc_, - imageDesc_, - filterDesc_, - paddingH_, - paddingW_, - strideH_, - strideW_, - dilationH_, - dilationW_); - - // initialize all to default algorithms - fwdAlgo_ = 0; - bwdFilterAlgo_ = 0; - bwdDataAlgo_ = 0; - fwdLimitBytes_ = 0; - bwdDataLimitBytes_ = 0; - bwdFilterLimitBytes_ = 0; - workSpaceInBytes_ = 0; -} - -void ConvBaseProjection::reshapeTensorDesc(int batchSize) { - // The stride between two consecutive samples in the output of ConvProjection - // may not be numFilters_ * outputH_ * outputW_ (conv) or - // channels_ * imageH_ * imageW_ (deconv) - // for example, in the case of layer ConcatenateLayer2 with two - // ConvProjection, the stride is the output_size of layer ConcatenateLayer2. - // So the calculation of nStride is different from CudnnConvLayer. - size_t nStrideImage, nStrideOutput; - if (isDeconv_) { - nStrideImage = out_->value->getStride(); - nStrideOutput = numFilters_ * outputH_ * outputW_; - } else { - nStrideImage = channels_ * imageH_ * imageW_; - nStrideOutput = out_->value->getStride(); - } - - hl_tensor_reshape(imageDesc_, - batchSize, - channels_ / groups_, - imageH_, - imageW_, - nStrideImage, - imageH_ * imageW_, - imageW_, - 1); - - hl_tensor_reshape(outputDesc_, - batchSize, - numFilters_ / groups_, - outputH_, - outputW_, - nStrideOutput, - outputH_ * outputW_, - outputW_, - 1); - - hl_reset_convolution_descriptor(convDesc_, - imageDesc_, - filterDesc_, - paddingH_, - paddingW_, - strideH_, - strideW_, - dilationH_, - dilationW_); -} - -void ConvBaseProjection::reshape(int batchSize) { - size_t width = calOutputSize(); - CHECK_EQ(width, out_->value->getWidth()); - CHECK_EQ(calInputSize(), in_->value->getWidth()); - - reshapeTensorDesc(batchSize); - bool useDilation = false; - if (dilationH_ > 1 || dilationW_ > 1) { - useDilation = true; - } - hl_conv_workspace(imageDesc_, - outputDesc_, - filterDesc_, - convDesc_, - &fwdAlgo_, - &fwdLimitBytes_, - &bwdDataAlgo_, - &bwdDataLimitBytes_, - &bwdFilterAlgo_, - &bwdFilterLimitBytes_, - useDilation); - - size_t maxWorkSpace = 0; - maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); - maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_); - workSpaceInBytes_ = maxWorkSpace; - - VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_ - << " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_; -} - -void *ConvBaseProjection::getSpaceBytes(size_t size) { - std::vector &convMem = *convMem_; - if (convMem.empty()) { - int numDevices = hl_get_device_count(); - convMem.resize(numDevices); - } - - int devId = hl_get_device(); - MemoryHandlePtr localMem = convMem[devId]; - if (NULL == localMem || size > localMem->getAllocSize()) { - localMem = std::make_shared(size); - } - return localMem->getBuf(); -} - -ConvBaseProjection::~ConvBaseProjection() { - hl_destroy_tensor_descriptor(imageDesc_); - hl_destroy_tensor_descriptor(outputDesc_); - hl_destroy_filter_descriptor(filterDesc_); - hl_destroy_convolution_descriptor(convDesc_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvBaseProjection.h b/paddle/legacy/gserver/layers/ConvBaseProjection.h deleted file mode 100644 index dcf5ce0f48daac396bab0ec7620303f6c1236fc2..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvBaseProjection.h +++ /dev/null @@ -1,111 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Projection.h" -#include "paddle/legacy/math/MathUtils.h" - -namespace paddle { - -/** - * @brief Base class for ConvProjection and ConvTransProjection. - */ -class ConvBaseProjection : public Projection { - public: - /** - * Constructor. - */ - ConvBaseProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu); - - ~ConvBaseProjection(); - - protected: - void getConvParams(); - void initCudnn(); - - void reshapeTensorDesc(int batchSize); - void reshape(int batchSize); - - virtual size_t calOutputSize() = 0; - virtual size_t calInputSize() = 0; - - static void* getSpaceBytes(size_t size); - - /// True if it's deconv projection layer, false if it's ConvProjection layer - bool isDeconv_; - /// imageH_ and imageW_ / outputH_ and outputW_ - /// is calculated from the input layer. - int imageH_, imageW_; - int outputH_, outputW_; - /// configImgH_ and configImgW_ / configOutH_ and configOutW_ - /// is obtained from config. - int configImgH_, configImgW_; - int configOutH_, configOutW_; - /// channels_ and numFilters_ are defined in terms of convolution semantics - int channels_, numFilters_; - /// configChannels and configNumFilters_ are obtained from config - /// For Conv they are the same as channels_ and numFilters - /// For ConvTrans they are opposite to channels_ and numFilters - int configChannels_, configNumFilters_; - int paddingH_, paddingW_; - int strideH_, strideW_; - int dilationH_, dilationW_; - int filterH_, filterW_; - /// One group offset of input data. - int inputOffset_; - /// One group offset of output data. - int outputOffset_; - /// One group offset of weight. - int weightOffset_; - int groups_; - - /// Cudnn tensor descriptor for input. - hl_tensor_descriptor imageDesc_; - /// Cudnn tensor descriptor for output. - hl_tensor_descriptor outputDesc_; - /// Cudnn tensor descriptor for filter. - hl_filter_descriptor filterDesc_; - /// Cudnn tensor descriptor for a convolution operation. - hl_convolution_descriptor convDesc_; - - /// Record the algorithm for forward convolution, which is obtained by cudnn - /// api to search the best suited algorithm. - int fwdAlgo_; - /// Record the algorithm for computing convolution gradient with respect to - /// filter coefficients. - int bwdFilterAlgo_; - /// Record the algorithm for computing convolution gradient with respect to - /// the output. - int bwdDataAlgo_; - /// Amount of GPU memory needed as workspace to be able to execute a - /// forward convolution with the specified algo. - size_t fwdLimitBytes_; - /// Amount of GPU memory needed as workspace to be able to execute a - /// backwardFilter with the specified algo. - size_t bwdDataLimitBytes_; - /// Amount of GPU memory needed as workspace to be able to execute a - /// backwardData with the specified algo. - size_t bwdFilterLimitBytes_; - /// Size of total work space. - size_t workSpaceInBytes_; - bool bias_; - - std::unique_ptr weight_; - static ThreadLocalD> convMem_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvOperator.cpp b/paddle/legacy/gserver/layers/ConvOperator.cpp deleted file mode 100644 index 5276b2c3920eee923f13a47d40b4498c6846f94b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvOperator.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ConvOperator.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief ConvOperator takes two inputs to perform the convolution. - * The first input is the image, and the second input is the convolution kernel. - * The height of data for two inputs are the same. Each data of the first input - * is convolved with each data of the second input indepedently. - * - * The config file api is conv_operator. - */ - -REGISTER_OPERATOR(conv, ConvOperator); - -void ConvOperator::reshape(int batchSize) { - imageH_ = ins_[0]->getFrameHeight(); - imageW_ = ins_[0]->getFrameWidth(); - if (imageH_ == 0) imageH_ = imgSizeY_; - if (imageW_ == 0) imageW_ = imgSize_; - outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_); - outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_); - /// Check that the outputSizes are consistent with config - CHECK_EQ(outputH_, outputY_); - CHECK_EQ(outputW_, outputX_); - out_->setFrameHeight(outputH_); - out_->setFrameWidth(outputW_); - - reshapeImageDescriptors(); - - inputOffset_ = channels_ * imageH_ * imageW_; - outputOffset_ = numFilters_ * outputH_ * outputW_; - weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSizeY_; - - if (!isSelectAlgo_) { - allocConvWorkSpace(); - } - - isSelectAlgo_ = true; -} - -void ConvOperator::forward() { - size_t batchSize = ins_[0]->value->getHeight(); - reshape(batchSize); - CHECK_EQ(ins_[1]->value->getHeight(), batchSize); - checkFilterSize(ins_[1]->value); - Matrix::resizeOrCreate(out_->value, - batchSize, - outputH_ * outputW_ * numFilters_, - false, - useGpu_); - { - AsyncGpuBlock block; - for (size_t batchId = 0; batchId < batchSize; ++batchId) { - real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; - real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; - real *outData = out_->value->getData() + outputOffset_ * batchId; - hl_convolution_forward(imageDesc_, - inputData, - outputDesc_, - outData, - filterDesc_, - wgtData, - convDesc_, - workSpace_, - workSpaceInBytes_, - fwdAlgo_); - } - } -} - -void ConvOperator::backward() { - size_t batchSize = ins_[0]->value->getHeight(); - { - AsyncGpuBlock block; - for (size_t batchId = 0; batchId < batchSize; ++batchId) { - real *outGrad = out_->grad->getData() + outputOffset_ * batchId; - if (ins_[1]->grad) { - real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; - real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId; - hl_convolution_backward_filter(imageDesc_, - inputData, - outputDesc_, - outGrad, - filterDesc_, - weightGrad, - convDesc_, - workSpace_, - workSpaceInBytes_, - bwdFilterAlgo_); - } - - MatrixPtr preGrad = ins_[0]->grad; - if (NULL != preGrad) { - real *inputGrad = preGrad->getData() + inputOffset_ * batchId; - real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; - hl_convolution_backward_data(imageDesc_, - inputGrad, - outputDesc_, - outGrad, - filterDesc_, - wgtData, - convDesc_, - workSpace_, - workSpaceInBytes_, - bwdDataAlgo_); - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvOperator.h b/paddle/legacy/gserver/layers/ConvOperator.h deleted file mode 100644 index 8f31620111c8ff3818d83145e16012d22b067a12..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvOperator.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once - -#include "ConvBaseOperator.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief ConvOperator takes two inputs to perform the convolution. - * The first input is the image, and the second input is the convolution kernel. - * The height of data for two inputs are the same. Each data of the first input - * is convolved with each data of the second input indepedently. - * - * The config file api is conv_operator. - */ - -class ConvOperator : public ConvBaseOperator { - public: - ConvOperator(const OperatorConfig &config, bool useGpu) - : ConvBaseOperator(config, useGpu) {} - /** - * Free workspace in device and destroy cudnn tensor descriptor. - */ - virtual ~ConvOperator() {} - void forward() override; - void backward() override; - void reshape(int batchSize) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvProjection.cpp b/paddle/legacy/gserver/layers/ConvProjection.cpp deleted file mode 100644 index b40cdac2587d1fc0fec00801414560d2a27bd34a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvProjection.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ConvProjection.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_PROJECTION(conv, ConvProjection); - -size_t ConvProjection::calOutputSize() { - imageH_ = in_->getFrameHeight(); - imageW_ = in_->getFrameWidth(); - if (imageH_ == 0) imageH_ = configImgH_; - if (imageW_ == 0) imageW_ = configImgW_; - outputH_ = outputSize(imageH_, - (filterH_ - 1) * dilationH_ + 1, - paddingH_, - strideH_, - /* caffeMode */ true); - outputW_ = outputSize(imageW_, - (filterW_ - 1) * dilationW_ + 1, - paddingW_, - strideW_, - /* caffeMode */ true); - - const_cast(out_)->setFrameHeight(outputH_); - const_cast(out_)->setFrameWidth(outputW_); - - inputOffset_ = (configChannels_ / groups_) * imageH_ * imageW_; - outputOffset_ = (configNumFilters_ / groups_) * outputH_ * outputW_; - return outputH_ * outputW_ * configNumFilters_; -} - -size_t ConvProjection::calInputSize() { - return static_cast(configChannels_ * imageH_ * imageW_); -} - -void ConvProjection::forward() { - int batchSize = in_->value->getHeight(); - reshape(batchSize); - - void *workSpace = NULL; - if (workSpaceInBytes_ > 0) { - workSpace = getSpaceBytes(workSpaceInBytes_); - } - - for (int g = 0; g < groups_; ++g) { - REGISTER_TIMER_INFO("CudnnConvFwTimer", getName().c_str()); - - real *inputData = in_->value->getData() + g * inputOffset_; - real *wgtData = weight_->getW()->getData() + g * weightOffset_; - real *outData = out_->value->getData() + g * outputOffset_; - hl_convolution_forward(imageDesc_, - inputData, - outputDesc_, - outData, - filterDesc_, - wgtData, - convDesc_, - workSpace, - fwdLimitBytes_, - fwdAlgo_); - } -} - -void ConvProjection::backward(const UpdateCallback &callback) { - REGISTER_TIMER_INFO("CudnnConvBpTimer", getName().c_str()); - - void *workSpace = NULL; - if (workSpaceInBytes_ > 0) { - workSpace = getSpaceBytes(workSpaceInBytes_); - } - - for (int g = 0; g < groups_; ++g) { - real *outGrad = out_->grad->getData() + g * outputOffset_; - if (weight_->getWGrad()) { - real *inputData = in_->value->getData() + g * inputOffset_; - real *weightGrad = weight_->getWGrad()->getData() + g * weightOffset_; - hl_convolution_backward_filter(imageDesc_, - inputData, - outputDesc_, - outGrad, - filterDesc_, - weightGrad, - convDesc_, - workSpace, - bwdFilterLimitBytes_, - bwdFilterAlgo_); - } - - MatrixPtr preGrad = in_->grad; - if (NULL != preGrad) { - real *inputGrad = preGrad->getData() + g * inputOffset_; - real *wgtData = weight_->getW()->getData() + g * weightOffset_; - hl_convolution_backward_data(imageDesc_, - inputGrad, - outputDesc_, - outGrad, - filterDesc_, - wgtData, - convDesc_, - workSpace, - bwdDataLimitBytes_, - bwdDataAlgo_); - } - } - - weight_->getParameterPtr()->incUpdate(callback); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvProjection.h b/paddle/legacy/gserver/layers/ConvProjection.h deleted file mode 100644 index 890a17e2f8d2d05001f825f374e8ab6420f7b3ea..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvProjection.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "ConvBaseProjection.h" -#include "paddle/legacy/math/MathUtils.h" - -namespace paddle { - -/** - * @brief Convolution projection do the same calculation with CudnnConvLayer. - */ -class ConvProjection : public ConvBaseProjection { - public: - /** - * Constructor. - */ - ConvProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu) - : ConvBaseProjection(config, parameter, useGpu) {} - - ~ConvProjection() {} - - virtual void forward(); - virtual void backward(const UpdateCallback& callback); - virtual size_t calOutputSize(); - virtual size_t calInputSize(); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvShiftLayer.cpp b/paddle/legacy/gserver/layers/ConvShiftLayer.cpp deleted file mode 100644 index b7ecbe556c59b32cc5833617717b40c730392506..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvShiftLayer.cpp +++ /dev/null @@ -1,108 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief A layer for circular convluation of two vectors, - * which is used in NEURAL TURING MACHINE. - * - Input: two vectors, the first is data (batchSize x dataDim) - * the second is shift weights (batchSize x shiftDim) - * - Output: a vector (batchSize x dataDim) - * Assumed that: - * - a[in]: contains M elements. - * - b[in]: contains N elements (N should be odd). - * - c[out]: contains M elements. - * - * \f[ - * c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j} - * \f] - * - * In this formula: - * - a's index is computed modulo M. - * - b's index is comupted modulo N. - * - * The config file api is conv_shift_layer. - */ - -class ConvShiftLayer : public Layer { - public: - explicit ConvShiftLayer(const LayerConfig& config) : Layer(config) {} - - ~ConvShiftLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(conv_shift, ConvShiftLayer); - -bool ConvShiftLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2U); - - return true; -} - -void ConvShiftLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - - size_t batchSize = inV0->getHeight(); - size_t dataDim = inV0->getWidth(); - - CHECK_EQ(batchSize, inV1->getHeight()); - CHECK_EQ(dataDim, getSize()); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - resetOutput(batchSize, dataDim); - } - - MatrixPtr outV = getOutputValue(); - - REGISTER_TIMER_INFO("FwConvShiftTimer", getName().c_str()); - outV->circularConv(*inV0, *inV1); -} - -void ConvShiftLayer::backward(const UpdateCallback& callback) { - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr outG = getOutputGrad(); - MatrixPtr inG0 = getInputGrad(0); - MatrixPtr inG1 = getInputGrad(1); - - REGISTER_TIMER_INFO("BwConvShiftTimer", getName().c_str()); - - if (inG0 && inG1) { - outG->circularConvDerivative(*outG, *inV0, *inV1, *inG0, *inG1); - } else { - CHECK(!inG0 || !inG1) << "Not supported"; - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvTransOperator.cpp b/paddle/legacy/gserver/layers/ConvTransOperator.cpp deleted file mode 100644 index f4ce2affb144152ed41a9d4be9fa87f800c83dbb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvTransOperator.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ConvTransOperator.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief ConvTransOperator takes two inputs to perform the convolution. - * The first input is the image, and the second input is the convolution kernel. - * The height of data for two inputs are the same. Each data of the first input - * is convolved with each data of the second input indepedently. - * - * The config file api is conv_operator. - */ - -REGISTER_OPERATOR(convt, ConvTransOperator); - -void ConvTransOperator::reshape(int batchSize) { - outputH_ = ins_[0]->getFrameHeight(); - outputW_ = ins_[0]->getFrameWidth(); - if (outputH_ == 0) outputH_ = outputY_; - if (outputW_ == 0) outputW_ = outputX_; - imageH_ = imageSize(outputH_, filterSizeY_, paddingY_, strideY_, caffeMode_); - imageW_ = imageSize(outputW_, filterSize_, padding_, stride_, caffeMode_); - /// Check that the imageSizes are consistent with config - CHECK_EQ(imageH_, imgSizeY_); - CHECK_EQ(imageW_, imgSize_); - out_->setFrameHeight(imageH_); - out_->setFrameWidth(imageW_); - - reshapeImageDescriptors(); - - inputOffset_ = numFilters_ * outputH_ * outputW_; - outputOffset_ = channels_ * imageH_ * imageW_; - weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSizeY_; - - if (!isSelectAlgo_) { - allocConvWorkSpace(); - } - - isSelectAlgo_ = true; -} - -void ConvTransOperator::forward() { - size_t batchSize = ins_[0]->value->getHeight(); - reshape(batchSize); - CHECK_EQ(ins_[1]->value->getHeight(), batchSize); - checkFilterSize(ins_[1]->value); - Matrix::resizeOrCreate( - out_->value, batchSize, imageH_ * imageW_ * channels_, false, useGpu_); - { - AsyncGpuBlock block; - for (size_t batchId = 0; batchId < batchSize; ++batchId) { - real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; - real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; - real *outData = out_->value->getData() + outputOffset_ * batchId; - hl_convolution_backward_data(imageDesc_, - outData, - outputDesc_, - inputData, - filterDesc_, - wgtData, - convDesc_, - workSpace_, - workSpaceInBytes_, - bwdDataAlgo_); - } - } -} - -void ConvTransOperator::backward() { - size_t batchSize = ins_[0]->value->getHeight(); - { - AsyncGpuBlock block; - for (size_t batchId = 0; batchId < batchSize; ++batchId) { - real *outGrad = out_->grad->getData() + outputOffset_ * batchId; - if (ins_[1]->grad) { - real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; - real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId; - hl_convolution_backward_filter(imageDesc_, - outGrad, - outputDesc_, - inputData, - filterDesc_, - weightGrad, - convDesc_, - workSpace_, - workSpaceInBytes_, - bwdFilterAlgo_); - } - - MatrixPtr preGrad = ins_[0]->grad; - if (NULL != preGrad) { - real *inputGrad = preGrad->getData() + inputOffset_ * batchId; - real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; - hl_convolution_forward(imageDesc_, - outGrad, - outputDesc_, - inputGrad, - filterDesc_, - wgtData, - convDesc_, - workSpace_, - workSpaceInBytes_, - fwdAlgo_); - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvTransOperator.h b/paddle/legacy/gserver/layers/ConvTransOperator.h deleted file mode 100644 index 206335a01ff7509eaa5528002c6c9686f05c931b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvTransOperator.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once - -#include "ConvBaseOperator.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief ConvTransOperator takes two inputs to perform the convolution. - * The first input is the image, and the second input is the convolution kernel. - * The height of data for two inputs are the same. Each data of the first input - * is convolved with each data of the second input indepedently. - * - * The config file api is conv_operator. - */ - -class ConvTransOperator : public ConvBaseOperator { - public: - ConvTransOperator(const OperatorConfig &config, bool useGpu) - : ConvBaseOperator(config, useGpu) {} - /** - * Free workspace in device and destroy cudnn tensor descriptor. - */ - virtual ~ConvTransOperator() {} - void forward() override; - void backward() override; - void reshape(int batchSize) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvTransProjection.cpp b/paddle/legacy/gserver/layers/ConvTransProjection.cpp deleted file mode 100644 index 00e34c8f2dcd2ea9698779f8b4425561f979cfef..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvTransProjection.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ConvTransProjection.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_PROJECTION(convt, ConvTransProjection); -size_t ConvTransProjection::calOutputSize() { - outputH_ = in_->getFrameHeight(); - outputW_ = in_->getFrameWidth(); - if (outputH_ == 0) outputH_ = configOutH_; - if (outputW_ == 0) outputW_ = configOutW_; - imageH_ = imageSize(outputH_, - (filterH_ - 1) * dilationH_ + 1, - paddingH_, - strideH_, - /* caffeMode */ true); - - imageW_ = imageSize(outputW_, - (filterW_ - 1) * dilationW_ + 1, - paddingW_, - strideW_, - /* caffeMode */ true); - - const_cast(out_)->setFrameHeight(imageH_); - const_cast(out_)->setFrameWidth(imageW_); - - inputOffset_ = (configChannels_ / groups_) * outputH_ * outputW_; - outputOffset_ = (configNumFilters_ / groups_) * imageH_ * imageW_; - return imageH_ * imageW_ * configNumFilters_; -} - -size_t ConvTransProjection::calInputSize() { - return static_cast(configChannels_ * outputH_ * outputW_); -} - -void ConvTransProjection::forward() { - int batchSize = in_->value->getHeight(); - reshape(batchSize); - - void *workSpace = NULL; - if (workSpaceInBytes_ > 0) { - workSpace = getSpaceBytes(workSpaceInBytes_); - } - - for (int g = 0; g < groups_; ++g) { - REGISTER_TIMER_INFO("CudnnConvTransFwTimer", getName().c_str()); - - real *inData = in_->value->getData() + g * inputOffset_; - real *wgtData = weight_->getW()->getData() + g * weightOffset_; - real *outData = out_->value->getData() + g * outputOffset_; - hl_convolution_backward_data(imageDesc_, - outData, - outputDesc_, - inData, - filterDesc_, - wgtData, - convDesc_, - workSpace, - bwdDataLimitBytes_, - bwdDataAlgo_); - } -} - -void ConvTransProjection::backward(const UpdateCallback &callback) { - REGISTER_TIMER_INFO("CudnnConvTransBpTimer", getName().c_str()); - - void *workSpace = NULL; - if (workSpaceInBytes_ > 0) { - workSpace = getSpaceBytes(workSpaceInBytes_); - } - - for (int g = 0; g < groups_; ++g) { - real *outGrad = out_->grad->getData() + g * outputOffset_; - if (weight_->getWGrad()) { - real *inData = in_->value->getData() + g * inputOffset_; - real *weightGrad = weight_->getWGrad()->getData() + g * weightOffset_; - hl_convolution_backward_filter(imageDesc_, - outGrad, - outputDesc_, - inData, - filterDesc_, - weightGrad, - convDesc_, - workSpace, - bwdFilterLimitBytes_, - bwdFilterAlgo_); - } - - MatrixPtr preGrad = in_->grad; - if (NULL != preGrad) { - real *inGrad = preGrad->getData() + g * inputOffset_; - real *wgtData = weight_->getW()->getData() + g * weightOffset_; - hl_convolution_forward(imageDesc_, - outGrad, - outputDesc_, - inGrad, - filterDesc_, - wgtData, - convDesc_, - workSpace, - fwdLimitBytes_, - fwdAlgo_); - } - } - - weight_->getParameterPtr()->incUpdate(callback); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvTransProjection.h b/paddle/legacy/gserver/layers/ConvTransProjection.h deleted file mode 100644 index 9b63dd47352b9f24810d9406b314fbfa15ae13c3..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvTransProjection.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "ConvBaseProjection.h" -#include "paddle/legacy/math/MathUtils.h" - -namespace paddle { - -/** - * @brief Convolution projection do the same calculation with CudnnConvLayer. - */ -class ConvTransProjection : public ConvBaseProjection { - public: - /** - * Constructor. - */ - ConvTransProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu) - : ConvBaseProjection(config, parameter, useGpu) {} - - ~ConvTransProjection() {} - - virtual void forward(); - virtual void backward(const UpdateCallback& callback); - virtual size_t calOutputSize(); - virtual size_t calInputSize(); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ConvexCombinationLayer.cpp b/paddle/legacy/gserver/layers/ConvexCombinationLayer.cpp deleted file mode 100644 index c38ab251f18728425d01479b82630550d29e9b61..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ConvexCombinationLayer.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief A layer for weighted sum of vectors, - * which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND - * TRANSLATE - * - Input: the the size of the first input is weightDim, - * and the size of the second input is weightdim * dataDim. - * - Output: the sizeof the output is dataDim - * \f[ - * out(j) = \sum_{i}(in0(i) * in1(i,j + i * dataDim)), - * i = 0,1,...,(weightDim-1); j = 0, 1,...,(dataDim-1) - * \f] - * Note that the above computation is for one sample. Multiple samples are - * processed in one batch. - * - * The config file api is linear_comb_layer. - */ -class ConvexCombinationLayer : public Layer { - protected: - /// A matrix pointer pointing to second input. - MatrixPtr tmpMtx0; - /// A matrix pointer pointing to first input. - MatrixPtr tmpRow0; - /// A matrix pointer pointing to output. - MatrixPtr tmpRow1; - - public: - explicit ConvexCombinationLayer(const LayerConfig& config) : Layer(config) {} - - ~ConvexCombinationLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(convex_comb, ConvexCombinationLayer); - -bool ConvexCombinationLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(2U, inputLayers_.size()); - size_t dataDim = getSize(); - size_t weightDim = inputLayers_[0]->getSize(); - - CHECK_EQ(weightDim * dataDim, inputLayers_[1]->getSize()) - << "Dimension mismatch"; - - tmpRow0 = Matrix::create(nullptr, - /* height= */ 1, - weightDim, - /* trans= */ false, - useGpu_); - tmpRow1 = Matrix::create(nullptr, - /* height= */ 1, - dataDim, - /* trans= */ false, - useGpu_); - tmpMtx0 = Matrix::create(nullptr, - /* height= */ weightDim, - dataDim, - /* trans= */ false, - useGpu_); - - return true; -} - -void ConvexCombinationLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - - size_t batchSize = inV0->getHeight(); - size_t weightDim = inV0->getWidth(); - size_t dataDim = getSize(); - - CHECK_EQ(batchSize, inV1->getHeight()); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - reserveOutput(batchSize, dataDim); - } - - MatrixPtr outV = getOutputValue(); - - REGISTER_TIMER_INFO("FwCvxCombTimer", getName().c_str()); - for (size_t i = 0; i < batchSize; i++) { - tmpMtx0->setData(inV1->getData() + i * weightDim * dataDim); - tmpRow0->setData(inV0->getData() + i * weightDim); - tmpRow1->setData(outV->getData() + i * dataDim); - - tmpRow1->mul(*tmpRow0, *tmpMtx0, 1, 0); - } -} - -void ConvexCombinationLayer::backward(const UpdateCallback& callback) { - MatrixPtr outG = getOutputGrad(); - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr inG0 = getInputGrad(0); - MatrixPtr inG1 = getInputGrad(1); - - size_t batchSize = inV0->getHeight(); - size_t weightDim = inV0->getWidth(); - size_t dataDim = getSize(); - - REGISTER_TIMER_INFO("BwCvxCombTimer", getName().c_str()); - - if (inG0) { - for (size_t i = 0; i < batchSize; i++) { - tmpRow0->setData(inG0->getData() + i * weightDim); - tmpRow1->setData(outG->getData() + i * dataDim); - tmpMtx0->setData(inV1->getData() + i * weightDim * dataDim); - - tmpRow0->mul(*tmpRow1, *(tmpMtx0->getTranspose()), 1, 1); - } - } - - if (inG1) { - for (size_t i = 0; i < batchSize; i++) { - tmpRow0->setData(inV0->getData() + i * weightDim); - tmpRow1->setData(outG->getData() + i * dataDim); - tmpMtx0->setData(inG1->getData() + i * weightDim * dataDim); - - tmpMtx0->mul(*(tmpRow0->getTranspose()), *tmpRow1, 1, 1); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CosSimLayer.cpp b/paddle/legacy/gserver/layers/CosSimLayer.cpp deleted file mode 100644 index ab8d7cc1f61823890676e8f647f784cfa9a0775e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CosSimLayer.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CosSimLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(cos, CosSimLayer); - -bool CosSimLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2LU); - - createFunction(forward_, - "CosSimForward", - FuncConfig().set("scale", (real)config_.cos_scale())); - createFunction(backward_, - "CosSimBackward", - FuncConfig().set("scale", (real)config_.cos_scale())); - - return true; -} - -void CosSimLayer::forward(PassType passType) { - Layer::forward(passType); - /* malloc memory for the output_ if necessary */ - int batchSize = getInputValue(0)->getHeight(); - int size = getSize(); - CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; - - { - REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); - reserveOutput(batchSize, size); - } - - MatrixPtr outV = getOutputValue(); - /* activation */ { - REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); - MatrixPtr prevOut1 = getInputValue(0); - MatrixPtr prevOut2 = getInputValue(1); - - CHECK(outV && prevOut1 && prevOut2); - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*prevOut1); - inputs.addArg(*prevOut2); - outputs.addArg(*outV, ASSIGN_TO); - forward_[0]->calc(inputs, outputs); - } -} - -void CosSimLayer::backward(const UpdateCallback& callback) { - /* activation */ { - REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); - CHECK_EQ(backward_.size(), 1UL) << "Only one backward function needed"; - - const auto outG = this->getOutputGrad(); - const auto outV = this->getOutputValue(); - const auto inV1 = this->getInputValue(0); - const auto inV2 = this->getInputValue(1); - auto inG1 = this->getInputGrad(0); - auto inG2 = this->getInputGrad(1); - CHECK(outG && outV && inV1 && inV2 && inG1 && inG2); - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*outG); - inputs.addArg(*outV); - inputs.addArg(*inV1); - inputs.addArg(*inV2); - outputs.addArg(*inG1, ADD_TO); - outputs.addArg(*inG2, ADD_TO); - - backward_[0]->calc(inputs, outputs); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CosSimLayer.h b/paddle/legacy/gserver/layers/CosSimLayer.h deleted file mode 100644 index b08e2c6a35369832732706d64f209f85a5292a6f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CosSimLayer.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { -/** - * @brief A layer for calculating cosine similarity between two vector - * \f[ - * f(x,y)=scale\frac{x_1y_1+x_2y_2+...+x_ny_n}{\sqrt{x_1^2+x_2^2+... - * +x_n^2}\sqrt{y_1^2+y_2^2+...+y_n^2}} - * \f] - * - * - Input1: A vector (batchSize * dataDim) * - * - Input2: A vector (batchSize * dataDim) or (1 * dataDim) * - * - Output: A vector (batchSize * 1) - * - * The config file api is cos_sim. - */ -class CosSimLayer : public Layer { - public: - explicit CosSimLayer(const LayerConfig& config) : Layer(config) {} - - ~CosSimLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CosSimVecMatLayer.cpp b/paddle/legacy/gserver/layers/CosSimVecMatLayer.cpp deleted file mode 100644 index 03de0be815a1fb5eeb7ffab31b1721dc5951a469..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CosSimVecMatLayer.cpp +++ /dev/null @@ -1,182 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { -/** - * @brief A layer for computing cosine similarity between a vector - * and each row of a matrix - * out[i] = cos_scale * cos(in1, in2(i,:)); - * @note used in NEURAL TURING MACHINE - * - * Input1: a vector (batchSize * dataDim) - * - * Input2: a matrix in vector form (batchSize * (weightDim*dataDim)) - * - * Output: a vector (batchSize * weightDim) - */ - -class CosSimVecMatLayer : public Layer { - protected: - MatrixPtr tmpMtx0; - MatrixPtr tmpMtx1; - MatrixPtr tmpRow0; - MatrixPtr tmpRow1; - MatrixPtr tmpRow2; - MatrixPtr tmpRow3; - - public: - explicit CosSimVecMatLayer(const LayerConfig& config) : Layer(config) {} - - ~CosSimVecMatLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(cos_vm, CosSimVecMatLayer); - -bool CosSimVecMatLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2U); - - size_t dataDim = inputLayers_[0]->getSize(); - size_t numKeys = getSize(); - size_t memoryDim = inputLayers_[1]->getSize(); - - CHECK_EQ(dataDim * numKeys, memoryDim) << "Dimension mismatch"; - - tmpRow0 = Matrix::create(nullptr, - /* height= */ 1, - dataDim, - /* trans= */ false, - useGpu_); - tmpRow1 = Matrix::create(nullptr, - /* height= */ 1, - dataDim, - /* trans= */ false, - useGpu_); - tmpRow2 = Matrix::create(nullptr, - /* height= */ numKeys, - 1, - /* trans= */ false, - useGpu_); - tmpRow3 = Matrix::create(nullptr, - /* height= */ numKeys, - 1, - /* trans= */ false, - useGpu_); - - tmpMtx0 = Matrix::create(nullptr, - /* height= */ numKeys, - dataDim, - /* trans= */ false, - useGpu_); - tmpMtx1 = Matrix::create(nullptr, - /* height= */ numKeys, - dataDim, - /* trans= */ false, - useGpu_); - - CHECK(tmpRow0 && tmpRow1 && tmpRow2 && tmpRow3 && tmpMtx0 && tmpMtx1); - - createFunction(forward_, - "CosSimForward", - FuncConfig().set("scale", (real)config_.cos_scale())); - createFunction(backward_, - "CosSimBackward", - FuncConfig().set("scale", (real)config_.cos_scale())); - - return true; -} - -void CosSimVecMatLayer::forward(PassType passType) { - Layer::forward(passType); - CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; - - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - - size_t batchSize = inV0->getHeight(); - size_t numKeys = getSize(); - - CHECK_EQ(batchSize, inV1->getHeight()); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - reserveOutput(batchSize, numKeys); - } - - MatrixPtr outV = getOutputValue(); - CHECK(outV && inV0 && inV1); - REGISTER_TIMER_INFO("FwCosVMTimer", getName().c_str()); - for (size_t i = 0; i < batchSize; i++) { - tmpRow0->setData(inV0->rowBuf(i)); - tmpMtx0->setData(inV1->rowBuf(i)); - tmpRow2->setData(outV->rowBuf(i)); - - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*tmpMtx0); - inputs.addArg(*tmpRow0); - outputs.addArg(*tmpRow2, ASSIGN_TO); - forward_[0]->calc(inputs, outputs); - } -} - -void CosSimVecMatLayer::backward(const UpdateCallback& callback) { - CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed"; - - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr inG0 = getInputGrad(0); - MatrixPtr inG1 = getInputGrad(1); - MatrixPtr outV = getOutputValue(); - MatrixPtr outG = getOutputGrad(); - - size_t batchSize = inV0->getHeight(); - CHECK(inV0 && inV1 && inG0 && inG1 && outV && outG); - REGISTER_TIMER_INFO("BwCosVMTimer", getName().c_str()); - - for (size_t i = 0; i < batchSize; i++) { - tmpRow0->setData(inV0->rowBuf(i)); - tmpRow1->setData(inG0->rowBuf(i)); - tmpMtx0->setData(inV1->rowBuf(i)); - tmpMtx1->setData(inG1->rowBuf(i)); - tmpRow2->setData(outV->rowBuf(i)); - tmpRow3->setData(outG->rowBuf(i)); - - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*tmpRow3); - inputs.addArg(*tmpRow2); - inputs.addArg(*tmpMtx0); - inputs.addArg(*tmpRow0); - outputs.addArg(*tmpMtx1, ADD_TO); - outputs.addArg(*tmpRow1, ADD_TO); - - backward_[0]->calc(inputs, outputs); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CostLayer.cpp b/paddle/legacy/gserver/layers/CostLayer.cpp deleted file mode 100644 index 18b5b77bde9dee97cb6971624007307ff06411c7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CostLayer.cpp +++ /dev/null @@ -1,748 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CostLayer.h" -#include -#include -#include -#include "paddle/legacy/utils/Logging.h" - -#include "paddle/legacy/math/SparseMatrix.h" - -namespace paddle { - -bool CostLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - bool ret = Layer::init(layerMap, parameterMap); - coeff_ = config_.coeff(); - if (!ret) return ret; - CHECK_GE(inputLayers_.size(), 2UL); - CHECK_LE(inputLayers_.size(), 3UL); - if (inputLayers_.size() == 3) { - weightLayer_ = inputLayers_[2]; - } - return true; -} - -void CostLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInputValue(*getOutputLayer())->getHeight(); - int size = 1; - resetOutput(batchSize, size); - - const MatrixPtr& output = getInputValue(*getOutputLayer()); - Argument label = getInput(*getLabelLayer()); - - /* get the cost value for each sample*/ - forwardImp(*output, label, *getOutputValue()); - if (weightLayer_) { - const MatrixPtr& weight = getInputValue(*weightLayer_); - getOutputValue()->dotMul(*getOutputValue(), *weight); - } -} - -void CostLayer::backward(const UpdateCallback& callback) { - (void)callback; - - const Argument& output = getInput(*getOutputLayer()); - Argument label = getInput(*getLabelLayer()); - - bool support = true; - if (weightLayer_) { - support = output.grad->getAbsSum() == 0; - } - - backwardImp(*output.value, label, *output.grad); - - if (weightLayer_) { - CHECK(support) << "Weighted cost layer '" << getName() - << "' must be the last layer " - "connected to the output layer '" - << getOutputLayer()->getName() << "'"; - output.grad->rowScale(0, *output.grad, *getInputValue(*weightLayer_)); - } - if (coeff_ != real(1.0f)) { - output.grad->add(coeff_, 0); - } -} - -// -// class MultiClassCrossEntropy -// -bool MultiClassCrossEntropy::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - return CostLayer::init(layerMap, parameterMap); -} - -void MultiClassCrossEntropy::forwardImp(Matrix& output, - Argument& label, - Matrix& target) { - target.oneHotCrossEntropy(output, *label.ids); -} - -void MultiClassCrossEntropy::backwardImp(Matrix& output, - Argument& label, - Matrix& outputG) { - outputG.oneHotCrossEntropyBp(output, *label.ids); -} - -// -// class MultiClassCrossEntropyWithSelfNorm -// -REGISTER_LAYER(multi_class_cross_entropy_with_selfnorm, - MultiClassCrossEntropyWithSelfNorm); - -bool MultiClassCrossEntropyWithSelfNorm::init( - const LayerMap& layerMap, const ParameterMap& parameterMap) { - return CostLayer::init(layerMap, parameterMap); -} - -void MultiClassCrossEntropyWithSelfNorm::forwardImp(Matrix& output, - Argument& label, - Matrix& target) { - Matrix::resizeOrCreate(sftMaxSum_, output.getHeight(), 1, false, useGpu_); - output.rowSum(*sftMaxSum_); - sftMaxSum_->log2(); - - target.oneHotCrossEntropy(output, *label.ids); - target.add(*sftMaxSum_); - - sftMaxSum_->square2(); - target.add(*sftMaxSum_, config_.softmax_selfnorm_alpha()); -} - -void MultiClassCrossEntropyWithSelfNorm::backwardImp(Matrix& output, - Argument& label, - Matrix& outputG) { - Matrix::resizeOrCreate(sftMaxSum_, output.getHeight(), 1, false, useGpu_); - output.rowSum(*sftMaxSum_); - - Matrix::resizeOrCreate(sumInv_, output.getHeight(), 1, false, useGpu_); - sftMaxSum_->reciprocal2(*sumInv_); - - outputG.oneHotCrossEntropyBp(output, *label.ids); - outputG.addColumnVector(*sumInv_); - - sftMaxSum_->log2(); - sumInv_->dotMul(*sumInv_, *sftMaxSum_); - sumInv_->mulScalar(2 * config_.softmax_selfnorm_alpha()); - - outputG.addColumnVector(*sumInv_); -} - -// -// class SoftBinaryClassCrossEntropy -// -REGISTER_LAYER(soft_binary_class_cross_entropy, SoftBinaryClassCrossEntropy); - -bool SoftBinaryClassCrossEntropy::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - return CostLayer::init(layerMap, parameterMap); -} - -void SoftBinaryClassCrossEntropy::forwardImp(Matrix& output, - Argument& label, - Matrix& target) { - Matrix::resizeOrCreate( - targetPerDim_, output.getHeight(), output.getWidth(), false, useGpu_); - - targetPerDim_->softCrossEntropy(output, *label.value); - targetPerDim_->rowSum(target); -} - -void SoftBinaryClassCrossEntropy::backwardImp(Matrix& output, - Argument& label, - Matrix& outputG) { - outputG.softCrossEntropyBp(output, *label.value); -} - -// -// class SumOfSquaresCostLayer -// - -REGISTER_LAYER(square_error, SumOfSquaresCostLayer); - -bool SumOfSquaresCostLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - return CostLayer::init(layerMap, parameterMap); -} - -void SumOfSquaresCostLayer::forwardImp(Matrix& output, - Argument& label, - Matrix& target) { - target.sumOfSquares(output, *label.value); -} - -void SumOfSquaresCostLayer::backwardImp(Matrix& output, - Argument& label, - Matrix& outputG) { - outputG.sumOfSquaresBp(output, *label.value); -} - -// -// class SmoothL1CostLayer -// - -REGISTER_LAYER(smooth_l1, SmoothL1CostLayer); - -bool SmoothL1CostLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - return CostLayer::init(layerMap, parameterMap); -} - -void SmoothL1CostLayer::forwardImp(Matrix& output, - Argument& label, - Matrix& target) { - MatrixPtr targetCpu, outputCpu, labelCpu; - if (useGpu_) { - targetCpu = - Matrix::create(target.getHeight(), target.getWidth(), false, false); - outputCpu = - Matrix::create(output.getHeight(), output.getWidth(), false, false); - labelCpu = Matrix::create( - label.value->getHeight(), label.value->getWidth(), false, false); - targetCpu->copyFrom(target); - outputCpu->copyFrom(output); - labelCpu->copyFrom(*label.value); - targetCpu->smoothL1(*outputCpu, *labelCpu, 1.0); - target.copyFrom(*targetCpu); - } else { - target.smoothL1(output, *label.value, 1.0); - } -} - -void SmoothL1CostLayer::backwardImp(Matrix& output, - Argument& label, - Matrix& outputG) { - MatrixPtr outputGCpu, outputCpu, labelCpu; - if (useGpu_) { - outputGCpu = - Matrix::create(outputG.getHeight(), outputG.getWidth(), false, false); - outputCpu = - Matrix::create(output.getHeight(), output.getWidth(), false, false); - labelCpu = Matrix::create( - label.value->getHeight(), label.value->getWidth(), false, false); - outputGCpu->copyFrom(outputG); - outputCpu->copyFrom(output); - labelCpu->copyFrom(*label.value); - outputGCpu->smoothL1Bp(*outputCpu, *labelCpu, 1.0); - outputG.copyFrom(*outputGCpu); - } else { - outputG.smoothL1Bp(output, *label.value, 1.0); - } -} - -// -// class RankingCost -// -bool RankingCost::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - posPairCount_ = 0; - negPairCount_ = 0; - - bool ret = Layer::init(layerMap, parameterMap); - if (!ret) return ret; - CHECK_GE(inputLayers_.size(), 3UL); - CHECK_LE(inputLayers_.size(), 4UL); - if (inputLayers_.size() == 4) { - weightLayer_ = inputLayers_[3]; - } - return true; -} - -void RankingCost::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInputValue(*getOutputLayer(0))->getHeight(); - int size = 1; - resizeOutput(batchSize, size); - Matrix::resizeOrCreate(margin_, batchSize, size, /* trans= */ false, useGpu_); - MatrixPtr label = getInputValue(*getLabelLayer()); - if (!label) { - // input label is not in value, try ids - IVectorPtr idLabel = getInput(*getLabelLayer()).ids; - CHECK(idLabel) << "label layer has neither value nor ids"; - CHECK_EQ((size_t)batchSize, idLabel->getSize()); - Matrix::resizeOrCreate( - labelBuf_, batchSize, /*width*/ 1, /*trans*/ false, useGpu_); - labelBuf_->copyFrom(*idLabel); - label = labelBuf_; - } - - MatrixPtr output[] = {getInputValue(*getOutputLayer(0)), - getInputValue(*getOutputLayer(1))}; - MatrixPtr target = this->getOutputValue(); - margin_->sub(*output[0], *output[1]); - - // for validation - size_t height = output[0]->getHeight(); - target->biggerThan(*(output[0]), *(output[1]), *label); - double total = static_cast(height); - if (weightLayer_) { - const MatrixPtr& weight = getInputValue(*weightLayer_); - target->dotMul(*target, *weight); - total = weight->getSum(); - } - double pos = target->getSum(); - posPairCount_ += pos; - negPairCount_ += (total - pos); - - // forward - target->logisticRegressionLoss(*margin_, *label); - if (weightLayer_) { - const MatrixPtr& weight = getInputValue(*weightLayer_); - target->dotMul(*target, *weight); - } -} - -void RankingCost::backward(const UpdateCallback& callback) { - (void)callback; - - MatrixPtr label = getInputValue(*getLabelLayer()); - if (!label) { - // input label is not in value, but in ids - // use labelBuf_ (should already resized and copied during forward) - label = labelBuf_; - } - - Matrix::resizeOrCreate( - marginGrad_, label->getHeight(), 1, /* trans= */ false, useGpu_); - marginGrad_->zeroMem(); - marginGrad_->logisticRegressionLossBp(*margin_, *label); - if (weightLayer_) { - const MatrixPtr& weight = getInputValue(*weightLayer_); - marginGrad_->dotMul(*marginGrad_, *weight); - } - - getInputGrad(0)->add(*marginGrad_); - getInputGrad(1)->sub(*marginGrad_); -} - -void RankingCost::onPassEnd() { - double ratio = posPairCount_ / ((negPairCount_ <= 0) ? 1.0 : negPairCount_); - LOG(INFO) << "calc pos/neg: " << ratio << " pos= " << posPairCount_ - << " neg= " << negPairCount_; - - posPairCount_ = 0; - negPairCount_ = 0; -} - -// -// class LambdaCost -// -REGISTER_LAYER(lambda_cost, LambdaCost); - -bool LambdaCost::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - truncationSize_ = config_.ndcg_num(); - maxSortSize_ = config_.max_sort_size(); - if (maxSortSize_ != -1) { - CHECK_GE(maxSortSize_, truncationSize_) - << "maxSortSize must be greater than or equal to NDCG size!"; - } - LOG(INFO) << "LambdaRank v1.3, NDCG size = " << truncationSize_ - << ", Max partial sort size = " << maxSortSize_; - CHECK(!useGpu_) << "LambdaRank supports CPU only!"; - return Layer::init(layerMap, parameterMap); -} - -void LambdaCost::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInputValue(*getOutputLayer())->getHeight(); - resizeOutput(batchSize, 1); - - MatrixPtr score = getInputValue(*getScoreLayer()); - MatrixPtr output = getInputValue(*getOutputLayer()); - MatrixPtr target = this->getOutputValue(); - - real* scoreData = score->getData(); - real* outputData = output->getData(); - real* targetData = target->getData(); - - auto startPos = getInput(*getOutputLayer()).sequenceStartPositions; - const int* startPosData = startPos->getData(false); - size_t batchNum = startPos->getSize() - 1; - for (size_t i = 0; i < batchNum; ++i) { - int beginPos = startPosData[i]; - int endPos = startPosData[i + 1]; - real NDCG = calcNDCG( - outputData + beginPos, scoreData + beginPos, endPos - beginPos); - for (int j = beginPos; j < endPos; ++j) { - targetData[j] = NDCG; - } - } -} - -void LambdaCost::backward(const UpdateCallback& callback) { - (void)callback; - MatrixPtr score = getInputValue(*getScoreLayer()); - MatrixPtr output = getInputValue(*getOutputLayer()); - Matrix::resizeOrCreate(marginGrad_, - score->getHeight(), - 1, - /* trans= */ false, - useGpu_); - marginGrad_->zeroMem(); - - real* gradData = marginGrad_->getData(); - real* scoreData = score->getData(); - real* outputData = output->getData(); - - auto startPos = getInput(*getOutputLayer()).sequenceStartPositions; - const int* startPosData = startPos->getData(false); - size_t batchNum = startPos->getSize() - 1; - - for (size_t i = 0; i < batchNum; ++i) { - int beginPos = startPosData[i]; - int endPos = startPosData[i + 1]; - calcGrad(outputData + beginPos, - scoreData + beginPos, - gradData + beginPos, - endPos - beginPos); - } - - getInputGrad(0)->add(*marginGrad_); -} - -void LambdaCost::calcGrad(const real* outputScore, - const real* score, - real* gradData, - int size) { - CHECK_GE(size, truncationSize_) - << "Invalid: (Sample num in the same list) < (NDCG truncation num) !"; - int sortSize = maxSortSize_ == -1 ? size : std::min(maxSortSize_, size); - - scorePair_.clear(); - for (int i = 0; i < size; ++i) { - scorePair_.push_back(std::make_pair(score[i], i)); - } - if (size <= sortSize) { - std::sort(scorePair_.begin(), - scorePair_.end(), - [](const std::pair& a, const std::pair& b) { - return a.first > b.first; - }); - } else { - std::partial_sort( - scorePair_.begin(), - scorePair_.begin() + sortSize, - scorePair_.end(), - [](const std::pair& a, const std::pair& b) { - return a.first > b.first; - }); - } - - real maxDCG = 0; - for (int i = 0; i < truncationSize_; ++i) { - maxDCG += (std::pow(2, scorePair_[i].first) - 1) / std::log(i + 2); - } - CHECK_GT(maxDCG, 0) << "Invalid: max DCG = 0!"; - - for (int i = 0; i < sortSize; ++i) { - for (int j = i + 1; j < size; ++j) { - int index_i = scorePair_[i].second; - int index_j = scorePair_[j].second; - real score_i = score[index_i]; - real score_j = score[index_j]; - real dcgDif = 0; - if (j < sortSize) { - dcgDif = (std::pow(2, score_i) - std::pow(2, score_j)) * - (1 / std::log(i + 2) - 1 / std::log(j + 2)); - } else { - dcgDif = - (std::pow(2, score_i) - std::pow(2, score_j)) / std::log(i + 2); - } - - real lambda_ij = - -std::abs(dcgDif) / - (1 + std::exp(outputScore[index_i] - outputScore[index_j])); - gradData[index_i] += lambda_ij / maxDCG; - gradData[index_j] -= lambda_ij / maxDCG; - } - } -} - -real LambdaCost::calcNDCG(const real* outputScore, - const real* score, - int size) { - CHECK_GE(size, truncationSize_) - << "Invalid: (Sample num in the same list) < (NDCG truncation num) !"; - - outputScorePair_.clear(); - for (int i = 0; i < size; ++i) { - outputScorePair_.push_back(std::make_pair(outputScore[i], i)); - } - std::partial_sort( - outputScorePair_.begin(), - outputScorePair_.begin() + truncationSize_, - outputScorePair_.end(), - [](const std::pair& a, const std::pair& b) { - return a.first > b.first; - }); - - real DCG = 0; - for (int i = 0; i < truncationSize_; ++i) { - DCG += - (std::pow(2, score[outputScorePair_[i].second]) - 1) / std::log(i + 2); - } - - scoreVec_.resize(size); - std::copy(score, score + size, scoreVec_.begin()); - real maxDCG = 0; - std::partial_sort(scoreVec_.begin(), - scoreVec_.begin() + truncationSize_, - scoreVec_.end(), - std::greater()); - for (int i = 0; i < truncationSize_; ++i) { - maxDCG += (std::pow(2, scoreVec_[i]) - 1) / std::log(i + 2); - } - CHECK_GT(maxDCG, 0) << "Invalid: max DCG = 0!"; - - return DCG / maxDCG; -} - -// -// class MultiBinaryLabelCrossEntropy -// - -REGISTER_LAYER(multi_binary_label_cross_entropy, MultiBinaryLabelCrossEntropy); - -bool MultiBinaryLabelCrossEntropy::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - return CostLayer::init(layerMap, parameterMap); -} - -void MultiBinaryLabelCrossEntropy::forwardImp(Matrix& output, - Argument& label, - Matrix& target) { - MatrixPtr value = nullptr; - if (label.ids) { - CHECK(!label.value); - value = label.ids->toOneHotSparseMatrix(output.getWidth(), useGpu_); - } else { - CHECK(label.value); - value = label.value; - } - - if (dynamic_cast(value.get()) || - dynamic_cast(value.get())) { - target.multiBinaryLabelCrossEntropy(output, *value); - } else { - Matrix::resizeOrCreate( - targetPerDim_, output.getHeight(), output.getWidth(), false, useGpu_); - - targetPerDim_->binaryLabelCrossEntropy(output, *value); - targetPerDim_->rowSum(target); - } -} - -void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output, - Argument& label, - Matrix& outputG) { - MatrixPtr value = nullptr; - if (label.ids) { - CHECK(!value); - value = label.ids->toOneHotSparseMatrix(output.getWidth(), useGpu_); - } else { - CHECK(label.value); - value = label.value; - } - - if (dynamic_cast(value.get()) || - dynamic_cast(value.get())) { - outputG.multiBinaryLabelCrossEntropyBp(output, *value); - } else { - outputG.binaryLabelCrossEntropyBp(output, *value); - } -} - -bool HuberCost::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - CostLayer::init(layerMap, parameterMap); - if (useGpu_) { - tmpCpuInput_.reserve(inputLayers_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - tmpCpuInput_.push_back(Argument()); - } - } - return true; -} - -void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) { - if (useGpu_) { - for (size_t i = 0; i < inputLayers_.size(); i++) { - tmpCpuInput_[i].resizeAndCopyFrom( - getInput(i), false, HPPL_STREAM_DEFAULT); - } - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - } -} - -// -// Huber loss for robust regression. -// -REGISTER_LAYER(huber_regression, HuberRegressionLoss); - -bool HuberRegressionLoss::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - HuberCost::init(layerMap, parameterMap); - delta_ = config_.delta(); - return true; -} - -void HuberRegressionLoss::forwardImp(Matrix& output, - Argument& label, - Matrix& target) { - HuberCost::forwardImp(output, label, target); - size_t numSamples = target.getHeight(); - size_t dim = output.getWidth(); - CHECK(label.value); - CHECK_EQ((*label.value).getHeight(), numSamples); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(dim, (*label.value).getWidth()); - CHECK_EQ(target.getWidth(), (size_t)1); - - real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); - real* lbl = - useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData(); - std::vector cost(numSamples, 0); - for (size_t i = 0; i < numSamples; ++i) { - for (size_t j = 0; j < dim; ++j) { - int index = i * dim + j; - real a = std::abs(lbl[index] - out[index]); - if (a <= delta_) - cost[i] += a * a / 2; - else - cost[i] += delta_ * (a - delta_ / 2); - } - } - target.copyFrom(cost.data(), numSamples); -} - -void HuberRegressionLoss::backwardImp(Matrix& output, - Argument& label, - Matrix& outputG) { - size_t numSamples = output.getHeight(); - size_t dim = output.getWidth(); - real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); - real* lbl = - useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData(); - real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); - for (size_t i = 0; i < numSamples; ++i) { - for (size_t j = 0; j < dim; ++j) { - int index = i * dim + j; - real a = lbl[index] - out[index]; - if (std::abs(a) <= delta_) - grad[index] += -a; - else - grad[index] += a > 0 ? -delta_ : delta_; - } - } - if (useGpu_) outputG.copyFrom(grad, numSamples * dim); -} - -// -// Huber loss for robust 2-classes classification -// -REGISTER_LAYER(huber_classification, HuberTwoClassification); - -bool HuberTwoClassification::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - return HuberCost::init(layerMap, parameterMap); -} - -void HuberTwoClassification::forwardImp(Matrix& output, - Argument& label, - Matrix& target) { - HuberCost::forwardImp(output, label, target); - size_t numSamples = target.getHeight(); - CHECK(label.ids); - CHECK_EQ((*label.ids).getSize(), numSamples); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(output.getWidth(), (size_t)1); - CHECK_EQ(target.getWidth(), (size_t)1); - - real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); - int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData(); - std::vector cost(numSamples, 0); - for (size_t i = 0; i < numSamples; ++i) { - int y = 2 * lbl[i] - 1; - real a = out[i] * y; - if (a < -1) - cost[i] = -4 * a; - else if (a < 1) - cost[i] = (1 - a) * (1 - a); - } - target.copyFrom(cost.data(), numSamples); -} - -void HuberTwoClassification::backwardImp(Matrix& output, - Argument& label, - Matrix& outputG) { - size_t numSamples = output.getHeight(); - real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); - int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData(); - real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); - for (size_t i = 0; i < numSamples; ++i) { - int y = 2 * lbl[i] - 1; - real a = out[i] * y; - if (a < -1) - grad[i] += -4 * y; - else if (a < 1) - grad[i] += -2 * (1 - a) * y; - } - if (useGpu_) outputG.copyFrom(grad, numSamples); -} -/** - * This cost layer compute the sum of its input as loss. - * \f[ - * o(i) = \sum_{j=1}^D y_{ij} - * \f] - */ -class SumCostLayer : public Layer { - public: - explicit SumCostLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override { - bool ret = Layer::init(layerMap, parameterMap); - if (!ret) return ret; - CHECK_EQ(inputLayers_.size(), 1UL); - return true; - } - - void forward(PassType passType) override { - Layer::forward(passType); - const MatrixPtr& input = getInputValue(0); - - /* malloc memory for the output_ if necessary */ - int batchSize = input->getHeight(); - int size = 1; - resizeOutput(batchSize, size); - output_.value->sumRows(*input, /* scaleSum= */ 1, /* scaleDest= */ 0); - } - - void backward(const UpdateCallback& callback = nullptr) override { - getInputGrad(0)->add((real)1); - } -}; - -REGISTER_LAYER(sum_cost, SumCostLayer); - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CostLayer.h b/paddle/legacy/gserver/layers/CostLayer.h deleted file mode 100644 index 9bfec0e2b169fac4f235fd13347be687c4f1a222..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CostLayer.h +++ /dev/null @@ -1,374 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "Layer.h" - -namespace paddle { - -/** - * Base class for a particular type of cost layer. - * This type of cost should have one data layer, one label layer - * and an optional weight layer as input. - * The derived class should implemnt forwardImp() and backwardImp() - * which calculate the cost for data and label. The weight is automatically - * handled by the base class. - */ -class CostLayer : public Layer { - public: - explicit CostLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - LayerPtr getOutputLayer() { return inputLayers_[0]; } - - LayerPtr getLabelLayer() { return inputLayers_[1]; } - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback = nullptr) override; - - virtual void forwardImp(Matrix& outputValue, - Argument& label, - Matrix& cost) = 0; - - virtual void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) = 0; - - protected: - LayerPtr weightLayer_; - real coeff_; -}; - -/** - * The cross-entropy loss for multi-class classification task. - * The loss function is: - * - * \f[ - * L = - \sum_{i}{t_{k} * log(P(y=k))} - * \f] - */ -class MultiClassCrossEntropy : public CostLayer { - public: - explicit MultiClassCrossEntropy(const LayerConfig& config) - : CostLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - - void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) override; -}; - -/** - * The cross-entropy with self-normalization for multi-class classification. - * - * The loss function is: - * \f[ - * L = \sum_{i}[-log(P(x_{i})) + alpha * log(Z(x_{i})^2)] - * \f] - * - * The \f$Z(x)\f$ is the softmax normalizer. - * - * [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar, - * Richard Schwartz, and John Makhoul. Fast and robust neural - * network joint models for statistical machine translation. - * In Proceedings of the ACL 2014 Conference. - */ -class MultiClassCrossEntropyWithSelfNorm : public CostLayer { - public: - explicit MultiClassCrossEntropyWithSelfNorm(const LayerConfig& config) - : CostLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - - void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) override; - - protected: - MatrixPtr sftMaxSum_; - MatrixPtr sumInv_; -}; - -/** - * The cross-entropy for soft binary class. - * \f[ - * L = \sum_i (\sum_j -y_j(i)*log(x_j(i))-(1-y_j(i))*log(1-x_j(i))) - * \f] - */ -class SoftBinaryClassCrossEntropy : public CostLayer { - public: - explicit SoftBinaryClassCrossEntropy(const LayerConfig& config) - : CostLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - - void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) override; - - protected: - MatrixPtr targetPerDim_; -}; - -/** - * This cost layer compute Euclidean (L2) loss for real-valued regression - * tasks. - * \f[ - * L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2} - * \f] - */ -class SumOfSquaresCostLayer : public CostLayer { - public: - explicit SumOfSquaresCostLayer(const LayerConfig& config) - : CostLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - - void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) override; -}; - -/** - * This cost layer compute smooth L1 loss for real-valued regression - * tasks. - * \f[ - * L = - * 0.5 * x^2 if / -1 < |x| < 1 / - * |x| - 0.5 / otherwise / - * \f] - * - * x = output - label - */ -class SmoothL1CostLayer : public CostLayer { - public: - explicit SmoothL1CostLayer(const LayerConfig& config) : CostLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - - void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) override; -}; - -/** - * A cost layer for learning to rank (LTR) task. This layer contains at leat - * three inputs. - * \f[ - * C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\ - * o_{i,j} = o_i - o_j \\ - * \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \} - * \f] - * - * [1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to - * Rank useing Gradient Descent. - */ -class RankingCost : public Layer { - public: - explicit RankingCost(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - LayerPtr getOutputLayer(size_t i) { return inputLayers_[i]; } - - LayerPtr getLabelLayer() { return inputLayers_[2]; } - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback = nullptr) override; - - void onPassEnd() override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) { - (void)output; - (void)label; - (void)cost; - } - - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) { - (void)outputValue; - (void)label; - (void)outputGrad; - } - - private: - double posPairCount_; - double negPairCount_; - MatrixPtr margin_; - MatrixPtr marginGrad_; - /// if input label is put in ids (not value), copy to this buffer. - MatrixPtr labelBuf_; - LayerPtr weightLayer_; -}; - -/** - * LambdaRank os a method for learning arbitrary information retrieval - * measures. It can be applied to any algorithm that learns through gradient - * descent. LambdaRank is a listwise method, in that the cost depends on the - * sorted order of the documents. LambdaRank gives the gradient of cost - * function: - * - * \f[ - * \lambda_{ij} = \frac{1}{1 + e^{o_i - o_j}} \left| \Delta_{NDCG} \right| - * \f] - * - * [1] Christopher J.C. Burges, Robert Ragno, Quoc Viet Le. Learning to Rank - * with Nonsmooth Cost Functions. - */ -class LambdaCost : public Layer { - public: - explicit LambdaCost(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - LayerPtr getOutputLayer() { return inputLayers_[0]; } - - LayerPtr getScoreLayer() { return inputLayers_[1]; } - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback = nullptr) override; - - real calcNDCG(const real* outputScore, const real* score, int size); - void calcGrad(const real* outputScore, - const real* score, - real* gradData, - int size); - - private: - MatrixPtr marginGrad_; - int truncationSize_; - int maxSortSize_; - std::vector> scorePair_; - std::vector> outputScorePair_; - std::vector scoreVec_; -}; - -/** - * Cross entropy for multi binary labels. - * \f[ - * cost[i] = -sum(label[i][j]*log(output[i][j]) + - * (1-label[i][j])*log(1-output[i][j])) - * \f] - */ -class MultiBinaryLabelCrossEntropy : public CostLayer { - protected: - MatrixPtr targetPerDim_; - - public: - explicit MultiBinaryLabelCrossEntropy(const LayerConfig& config) - : CostLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - - void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) override; -}; - -/* - * A base layer for HuberRegressionLoss and HuberTwoClassification. - */ -class HuberCost : public CostLayer { - public: - std::vector tmpCpuInput_; - - explicit HuberCost(const LayerConfig& config) : CostLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - - void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) override {} -}; - -/** - * Huber loss for robust regression. - * - * Given output f(x), label y and delta, the loss is: - * Loss = 0.5 * (1 - y * f)^2, if abs(y - f) <= delta \\ - * Loss = delta * abs(y - f) - 0.5 * delta^2, otherwise - */ -class HuberRegressionLoss : public HuberCost { - public: - explicit HuberRegressionLoss(const LayerConfig& config) : HuberCost(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - - void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) override; - - protected: - real delta_; -}; - -/** - * Huber loss for robust 2-classes classification. - * - * For label={0, 1}, let y=2*label-1. Given output f(x), the loss is: - * Loss = 4 * y * f, if y* f < -1 \\ - * Loss = (1 - y * f)^2, if -1 < y * f < 1 \\ - * Loss = 0, otherwise - */ -class HuberTwoClassification : public HuberCost { - public: - explicit HuberTwoClassification(const LayerConfig& config) - : HuberCost(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - - void backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) override; -}; - -typedef std::shared_ptr CostLayerPtr; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CropLayer.cpp b/paddle/legacy/gserver/layers/CropLayer.cpp deleted file mode 100644 index d891375ecce0371503ba3034f0584f3b1e553a55..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CropLayer.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CropLayer.h" -#include "paddle/legacy/utils/Stat.h" -namespace paddle { - -REGISTER_LAYER(crop, CropLayer); - -bool CropLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - CHECK_LE(static_cast(inputLayers_.size()), 2); - CHECK_GE(static_cast(inputLayers_.size()), 1); - crop_axis_ = config_.axis(); - for (int i = 0; i < config_.offset_size(); i++) { - crop_offsets_.push_back(config_.offset(i)); - } - - // 1. get input_0 shape - auto& input0_img_conf = config_.inputs(0).image_conf(); - inDims_ = TensorShape({0, - input0_img_conf.channels(), - input0_img_conf.has_img_size_y() - ? input0_img_conf.img_size_y() - : input0_img_conf.img_size(), - input0_img_conf.img_size()}); - // 2. get target dims from config - if (config_.inputs_size() == 1) { - targetDims_ = TensorShape({config_.shape(0), - config_.shape(1), - config_.shape(2), - config_.shape(3)}); - } else { - // 2. get input_1 shape - auto& input1_img_conf = config_.inputs(1).image_conf(); - targetDims_ = TensorShape({0, - input1_img_conf.channels(), - input1_img_conf.has_img_size_y() - ? input1_img_conf.img_size_y() - : input1_img_conf.img_size(), - input1_img_conf.img_size()}); - } - - // 3. get final crop corner - int dimSize = 4; - crop_corner_ = {0, 0, 0, 0}; - for (int i = 0; i < dimSize; i++) { - if (i >= crop_axis_) { - if (crop_offsets_.size() > 1) { - crop_corner_[i] = crop_offsets_[i - crop_axis_]; - } else { - crop_corner_[i] = crop_offsets_[0]; - } - } - } - - outDims_ = TensorShape(4); - - createFunction( - forward_, "Crop", FuncConfig().set("crop_corner", crop_corner_)); - createFunction( - backward_, "CropGrad", FuncConfig().set("crop_corner", crop_corner_)); - - return true; -} - -void CropLayer::setOutDims() { - MatrixPtr input = inputLayers_[1]->getOutputValue(); - size_t batchSize = input->getHeight(); - // get target dims from input_1 - if (config_.inputs_size() == 2) { - targetDims_.setDim(0, batchSize); - int ch = config_.inputs(0).image_conf().channels(); - if (ch != 0) targetDims_.setDim(1, ch); - int h = inputLayers_[1]->getOutput().getFrameHeight(); - if (h != 0) targetDims_.setDim(2, h); - int w = inputLayers_[1]->getOutput().getFrameWidth(); - if (w != 0) targetDims_.setDim(3, w); - } - // get final crop shape from target dims and crop axis - std::vector crop_shape; - int dimSize = 4; - for (int i = 0; i < dimSize; i++) { - if (i >= crop_axis_) { - crop_shape.push_back(targetDims_[i]); - } else { - crop_shape.push_back(inDims_[i]); - } - } - - outDims_.reshape( - {crop_shape[0], crop_shape[1], crop_shape[2], crop_shape[3]}); - output_.setFrameHeight(crop_shape[2]); - output_.setFrameWidth(crop_shape[3]); -} - -void CropLayer::setInDims() { - MatrixPtr input = inputLayers_[0]->getOutputValue(); - size_t batchSize = input->getHeight(); - inDims_.setDim(0, batchSize); - int h = inputLayers_[0]->getOutput().getFrameHeight(); - if (h != 0) inDims_.setDim(2, h); - int w = inputLayers_[0]->getOutput().getFrameWidth(); - if (w != 0) inDims_.setDim(3, w); -} - -void CropLayer::forward(PassType passType) { - Layer::forward(passType); - setInDims(); - setOutDims(); - int size = outDims_[1] * outDims_[2] * outDims_[3]; - resetOutput(outDims_[0], size); - MatrixPtr outV = getOutputValue(); - REGISTER_TIMER_INFO("CropForward", getName().c_str()); - - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getInputValue(0), inDims_); - outputs.addArg(*getOutputValue(), outDims_, ASSIGN_TO); - forward_[0]->calc(inputs, outputs); -} - -void CropLayer::backward(const UpdateCallback& callback) { - (void)callback; - REGISTER_TIMER_INFO("CropBackward", getName().c_str()); - - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getOutputGrad(), outDims_); - outputs.addArg(*getInputGrad(0), inDims_, ADD_TO); - backward_[0]->calc(inputs, outputs); -} -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CropLayer.h b/paddle/legacy/gserver/layers/CropLayer.h deleted file mode 100644 index ef88bc483d157406a0f5a7924c14c345ea0df8c4..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CropLayer.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" - -namespace paddle { - -/** - * \brief This layer crop input according to the specify conf. - * input_0: input to be cropped - * input_1: optional reference input - * axis: start dimension to be croped - * offset: offset of cropping in each dimension - * shape: if reference input layer was not setted, - * crop input as this shape conf - */ -class CropLayer : public Layer { - public: - explicit CropLayer(const LayerConfig& config) : Layer(config) {} - - ~CropLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - protected: - void setOutDims(); - void setInDims(); - - int32_t crop_axis_; - std::vector crop_offsets_; - std::vector crop_corner_; - TensorShape inDims_; - TensorShape targetDims_; - TensorShape outDims_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CrossChannelNormLayer.cpp b/paddle/legacy/gserver/layers/CrossChannelNormLayer.cpp deleted file mode 100644 index 0fe100a96c01713f6c8d10d4eff428e7e743b002..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CrossChannelNormLayer.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "NormLayer.h" -#include "paddle/legacy/math/BaseMatrix.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -MatrixPtr CrossChannelNormLayer::createSampleMatrix(MatrixPtr data, - size_t iter, - size_t spatialDim) { - return Matrix::create(data->getData() + iter * channels_ * spatialDim, - channels_, - spatialDim, - false, - useGpu_); -} - -MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data, - size_t iter, - size_t spatialDim) { - return Matrix::create( - data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_); -} - -bool CrossChannelNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - CHECK(parameters_[0]); - const NormConfig& conf = config_.inputs(0).norm_conf(); - channels_ = conf.channels(); - scale_.reset(new Weight(channels_, 1, parameters_[0])); - return true; -} - -void CrossChannelNormLayer::forward(PassType passType) { - Layer::forward(passType); - MatrixPtr inV = getInputValue(0); - - size_t batchSize = inV->getHeight(); - size_t dataDim = inV->getWidth(); - CHECK_EQ(getSize(), dataDim); - - reserveOutput(batchSize, dataDim); - MatrixPtr outV = getOutputValue(); - size_t spatialDim = dataDim / channels_; - - Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_); - Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_); - Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_); - - inV->square2(*dataBuffer_); - for (size_t i = 0; i < batchSize; i++) { - const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim); - const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim); - MatrixPtr outVTmp = createSampleMatrix(outV, i, spatialDim); - MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim); - - // compute norm. - spatialBuffer_->sumCols(*dataTmp, 1, 0); - // add eps to avoid overflow - spatialBuffer_->add(1e-6); - spatialBuffer_->sqrt2(*spatialBuffer_); - normTmp->copyFrom(*spatialBuffer_); - outVTmp->copyFrom(*inVTmp); - outVTmp->divRowVector(*spatialBuffer_); - // scale the layer. - outVTmp->mulColVector(*scale_->getW()); - } -} - -void CrossChannelNormLayer::backward(const UpdateCallback& callback) { - MatrixPtr inG = getInputGrad(0); - MatrixPtr inV = getInputValue(0); - MatrixPtr outG = getOutputGrad(); - MatrixPtr outV = getOutputValue(); - - size_t batchSize = inG->getHeight(); - size_t dataDim = inG->getWidth(); - size_t spatialDim = dataDim / channels_; - - MatrixPtr inGBuffer; - Matrix::resizeOrCreate(inGBuffer, channels_, spatialDim, false, useGpu_); - - dataBuffer_->dotMul(*outG, *outV); - Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_); - Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_); - Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_); - scaleDiff_->zeroMem(); - for (size_t i = 0; i < batchSize; i++) { - MatrixPtr outGTmp = createSampleMatrix(outG, i, spatialDim); - const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim); - const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim); - const MatrixPtr inGTmp = createSampleMatrix(inG, i, spatialDim); - const MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim); - - channelBuffer_->sumRows(*dataTmp, 1, 0); - channelBuffer_->dotDiv(*channelBuffer_, *(scale_->getW())); - // store a / scale[i] in scaleDiff_ temporary - scaleDiff_->add(*channelBuffer_, 1.); - - sampleBuffer_->dotMul(*inVTmp, *outGTmp); - spatialBuffer_->sumCols(*sampleBuffer_, 1., 0.); - // scale the grad - inGBuffer->copyFrom(*inVTmp); - inGBuffer->mulRowVector(*spatialBuffer_); - // divide by square of norm - spatialBuffer_->dotMul(*normTmp, *normTmp); - inGBuffer->divRowVector(*spatialBuffer_); - // subtract - inGBuffer->add(*outGTmp, -1, 1); - // divide by norm - inGBuffer->divRowVector(*normTmp); - // scale the diff - inGBuffer->mulColVector(*scale_->getW()); - - inGTmp->add(*inGBuffer); - } - // updata scale - if (scale_->getWGrad()) scale_->getWGrad()->add(*scaleDiff_); - scale_->getParameterPtr()->incUpdate(callback); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/legacy/gserver/layers/CrossEntropyOverBeam.cpp deleted file mode 100644 index f3bf214858702ec820020bc554359c58b1ffcfe3..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CrossEntropyOverBeam.cpp +++ /dev/null @@ -1,393 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CrossEntropyOverBeam.h" - -namespace paddle { - -void CostForOneSequence::calValidExpandStep() { - validExpansionCount_ = 0; - goldAsExtraPath_ = true; - - for (size_t i = 0; i < beams_->expansionCount; ++i) { - real gold = static_cast(beams_->gold[i]); - if (i) { - real* start = beams_->candidateIds[i - 1]->getData(); - goldRowIds_[i] = std::count_if( - start, - start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1], - [](const real& val) { return val != -1.; }); - } else { - goldRowIds_[i] = 0; - } - - real* start = - beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_; - real* findEnd = std::find(start, start + beamSize_, gold); - validExpansionCount_++; - - if (start + beamSize_ == findEnd) return; - goldColIds_[i] = findEnd - start; - } - if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false; -} - -size_t CostForOneSequence::initLastExpansion() { - int beamId = validExpansionCount_ - 1; - const MatrixPtr candidates = beams_->candidateIds[beamId]; - size_t height = candidates->getHeight(); - - /* initialization the last expansion. */ - size_t pathCount = std::count_if(candidates->getData(), - candidates->getData() + height * beamSize_, - [](const real& val) { return val != -1; }); - /* - * if the gold sequence falls off the beam during search, add the gold - * sequence as the last path into the all expanded candidates. - */ - if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++; - - pathRowIdsInEachBeam_.clear(); - pathRowIdsInEachBeam_.resize(validExpansionCount_, - std::vector(pathCount, 0)); - parentIdsInBeam_.clear(); - parentIdsInBeam_.resize(pathCount, 0); - - if (goldAsExtraPath_) { - /* add gold sequence into the total expansion. */ - pathRowIdsInEachBeam_[beamId].back() = - beams_->gold[beamId] + - getSeqStartPos(beamId, goldRowIds_[validExpansionCount_ - 1]); - parentIdsInBeam_.back() = goldRowIds_[validExpansionCount_ - 1]; - } else { - size_t goldOffset = goldRowIds_[beamId] * beamSize_ + goldColIds_[beamId]; - goldIdsInFinalExpansion_ = - std::count_if(candidates->getData(), - candidates->getData() + goldOffset, - [](const real& val) { return val != -1.; }); - } - - /* - * TODO(caoying): fix this, store the indices of selected candidate - * paths into Argument.ids - */ - real* ids = candidates->getData(); - size_t curIdx = 0; - for (size_t i = 0; i < height; ++i) { - int basePos = getSeqStartPos(beamId, i); - for (size_t j = 0; j < beamSize_; ++j) { - int id = ids[i * beamSize_ + j]; - if (id == -1) continue; - pathRowIdsInEachBeam_[beamId][curIdx] = id + basePos; - parentIdsInBeam_[curIdx++] = i; - } - } - return pathCount; -} - -void CostForOneSequence::constructTotalExpansion() { - /* - * construct the entire expanded beam by begining with the last search - * in which gold falls off the beam. - */ - size_t totalPathCount = initLastExpansion(); - - for (int beamId = validExpansionCount_ - 2; beamId >= 0; --beamId) { - const MatrixPtr candidates = beams_->candidateIds[beamId]; - real* ids = candidates->getData(); - - int lastParentIdInBeam = -1; - int basePos = -1; - for (size_t i = 0; - i < (goldAsExtraPath_ ? totalPathCount - 1 : totalPathCount); - ++i) { - int id = ids[parentIdsInBeam_[i]]; - int parentRowId = std::div(parentIdsInBeam_[i], beamSize_).quot; - if (parentIdsInBeam_[i] != lastParentIdInBeam) - basePos = getSeqStartPos(beamId, parentRowId); - - pathRowIdsInEachBeam_[beamId][i] = id + basePos; - lastParentIdInBeam = parentIdsInBeam_[i]; - parentIdsInBeam_[i] = parentRowId; - - if (goldAsExtraPath_) - pathRowIdsInEachBeam_[beamId][totalPathCount - 1] = - beams_->gold[beamId] + getSeqStartPos(beamId, goldRowIds_[beamId]); - } - } -} - -real CostForOneSequence::globallyNormalizedScore() { - expandedPathScores_.resize(validExpansionCount_); - - Matrix::resizeOrCreate( - softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false); - softmaxOut_->zeroMem(); - MatrixPtr tmp = Matrix::create( - softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false); - - for (size_t i = 0; i < validExpansionCount_; ++i) { - Matrix::resizeOrCreate(expandedPathScores_[i], - pathRowIdsInEachBeam_[i].size(), - 1, - false, - false); - expandedPathScores_[i]->zeroMem(); - - IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(), - pathRowIdsInEachBeam_[i].size(), - false); - expandedPathScores_[i]->selectRows(*(beams_->scores[i]), *rowIds); - tmp->add(*expandedPathScores_[i]); - } - - softmaxOut_->softmax(*softmaxOut_); - return -std::log(softmaxOut_->getData()[goldIdsInFinalExpansion_]); -} - -real CostForOneSequence::forward() { - calValidExpandStep(); - constructTotalExpansion(); - return globallyNormalizedScore(); -} - -void CostForOneSequence::backward() { - /* - * when softmax layer is the output layer, and it is combined with - * cross-entropy as cost. The derivate with regard to softmax's input - * is simply: - * - * grad_i = softmax_out_i - target_i, - * - * and here hard label is used. - */ - softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.; - - MatrixPtr tmp = Matrix::create( - softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false); - - for (size_t i = 0; i < validExpansionCount_; ++i) { - IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(), - pathRowIdsInEachBeam_[i].size(), - false); - /* - beams_->scoreGrad[i] has been intialized outside this class, this - class only keeps a pointer pointing to the original input gradients, - so here does not need to allocate or initalize the memory. - */ - tmp->addToRows(*beams_->scoreGrad[i], *rowIds); - } -} - -REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam); - -bool CrossEntropyOverBeam::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number."; - - beamExpanCount_ = inputLayers_.size() / 3; - - candidateScores_.resize(beamExpanCount_); - candidateScoreGrad_.resize(beamExpanCount_); - - candidateInBeam_.resize(beamExpanCount_); - goldSequence_.resize(beamExpanCount_); - gradToInputs_.resize(beamExpanCount_); - - setNeedSequenceInfo(false); - return true; -} - -void CrossEntropyOverBeam::checkInputs() { - batchSize_ = 0; - for (size_t i = 0; i < beamExpanCount_; ++i) { - const Argument& scores = getInput(i * 3); - const Argument& selCandidates = getInput(i * 3 + 1); - const Argument& goldSeq = getInput(i * 3 + 2); - - if (i) { - CHECK(scores.hasSubseq()) << "input " << i << " " - << inputLayers_[i * 3]->getName() - << " should be a nested sequence"; - CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_); - CHECK_EQ(batchSize_, static_cast(scores.getNumSequences())); - CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize()); - } else { - CHECK(scores.hasSeq()) << "input " << i << " " - << inputLayers_[i]->getName() - << " should be a sequence"; - batchSize_ = scores.getNumSequences(); - beamSize_ = getInputValue(i * 3 + 1)->getWidth(); - CHECK_EQ(batchSize_, static_cast(selCandidates.getBatchSize())); - } - CHECK_EQ(1U, scores.value->getWidth()); - CHECK_EQ(batchSize_, static_cast(goldSeq.getBatchSize())); - } -} - -void CrossEntropyOverBeam::copyInputsToCpu() { - auto copyValue = [](const MatrixPtr& src, MatrixPtr& trg) { - if (dynamic_cast(src.get())) { - Matrix::resizeOrCreate( - trg, src->getHeight(), src->getWidth(), false, false); - trg->copyFrom(*src); - } else { - trg = std::move(src); - } - }; - - auto copyIds = [](const IVectorPtr& src, IVectorPtr& trg) { - if (dynamic_cast(src.get())) { - IVector::resizeOrCreate(trg, src->getSize(), false); - trg->copyFrom(*src); - } else { - trg = std::move(src); - } - }; - - beamSplitPos_.clear(); - beamSplitPos_.resize(batchSize_, std::vector(beamExpanCount_, 0)); - for (size_t i = 0; i < beamExpanCount_; ++i) { - copyValue(getInputValue(i * 3), candidateScores_[i]); - copyValue(getInputValue(i * 3 + 1), candidateInBeam_[i]); - copyIds(getInput(i * 3 + 2).ids, goldSequence_[i]); - - if (i) { - ICpuGpuVectorPtr seqInfo = getInput(i * 3).sequenceStartPositions; - const int* seqStarts = seqInfo->getMutableData(false); - ICpuGpuVectorPtr subSeqInfo = getInput(i * 3).subSequenceStartPositions; - const int* subSeqStarts = subSeqInfo->getMutableData(false); - - size_t seqId = 1; - for (size_t subSeqId = 0; subSeqId < subSeqInfo->getSize() - 1; - ++subSeqId) { - CHECK_LT(seqId, seqInfo->getSize()); - if (subSeqStarts[subSeqId] == seqStarts[seqId]) { - beamSplitPos_[seqId][i] = beamSplitPos_[seqId - 1][i]; - seqId++; - } - beamSplitPos_[seqId - 1][i]++; - } - } else { - for (size_t j = 0; j < batchSize_; ++j) beamSplitPos_[j][i] = j + 1; - } - } -} - -void CrossEntropyOverBeam::splitBatchBeams() { - beamCosts_.resize(batchSize_); - beamPerSeq_.resize(batchSize_, BeamExpansion(beamExpanCount_)); - - for (size_t i = 0; i < beamExpanCount_; ++i) { - int* seqStarts = - getInput(i * 3).sequenceStartPositions->getMutableData(false); - - int* subSeqStarts = nullptr; - int maxLen = 0; - if (i) { - subSeqStarts = - getInput(i * 3).subSequenceStartPositions->getMutableData(false); - maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1; - } else { - maxLen = getInput(i).sequenceStartPositions->getSize() - 1; - } - - for (size_t j = 0; j < batchSize_; ++j) { - beamPerSeq_[j].scores[i] = - Matrix::create(candidateScores_[i]->getData() + seqStarts[j], - seqStarts[j + 1] - seqStarts[j], - 1, - false, - false); - beamPerSeq_[j].scoreGrad[i] = - Matrix::create(candidateScoreGrad_[i]->getData() + seqStarts[j], - seqStarts[j + 1] - seqStarts[j], - 1, - false, - false); - - int offset = j ? beamSplitPos_[j - 1][i] : 0; - int height = beamSplitPos_[j][i] - (j ? beamSplitPos_[j - 1][i] : 0); - CHECK_GE(maxLen, offset + height); - beamPerSeq_[j].seqInfo[i] = IVector::create( - (i ? subSeqStarts : seqStarts) + offset, height + 1, false); - - beamPerSeq_[j].candidateIds[i] = - Matrix::create(candidateInBeam_[i]->getData() + offset * beamSize_, - height, - beamSize_, - false, - false); - beamPerSeq_[j].gold[i] = goldSequence_[i]->getData()[j]; - - CHECK_LE(beamPerSeq_[j].gold[i], seqStarts[j + 1] - seqStarts[j]); - } - } -} - -void CrossEntropyOverBeam::resizeOutput() { - Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false); - output_.value->zeroMem(); - - for (size_t i = 0; i < beamExpanCount_; ++i) { - MatrixPtr inGrad = getInputGrad(i * 3); - if (dynamic_cast(inGrad.get())) { - Matrix::resizeOrCreate(candidateScoreGrad_[i], - inGrad->getHeight(), - inGrad->getWidth(), - false, - false); - } else { - candidateScoreGrad_[i] = std::move(inGrad); - } - candidateScoreGrad_[i]->zeroMem(); - } -} - -void CrossEntropyOverBeam::copyGradToGpu(size_t copyCount) { - for (size_t i = 0; i < beamExpanCount_; ++i) { - if (dynamic_cast(getInputGrad(i * 3).get())) - getInputGrad(i * 3)->copyFrom(*candidateScoreGrad_[i]); - - if (i == copyCount - 1) break; - } -} - -void CrossEntropyOverBeam::forward(PassType passType) { - Layer::forward(passType); - - checkInputs(); - copyInputsToCpu(); - - resizeOutput(); - splitBatchBeams(); - - MatrixPtr outputValue = getOutputValue(); - for (size_t i = 0; i < batchSize_; ++i) { - BeamExpansionPtr ptr = std::make_shared(beamPerSeq_[i]); - beamCosts_[i].setData(std::move(ptr), beamSize_); - outputValue->getData()[i] = beamCosts_[i].forward(); - } -} - -void CrossEntropyOverBeam::backward(const UpdateCallback& callback) { - for (size_t i = 0; i < batchSize_; ++i) { - beamCosts_[i].backward(); - copyGradToGpu(beamCosts_[i].getValidExpansionCount()); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CrossEntropyOverBeam.h b/paddle/legacy/gserver/layers/CrossEntropyOverBeam.h deleted file mode 100644 index c8702b16165eee8d552c563082ffc708ce443deb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CrossEntropyOverBeam.h +++ /dev/null @@ -1,135 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "CrossEntropyOverBeam.h" -#include "Layer.h" - -namespace paddle { - -/* This struct stores the beams in all search steps for a single sequence. */ -struct BeamExpansion { - std::vector scores; - std::vector seqInfo; - - std::vector candidateIds; - std::vector gold; - - std::vector scoreGrad; - - size_t expansionCount; - - explicit BeamExpansion(int n) { - expansionCount = n; - scores.resize(expansionCount); - seqInfo.resize(expansionCount); - candidateIds.resize(expansionCount); - scoreGrad.resize(expansionCount); - - gold.resize(expansionCount); - } -}; -typedef std::shared_ptr BeamExpansionPtr; - -class CostForOneSequence { - public: - CostForOneSequence() - : beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {} - void setData(const BeamExpansionPtr bPtr, size_t beamSize) { - beams_ = bPtr; - beamSize_ = beamSize; - - expandedPathScores_.clear(); - expandedPathScores_.resize(beams_->expansionCount); - - goldRowIds_.clear(); - goldRowIds_.resize(beams_->expansionCount, 0); - goldColIds_.clear(); - goldColIds_.resize(beams_->expansionCount, -1); - } - size_t getValidExpansionCount() { return validExpansionCount_; } - - real forward(); - void backward(); - - private: - void calValidExpandStep(); - void constructTotalExpansion(); - size_t initLastExpansion(); - real globallyNormalizedScore(); - - int getSeqStartPos(size_t beamId, size_t rowId) { - CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId); - int* starts = beams_->seqInfo[beamId]->getData(); - return starts[rowId] - starts[0]; - } - - size_t beamSize_; - size_t validExpansionCount_; - bool goldAsExtraPath_; - std::vector goldRowIds_; - std::vector goldColIds_; - - BeamExpansionPtr beams_; - std::vector> pathRowIdsInEachBeam_; - std::vector parentIdsInBeam_; - size_t goldIdsInFinalExpansion_; - - std::vector expandedPathScores_; - - MatrixPtr softmaxOut_; -}; - -class CrossEntropyOverBeam : public Layer { - public: - explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - - private: - void checkInputs(); - void copyInputsToCpu(); - void resizeOutput(); - void copyGradToGpu(size_t copyCount); - void splitBatchBeams(); - - size_t beamExpanCount_; - size_t batchSize_; - size_t beamSize_; - - /* - * the process of constructing beams is not friendly to GPU, currently, this - * layer only runs on CPU, if any of its inputs is on GPU memory, then copy - * it to CPU memory. - */ - std::vector candidateScores_; - std::vector candidateScoreGrad_; - std::vector candidateInBeam_; - std::vector gradToInputs_; - std::vector goldSequence_; - std::vector> beamSplitPos_; - - /* - * split entire bath of beams into beam per sequnence and store the result - * into this member. - */ - std::vector beamPerSeq_; - /* beamCosts_ is used to propagate error in one sequence. */ - std::vector beamCosts_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/legacy/gserver/layers/CudnnBatchNormLayer.cpp deleted file mode 100644 index 051155e0d2c1b4910c6627a902a4150cbfb15800..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CudnnBatchNormLayer.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CudnnBatchNormLayer.h" -#include "Layer.h" -#include "paddle/legacy/cuda/include/hl_batch_norm.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(cudnn_batch_norm, CudnnBatchNormLayer); - -bool CudnnBatchNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - if (!BatchNormBaseLayer::init(layerMap, parameterMap)) return false; - CHECK(useGpu_) << "CudnnBatchNorm only support GPU"; - - hl_create_tensor_descriptor(&ioDesc_); - hl_create_tensor_descriptor(&bnParamDesc_); - hl_tensor_reshape(bnParamDesc_, 1, channels_, 1, 1); - - return true; -} - -void CudnnBatchNormLayer::reshape(int batchSize) { - hl_tensor_reshape(ioDesc_, batchSize, channels_, imageH_ * imageD_, imageW_); -} - -void CudnnBatchNormLayer::forward(PassType passType) { - Layer::forward(passType); - - int batchSize = getInputValue(0)->getHeight(); - calFeatureMapSize(); - reshape(batchSize); - resetOutput(batchSize, getInputValue(0)->getWidth()); - - // for testing in training peroid. - useGlobalStats_ = (passType == PASS_TEST); - if (passType == PASS_TEST && config_.has_use_global_stats()) { - useGlobalStats_ = config_.use_global_stats(); - } - - real* input = getInputValue(0)->getData(); - real* output = getOutputValue()->getData(); - real* gamma = weight_->getW()->getData(); - real* beta = biases_->getW()->getData(); - real* movingMean = movingMean_->getW()->getData(); - real* movingVar = movingVar_->getW()->getData(); - - // cuDNN does not allow an epsilon value less than CUDNN_BN_MIN_EPSILON. - eps_ = std::max(CUDNN_BN_MIN_EPSILON, static_cast(epsilon_)); - - if (!useGlobalStats_) { - REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str()); - real* savedMean = savedMean_->getData(); - real* savedInvVar = savedInvVar_->getData(); - hl_batch_norm_forward_training(ioDesc_, - input, - ioDesc_, - output, - bnParamDesc_, - gamma, - beta, - 1.0 - movingAvgFraction_, - movingMean, - movingVar, - eps_, - savedMean, - savedInvVar); - } else { - // used movingMean and movingVar in testing - if (batchSize <= 1024) { - hl_batch_norm_forward_inference(ioDesc_, - input, - ioDesc_, - output, - bnParamDesc_, - gamma, - beta, - movingMean, - movingVar, - eps_); - } else { - // There is a limitation in cudnn library. - // When the batch size is larger than 1024 in cuDNN v5.1, - // the cudnnBatchNormalizationForwardInference will fail. - hl_batch_norm_cuda_inference(input, - output, - gamma, - beta, - movingMean, - movingVar, - eps_, - batchSize, - channels_, - imageH_ * imageD_, - imageW_); - } - } - - /* activation */ { - REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { - REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); - backwardActivation(); - } - - real* input = getInputValue(0)->getData(); - real* outGrad = getOutputGrad()->getData(); - real* inGrad = getInputGrad(0)->getData(); - real* gamma = weight_->getW()->getData(); - real* savedMean = savedMean_->getData(); - real* savedInvVar = savedInvVar_->getData(); - - // cuDNN does not allow an epsilon value less than CUDNN_BN_MIN_EPSILON. - eps_ = std::max(CUDNN_BN_MIN_EPSILON, static_cast(epsilon_)); - - auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) { - Matrix::resizeOrCreate(m, h, w, false, true); - m->zeroMem(); - *p = m->getData(); - }; - - real* gammaGrad = nullptr; - real* betaGrad = nullptr; - if (weight_->getWGrad()) { - gammaGrad = weight_->getWGrad()->getData(); - } else { - create(tmpWGrad_, 1, channels_, &gammaGrad); - } - if (biases_ && biases_->getWGrad()) { - betaGrad = biases_->getWGrad()->getData(); - } else { - create(tmpBiasGrad_, 1, channels_, &betaGrad); - } - - hl_batch_norm_backward(ioDesc_, - input, - ioDesc_, - outGrad, - ioDesc_, - inGrad, - bnParamDesc_, - gamma, - gammaGrad, - betaGrad, - eps_, - savedMean, - savedInvVar); - - { - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - biases_->getParameterPtr()->incUpdate(callback); - weight_->getParameterPtr()->incUpdate(callback); - } -} - -CudnnBatchNormLayer::~CudnnBatchNormLayer() { - hl_destroy_tensor_descriptor(ioDesc_); - hl_destroy_tensor_descriptor(bnParamDesc_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CudnnBatchNormLayer.h b/paddle/legacy/gserver/layers/CudnnBatchNormLayer.h deleted file mode 100644 index 3b33b983b31173ab941df5f2e66eac51aabc6315..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CudnnBatchNormLayer.h +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "BatchNormBaseLayer.h" -#include "Layer.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief Cudnn Batch normalization layer use to cuDNN lib to implentment. - * @note Cudnn version must >= v4.0, and better to use the latest version - * (v5.1). - * - * The config file api is batch_norm_layer. - */ - -class CudnnBatchNormLayer : public BatchNormBaseLayer { - public: - explicit CudnnBatchNormLayer(const LayerConfig& config) - : BatchNormBaseLayer(config) {} - - ~CudnnBatchNormLayer(); - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - /** - * reshape tensor of ioDesc_. - */ - void reshape(int batchSize); - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - protected: - /// Epsilon value used in the batch normalization formula. - /// Same epsilon value should be used in forward and backward functions. - double eps_; - - /// Input/output tensor descriptor desc - hl_tensor_descriptor ioDesc_; - /// Shared tensor descriptor desc for the 6 tenros: - /// bnScale, bnBias, running mean/var, save_mean/var - hl_tensor_descriptor bnParamDesc_; - - /** - * @brief The gradient of weight and bias in cudnn api can not be empty. - * If set is_static for weight or bias, it will not allocate memory for them, - * and the gradient is NULL. In this case, will use two matrix. - */ - MatrixPtr tmpWGrad_, tmpBiasGrad_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CudnnConvBaseLayer.cpp b/paddle/legacy/gserver/layers/CudnnConvBaseLayer.cpp deleted file mode 100644 index 9353cca9c83bd90a454b2be56dc08b8eadee0bf7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CudnnConvBaseLayer.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CudnnConvBaseLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { -REGISTER_LAYER(cudnn_conv, CudnnConvBaseLayer); -REGISTER_LAYER(cudnn_convt, CudnnConvBaseLayer); - -bool CudnnConvBaseLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { - if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; - CHECK(useGpu_) << "CudnnConvLayer only support gpu"; - - CHECK_EQ(inputLayers_.size(), parameters_.size()); - projections_.reserve(inputLayers_.size()); - projConf_.reserve(inputLayers_.size()); - - numFilters_ = config_.num_filters(); - CHECK(config_.shared_biases()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - ProjectionConfig *conf = new ProjectionConfig(); - if (isDeconv_) { - conf->set_type("convt"); - } else { - conf->set_type("conv"); - } - conf->set_num_filters(numFilters_); - ConvConfig *convConf = conf->mutable_conv_conf(); - *convConf = *(config_.mutable_inputs(i)->mutable_conv_conf()); - conf->set_input_size(getPrev(i)->getSize()); - conf->set_output_size(getSize()); - projConf_.emplace_back(conf); - projections_.emplace_back( - Projection::create(*projConf_[i], parameters_[i], useGpu_)); - - // create a new weight - size_t height, width; - height = filterPixels_[i] * filterChannels_[i]; - width = (!isDeconv_) ? numFilters_ : channels_[i]; - CHECK_EQ(parameters_[i]->getSize(), width * height); - Weight *w = new Weight(height, width, parameters_[i]); - weights_.emplace_back(w); - } - - if (biasParameter_.get()) { - if (sharedBiases_) { - CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); - biases_ = - std::unique_ptr(new Weight(numFilters_, 1, biasParameter_)); - } else { - biases_ = - std::unique_ptr(new Weight(getSize(), 1, biasParameter_)); - } - } - if (biases_.get() && sharedBiases_) { - hl_create_tensor_descriptor(&biasDesc_); - hl_create_tensor_descriptor(&outputDesc_); - hl_tensor_reshape(biasDesc_, 1, numFilters_, 1, 1); - } - - return true; -} - -void CudnnConvBaseLayer::forward(PassType passType) { - Layer::forward(passType); - - int batchSize = getInput(0).getBatchSize(); - resetOutput(batchSize, calOutputSize()); - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - projections_[i]->forward(&getInput(i), &getOutput(), passType); - } - - if (biases_) { - REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str()); - int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - int outH = outputH_[0]; - int outW = outputW_[0]; - - hl_tensor_reshape(outputDesc_, - batchSize, - numFilters_, - outH, - outW, - numFilters_ * outH * outW, - outH * outW, - outW, - 1); - real *outData = getOutputValue()->getData(); - real *biasData = biases_->getW()->getData(); - hl_convolution_forward_add_bias(biasDesc_, biasData, outputDesc_, outData); - } - - forwardActivation(); -} - -void CudnnConvBaseLayer::backward(const UpdateCallback &callback) { - backwardActivation(); - - if (biases_ && biases_->getWGrad()) { - REGISTER_TIMER_INFO("CudnnConvBpBiasTimer", getName().c_str()); - real *biasGrad = biases_->getWGrad()->getData(); - real *outGrad = getOutputGrad()->getData(); - hl_convolution_backward_bias(biasDesc_, biasGrad, outputDesc_, outGrad); - - biases_->getParameterPtr()->incUpdate(callback); - } - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - projections_[i]->backward(callback); - } -} - -CudnnConvBaseLayer::~CudnnConvBaseLayer() { - if (biases_) { - hl_destroy_tensor_descriptor(biasDesc_); - hl_destroy_tensor_descriptor(outputDesc_); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CudnnConvBaseLayer.h b/paddle/legacy/gserver/layers/CudnnConvBaseLayer.h deleted file mode 100644 index d050183eb7838bed803995985383e0ee4e9731a1..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CudnnConvBaseLayer.h +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "ConvBaseLayer.h" -#include "Projection.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief A 2-dimension conv layer implemented by cuDNN. It only - * supports GPU mode. We automatic select CudnnConvLayer for GPU - * mode and ExpandConvLayer for CPU mode if you set type of "conv". - * User also can specfiy type of "exconv" or "cudnn_conv" for - * particular type. - * - * The config file api is img_conv_layer. - */ -class CudnnConvBaseLayer : public ConvBaseLayer { - protected: - std::vector> projConf_; - std::vector> projections_; - - hl_tensor_descriptor biasDesc_; - hl_tensor_descriptor outputDesc_; - - public: - explicit CudnnConvBaseLayer(const LayerConfig& config) - : ConvBaseLayer(config) {} - - ~CudnnConvBaseLayer(); - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CudnnPoolLayer.cpp b/paddle/legacy/gserver/layers/CudnnPoolLayer.cpp deleted file mode 100644 index c790dfd71efbee1a2a0afa69e6c336c4330737d0..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CudnnPoolLayer.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CudnnPoolLayer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -bool CudnnPoolLayer::typeCheck(const std::string &poolType, - hl_pooling_mode_t *mode) { - if (poolType == "cudnn-max-pool") { - if (mode) { - *mode = HL_POOLING_MAX; - } - } else if (poolType == "cudnn-avg-pool") { - if (mode) { - *mode = HL_POOLING_AVERAGE; - } - } else if (poolType == "cudnn-avg-incl-pad-pool") { - if (mode) { - *mode = HL_POOLING_AVERAGE_INCLUDE_PADDING; - } - } else { - return false; - } - - return true; -} - -CudnnPoolLayer::CudnnPoolLayer(const LayerConfig &config) : PoolLayer(config) { - const std::string &pool_type = config.inputs(0).pool_conf().pool_type(); - CHECK_EQ(CudnnPoolLayer::typeCheck(pool_type, &mode_), true); -} - -bool CudnnPoolLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { - PoolLayer::init(layerMap, parameterMap); - - CHECK(useGpu_) << "CudnnPoolLayer only support gpu"; - - hl_create_tensor_descriptor(&inputDesc_); - hl_create_tensor_descriptor(&outputDesc_); - - windowHeight = sizeY_; - windowWidth = sizeX_; - heightPadding = confPaddingY_; - widthPadding = confPadding_; - strideHeight = strideY_; - strideWidth = stride_; - - hl_create_pooling_descriptor(&poolingDesc_, - mode_, - windowHeight, - windowWidth, - heightPadding, - widthPadding, - strideHeight, - strideWidth); - - return true; -} - -void CudnnPoolLayer::reshape(int batchSize) { - imageH_ = inputLayers_[0]->getOutput().getFrameHeight(); - imageW_ = inputLayers_[0]->getOutput().getFrameWidth(); - if (imageH_ == 0) { - imageH_ = imgSizeY_; - } - if (imageW_ == 0) { - imageW_ = imgSize_; - } - CHECK_EQ(inputLayers_[0]->getOutput().value->getWidth(), - channels_ * imageH_ * imageW_); - outputH_ = outputSize(imageH_, - sizeY_, - confPaddingY_, - strideY_, - /* caffeMode */ false); - outputW_ = - outputSize(imageW_, sizeX_, confPadding_, stride_, /* caffeMode */ false); - getOutput().setFrameHeight(outputH_); - getOutput().setFrameWidth(outputW_); - - hl_tensor_reshape(inputDesc_, batchSize, channels_, imageH_, imageW_); - hl_tensor_reshape(outputDesc_, batchSize, channels_, outputH_, outputW_); -} - -void CudnnPoolLayer::forward(PassType passType) { - Layer::forward(passType); - - CHECK(inputLayers_[0]->getOutputValue()->useGpu()); - int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - reshape(batchSize); - resetOutput(batchSize, outputH_ * outputW_ * channels_); - - real *inputData = getInputValue(0)->getData(); - real *outData = getOutputValue()->getData(); - hl_pooling_forward(inputDesc_, inputData, outputDesc_, outData, poolingDesc_); -} - -void CudnnPoolLayer::backward(const UpdateCallback &callback) { - (void)callback; - if (NULL == getInputGrad(0)) { - return; - } - - real *inputData = getInputValue(0)->getData(); - real *inputGrad = getInputGrad(0)->getData(); - real *outData = getOutputValue()->getData(); - real *outGrad = getOutputGrad()->getData(); - hl_pooling_backward(inputDesc_, - inputData, - inputGrad, - outputDesc_, - outData, - outGrad, - poolingDesc_); -} - -CudnnPoolLayer::~CudnnPoolLayer() { - hl_destroy_tensor_descriptor(inputDesc_); - hl_destroy_tensor_descriptor(outputDesc_); - hl_destroy_pooling_descriptor(poolingDesc_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/CudnnPoolLayer.h b/paddle/legacy/gserver/layers/CudnnPoolLayer.h deleted file mode 100644 index fc249354d10333211691b6844bffa3c8da8a79ee..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/CudnnPoolLayer.h +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "PoolLayer.h" - -namespace paddle { - -/** - * @brief CudnnPoolLayer is subclass of PoolLayer, which is implemented by - * cudnn api and only supports GPU. - * - * The config file api is img_pool_layer. - */ - -class CudnnPoolLayer : public PoolLayer { - protected: - int windowHeight, windowWidth; - int heightPadding, widthPadding, strideHeight, strideWidth; - int imageH_, imageW_, outputH_, outputW_; - /// mode_ is poolint type, inlcuding "cudnn-max-pool", "cudnn-avg-pool" - /// "cudnn-avg-excl-pad-pool". - hl_pooling_mode_t mode_; - /// cudnn tensor descriptor for input. - hl_tensor_descriptor inputDesc_; - /// cudnn tensor descriptor for output. - hl_tensor_descriptor outputDesc_; - /// A description of a pooling operation. - hl_pooling_descriptor poolingDesc_; - - public: - static bool typeCheck(const std::string& poolType, - hl_pooling_mode_t* mode = nullptr); - explicit CudnnPoolLayer(const LayerConfig& config); - ~CudnnPoolLayer(); - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - /** - * Reshape input and output tensor descriptor. - * The batch size maybe change during training in last batch of each pass. - * So reshaping is needed. - */ - void reshape(int batchSize); - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DataLayer.cpp b/paddle/legacy/gserver/layers/DataLayer.cpp deleted file mode 100644 index 4cadaa76631ed793d041a5c1f9aa3a543c8e134f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DataLayer.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "DataLayer.h" - -namespace paddle { - -REGISTER_LAYER(data, DataLayer); - -void DataLayer::copyDataToOutput(Argument& output) { - if (output.deviceId == data_.deviceId) { - output.value = data_.value; - output.in = data_.in; - output.grad = data_.grad; - output.ids = data_.ids; - } else { - SetDevice device(output.deviceId); - if (data_.value) { - if (!output.value) { - output.value = data_.value->clone(data_.value->getHeight(), - data_.value->getWidth(), - useGpu(output.deviceId)); - } else { - output.value->resize(data_.value->getHeight(), data_.value->getWidth()); - } - output.value->copyFrom(*data_.value); - } - if (data_.grad) { - Matrix::resizeOrCreate(output.grad, - data_.grad->getHeight(), - data_.grad->getWidth(), - /* trans= */ false, - useGpu(output.deviceId)); - } - if (data_.ids) { - IVector::resizeOrCreate( - output.ids, data_.ids->getSize(), useGpu(output.deviceId)); - output.ids->copyFrom(*data_.ids); - } - } - if (config_.height() && config_.width()) { - output.setFrameHeight(config_.height()); - output.setFrameWidth(config_.width()); - } else { - output.setFrameHeight(data_.getFrameHeight()); - output.setFrameWidth(data_.getFrameWidth()); - } - output.cpuSequenceDims = data_.cpuSequenceDims; - output.sequenceStartPositions = data_.sequenceStartPositions; - output.subSequenceStartPositions = data_.subSequenceStartPositions; - output.strs = data_.strs; - - output.notifyValueReady(); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DataLayer.h b/paddle/legacy/gserver/layers/DataLayer.h deleted file mode 100644 index d02f5a4697b9067f7d34e4d0b2d34f8c63ffe020..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DataLayer.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "Layer.h" - -namespace paddle { -/** - * This layer just copy data to output, and has no backward propagation. - * - * The config file api is data_layer. - */ -class DataLayer : public Layer { - public: - explicit DataLayer(const LayerConfig& config) : Layer(config) {} - - virtual void setData(const Argument& data) { data_ = data; } - - /** - * Prefetch sparse matrix/ids only. - */ - void prefetch() override { output_ = data_; } - - /** - * Forward propagation. Copy data_ (value, in, grad, ids, cpuSequenceDims, - * sequenceStartPositions, subSequenceStartPositions, strs) to output_. - */ - void forward(PassType passType) override { - Layer::forward(passType); - copyDataToOutput(output_); - if (FLAGS_show_layer_stat) { - showOutputStats(); - } - } - - /** - * Data layer's backward propagation do nothing. - */ - void backward(const UpdateCallback& callback) override { (void)callback; } - - void copyOutputToOtherDevice() override { - for (size_t i = 0; i != outputOtherDevice_.size(); i++) { - copyDataToOutput(outputOtherDevice_[i]); - } - } - - private: - void copyDataToOutput(Argument& output); - - protected: - Argument data_; -}; - -typedef std::shared_ptr DataLayerPtr; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DataNormLayer.cpp b/paddle/legacy/gserver/layers/DataNormLayer.cpp deleted file mode 100644 index 6820dfa4d4dcf90b2318a190ad4cc082c26fc180..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DataNormLayer.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "DataNormLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(data_norm, DataNormLayer); - -bool DataNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* initialize the weight */ - CHECK(!biasParameter_) << "DataNormLayer does not need bias"; - CHECK(inputLayers_.size() == 1 && inputLayers_[0]->getType() == "data") - << "DataNormLayer accepts one and only one DataLayer as its input layer"; - CHECK_EQ(inputLayers_.size(), parameters_.size()); - CHECK_EQ(inputLayers_[0]->getSize(), getSize()); - CHECK_EQ(parameters_[0]->getSize(), 5 * getSize()); - CHECK(parameters_[0]->isStatic()) - << "The parameter of DataNormLayer must be static"; - - weight_ = std::unique_ptr(new Weight(5, getSize(), parameters_[0])); - min_ = Matrix::create( - nullptr, /* height= */ 1, getSize(), /* trans= */ false, useGpu_); - rangeReciprocal_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - mean_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - stdReciprocal_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - decimalReciprocal_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - - min_->setData(weight_->getW()->getData()); - rangeReciprocal_->setData(weight_->getW()->getData() + getSize()); - mean_->setData(weight_->getW()->getData() + 2 * getSize()); - stdReciprocal_->setData(weight_->getW()->getData() + 3 * getSize()); - decimalReciprocal_->setData(weight_->getW()->getData() + 4 * getSize()); - - /* normalization strategy */ - if (config_.data_norm_strategy() == "z-score") { - mode_ = kZScore; - } else if (config_.data_norm_strategy() == "min-max") { - mode_ = kMinMax; - } else if (config_.data_norm_strategy() == "decimal-scaling") { - mode_ = kDecimalScaling; - } else { - LOG(FATAL) << "Unknown data normalization strategy: " - << config_.data_norm_strategy(); - } - - return true; -} - -void DataNormLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInput(0).getBatchSize(); - int size = getSize(); - reserveOutput(batchSize, size); - - const MatrixPtr inValue = getInputValue(0); - MatrixPtr outValue = getOutputValue(); - outValue->copyFrom(*inValue); - switch (mode_) { - case kZScore: { - outValue->addBias(*mean_, -1.0); - outValue->colScale(0, *outValue, *stdReciprocal_); - break; - } - case kMinMax: { - outValue->addBias(*min_, -1.0); - outValue->colScale(0, *outValue, *rangeReciprocal_); - break; - } - case kDecimalScaling: { - outValue->colScale(0, *outValue, *decimalReciprocal_); - break; - } - default: - LOG(FATAL) << "should not reach here"; - } -} - -void DataNormLayer::backward(const UpdateCallback& callback) { - // The parameter for DataNormLayer is static, and does not need to be updated - (void)callback; - - /* Calculate the input layers error */ - const MatrixPtr& outGrad = getOutputGrad(); - MatrixPtr inGrad = getInputGrad(0); - if (inGrad) { - switch (mode_) { - case kZScore: { - inGrad->addColScale(0, *outGrad, *stdReciprocal_); - break; - } - case kMinMax: { - inGrad->addColScale(0, *outGrad, *rangeReciprocal_); - break; - } - case kDecimalScaling: { - inGrad->addColScale(0, *outGrad, *decimalReciprocal_); - break; - } - default: { LOG(FATAL) << "should not reach here"; } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DataNormLayer.h b/paddle/legacy/gserver/layers/DataNormLayer.h deleted file mode 100644 index 7bb8e928248355cb7ae78dc16e467b77a42e02fc..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DataNormLayer.h +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { - -/** - * @brief A layer for data normalization - * - Input: One and only one input layer is accepted. The input layer must - * be DataLayer with dense data type. - * - Output: The normalization of the input data - * - * Reference: - * LA Shalabi, Z Shaaban, B Kasasbeh. Data mining: A preprocessing engine - * - * Three data normalization methoeds are considered - * - z-score: y = (x-mean)/std - * - min-max: y = (x-min)/(max-min) - * - decimal-scaling: y = x/10^j, where j is the smallest integer such that - *max(|y|)<1 - */ - -class DataNormLayer : public Layer { - public: - enum NormalizationStrategy { kZScore = 0, kMinMax = 1, kDecimalScaling = 2 }; - - explicit DataNormLayer(const LayerConfig& config) : Layer(config) {} - - ~DataNormLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - protected: - int mode_; - std::unique_ptr weight_; - MatrixPtr min_; - MatrixPtr rangeReciprocal_; // 1/(max-min) - MatrixPtr mean_; - MatrixPtr stdReciprocal_; // 1/std - MatrixPtr decimalReciprocal_; // 1/10^j -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DeConv3DLayer.cpp b/paddle/legacy/gserver/layers/DeConv3DLayer.cpp deleted file mode 100644 index 2cd635564c4cd9f42d27cd58694cff381d1ce224..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DeConv3DLayer.cpp +++ /dev/null @@ -1,220 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "DeConv3DLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(deconv3d, DeConv3DLayer); - -bool DeConv3DLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { - if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; - // for Deconv, the dimension of Kernel is - // channel * output * depth * height * weigth - // Matrix storage format: (output * depth * height * weigth) x channel - for (int index = 0; index < config_.inputs().size(); ++index) { - M_.push_back(filterChannels_[index]); - K_.push_back(filterPixels_[index] * (numFilters_ / groups_[index])); - - // create a new weight - size_t height, width; - height = filterPixels_[index] * numFilters_; - width = filterChannels_[index]; - CHECK_EQ(parameters_[index]->getSize(), width * height); - Weight *w = new Weight(height, width, parameters_[index]); - weights_.emplace_back(w); - } - if (biasParameter_.get()) { - if (sharedBiases_) { - CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); - biases_ = - std::unique_ptr(new Weight(numFilters_, 1, biasParameter_)); - } else { - biases_ = - std::unique_ptr(new Weight(getSize(), 1, biasParameter_)); - } - } - return true; -} - -size_t DeConv3DLayer::getSize() { - CHECK_NE(inputLayers_.size(), 0UL); - imgSizeW_.clear(); - imgSizeH_.clear(); - imgSizeD_.clear(); - N_.clear(); - NOut_.clear(); - size_t layerSize = 0; - for (size_t i = 0; i < inputLayers_.size(); ++i) { - imgSizeW_.push_back( - imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i], true)); - imgSizeH_.push_back(imageSize( - outputH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true)); - imgSizeD_.push_back(imageSize( - outputD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true)); - NOut_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]); - N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); - CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); - layerSize += NOut_[i] * numFilters_; - } - getOutput().setFrameHeight(imgSizeH_[0]); - getOutput().setFrameWidth(imgSizeW_[0]); - getOutput().setFrameDepth(imgSizeD_[0]); - return layerSize; -} - -void DeConv3DLayer::forward(PassType passType) { - Layer::forward(passType); - int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - int outWidth = getSize(); - resetOutput(batchSize, outWidth); - const MatrixPtr outMat = getOutputValue(); - - REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); - for (size_t i = 0; i != inputLayers_.size(); ++i) { - const MatrixPtr &inMat = getInputValue(i); - int M = M_[i]; - int N = N_[i]; - int K = K_[i]; - MatrixPtr wMat = weights_[i]->getW(); - Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); - for (int n = 0; n < batchSize; ++n) { - real *inData = inMat->getData() + n * inMat->getStride(); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); - MatrixPtr wMatSub = wMat->subMatrix(g * K, K); - MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); - colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0); - inData += M * N; - } - colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(), - numFilters_, - imgSizeD_[i], - imgSizeH_[i], - imgSizeW_[i], - filterSizeZ_[i], - filterSizeY_[i], - filterSize_[i], - strideZ_[i], - strideY_[i], - stride_[i], - paddingZ_[i], - paddingY_[i], - padding_[i], - 1.0, - 1.0); - } - } - if (nullptr != this->biasParameter_) { - this->addBias(); - } - forwardActivation(); -} - -void DeConv3DLayer::backward(const UpdateCallback &callback) { - backwardActivation(); - int batchSize = getOutputGrad()->getHeight(); - if (biases_ && biases_->getWGrad()) { - bpropBiases(); - biases_->getParameterPtr()->incUpdate(callback); - } - REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); - for (size_t i = 0; i < inputLayers_.size(); ++i) { - if (weights_[i]->getWGrad() || this->needGradient_) { - int M = M_[i]; - int N = N_[i]; - int K = K_[i]; - Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); - const MatrixPtr &inMat = getInputValue(i); - for (int n = 0; n < batchSize; ++n) { - colBuf_->vol2Col( - getOutputGrad()->getData() + n * getOutputGrad()->getStride(), - numFilters_, - imgSizeD_[i], - imgSizeH_[i], - imgSizeW_[i], - filterSizeZ_[i], - filterSizeY_[i], - filterSize_[i], - strideZ_[i], - strideY_[i], - stride_[i], - paddingZ_[i], - paddingY_[i], - padding_[i]); - if (weights_[i]->getWGrad()) { - real *inData = inMat->getData() + n * inMat->getStride(); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); - MatrixPtr wGradMatSub = - weights_[i]->getWGrad()->subMatrix(g * K, K); - MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); - wGradMatSub->mul( - *colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0); - inData += M * N; - } - } - if (getInputGrad(i)) { - real *preGrad = - getInputGrad(i)->getData() + n * getInputGrad(i)->getStride(); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K); - MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K); - MatrixPtr inGradMatSub = - Matrix::create(preGrad, M, N, false, useGpu_); - inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0); - preGrad += M * N; - } - } - } - weights_[i]->getParameterPtr()->incUpdate(callback); - } - } -} -void DeConv3DLayer::bpropWeights(int i) {} -void DeConv3DLayer::bpropData(int i) {} - -void DeConv3DLayer::bpropBiases() { - MatrixPtr biases = Matrix::create(biases_->getWGrad()->getData(), - 1, - biases_->getWGrad()->getElementCnt(), - false, - useGpu_); - const MatrixPtr &outGradMat = getOutputGrad(); - - if (this->sharedBiases_) { - biases->collectSharedBias(*outGradMat, 1.0f); - } else { - biases->collectBias(*outGradMat, 1.0f); - } -} - -void DeConv3DLayer::addBias() { - MatrixPtr outMat = getOutputValue(); - MatrixPtr bias = Matrix::create(biases_->getW()->getData(), - 1, - biases_->getW()->getElementCnt(), - false, - useGpu_); - if (this->sharedBiases_) { - outMat->addSharedBias(*(bias), 1.0f); - } else { - outMat->addBias(*(bias), 1.0f); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DeConv3DLayer.h b/paddle/legacy/gserver/layers/DeConv3DLayer.h deleted file mode 100644 index 9931bccb1284111e299206883847045edaae4ded..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DeConv3DLayer.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "ConvBaseLayer.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief A subclass of deconvolution3D layer. - * This layer expands input and use matrix multiplication to - * calculate deconvolution3D operation. - */ -class DeConv3DLayer : public ConvBaseLayer { - public: - explicit DeConv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {} - ~DeConv3DLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void addBias(); - void backward(const UpdateCallback& callback); - void bpropBiases(); - void bpropData(int i); - void bpropWeights(int i); - size_t getSize(); - - protected: - // Figure out the dimensions for individual gemms. - IntV M_; /// numFilters_ / filter_group_; - IntV N_; /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_ - IntV K_; /// outputD_ * outputH_ * outputW_ - IntV NOut_; - MatrixPtr colBuf_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DetectionOutputLayer.cpp b/paddle/legacy/gserver/layers/DetectionOutputLayer.cpp deleted file mode 100644 index 93fe046c6a87f26c4db7b8e17df1c4dc4343884f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DetectionOutputLayer.cpp +++ /dev/null @@ -1,160 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "DetectionOutputLayer.h" - -namespace paddle { - -REGISTER_LAYER(detection_output, DetectionOutputLayer); - -bool DetectionOutputLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - auto& layerConf = config_.inputs(0).detection_output_conf(); - numClasses_ = layerConf.num_classes(); - inputNum_ = layerConf.input_num(); - nmsThreshold_ = layerConf.nms_threshold(); - confidenceThreshold_ = layerConf.confidence_threshold(); - nmsTopK_ = layerConf.nms_top_k(); - keepTopK_ = layerConf.keep_top_k(); - backgroundId_ = layerConf.background_id(); - return true; -} - -void DetectionOutputLayer::forward(PassType passType) { - Layer::forward(passType); - size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight(); - - locSizeSum_ = 0; - confSizeSum_ = 0; - for (size_t n = 0; n < inputNum_; ++n) { - const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); - const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); - locSizeSum_ += inLoc->getElementCnt(); - confSizeSum_ += inConf->getElementCnt(); - } - - Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_); - Matrix::resizeOrCreate( - confTmpBuffer_, confSizeSum_ / numClasses_, numClasses_, false, useGpu_); - - size_t locOffset = 0; - size_t confOffset = 0; - auto& layerConf = config_.inputs(0).detection_output_conf(); - for (size_t n = 0; n < inputNum_; ++n) { - const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); - const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); - - size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); - if (!height) height = layerConf.height(); - size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); - if (!width) width = layerConf.width(); - locOffset += appendWithPermute(*inLoc, - height, - width, - locSizeSum_, - locOffset, - batchSize, - *locTmpBuffer_, - kNCHWToNHWC); - confOffset += appendWithPermute(*inConf, - height, - width, - confSizeSum_, - confOffset, - batchSize, - *confTmpBuffer_, - kNCHWToNHWC); - } - CHECK_EQ(locOffset, locSizeSum_ / batchSize); - CHECK_EQ(confOffset, confSizeSum_ / batchSize); - - MatrixPtr priorValue; - if (useGpu_) { - Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false); - Matrix::resizeOrCreate( - confCpuBuffer_, confSizeSum_ / numClasses_, numClasses_, false, false); - MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer()); - Matrix::resizeOrCreate( - priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false); - - locCpuBuffer_->copyFrom(*locTmpBuffer_); - confCpuBuffer_->copyFrom(*confTmpBuffer_); - priorCpuValue_->copyFrom(*priorTmpValue); - - locBuffer_ = locCpuBuffer_; - confBuffer_ = confCpuBuffer_; - priorValue = priorCpuValue_; - } else { - priorValue = getInputValue(*getPriorBoxLayer()); - locBuffer_ = locTmpBuffer_; - confBuffer_ = confTmpBuffer_; - } - confBuffer_->softmax(*confBuffer_); - - size_t numPriors = priorValue->getElementCnt() / 8; - std::vector> allDecodedBBoxes; - for (size_t n = 0; n < batchSize; ++n) { - std::vector decodedBBoxes; - for (size_t i = 0; i < numPriors; ++i) { - size_t priorOffset = i * 8; - size_t locPredOffset = n * numPriors * 4 + i * 4; - std::vector priorBBoxVec; - getBBoxFromPriorData( - priorValue->getData() + priorOffset, 1, priorBBoxVec); - std::vector> priorBBoxVar; - getBBoxVarFromPriorData( - priorValue->getData() + priorOffset, 1, priorBBoxVar); - std::vector locPredData; - for (size_t j = 0; j < 4; ++j) - locPredData.push_back(*(locBuffer_->getData() + locPredOffset + j)); - NormalizedBBox bbox = - decodeBBoxWithVar(priorBBoxVec[0], priorBBoxVar[0], locPredData); - decodedBBoxes.push_back(bbox); - } - allDecodedBBoxes.push_back(decodedBBoxes); - } - - std::vector>> allIndices; - size_t numKept = getDetectionIndices(confBuffer_->getData(), - numPriors, - numClasses_, - backgroundId_, - batchSize, - confidenceThreshold_, - nmsTopK_, - nmsThreshold_, - keepTopK_, - allDecodedBBoxes, - &allIndices); - - if (numKept > 0) { - resetOutput(numKept, 7); - } else { - MatrixPtr outV = getOutputValue(); - if (outV) outV->resize(0, 0); - return; - } - MatrixPtr outV = getOutputValue(); - getDetectionOutput(confBuffer_->getData(), - numKept, - numPriors, - numClasses_, - batchSize, - allIndices, - allDecodedBBoxes, - *outV); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DetectionOutputLayer.h b/paddle/legacy/gserver/layers/DetectionOutputLayer.h deleted file mode 100644 index b0270ed33141993665aeabdc53829600a4403643..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DetectionOutputLayer.h +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "DetectionUtil.h" -#include "Layer.h" - -namespace paddle { - -/** - * The detection output layer for a SSD detection task. This layer applies the - * Non-maximum suppression to the all predicted bounding box and keeps the - * Top-K bounding boxes. - * - Input: This layer needs three input layers: The first input layer - * is the priorbox layer. The rest two input layers are convolution - * layers for generating bbox location offset and the classification - * confidence. - * - Output: The predict bounding box locations. - */ - -class DetectionOutputLayer : public Layer { - public: - explicit DetectionOutputLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - - void backward(const UpdateCallback& callback = nullptr) {} - - protected: - inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; } - - inline LayerPtr getLocInputLayer(size_t index) { - return inputLayers_[1 + index]; - } - - inline LayerPtr getConfInputLayer(size_t index) { - return inputLayers_[1 + inputNum_ + index]; - } - - private: - size_t numClasses_; // number of classes - size_t inputNum_; // number of input layers - real nmsThreshold_; - real confidenceThreshold_; - size_t nmsTopK_; - size_t keepTopK_; - size_t backgroundId_; - - size_t locSizeSum_; - size_t confSizeSum_; - - MatrixPtr locBuffer_; - MatrixPtr confBuffer_; - MatrixPtr locTmpBuffer_; - MatrixPtr confTmpBuffer_; - MatrixPtr priorCpuValue_; - MatrixPtr locCpuBuffer_; - MatrixPtr confCpuBuffer_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DetectionUtil.cpp b/paddle/legacy/gserver/layers/DetectionUtil.cpp deleted file mode 100644 index 0dc45e5a7517820369dba245822fd9f880f96757..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DetectionUtil.cpp +++ /dev/null @@ -1,576 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "DetectionUtil.h" - -namespace paddle { - -size_t appendWithPermute(const Matrix& inMatrix, - size_t height, - size_t width, - size_t outTotalSize, - size_t outOffset, - size_t batchSize, - Matrix& outMatrix, - PermMode permMode) { - CHECK_EQ(inMatrix.useGpu(), outMatrix.useGpu()); - bool useGpu = inMatrix.useGpu(); - if (permMode == kNCHWToNHWC) { - size_t inElementCnt = inMatrix.getElementCnt(); - size_t channels = inElementCnt / (height * width * batchSize); - size_t imgSize = height * width; - for (size_t i = 0; i < batchSize; ++i) { - size_t offset = i * (outTotalSize / batchSize) + outOffset; - const MatrixPtr inTmp = Matrix::create( - const_cast(inMatrix.getData()) + i * channels * imgSize, - channels, - imgSize, - false, - useGpu); - MatrixPtr outTmp = - Matrix::create(const_cast(outMatrix.getData()) + offset, - imgSize, - channels, - false, - useGpu); - inTmp->transpose(outTmp, false); - } - return channels * imgSize; - } else { - LOG(FATAL) << "Unkown permute mode"; - } -} - -size_t decomposeWithPermute(const Matrix& inMatrix, - size_t height, - size_t width, - size_t inTotalSize, - size_t inOffset, - size_t batchSize, - Matrix& outMatrix, - PermMode permMode) { - CHECK_EQ(inMatrix.useGpu(), outMatrix.useGpu()); - bool useGpu = inMatrix.useGpu(); - if (permMode == kNHWCToNCHW) { - size_t outElementCnt = outMatrix.getElementCnt(); - size_t channels = outElementCnt / (height * width * batchSize); - size_t imgSize = height * width; - for (size_t i = 0; i < batchSize; ++i) { - size_t offset = i * (inTotalSize / batchSize) + inOffset; - const MatrixPtr inTmp = - Matrix::create(const_cast(inMatrix.getData()) + offset, - imgSize, - channels, - false, - useGpu); - MatrixPtr outTmp = Matrix::create( - const_cast(outMatrix.getData()) + i * channels * imgSize, - channels, - imgSize, - false, - useGpu); - inTmp->transpose(outTmp, false); - } - return channels * imgSize; - } else { - LOG(FATAL) << "Unkown permute mode"; - } -} - -real jaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2) { - if (bbox2.xMin > bbox1.xMax || bbox2.xMax < bbox1.xMin || - bbox2.yMin > bbox1.yMax || bbox2.yMax < bbox1.yMin) { - return 0.0; - } else { - real interXMin = std::max(bbox1.xMin, bbox2.xMin); - real interYMin = std::max(bbox1.yMin, bbox2.yMin); - real interXMax = std::min(bbox1.xMax, bbox2.xMax); - real interYMax = std::min(bbox1.yMax, bbox2.yMax); - - real interWidth = interXMax - interXMin; - real interHeight = interYMax - interYMin; - real interArea = interWidth * interHeight; - - real bboxArea1 = bbox1.getArea(); - real bboxArea2 = bbox2.getArea(); - - return interArea / (bboxArea1 + bboxArea2 - interArea); - } -} - -void encodeBBoxWithVar(const NormalizedBBox& priorBBox, - const vector& priorBBoxVar, - const NormalizedBBox& gtBBox, - vector& outVec) { - real priorBBoxWidth = priorBBox.getWidth(); - real priorBBoxHeight = priorBBox.getHeight(); - real priorBBoxCenterX = priorBBox.getCenterX(); - real priorBBoxCenterY = priorBBox.getCenterY(); - - real gtBBoxWidth = gtBBox.getWidth(); - real gtBBoxHeight = gtBBox.getHeight(); - real gtBBoxCenterX = gtBBox.getCenterX(); - real gtBBoxCenterY = gtBBox.getCenterY(); - - outVec.clear(); - outVec.push_back((gtBBoxCenterX - priorBBoxCenterX) / priorBBoxWidth / - priorBBoxVar[0]); - outVec.push_back((gtBBoxCenterY - priorBBoxCenterY) / priorBBoxHeight / - priorBBoxVar[1]); - outVec.push_back(std::log(std::fabs(gtBBoxWidth / priorBBoxWidth)) / - priorBBoxVar[2]); - outVec.push_back(std::log(std::fabs(gtBBoxHeight / priorBBoxHeight)) / - priorBBoxVar[3]); -} - -NormalizedBBox decodeBBoxWithVar(const NormalizedBBox& priorBBox, - const vector& priorBBoxVar, - const vector& locPredData) { - real priorBBoxWidth = priorBBox.getWidth(); - real priorBBoxHeight = priorBBox.getHeight(); - real priorBBoxCenterX = priorBBox.getCenterX(); - real priorBBoxCenterY = priorBBox.getCenterY(); - - real decodedBBoxCenterX = - priorBBoxVar[0] * locPredData[0] * priorBBoxWidth + priorBBoxCenterX; - real decodedBBoxCenterY = - priorBBoxVar[1] * locPredData[1] * priorBBoxHeight + priorBBoxCenterY; - real decodedBBoxWidth = - std::exp(priorBBoxVar[2] * locPredData[2]) * priorBBoxWidth; - real decodedBBoxHeight = - std::exp(priorBBoxVar[3] * locPredData[3]) * priorBBoxHeight; - - NormalizedBBox decodedBBox; - decodedBBox.xMin = decodedBBoxCenterX - decodedBBoxWidth / 2; - decodedBBox.yMin = decodedBBoxCenterY - decodedBBoxHeight / 2; - decodedBBox.xMax = decodedBBoxCenterX + decodedBBoxWidth / 2; - decodedBBox.yMax = decodedBBoxCenterY + decodedBBoxHeight / 2; - - return decodedBBox; -} - -void getBBoxFromPriorData(const real* priorData, - const size_t numBBoxes, - vector& bboxVec) { - size_t outOffset = bboxVec.size(); - bboxVec.resize(bboxVec.size() + numBBoxes); - for (size_t i = 0; i < numBBoxes; ++i) { - NormalizedBBox bbox; - bbox.xMin = *(priorData + i * 8); - bbox.yMin = *(priorData + i * 8 + 1); - bbox.xMax = *(priorData + i * 8 + 2); - bbox.yMax = *(priorData + i * 8 + 3); - bboxVec[outOffset + i] = bbox; - } -} - -void getBBoxVarFromPriorData(const real* priorData, - const size_t num, - vector>& varVec) { - size_t outOffset = varVec.size(); - varVec.resize(varVec.size() + num); - for (size_t i = 0; i < num; ++i) { - vector var; - var.push_back(*(priorData + i * 8 + 4)); - var.push_back(*(priorData + i * 8 + 5)); - var.push_back(*(priorData + i * 8 + 6)); - var.push_back(*(priorData + i * 8 + 7)); - varVec[outOffset + i] = var; - } -} - -void getBBoxFromLabelData(const real* labelData, - const size_t numBBoxes, - vector& bboxVec) { - size_t outOffset = bboxVec.size(); - bboxVec.resize(bboxVec.size() + numBBoxes); - for (size_t i = 0; i < numBBoxes; ++i) { - NormalizedBBox bbox; - bbox.xMin = *(labelData + i * 6 + 1); - bbox.yMin = *(labelData + i * 6 + 2); - bbox.xMax = *(labelData + i * 6 + 3); - bbox.yMax = *(labelData + i * 6 + 4); - real isDifficult = *(labelData + i * 6 + 5); - if (std::abs(isDifficult - 0.0) < 1e-6) - bbox.isDifficult = false; - else - bbox.isDifficult = true; - bboxVec[outOffset + i] = bbox; - } -} - -void getBBoxFromDetectData(const real* detectData, - const size_t numBBoxes, - vector& labelVec, - vector& scoreVec, - vector& bboxVec) { - size_t outOffset = bboxVec.size(); - labelVec.resize(outOffset + numBBoxes); - scoreVec.resize(outOffset + numBBoxes); - bboxVec.resize(outOffset + numBBoxes); - for (size_t i = 0; i < numBBoxes; ++i) { - labelVec[outOffset + i] = *(detectData + i * 7 + 1); - scoreVec[outOffset + i] = *(detectData + i * 7 + 2); - NormalizedBBox bbox; - bbox.xMin = *(detectData + i * 7 + 3); - bbox.yMin = *(detectData + i * 7 + 4); - bbox.xMax = *(detectData + i * 7 + 5); - bbox.yMax = *(detectData + i * 7 + 6); - bboxVec[outOffset + i] = bbox; - } -} - -void matchBBox(const vector& priorBBoxes, - const vector& gtBBoxes, - real overlapThreshold, - vector* matchIndices, - vector* matchOverlaps) { - map> overlaps; - size_t numPriors = priorBBoxes.size(); - size_t numGTs = gtBBoxes.size(); - - matchIndices->clear(); - matchIndices->resize(numPriors, -1); - matchOverlaps->clear(); - matchOverlaps->resize(numPriors, 0.0); - - // Store the positive overlap between predictions and ground truth - for (size_t i = 0; i < numPriors; ++i) { - for (size_t j = 0; j < numGTs; ++j) { - real overlap = jaccardOverlap(priorBBoxes[i], gtBBoxes[j]); - if (overlap > 1e-6) { - (*matchOverlaps)[i] = std::max((*matchOverlaps)[i], overlap); - overlaps[i][j] = overlap; - } - } - } - // Bipartite matching - vector gtPool; - for (size_t i = 0; i < numGTs; ++i) { - gtPool.push_back(i); - } - while (gtPool.size() > 0) { - // Find the most overlapped gt and corresponding predictions - int maxPriorIdx = -1; - int maxGTIdx = -1; - real maxOverlap = -1.0; - for (map>::iterator it = overlaps.begin(); - it != overlaps.end(); - ++it) { - size_t i = it->first; - if ((*matchIndices)[i] != -1) { - // The prediction already has matched ground truth or is ignored - continue; - } - for (size_t p = 0; p < gtPool.size(); ++p) { - int j = gtPool[p]; - if (it->second.find(j) == it->second.end()) { - // No overlap between the i-th prediction and j-th ground truth - continue; - } - // Find the maximum overlapped pair - if (it->second[j] > maxOverlap) { - maxPriorIdx = (int)i; - maxGTIdx = (int)j; - maxOverlap = it->second[j]; - } - } - } - if (maxPriorIdx == -1) { - break; - } else { - (*matchIndices)[maxPriorIdx] = maxGTIdx; - (*matchOverlaps)[maxPriorIdx] = maxOverlap; - gtPool.erase(std::find(gtPool.begin(), gtPool.end(), maxGTIdx)); - } - } - - // Get most overlaped for the rest prediction bboxes - for (map>::iterator it = overlaps.begin(); - it != overlaps.end(); - ++it) { - size_t i = it->first; - if ((*matchIndices)[i] != -1) { - // The prediction already has matched ground truth or is ignored - continue; - } - int maxGTIdx = -1; - real maxOverlap = -1; - for (size_t j = 0; j < numGTs; ++j) { - if (it->second.find(j) == it->second.end()) { - // No overlap between the i-th prediction and j-th ground truth - continue; - } - // Find the maximum overlapped pair - real overlap = it->second[j]; - if (overlap > maxOverlap && overlap >= overlapThreshold) { - maxGTIdx = j; - maxOverlap = overlap; - } - } - if (maxGTIdx != -1) { - (*matchIndices)[i] = maxGTIdx; - (*matchOverlaps)[i] = maxOverlap; - } - } -} - -pair generateMatchIndices( - const Matrix& priorValue, - const size_t numPriorBBoxes, - const Matrix& gtValue, - const int* gtStartPosPtr, - const size_t seqNum, - const vector>& maxConfScore, - const size_t batchSize, - const real overlapThreshold, - const real negOverlapThreshold, - const size_t negPosRatio, - vector>* matchIndicesVecPtr, - vector>* negIndicesVecPtr) { - vector priorBBoxes; // share same prior bboxes - getBBoxFromPriorData(priorValue.getData(), numPriorBBoxes, priorBBoxes); - size_t totalPos = 0; - size_t totalNeg = 0; - for (size_t n = 0; n < batchSize; ++n) { - vector matchIndices; - vector negIndices; - vector matchOverlaps; - matchIndices.resize(numPriorBBoxes, -1); - matchOverlaps.resize(numPriorBBoxes, 0.0); - size_t numGTBBoxes = 0; - if (n < seqNum) numGTBBoxes = gtStartPosPtr[n + 1] - gtStartPosPtr[n]; - if (!numGTBBoxes) { - matchIndicesVecPtr->push_back(matchIndices); - negIndicesVecPtr->push_back(negIndices); - continue; - } - vector gtBBoxes; - getBBoxFromLabelData( - gtValue.getData() + gtStartPosPtr[n] * 6, numGTBBoxes, gtBBoxes); - - matchBBox( - priorBBoxes, gtBBoxes, overlapThreshold, &matchIndices, &matchOverlaps); - - size_t numPos = 0; - size_t numNeg = 0; - for (size_t i = 0; i < matchIndices.size(); ++i) - if (matchIndices[i] != -1) ++numPos; - totalPos += numPos; - vector> scoresIndices; - for (size_t i = 0; i < matchIndices.size(); ++i) - if (matchIndices[i] == -1 && matchOverlaps[i] < negOverlapThreshold) { - scoresIndices.push_back(std::make_pair(maxConfScore[n][i], i)); - ++numNeg; - } - numNeg = std::min(static_cast(numPos * negPosRatio), numNeg); - std::sort(scoresIndices.begin(), - scoresIndices.end(), - sortScorePairDescend); - for (size_t i = 0; i < numNeg; ++i) - negIndices.push_back(scoresIndices[i].second); - totalNeg += numNeg; - matchIndicesVecPtr->push_back(matchIndices); - negIndicesVecPtr->push_back(negIndices); - } - return std::make_pair(totalPos, totalNeg); -} - -void getMaxConfidenceScores(const real* confData, - const size_t batchSize, - const size_t numPriorBBoxes, - const size_t numClasses, - const size_t backgroundId, - vector>* maxConfScoreVecPtr) { - maxConfScoreVecPtr->clear(); - for (size_t i = 0; i < batchSize; ++i) { - vector maxConfScore; - for (size_t j = 0; j < numPriorBBoxes; ++j) { - int offset = j * numClasses; - real maxVal = -FLT_MAX; - real maxPosVal = -FLT_MAX; - real maxScore = 0.0; - for (size_t c = 0; c < numClasses; ++c) { - maxVal = std::max(confData[offset + c], maxVal); - if (c != backgroundId) - maxPosVal = std::max(confData[offset + c], maxPosVal); - } - real sum = 0.0; - for (size_t c = 0; c < numClasses; ++c) - sum += std::exp(confData[offset + c] - maxVal); - maxScore = std::exp(maxPosVal - maxVal) / sum; - maxConfScore.push_back(maxScore); - } - confData += numPriorBBoxes * numClasses; - maxConfScoreVecPtr->push_back(maxConfScore); - } -} - -template -bool sortScorePairDescend(const pair& pair1, - const pair& pair2) { - return pair1.first > pair2.first; -} - -template <> -bool sortScorePairDescend(const pair& pair1, - const pair& pair2) { - return pair1.first > pair2.first; -} - -void applyNMSFast(const vector& bboxes, - const real* confScoreData, - size_t classIdx, - size_t topK, - real confThreshold, - real nmsThreshold, - size_t numPriorBBoxes, - size_t numClasses, - vector* indices) { - vector> scores; - for (size_t i = 0; i < numPriorBBoxes; ++i) { - size_t confOffset = i * numClasses + classIdx; - if (confScoreData[confOffset] > confThreshold) - scores.push_back(std::make_pair(confScoreData[confOffset], i)); - } - std::stable_sort(scores.begin(), scores.end(), sortScorePairDescend); - if (topK > 0 && topK < scores.size()) scores.resize(topK); - while (scores.size() > 0) { - const size_t idx = scores.front().second; - bool keep = true; - for (size_t i = 0; i < indices->size(); ++i) { - if (keep) { - const size_t savedIdx = (*indices)[i]; - real overlap = jaccardOverlap(bboxes[idx], bboxes[savedIdx]); - keep = overlap <= nmsThreshold; - } else { - break; - } - } - if (keep) indices->push_back(idx); - scores.erase(scores.begin()); - } -} - -size_t getDetectionIndices( - const real* confData, - const size_t numPriorBBoxes, - const size_t numClasses, - const size_t backgroundId, - const size_t batchSize, - const real confThreshold, - const size_t nmsTopK, - const real nmsThreshold, - const size_t keepTopK, - const vector>& allDecodedBBoxes, - vector>>* allDetectionIndices) { - size_t totalKeepNum = 0; - for (size_t n = 0; n < batchSize; ++n) { - const vector& decodedBBoxes = allDecodedBBoxes[n]; - size_t numDetected = 0; - map> indices; - size_t confOffset = n * numPriorBBoxes * numClasses; - for (size_t c = 0; c < numClasses; ++c) { - if (c == backgroundId) continue; - applyNMSFast(decodedBBoxes, - confData + confOffset, - c, - nmsTopK, - confThreshold, - nmsThreshold, - numPriorBBoxes, - numClasses, - &(indices[c])); - numDetected += indices[c].size(); - } - if (keepTopK > 0 && numDetected > keepTopK) { - vector>> scoreIndexPairs; - for (size_t c = 0; c < numClasses; ++c) { - const vector& labelIndices = indices[c]; - for (size_t i = 0; i < labelIndices.size(); ++i) { - size_t idx = labelIndices[i]; - scoreIndexPairs.push_back( - std::make_pair((confData + confOffset)[idx * numClasses + c], - std::make_pair(c, idx))); - } - } - std::sort(scoreIndexPairs.begin(), - scoreIndexPairs.end(), - sortScorePairDescend>); - scoreIndexPairs.resize(keepTopK); - map> newIndices; - for (size_t i = 0; i < scoreIndexPairs.size(); ++i) { - size_t label = scoreIndexPairs[i].second.first; - size_t idx = scoreIndexPairs[i].second.second; - newIndices[label].push_back(idx); - } - allDetectionIndices->push_back(newIndices); - totalKeepNum += keepTopK; - } else { - allDetectionIndices->push_back(indices); - totalKeepNum += numDetected; - } - } - return totalKeepNum; -} - -void getDetectionOutput(const real* confData, - const size_t numKept, - const size_t numPriorBBoxes, - const size_t numClasses, - const size_t batchSize, - const vector>>& allIndices, - const vector>& allDecodedBBoxes, - Matrix& out) { - MatrixPtr outBuffer; - Matrix::resizeOrCreate(outBuffer, numKept, 7, false, false); - real* bufferData = outBuffer->getData(); - size_t count = 0; - for (size_t n = 0; n < batchSize; ++n) { - for (map>::const_iterator it = allIndices[n].begin(); - it != allIndices[n].end(); - ++it) { - size_t label = it->first; - const vector& indices = it->second; - const vector& decodedBBoxes = allDecodedBBoxes[n]; - for (size_t i = 0; i < indices.size(); ++i) { - size_t idx = indices[i]; - size_t confOffset = n * numPriorBBoxes * numClasses + idx * numClasses; - bufferData[count * 7] = n; - bufferData[count * 7 + 1] = label; - bufferData[count * 7 + 2] = (confData + confOffset)[label]; - NormalizedBBox clippedBBox = clipBBox(decodedBBoxes[idx]); - bufferData[count * 7 + 3] = clippedBBox.xMin; - bufferData[count * 7 + 4] = clippedBBox.yMin; - bufferData[count * 7 + 5] = clippedBBox.xMax; - bufferData[count * 7 + 6] = clippedBBox.yMax; - ++count; - } - } - } - out.copyFrom(bufferData, numKept * 7); -} - -NormalizedBBox clipBBox(const NormalizedBBox& bbox) { - real realOne = static_cast(1.0); - real realZero = static_cast(0.0); - NormalizedBBox clippedBBox; - clippedBBox.xMin = std::max(std::min(bbox.xMin, realOne), realZero); - clippedBBox.yMin = std::max(std::min(bbox.yMin, realOne), realZero); - clippedBBox.xMax = std::max(std::min(bbox.xMax, realOne), realZero); - clippedBBox.yMax = std::max(std::min(bbox.yMax, realOne), realZero); - return clippedBBox; -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DetectionUtil.h b/paddle/legacy/gserver/layers/DetectionUtil.h deleted file mode 100644 index c1e0bb809ad290613159f558e9b1860476b3b5f2..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DetectionUtil.h +++ /dev/null @@ -1,307 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include "paddle/legacy/math/Matrix.h" - -using std::vector; -using std::pair; -using std::map; - -namespace paddle { - -template -struct BBoxBase { - BBoxBase(T xMin, T yMin, T xMax, T yMax) - : xMin(xMin), yMin(yMin), xMax(xMax), yMax(yMax), isDifficult(false) {} - - BBoxBase() {} - - T getWidth() const { return xMax - xMin; } - - T getHeight() const { return yMax - yMin; } - - T getCenterX() const { return (xMin + xMax) / 2; } - - T getCenterY() const { return (yMin + yMax) / 2; } - - T getArea() const { return getWidth() * getHeight(); } - - // coordinate of bounding box - T xMin; - T yMin; - T xMax; - T yMax; - // whether difficult object (e.g. object with heavy occlusion is difficult) - bool isDifficult; -}; - -struct NormalizedBBox : BBoxBase { - NormalizedBBox() : BBoxBase() {} -}; - -enum PermMode { kNCHWToNHWC, kNHWCToNCHW }; - -/** - * @brief First permute input maxtrix then append to output matrix - */ -size_t appendWithPermute(const Matrix& inMatrix, - size_t height, - size_t width, - size_t outTotalSize, - size_t outOffset, - size_t batchSize, - Matrix& outMatrix, - PermMode permMode); - -/** - * @brief First permute input maxtrix then decompose to output - */ -size_t decomposeWithPermute(const Matrix& inMatrix, - size_t height, - size_t width, - size_t totalSize, - size_t offset, - size_t batchSize, - Matrix& outMatrix, - PermMode permMode); - -/** - * @brief Compute jaccard overlap between two bboxes. - * @param bbox1 The first bbox - * @param bbox2 The second bbox - */ -real jaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2); - -/** - * @brief Compute offset parameters between prior bbox and ground truth bbox - * and variances of prior bbox are considered - * @param priorBBox Input prior bbox - * @param priorBBoxVar Variance parameters of prior bbox - * @param gtBBox Groundtruth bbox - * @param outVec Output vector - */ -void encodeBBoxWithVar(const NormalizedBBox& priorBBox, - const vector& priorBBoxVar, - const NormalizedBBox& gtBBox, - vector& outVec); - -/** - * @brief Decode prior bbox with offset parameters - * and variances of prior bbox are considered - * @param priorBBox Prior bbox to be decoded - * @param priorBBoxVar Variance parameters of prior bbox - * @param locPredData Offset parameters - */ -NormalizedBBox decodeBBoxWithVar(const NormalizedBBox& priorBBox, - const vector& priorBBoxVar, - const vector& locPredData); - -/** - * @brief Extract bboxes from prior matrix, the layout is - * xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ... - * @param priorData Matrix of prior value - * @param numBBoxes Number of bbox to be extracted - * @param bboxVec Append to the vector - */ -void getBBoxFromPriorData(const real* priorData, - const size_t numBBoxes, - vector& bboxVec); - -/** - * @brief Extract labels, scores and bboxes from detection matrix, the layout is - * imageId | label | score | xmin | ymin | xmax | ymax - * @param detectData Matrix of detection value - * @param numBBoxes Number of bbox to be extracted - * @param labelVec Label of bbox - * @param scoreVec Score of bbox - * @param bboxVec Append to the vector - */ -void getBBoxFromDetectData(const real* detectData, - const size_t numBBoxes, - vector& labelVec, - vector& scoreVec, - vector& bboxVec); - -/** - * @brief Extract variances from prior matrix, the layout is - * xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ... - * @param priorData Matrix of prior value - * @param num Number to be extracted - * @param varVec Append to the vector - */ -void getBBoxVarFromPriorData(const real* priorData, - const size_t num, - vector>& varVec); - -/** - * @brief Extract bboxes from label matrix, the layout is - * class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ... - * @param labelData Matrix of label value - * @param numBBoxes Number to be extracted - * @param bboxVec Append to the vector - */ -void getBBoxFromLabelData(const real* labelData, - const size_t numBBoxes, - vector& bboxVec); - -/** -* @brief Match prior bbox to groundtruth bbox, the strategy is: -1. Find the most overlaped bbox pair (prior and groundtruth) -2. For rest of prior bboxes find the most overlaped groundtruth bbox -* @param priorBBoxes prior bbox -* @param gtBBoxes groundtruth bbox -* @param overlapThreshold Low boundary of overlap (judge whether matched) -* @param matchIndices For each prior bbox, groundtruth bbox index if matched -otherwise -1 -* @param matchOverlaps For each prior bbox, overap with all groundtruth bboxes -*/ -void matchBBox(const vector& priorBBoxes, - const vector& gtBBoxes, - real overlapThreshold, - vector* matchIndices, - vector* matchOverlaps); - -/** -* @brief Generate positive bboxes and negative bboxes, -|positive bboxes|/|negative bboxes| is negPosRatio -* @param priorValue Prior value -* @param numPriorBBoxes Number of prior bbox -* @param gtValue Groundtruth value -* @param gtStartPosPtr Since groundtruth value stored as sequence type, -this parameter indicates start position of each record -* @param seqNum Number of sequence -* @param maxConfScore Classification score for prior bbox, used to mine -negative examples -* @param batchSize Image number -* @param overlapThreshold Low boundary of overap -* @param negOverlapThreshold Upper boundary of overap (judge negative example) -* @param negPosRatio Control number of negative bboxes -* @param matchIndicesVecPtr Save indices of matched prior bbox -* @param negIndicesVecPtr Save indices of negative prior bbox -*/ -pair generateMatchIndices( - const Matrix& priorValue, - const size_t numPriorBBoxes, - const Matrix& gtValue, - const int* gtStartPosPtr, - const size_t seqNum, - const vector>& maxConfScore, - const size_t batchSize, - const real overlapThreshold, - const real negOverlapThreshold, - const size_t negPosRatio, - vector>* matchIndicesVecPtr, - vector>* negIndicesVecPtr); - -/** - * @brief Get max confidence score for each prior bbox - * @param confData Confidence scores, layout is - * class1 score | class2 score | ... | classN score ... - * @param batchSize Image number - * @param numPriorBBoxes Prior bbox number - * @param numClasses Classes number - * @param backgroundId Background id - * @param maxConfScoreVecPtr Ouput - */ -void getMaxConfidenceScores(const real* confData, - const size_t batchSize, - const size_t numPriorBBoxes, - const size_t numClasses, - const size_t backgroundId, - vector>* maxConfScoreVecPtr); - -template -bool sortScorePairDescend(const pair& pair1, - const pair& pair2); - -template <> -bool sortScorePairDescend(const pair& pair1, - const pair& pair2); - -/** - * @brief Do NMS for bboxes to remove duplicated bboxes - * @param bboxes BBoxes to apply NMS - * @param confScoreData Confidence scores - * @param classIdx Class to do NMS - * @param topK Number to keep - * @param confThreshold Low boundary of confidence score - * @param nmsThreshold Threshold of overlap - * @param numPriorBBoxes Total number of prior bboxes - * @param numClasses Total class number - * @param indices Indices of high quality bboxes - */ -void applyNMSFast(const vector& bboxes, - const real* confScoreData, - size_t classIdx, - size_t topK, - real confThreshold, - real nmsThreshold, - size_t numPriorBBoxes, - size_t numClasses, - vector* indices); - -/** - * @brief Get detection results which satify requirements - * @param numPriorBBoxes Prior bbox number - * @param numClasses Class number - * @param backgroundId Background class - * @param batchSize Image number - * @param confThreshold Threshold of class confidence - * @param nmsTopK Used in NMS operation to keep top k bbox - * @param nmsThreshold Used in NMS, threshold of overlap - * @param keepTopK How many bboxes keeped in an image - * @param allDecodedBBoxes Decoded bboxes for all images - * @param allDetectionIndices Save detection bbox indices - */ -size_t getDetectionIndices( - const real* confData, - const size_t numPriorBBoxes, - const size_t numClasses, - const size_t backgroundId, - const size_t batchSize, - const real confThreshold, - const size_t nmsTopK, - const real nmsThreshold, - const size_t keepTopK, - const vector>& allDecodedBBoxes, - vector>>* allDetectionIndices); - -/** - * @brief Get detection results - * @param confData Confidence scores - * @param numPriorBBoxes Prior bbox number - * @param numClasses Class number - * @param batchSize Image number - * @param allIndices Indices of predicted bboxes - * @param allDecodedBBoxes BBoxes decoded - * @param out Output matrix - * image number | label | confidence score | xMin | yMin | xMax | yMax - */ -void getDetectionOutput(const real* confData, - const size_t numKept, - const size_t numPriorBBoxes, - const size_t numClasses, - const size_t batchSize, - const vector>>& allIndices, - const vector>& allDecodedBBoxes, - Matrix& out); - -NormalizedBBox clipBBox(const NormalizedBBox& bbox); - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DotMulOperator.cpp b/paddle/legacy/gserver/layers/DotMulOperator.cpp deleted file mode 100644 index 03d18d9b239e57dc41334462f2324ae2d0505a62..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DotMulOperator.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Operator.h" - -namespace paddle { - -/** - * DotMulOperator takes two inputs, performs element-wise multiplication: - * \f[ - * out.row[i] += scale * (in1.row[i] .* in2.row[i]) - * \f] - * where \f$.*\f$ means element-wise multiplication, - * and scale is a config scalar, its default value is one. - * - * The config file api is dotmul_operator. - */ -class DotMulOperator : public Operator { - public: - DotMulOperator(const OperatorConfig& config, bool useGpu); - virtual void forward(); - virtual void backward(); -}; - -REGISTER_OPERATOR(dot_mul, DotMulOperator); - -DotMulOperator::DotMulOperator(const OperatorConfig& config, bool useGpu) - : Operator(config, useGpu) { - CHECK_EQ(config_.input_indices_size(), 2L); -} - -void DotMulOperator::forward() { - out_->value->addDotMul( - *ins_[0]->value, *ins_[1]->value, 1, config_.dotmul_scale()); -} - -void DotMulOperator::backward() { - const MatrixPtr& inV0 = ins_[0]->value; - const MatrixPtr& inV1 = ins_[1]->value; - const MatrixPtr& inG0 = ins_[0]->grad; - const MatrixPtr& inG1 = ins_[1]->grad; - - if (inG0) { - inG0->addDotMul(*out_->grad, *inV1, 1, config_.dotmul_scale()); - } - if (inG1) { - inG1->addDotMul(*out_->grad, *inV0, 1, config_.dotmul_scale()); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DotMulProjection.cpp b/paddle/legacy/gserver/layers/DotMulProjection.cpp deleted file mode 100644 index d7780387670e83af24fa342be3d596b618b1f677..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DotMulProjection.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Projection.h" - -namespace paddle { - -/** - * DotMulProjection performs element-wise multiplication with weight: - * \f[ - * out.row[i] += in.row[i] .* weight - * \f] - * where \f$.*\f$ means element-wise multiplication. - * - * The config file api is dotmul_projection. - */ -class DotMulProjection : public Projection { - public: - DotMulProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu); - virtual void forward(); - virtual void backward(const UpdateCallback& callback); - - protected: - /// shared memory with parameter - std::unique_ptr weight_; -}; - -REGISTER_PROJECTION(dot_mul, DotMulProjection); - -DotMulProjection::DotMulProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - weight_.reset(new Weight(1LU, config.output_size(), parameter)); -} - -void DotMulProjection::forward() { - out_->value->addDotMulMMV(*in_->value, *(weight_->getW())); -} - -void DotMulProjection::backward(const UpdateCallback& callback) { - /* Calculate the W-gradient for the current layer */ - if (weight_->getWGrad()) { - weight_->getWGrad()->addDotMulVMM(*out_->grad, *in_->value); - } - - /* Calculate the input layers error */ - if (in_->grad) { - in_->grad->addDotMulMMV(*out_->grad, *(weight_->getW())); - } - - parameter_->incUpdate(callback); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/DotProdLayer.cpp b/paddle/legacy/gserver/layers/DotProdLayer.cpp deleted file mode 100644 index 06060d93f76c18d893852a5f5c99c36fe5641b2e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/DotProdLayer.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief A layer for computing the dot product of two vectors. - * Input1: vector (batchSize * dim) - * Input2: vector (batchSize * dim) - * Output: a matrix: (batchSize * 1) - */ - -class DotProdLayer : public Layer { - public: - explicit DotProdLayer(const LayerConfig& config) : Layer(config) {} - - ~DotProdLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(dot_prod, DotProdLayer); - -bool DotProdLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2U); - CHECK_EQ(1UL, getSize()) - << "The output dimensionality of this layer should be fixed to 1."; - - return true; -} - -void DotProdLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - - size_t batchSize = inV0->getHeight(); - CHECK_EQ(inV1->getHeight(), batchSize); - CHECK_EQ(inV0->getWidth(), inV1->getWidth()); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - reserveOutput(batchSize, 1); - } - - MatrixPtr outV = getOutputValue(); - { - REGISTER_TIMER_INFO("FwDotProdTimer", getName().c_str()); - outV->sumOfProducts(*inV0, *inV1, 1, 0); - } -} - -void DotProdLayer::backward(const UpdateCallback& callback) { - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr outG = getOutputGrad(); - MatrixPtr inG0 = getInputGrad(0); - MatrixPtr inG1 = getInputGrad(1); - - { - REGISTER_TIMER_INFO("BwDotProdTimer", getName().c_str()); - - if (inG0) { - inG0->addRowScale(0, *inV1, *outG); - } - - if (inG1) { - inG1->addRowScale(0, *inV0, *outG); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/EosIdCheckLayer.cpp b/paddle/legacy/gserver/layers/EosIdCheckLayer.cpp deleted file mode 100644 index 38671126c62ba36e22496dcbe1ff3c8d6dcea742..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/EosIdCheckLayer.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { -/** - * A layer for checking EOS for each sample: - * - output_id = (input_id == conf.eos_id) - * - * The result is stored in output_.ids. - * It is used by recurrent layer group. - */ -class EosIdCheckLayer : public Layer { - public: - explicit EosIdCheckLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override { - bool ret = Layer::init(layerMap, parameterMap); - CHECK_EQ(1UL, inputLayers_.size()); - return ret; - } - - void forward(PassType passType) override { - Layer::forward(passType); - - const Argument& input = getInput(0); - IVector::resizeOrCreate(output_.ids, input.ids->getSize(), useGpu_); - output_.ids->isEqualTo(*input.ids, config_.eos_id()); - } - - void backward(const UpdateCallback& callback) override {} -}; - -REGISTER_LAYER(eos_id, EosIdCheckLayer); - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ExpandConvLayer.cpp b/paddle/legacy/gserver/layers/ExpandConvLayer.cpp deleted file mode 100644 index 8a53db380686cea2ad121c948c45a0fa1154381e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ExpandConvLayer.cpp +++ /dev/null @@ -1,248 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ExpandConvLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -DEFINE_bool(use_nnpack, - false, - "Whether to use nnpack for convolution calculation."); - -namespace paddle { - -/* - * The calculation of the exconvt(convolution transpose (deconv) operation) - * is a swap of forward and backward of the calculation of exconv. - * */ -REGISTER_LAYER(exconv, ExpandConvLayer); -REGISTER_LAYER(exconvt, ExpandConvLayer); - -inline bool isDepthwiseConv(int channels, int groups) { - return channels == groups; -} - -bool ExpandConvLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { - /* Initialize the basic convolutional parent class */ - ConvBaseLayer::init(layerMap, parameterMap); - - int index = 0; - for (auto &inputConfig : config_.inputs()) { - const ConvConfig &conf = inputConfig.conv_conf(); - /* Consistent caffe mode for multiple input */ - caffeMode_ = conf.caffe_mode(); - - // create a new weight - size_t height, width; - height = filterPixels_[index] * filterChannels_[index]; - width = (!isDeconv_) ? numFilters_ : channels_[index]; - CHECK_EQ(parameters_[index]->getSize(), width * height); - Weight *w = new Weight(height, width, parameters_[index]); - weights_.emplace_back(w); - index++; - } - - if (biasParameter_.get()) { - if (sharedBiases_) { - CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); - biases_ = std::unique_ptr( - new Weight(1, numFilters_, biasParameter_, 0)); - } else { - biases_ = - std::unique_ptr(new Weight(1, getSize(), biasParameter_, 0)); - } - } - - getOutputSize(); - - size_t numInputs = config_.inputs_size(); - inputShape_.resize(numInputs); - filterShape_.resize(numInputs); - outputShape_.resize(numInputs); - - std::string convType; - std::string convGradInputType; - std::string convGradFilterType; - - for (int i = 0; i < config_.inputs_size(); i++) { - std::vector paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; - std::vector strides = {(size_t)strideY_[i], (size_t)stride_[i]}; - std::vector dilations = {(size_t)dilationY_[i], - (size_t)dilation_[i]}; - - bool useDilation = ((size_t)dilationY_[i] > 1 || (size_t)dilation_[i] > 1); - - // Convolution Layer uses the GemmConv function by default. - convType = "GemmConv"; - convGradInputType = "GemmConvGradInput"; - convGradFilterType = "GemmConvGradFilter"; - - // If depth wise convolution and useGpu == true - if (useGpu_ && isDepthwiseConv(channels_[i], groups_[i]) && !isDeconv_) { - convType = "DepthwiseConv"; - convGradInputType = "DepthwiseConvGradInput"; - convGradFilterType = "DepthwiseConvGradFilter"; - } - - // If depth wise convolution and useGpu == false and ARM-NEON - if (!useGpu_ && isDepthwiseConv(channels_[i], groups_[i]) && !isDeconv_) { -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - if ((filterSize_[i] == filterSizeY_[i]) && - (filterSize_[i] == 3 || filterSize_[i] == 4) && - (stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2) && - !useDilation) { - convType = "NeonDepthwiseConv"; - } -#endif - } - - if (FLAGS_use_nnpack && !isDeconv_ && !useDilation) { - createFunction(forward_, - "NNPACKConv", - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)groups_[i]) - .set("algo", std::string("auto"))); - } else { - createFunction(forward_, - !isDeconv_ ? convType : convGradInputType, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("dilations", dilations) - .set("groups", (size_t)groups_[i])); - - createFunction(backward_, - !isDeconv_ ? convGradInputType : convType, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("dilations", dilations) - .set("groups", (size_t)groups_[i])); - - createFunction(backward_, - convGradFilterType, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("dilations", dilations) - .set("groups", (size_t)groups_[i])); - } - } - return true; -} - -size_t ExpandConvLayer::getOutputSize() { - CHECK_NE(inputLayers_.size(), 0UL); - size_t layerSize = ConvBaseLayer::calOutputSize(); - return layerSize; -} - -// i is the index of input layers -#define BACKWARD_INPUT(i, inputs, outputs) \ - backward_[2 * i]->calc(inputs, outputs) -#define BACKWARD_FILTER(i, inputs, outputs) \ - backward_[2 * i + 1]->calc(inputs, outputs) - -void ExpandConvLayer::forward(PassType passType) { - Layer::forward(passType); - - size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - resetOutput(batchSize, getOutputSize()); - - // Calculate the shape of the input, output, and filter. - for (size_t i = 0; i < inputLayers_.size(); ++i) { - inputShape_[i] = TensorShape({(size_t)batchSize, - (size_t)channels_[i], - (size_t)imgSizeH_[i], - (size_t)imgSizeW_[i]}); - filterShape_[i] = - TensorShape({(size_t)groups_[i], - !isDeconv_ ? (size_t)numFilters_ / groups_[i] - : (size_t)channels_[i] / groups_[i], - !isDeconv_ ? (size_t)channels_[i] / groups_[i] - : (size_t)numFilters_ / groups_[i], - (size_t)filterSizeY_[i], - (size_t)filterSize_[i]}); - outputShape_[i] = TensorShape({(size_t)batchSize, - (size_t)numFilters_, - (size_t)outputH_[i], - (size_t)outputW_[i]}); - } - - // Calculate the output value. - for (size_t i = 0; i < inputLayers_.size(); ++i) { - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getInputValue(i), inputShape_[i]); - inputs.addArg(*weights_[i]->getW(), filterShape_[i]); - outputs.addArg(*getOutputValue(), - outputShape_[i], - !isDeconv_ && i == 0 ? ASSIGN_TO : ADD_TO); - - forward_[i]->calc(inputs, outputs); - } - - /* add the bias-vector */ - if (biases_.get()) { - output_.value->addBias(*biases_->getW(), 1.0, sharedBiases_); - } - - /* activation */ - forwardActivation(); -} - -void ExpandConvLayer::backward(const UpdateCallback &callback) { - backwardActivation(); - - MatrixPtr outGrad = getOutputGrad(); - if (biases_ && biases_->getWGrad()) { - biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBiases_); - /* Increasing the number of gradient */ - biases_->getParameterPtr()->incUpdate(callback); - } - - // Calculate the input grad and filter grad. - for (size_t i = 0; i < inputLayers_.size(); ++i) { - if (getInputGrad(i)) { - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getOutputGrad(), outputShape_[i]); - inputs.addArg(*weights_[i]->getW(), filterShape_[i]); - outputs.addArg(*getInputGrad(i), inputShape_[i], ADD_TO); - BACKWARD_INPUT(i, inputs, outputs); - } - - if (weights_[i]->getWGrad()) { - BufferArgs inputs; - BufferArgs outputs; - if (!isDeconv_) { - inputs.addArg(*getOutputGrad(), outputShape_[i]); - inputs.addArg(*getInputValue(i), inputShape_[i]); - } else { - inputs.addArg(*getInputValue(i), inputShape_[i]); - inputs.addArg(*getOutputGrad(), outputShape_[i]); - } - outputs.addArg(*weights_[i]->getWGrad(), filterShape_[i], ADD_TO); - BACKWARD_FILTER(i, inputs, outputs); - - /* Increasing the number of gradient */ - weights_[i]->getParameterPtr()->incUpdate(callback); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ExpandConvLayer.h b/paddle/legacy/gserver/layers/ExpandConvLayer.h deleted file mode 100644 index c0eff3ab061949bd583e0deaf121912ed993be76..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ExpandConvLayer.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "ConvBaseLayer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief A subclass of convolution layer. - * This layer expands input and use matrix multiplication to - * calculate convolution operation. - * - * The config file api is img_conv_layer. - */ - -class ExpandConvLayer : public ConvBaseLayer { - public: - explicit ExpandConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} - - ~ExpandConvLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - - size_t getOutputSize(); - - protected: - std::vector inputShape_; - std::vector filterShape_; - std::vector outputShape_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ExpandLayer.cpp b/paddle/legacy/gserver/layers/ExpandLayer.cpp deleted file mode 100644 index 074fbab8ef9d1453160058031be370e991459fa5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ExpandLayer.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ExpandLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(expand, ExpandLayer); - -bool ExpandLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - CHECK_EQ(inputLayers_.size(), 2UL); - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - // which sequence type of input[0] - if (config_.trans_type() == "non-seq") { - type_ = kNonSeq; - } else if (config_.trans_type() == "seq") { - type_ = kSeq; - } else { - LOG(FATAL) << "Unknown trans_type: " << config_.trans_type(); - } - setNeedSequenceInfo(false); - return true; -} - -void ExpandLayer::forward(PassType passType) { - Layer::forward(passType); - // Expand layer should have exactly 2 input, one for data, one for size - CHECK_EQ(2U, inputLayers_.size()); - - // using two input: - // * first one for data; - // * second one only for sequence info - const Argument& shapeInput = getInput(1); - const Argument& dataInput = getInput(0); - size_t outputBatchSize = shapeInput.getBatchSize(); - auto startPositions = type_ ? shapeInput.subSequenceStartPositions - : shapeInput.sequenceStartPositions; - size_t numSequences = startPositions->getSize() - 1; - const int* starts = startPositions->getData(false); - - CHECK_EQ(starts[numSequences], shapeInput.getBatchSize()); - if (type_) { - // when trans_type = seq, input[1] must hasSubseq - CHECK_EQ(shapeInput.hasSubseq(), 1UL); - CHECK_EQ(dataInput.getNumSequences(), shapeInput.getNumSequences()); - } else { - CHECK_EQ(dataInput.getBatchSize(), shapeInput.getNumSequences()); - } - - // set output sequence info as shape sequence - output_.sequenceStartPositions = shapeInput.sequenceStartPositions; - if (shapeInput.hasSubseq()) { - output_.subSequenceStartPositions = shapeInput.subSequenceStartPositions; - } - - // reserve output: Expand output to batchsize of sequence data. - reserveOutput(outputBatchSize, dataInput.value->getWidth()); - - MatrixPtr inputValue = getInputValue(0); - MatrixPtr outputValue = getOutputValue(); - - ICpuGpuVector::resizeOrCreate(expandStartsPos_, outputBatchSize, false); - int* expandStarts = expandStartsPos_->getMutableData(false); - for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) { - int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; - for (int j = 0; j < sequenceLength; j++) { - expandStarts[starts[sequenceId] + j] = sequenceId; - } - } - - outputValue->copyByRowIndex(*inputValue, - *expandStartsPos_->getVector(useGpu_)); - - if (biases_.get() != NULL) { - outputValue->addBias(*(biases_->getW()), 1); - } -} - -void ExpandLayer::backward(const UpdateCallback& callback) { - if (biases_ && biases_->getWGrad()) { - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - /* Increasing the number of gradient */ - biases_->getParameterPtr()->incUpdate(callback); - } - - if (!getInputGrad(0)) return; - MatrixPtr inputGrad = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions - : getInput(1).sequenceStartPositions; - size_t numSequences = cpuSeqStartPos->getSize() - 1; - const int* starts = cpuSeqStartPos->getData(false); - - CHECK_EQ(inputGrad->getWidth(), outputGrad->getWidth()); - CHECK_EQ(outputGrad->getHeight(), (size_t)starts[numSequences]); - - AsyncGpuBlock asyncGpuBlock; - - // sum to get the grad - real scale = 1; - for (size_t sequenceId = 0; sequenceId < numSequences; sequenceId++) { - // TODO(Dangqingqing) optimization for GPU - int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; - if (sequenceLength == 0) { - // empty sequence - continue; - } - MatrixPtr copyData = inputGrad->subMatrix(sequenceId, 1); - copyData->collectBias( - *outputGrad->subMatrix(starts[sequenceId], sequenceLength), scale); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ExpandLayer.h b/paddle/legacy/gserver/layers/ExpandLayer.h deleted file mode 100644 index 75a1ec75688cdbc61a117da7d4be47848c30425a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ExpandLayer.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * A layer for "Expand Dense data or (sequence data where the length of each - * sequence is one) to sequence data." - * - * It should have exactly 2 input, one for data, one for size: - * - first one for data - * - If ExpandLevel = kNonSeq: dense data - * - If ExpandLevel = kSeq: sequence data where the length of each sequence is - * one - * - second one only for sequence info - * - should be sequence data with or without sub-sequence. - * - * And the output size is the batch size(not instances) of second input. - * - * The config file api is expand_layer. - */ - -class ExpandLayer : public Layer { - protected: - std::unique_ptr biases_; - /// if input[0] is dense data, ExpandLevel=kNonSeq; - /// if input[0] is sequence data, ExpandLevel=kSeq - enum ExpandLevel { kNonSeq = 0, kSeq = 1 }; - /// store the ExpandLevel - int type_; - /// expanded sequenceStartPositions or subSequenceStartPositions - /// of input[1] - ICpuGpuVectorPtr expandStartsPos_; - - public: - explicit ExpandLayer(const LayerConfig& config) : Layer(config) {} - - ~ExpandLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/FactorizationMachineLayer.cpp b/paddle/legacy/gserver/layers/FactorizationMachineLayer.cpp deleted file mode 100644 index 6cf269fa3ffb3f4a2864aea4225d9401930e73b1..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/FactorizationMachineLayer.cpp +++ /dev/null @@ -1,158 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "FactorizationMachineLayer.h" -#include -#include -#include "paddle/legacy/math/SparseMatrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(factorization_machine, FactorizationMachineLayer); - -bool FactorizationMachineLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - factorSize_ = config_.factor_size(); - - /* initialize the latentVectors_ */ - CHECK_EQ(inputLayers_.size(), 1UL); - size_t inputSize = inputLayers_[0]->getSize(); - CHECK_EQ(parameters_[0]->getSize(), inputSize * factorSize_); - latentVectors_ = std::unique_ptr( - new Weight(inputSize, factorSize_, parameters_[0])); - - return true; -} - -void FactorizationMachineLayer::forward(PassType passType) { - Layer::forward(passType); - - const MatrixPtr& inputV = getInputValue(0); - - size_t batchSize = inputV->getHeight(); - size_t outputSize = getSize(); - size_t inputSize = inputLayers_[0]->getSize(); - reserveOutput(batchSize, outputSize); - - MatrixPtr outV = getOutputValue(); - - Matrix::resizeOrCreate( - latentVectorsSquare_, inputSize, factorSize_, false, useGpu_); - Matrix::resizeOrCreate( - inputMulFactor_, batchSize, factorSize_, false, useGpu_); - Matrix::resizeOrCreate(tmpOut_, batchSize, factorSize_, false, useGpu_); - - REGISTER_TIMER_INFO("FmInputMulFactorTimer", getName().c_str()); - inputMulFactor_->mul(*inputV, *latentVectors_->getW()); - inputMulFactor_->square2(*tmpOut_); - outV->sumRows(*tmpOut_, 0.5, 0); - - if (dynamic_cast(inputV.get())) { - Matrix::resizeOrCreateSparseMatrix(inputSquare_, - inputV->getHeight(), - inputV->getWidth(), - inputV->getElementCnt(), - inputV->getValueType()); - inputSquare_->copyFrom(*inputV); - (dynamic_cast(inputSquare_.get()))->square2(); - } else { - Matrix::resizeOrCreate( - inputSquare_, inputV->getHeight(), inputV->getWidth(), false, useGpu_); - inputV->square2(*inputSquare_); - } - latentVectors_->getW()->square2(*latentVectorsSquare_); - tmpOut_->mul(*inputSquare_, *latentVectorsSquare_); - outV->sumRows(*tmpOut_, -0.5, 1.0); - - /* activation */ { - REGISTER_TIMER_INFO("FmFwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void FactorizationMachineLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { backwardActivation(); } - - const MatrixPtr& inputV = getInputValue(0); - const MatrixPtr& oGrad = getOutputGrad(); - - Matrix::resizeOrCreate( - tmpSum_, 1, latentVectors_->getW()->getHeight(), false, useGpu_); - MatrixPtr tmpSumTrans = Matrix::create(tmpSum_->getRowBuf(0), - latentVectors_->getW()->getHeight(), - 1, - false, - useGpu_); - - /* Calculate the gradients of the latentVectors_ matrix */ - if (latentVectors_->getWGrad()) { - if (dynamic_cast(inputV.get())) { - Matrix::resizeOrCreateSparseMatrix(tmpInput_, - inputV->getHeight(), - inputV->getWidth(), - inputV->getElementCnt()); - - CpuSparseMatrix* sparseInputV = - dynamic_cast(inputV.get()); - CpuSparseMatrix* sparseInputSquare = - dynamic_cast(inputSquare_.get()); - CpuSparseMatrix* sparseTmpInput = - dynamic_cast(tmpInput_.get()); - sparseTmpInput->copyFrom(*sparseInputV); - - sparseTmpInput->rowScale(0, *sparseInputV, *oGrad); - latentVectors_->getWGrad()->mul( - *sparseTmpInput->getTranspose(), *inputMulFactor_, 1, 1); - sparseTmpInput->rowScale(0, *sparseInputSquare, *oGrad); - - Matrix::resizeOrCreate(negOnes_, 1, inputV->getHeight(), false, useGpu_); - negOnes_->zeroMem(); - negOnes_->add(-1); - tmpSum_->mul(*negOnes_, *sparseTmpInput, 1, 0); - } else { - Matrix::resizeOrCreate( - tmpInput_, inputV->getHeight(), inputV->getWidth(), false, useGpu_); - - tmpInput_->rowScale(0, *inputV, *oGrad); - latentVectors_->getWGrad()->mul( - *tmpInput_->getTranspose(), *inputMulFactor_, 1, 1); - tmpInput_->rowScale(0, *inputSquare_, *oGrad); - - tmpSum_->sumCols(*tmpInput_, -1, 0); - } - - latentVectors_->getWGrad()->addRowScale( - 0, *latentVectors_->getW(), *tmpSumTrans); - - /* Increasing the number of gradient */ - latentVectors_->getParameterPtr()->incUpdate(callback); - } - - /* Calculate the input layers gradient */ - MatrixPtr inGrad = getInputGrad(0); - if (inGrad != NULL) { - inGrad->mul( - *inputMulFactor_, *latentVectors_->getW()->getTranspose(), 1, 1); - tmpSumTrans->sumRows(*latentVectorsSquare_, -1, 0); - inGrad->addColScale(0, *inputV, *tmpSum_); - inGrad->rowScale(0, *inGrad, *oGrad); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/FactorizationMachineLayer.h b/paddle/legacy/gserver/layers/FactorizationMachineLayer.h deleted file mode 100644 index fc015ed727bbd8781bb50a22b8e745d8896837e1..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/FactorizationMachineLayer.h +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { -/** - * @brief The Factorization Machine models pairwise (order-2) feature - * interactions as inner product of the learned latent vectors corresponding - * to each input feature. - * - * The Factorization Machine can effectively capture feature interactions - * especially when the input is sparse. While in principle FM can model higher - * order feature interaction, in practice usually only order-2 feature - * interactions are considered. The Factorization Machine Layer here only - * computes the order-2 interations with the formula: - * - * \f[ - * y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j - * \f] - * - * The detailed calculation for forward and backward can be found at this paper: - * - * Factorization machines. - * - * The config file api is factorization_machine. - */ - -class FactorizationMachineLayer : public Layer { - protected: - // The latent vectors, shape: (size, factorSize_) - // Each row of the latentVectors_ matrix is the latent vector - // corresponding to one input feature dimension - std::unique_ptr latentVectors_; - // The hyperparameter that defines the dimensionality of the factorization - size_t factorSize_; - - private: - // Store the square values of the letent vectors matrix - MatrixPtr latentVectorsSquare_; - // Store the square values of input matrix - MatrixPtr inputSquare_; - // The result of input matrix * latent vector matrix that will be used in - // both forward and backward step - MatrixPtr inputMulFactor_; - // Store temporary calculation result - MatrixPtr tmpOut_; - MatrixPtr tmpSum_; - MatrixPtr tmpInput_; - // Negative identity matrix - MatrixPtr negOnes_; - - public: - explicit FactorizationMachineLayer(const LayerConfig& config) - : Layer(config) {} - ~FactorizationMachineLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/FeatureMapExpandLayer.cpp b/paddle/legacy/gserver/layers/FeatureMapExpandLayer.cpp deleted file mode 100644 index a3fe1433e4b5fd7bd77f8d6bb73378243d391dd5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/FeatureMapExpandLayer.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief A layer for expanding a batch of images to feature maps. - * Each data of the input is a 2 dimensional matrix. Each element of the matrix - * is replicated num_filters times to create a feature map with num_filters - * channels. - * - Input: Input one should be dense image data. - * - Output: expanded fature maps. - * \f[ - * y.row[i] = x.row[i \mod x.width], i = 0,1,..., (x.width * num\_filters - 1) - * \f] - * For example, num_filters = 4: - * @code - * x = [a1,a2; - * b1,b2] - * y = [a1, a2, a1, a2, a1, a2, a1, a2; - * b1, b2, b1, b2, b1, b2, b1, b2;] - * @endcode - */ - -class FeatureMapExpandLayer : public Layer { - private: - int numFilters_; - bool asRowVector_; - - public: - explicit FeatureMapExpandLayer(const LayerConfig& config) : Layer(config) {} - - ~FeatureMapExpandLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(featmap_expand, FeatureMapExpandLayer); - -bool FeatureMapExpandLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 1UL); - numFilters_ = config_.num_filters(); - asRowVector_ = config_.user_arg() != "as_col_vec"; - return true; -} - -void FeatureMapExpandLayer::forward(PassType passType) { - Layer::forward(passType); - MatrixPtr inputV = getInputValue(0); - size_t batchSize = getInput(0).getBatchSize(); - int imgSize = inputV->getWidth(); - resetOutput(batchSize, imgSize * numFilters_); - - MatrixPtr outputV = getOutputValue(); - - { - AsyncGpuBlock asyncGpuBlock; - if (asRowVector_) { - for (size_t i = 0; i < batchSize; i++) { - MatrixPtr outVTmp = - Matrix::create(outputV->getData() + i * imgSize * numFilters_, - numFilters_, - imgSize, - false, - useGpu_); - MatrixPtr inVTmp = Matrix::create( - inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_); - outVTmp->addRowVector(*inVTmp); - } - } else { - for (size_t i = 0; i < batchSize; i++) { - MatrixPtr outVTmp = - Matrix::create(outputV->getData() + i * imgSize * numFilters_, - imgSize, - numFilters_, - false, - useGpu_); - MatrixPtr inVTmp = Matrix::create( - inputV->getData() + i * imgSize, imgSize, 1, false, useGpu_); - outVTmp->addColVector(*inVTmp); - } - } - } - /* activation */ { - REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void FeatureMapExpandLayer::backward(const UpdateCallback& callback) { - MatrixPtr inGrad = getInputGrad(0); - if (NULL == inGrad) { - return; - } - MatrixPtr outGrad = getOutputGrad(); - size_t batchSize = getInput(0).getBatchSize(); - int imgSize = inGrad->getWidth(); - /* Do activation */ { - REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); - backwardActivation(); - } - { - AsyncGpuBlock asyncGpuBlock; - if (asRowVector_) { - for (size_t i = 0; i < batchSize; i++) { - MatrixPtr outGradTmp = - Matrix::create(outGrad->getData() + i * imgSize * numFilters_, - numFilters_, - imgSize, - false, - useGpu_); - MatrixPtr inGradTmp = Matrix::create( - inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_); - inGradTmp->collectBias(*outGradTmp, 1); - } - } else { - for (size_t i = 0; i < batchSize; i++) { - MatrixPtr outGradTmp = - Matrix::create(outGrad->getData() + i * imgSize * numFilters_, - imgSize, - numFilters_, - false, - useGpu_); - MatrixPtr inGradTmp = Matrix::create( - inGrad->getData() + i * imgSize, imgSize, 1, false, useGpu_); - inGradTmp->sumRows(*outGradTmp, 1, 1); - } - } - } -} - -} // namespace paddle. diff --git a/paddle/legacy/gserver/layers/FullMatrixProjection.cpp b/paddle/legacy/gserver/layers/FullMatrixProjection.cpp deleted file mode 100644 index b9f1bc99fab506cc616503608a581702b8e41d01..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/FullMatrixProjection.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "FullMatrixProjection.h" - -namespace paddle { - -REGISTER_PROJECTION(fc, FullMatrixProjection); - -FullMatrixProjection::FullMatrixProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - weight_.reset( - new Weight(config.input_size(), config.output_size(), parameter)); -} - -void FullMatrixProjection::forward() { - REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); - out_->value->mul(*(in_->value), *(weight_->getW()), 1, 1); -} - -void FullMatrixProjection::backward(const UpdateCallback& callback) { - bool syncFlag = hl_get_sync_flag(); - - /* Calculate the W-gradient for the current layer */ - if (weight_->getWGrad()) { - REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); - weight_->getWGrad()->mul( - *(in_->value->getTranspose()), *(out_->grad), 1, 1); - } - - // If callback does not change value, backward propagation error - // asynchronously, so that we can do the callback concurrently. - hl_set_sync_flag(false); - - /* Calculate the input layers error */ - if (in_->grad) { - REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); - in_->grad->mul(*(out_->grad), *(weight_->getW()->getTranspose()), 1, 1); - } - - hl_set_sync_flag(syncFlag); - if (weight_->getWGrad()) { - parameter_->incUpdate(callback); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/FullMatrixProjection.h b/paddle/legacy/gserver/layers/FullMatrixProjection.h deleted file mode 100644 index c33d02a3aeac8e83f613e61320cb6cd63baeae83..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/FullMatrixProjection.h +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/legacy/utils/Stat.h" - -#include "Projection.h" - -namespace paddle { - -/** - * FullMatrixProjection performs full matrix multiplication: - * \f[ - * out.row[i] += in.row[i] * weight - * \f] - * - * The config file api is full_matrix_projection. - */ -class FullMatrixProjection : public Projection { - public: - FullMatrixProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu); - virtual void forward(); - virtual void backward(const UpdateCallback& callback); - - protected: - std::unique_ptr weight_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/FullyConnectedLayer.cpp b/paddle/legacy/gserver/layers/FullyConnectedLayer.cpp deleted file mode 100644 index 07f4dfbe39c6b9bc233b3c75b4b5891a1ec9b2ec..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/FullyConnectedLayer.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "FullyConnectedLayer.h" -#include -#include -#include "paddle/legacy/math/SparseMatrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(fc, FullyConnectedLayer); - -bool FullyConnectedLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* initialize the weightList */ - CHECK(inputLayers_.size() == parameters_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - // Option the parameters - size_t height = inputLayers_[i]->getSize(); - size_t width = getSize(); - - // create a new weight - if (parameters_[i]->isSparse()) { - CHECK_LE(parameters_[i]->getSize(), width * height); - } else { - CHECK_EQ(parameters_[i]->getSize(), width * height); - } - Weight* w = new Weight(height, width, parameters_[i]); - - // append the new weight to the list - weights_.emplace_back(w); - } - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - - return true; -} - -void FullyConnectedLayer::prefetch() { - for (size_t i = 0; i != inputLayers_.size(); ++i) { - auto* sparseParam = - dynamic_cast(weights_[i]->getW().get()); - if (sparseParam) { - MatrixPtr input = getInputValue(i); - sparseParam->addRows(input); - } - } -} - -void FullyConnectedLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInput(0).getBatchSize(); - int size = getSize(); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - reserveOutput(batchSize, size); - } - - MatrixPtr outV = getOutputValue(); - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - auto input = getInput(i); - CHECK(input.value) << "The input of 'fc' layer must be matrix"; - REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); - i == 0 ? outV->mul(*input.value, *weights_[i]->getW(), 1, 0) - : outV->mul(*input.value, *weights_[i]->getW(), 1, 1); - } - - /* add the bias-vector */ - if (biases_.get() != NULL) { - REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); - outV->addBias(*(biases_->getW()), 1); - } - - /* activation */ { - REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void FullyConnectedLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { - REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); - backwardActivation(); - } - - if (biases_ && biases_->getWGrad()) { - REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str()); - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - - /* Increasing the number of gradient */ - biases_->getParameterPtr()->incUpdate(callback); - } - - bool syncFlag = hl_get_sync_flag(); - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - /* Calculate the W-gradient for the current layer */ - if (weights_[i]->getWGrad()) { - MatrixPtr input_T = getInputValue(i)->getTranspose(); - MatrixPtr oGrad = getOutputGrad(); - { - REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); - weights_[i]->getWGrad()->mul(*input_T, *oGrad, 1, 1); - } - } - - // If callback does not change value, backprop error asynchronously so that - // we can do the callback concurrently. - hl_set_sync_flag(false); - - /* Calculate the input layers error */ - MatrixPtr preGrad = getInputGrad(i); - if (NULL != preGrad) { - MatrixPtr weights_T = weights_[i]->getW()->getTranspose(); - REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); - preGrad->mul(*getOutputGrad(), *weights_T, 1, 1); - } - - hl_set_sync_flag(syncFlag); - { - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - weights_[i]->getParameterPtr()->incUpdate(callback); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/FullyConnectedLayer.h b/paddle/legacy/gserver/layers/FullyConnectedLayer.h deleted file mode 100644 index 7e29cac0437a8ae735ffb71e5ee901edd79fa7f3..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/FullyConnectedLayer.h +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { -/** - * A layer has full connections to all neurons in the previous layer. - * It computes an inner product with a set of learned weights, and - * (optionally) adds biases. - * - * The config file api is fc_layer. - */ - -class FullyConnectedLayer : public Layer { - protected: - WeightList weights_; - std::unique_ptr biases_; - - public: - explicit FullyConnectedLayer(const LayerConfig& config) : Layer(config) {} - ~FullyConnectedLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - Weight& getWeight(int idx) { return *weights_[idx]; } - - void prefetch() override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/GatedRecurrentLayer.cpp b/paddle/legacy/gserver/layers/GatedRecurrentLayer.cpp deleted file mode 100644 index bdcd445cb47de346a8ca496fdaecf7d1f841f51e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/GatedRecurrentLayer.cpp +++ /dev/null @@ -1,414 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "GatedRecurrentLayer.h" -#include "Layer.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(gated_recurrent, GatedRecurrentLayer); - -bool GatedRecurrentLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!Layer::init(layerMap, parameterMap)) return false; - CHECK_EQ(1U, inputLayers_.size()); - CHECK_EQ(1U, parameters_.size()); - CHECK_EQ(getSize() * getSize() * 3, parameters_[0]->getSize()); - CHECK_EQ(getSize() * 3, biasParameter_->getSize()); - weight_.reset(new Weight(getSize(), getSize() * 3, parameters_[0])); - gateWeight_.reset(new Weight(getSize(), getSize() * 2, parameters_[0], 0)); - stateWeight_.reset(new Weight( - getSize(), getSize(), parameters_[0], 2 * getSize() * getSize())); - if (biasParameter_.get() != NULL) { - bias_.reset(new Weight(1, getSize() * 3, biasParameter_)); - } - - reversed_ = config_.reversed(); - activationGate_.reset(ActivationFunction::create(config_.active_gate_type())); - - GruCompute::init(config_); - useBatch_ = true; - - return true; -} - -void GatedRecurrentLayer::resetState() { - CHECK(!reversed_) << "state is not allowed for reversed gated " - "recurrent layer"; - Matrix::resizeOrCreate( - prevOutput_, 1, getSize(), /* trans= */ false, useGpu_); - prevOutput_->zeroMem(); - - // TODO(hedaoyuan): support prev_batch_state - CHECK(!FLAGS_prev_batch_state) << "Not supported"; - - useBatch_ = false; -} - -void GatedRecurrentLayer::setState(LayerStatePtr state) { - CHECK(state->value.size() == 1) - << "one matrix is expected for GatedRecurrentLayer state"; - prevOutput_->copyFrom(*(state->value[0])); -} - -LayerStatePtr GatedRecurrentLayer::getState() { - LayerStatePtr res = std::make_shared(); - res->value.push_back(prevOutput_->clone(0, 0, useGpu_)); - res->value[0]->copyFrom(*prevOutput_); - return res; -} - -void GatedRecurrentLayer::forward(PassType passType) { - REGISTER_TIMER_INFO("GruFwTimer", getName().c_str()); - Layer::forward(passType); - - const Argument& input = getInput(0); - CHECK(input.sequenceStartPositions); - int batchSize = input.getBatchSize(); - size_t numSequences = input.getNumSequences(); - resetOutput(batchSize, getSize()); - CHECK_EQ(getSize() * 3, input.value->getWidth()); - const int* starts = input.sequenceStartPositions->getData(false); - // batchSize = length of total frames in a batch (NOT size of mini-batch) - CHECK_EQ(starts[numSequences], batchSize); - - Matrix::resizeOrCreate(gate_.value, - /* height= */ batchSize, - getSize() * 3, - /* trans= */ false, - useGpu_); - Matrix::resizeOrCreate(resetOutput_.value, - /* height= */ batchSize, - getSize(), - /* trans= */ false, - useGpu_); - - if (useBatch_) { - forwardBatch(batchSize, numSequences, starts, input.value); - } else { - forwardSequence(batchSize, numSequences, starts, input.value); - } -} - -void GatedRecurrentLayer::backward(const UpdateCallback& callback) { - REGISTER_TIMER_INFO("GruBwTimer", getName().c_str()); - const Argument& input = getInput(0); - CHECK(input.sequenceStartPositions); - int batchSize = input.getBatchSize(); - const int* starts = input.sequenceStartPositions->getData(false); - size_t numSequences = input.getNumSequences(); - - Matrix::resizeOrCreate(gate_.grad, - /* height= */ batchSize, - getSize() * 3, - /* trans= */ false, - useGpu_); - Matrix::resizeOrCreate(resetOutput_.grad, - /* height= */ batchSize, - getSize(), - /* trans= */ false, - useGpu_); - - if (useBatch_) { - backwardBatch(batchSize, input.grad); - } else { - backwardSequence(batchSize, numSequences, starts, input.grad); - } - - if (bias_) { - bias_->getParameterPtr()->incUpdate(callback); - } - - weight_->getParameterPtr()->incUpdate(callback); -} - -void GatedRecurrentLayer::forwardSequence(int batchSize, - size_t numSequences, - const int* starts, - MatrixPtr inputValue) { - REGISTER_TIMER_INFO("GruFwSequenceTime", getName().c_str()); - gate_.value->assign(*inputValue); - if (bias_) { - gate_.value->addBias(*(bias_->getW()), 1); - } - - hl_gru_value gruValue; - gruValue.gateWeight = (gateWeight_->getW())->getData(); - gruValue.stateWeight = (stateWeight_->getW())->getData(); - gruValue.gateValue = gate_.value->getData(); - gruValue.resetOutputValue = resetOutput_.value->getData(); - gruValue.outputValue = output_.value->getData(); - gruValue.prevOutValue = nullptr; - - if (reversed_) { - gruValue.gateValue += (batchSize - 1) * getSize() * 3; - gruValue.resetOutputValue += (batchSize - 1) * getSize(); - gruValue.outputValue += (batchSize - 1) * getSize(); - } - - auto nextFrame = [&gruValue](bool reversed, int frameSize) { - gruValue.prevOutValue = gruValue.outputValue; - if (!reversed) { - gruValue.gateValue += frameSize * 3; - gruValue.resetOutputValue += frameSize; - gruValue.outputValue += frameSize; - } else { - gruValue.gateValue -= frameSize * 3; - gruValue.resetOutputValue -= frameSize; - gruValue.outputValue -= frameSize; - } - }; - - if (!reversed_) { - if (prevOutput_) { - gruValue.prevOutValue = prevOutput_->getData(); - } - } - AsyncGpuBlock asyncGpuBlock; - for (size_t n = 0; n < numSequences; ++n) { - int length; - if (!reversed_) { - length = starts[n + 1] - starts[n]; - } else { - length = starts[numSequences - n] - starts[numSequences - n - 1]; - } - for (int l = 0; l < length; ++l) { - if (useGpu_) { - GruCompute::forward<1>(gruValue, getSize()); - } else { - GruCompute::forward<0>(gruValue, getSize()); - } - - nextFrame(reversed_, getSize()); - } - if (!reversed_) { - if (!prevOutput_) gruValue.prevOutValue = nullptr; - } else { - gruValue.prevOutValue = nullptr; - } - } - - if (!reversed_) { - if (prevOutput_) { - prevOutput_->assign(*output_.value->subMatrix(batchSize - 1, 1)); - } - } -} - -void GatedRecurrentLayer::backwardSequence(int batchSize, - size_t numSequences, - const int* starts, - MatrixPtr inputGrad) { - REGISTER_TIMER_INFO("GruBwSequenceTime", getName().c_str()); - - hl_gru_value gruValue; - gruValue.gateWeight = (gateWeight_->getW())->getData(); - gruValue.stateWeight = (stateWeight_->getW())->getData(); - gruValue.gateValue = gate_.value->getData(); - gruValue.resetOutputValue = resetOutput_.value->getData(); - gruValue.outputValue = output_.value->getData(); - - hl_gru_grad gruGrad; - gruGrad.gateWeightGrad = - (gateWeight_->getWGrad() ? gateWeight_->getWGrad()->getData() : nullptr); - gruGrad.stateWeightGrad = - (stateWeight_->getWGrad() ? stateWeight_->getWGrad()->getData() - : nullptr); - gruGrad.gateGrad = gate_.grad->getData(); - gruGrad.resetOutputGrad = resetOutput_.grad->getData(); - gruGrad.outputGrad = output_.grad->getData(); - - if (!reversed_) { - gruValue.gateValue += (batchSize - 1) * getSize() * 3; - gruValue.resetOutputValue += (batchSize - 1) * getSize(); - gruValue.outputValue += (batchSize - 1) * getSize(); - gruGrad.gateGrad += (batchSize - 1) * getSize() * 3; - gruGrad.resetOutputGrad += (batchSize - 1) * getSize(); - gruGrad.outputGrad += (batchSize - 1) * getSize(); - gruValue.prevOutValue = gruValue.outputValue - getSize(); - gruGrad.prevOutGrad = gruGrad.outputGrad - getSize(); - } else { - gruValue.prevOutValue = gruValue.outputValue + getSize(); - gruGrad.prevOutGrad = gruGrad.outputGrad + getSize(); - } - - auto nextFrame = [&gruValue, &gruGrad](bool reversed, int frameSize) { - if (reversed) { - gruValue.gateValue += frameSize * 3; - gruValue.resetOutputValue += frameSize; - gruValue.outputValue += frameSize; - gruGrad.gateGrad += frameSize * 3; - gruGrad.resetOutputGrad += frameSize; - gruGrad.outputGrad += frameSize; - gruValue.prevOutValue = gruValue.outputValue + frameSize; - gruGrad.prevOutGrad = gruGrad.outputGrad + frameSize; - } else { - gruValue.gateValue -= frameSize * 3; - gruValue.resetOutputValue -= frameSize; - gruValue.outputValue -= frameSize; - gruGrad.gateGrad -= frameSize * 3; - gruGrad.resetOutputGrad -= frameSize; - gruGrad.outputGrad -= frameSize; - gruValue.prevOutValue = gruValue.outputValue - frameSize; - gruGrad.prevOutGrad = gruGrad.outputGrad - frameSize; - } - }; - - { - AsyncGpuBlock asyncGpuBlock; - for (size_t n = 0; n < numSequences; ++n) { - int length; - if (reversed_) { - length = starts[n + 1] - starts[n]; - } else { - length = starts[numSequences - n] - starts[numSequences - n - 1]; - } - for (int l = 0; l < length; ++l) { - if (l == length - 1) { - gruValue.prevOutValue = nullptr; - gruGrad.prevOutGrad = nullptr; - } - if (useGpu_) { - GruCompute::backward<1>(gruValue, gruGrad, getSize()); - } else { - GruCompute::backward<0>(gruValue, gruGrad, getSize()); - } - nextFrame(reversed_, getSize()); - } - } - } - - if (inputGrad) { - inputGrad->add(*gate_.grad); - } - if (bias_ && bias_->getWGrad()) { - bias_->getWGrad()->collectBias(*gate_.grad, 1); - } -} - -void GatedRecurrentLayer::forwardBatch(int batchSize, - size_t numSequences, - const int* starts, - MatrixPtr inputValue) { - REGISTER_TIMER_INFO("GruFwBatchTime", getName().c_str()); - hl_gru_value gruValue; - gruValue.gateWeight = (gateWeight_->getW())->getData(); - gruValue.stateWeight = (stateWeight_->getW())->getData(); - - if (!batchValue_) { - batchValue_.reset(new SequenceToBatch(useGpu_)); - } - batchValue_->resizeOrCreateBatch(batchSize, numSequences, starts, reversed_); - - batchValue_->resizeOrCreate(*output_.value); - batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */ true); - if (bias_) { - gate_.value->addBias(*(bias_->getW()), 1); - } - - { - int numBatch = batchValue_->getNumBatch(); - int curBatchSize = 0; - AsyncGpuBlock asyncGpuBlock; - for (int n = 0; n < numBatch; n++) { - MatrixPtr outputValueTmp = batchValue_->getBatchValue(n); - gruValue.outputValue = outputValueTmp->getData(); - gruValue.gateValue = - (batchValue_->getBatchValue(*gate_.value, n))->getData(); - gruValue.resetOutputValue = - (batchValue_->getBatchValue(*resetOutput_.value, n))->getData(); - - curBatchSize = outputValueTmp->getHeight(); - gruValue.prevOutValue = - (n == 0 - ? nullptr - : (batchValue_->getBatchValue(n - 1, curBatchSize))->getData()); - - { - if (useGpu_) { - GruCompute::forward<1>(gruValue, getSize(), curBatchSize); - } else { - GruCompute::forward<0>(gruValue, getSize(), curBatchSize); - } - } - } - } - { batchValue_->copyBackSeq(*output_.value); } -} - -void GatedRecurrentLayer::backwardBatch(int batchSize, MatrixPtr inputGrad) { - REGISTER_TIMER_INFO("GruBwBatchTime", getName().c_str()); - hl_gru_value gruValue; - gruValue.gateWeight = (gateWeight_->getW())->getData(); - gruValue.stateWeight = (stateWeight_->getW())->getData(); - - hl_gru_grad gruGrad; - gruGrad.gateWeightGrad = - (gateWeight_->getWGrad() ? gateWeight_->getWGrad()->getData() : nullptr); - gruGrad.stateWeightGrad = - (stateWeight_->getWGrad() ? stateWeight_->getWGrad()->getData() - : nullptr); - - if (!batchGrad_) { - batchGrad_.reset(new SequenceToBatch(useGpu_)); - } - batchGrad_->shareIndexWith(*batchValue_); - - { batchGrad_->copyFromSeq(*output_.grad); } - - { - int numBatch = batchGrad_->getNumBatch(); - int batchSize = 0; - AsyncGpuBlock asyncGpuBlock; - for (int n = (int)numBatch - 1; n >= 0; n--) { - gruValue.gateValue = - (batchGrad_->getBatchValue(*gate_.value, n))->getData(); - gruValue.resetOutputValue = - (batchGrad_->getBatchValue(*resetOutput_.value, n))->getData(); - - MatrixPtr outputGradTmp = batchGrad_->getBatchValue(n); - gruGrad.outputGrad = outputGradTmp->getData(); - gruGrad.gateGrad = (batchGrad_->getBatchValue(*gate_.grad, n))->getData(); - gruGrad.resetOutputGrad = - (batchGrad_->getBatchValue(*resetOutput_.grad, n))->getData(); - - { - batchSize = outputGradTmp->getHeight(); - gruValue.prevOutValue = - (n == 0 - ? nullptr - : (batchValue_->getBatchValue(n - 1, batchSize))->getData()); - gruGrad.prevOutGrad = - (n == 0 ? nullptr - : (batchGrad_->getBatchValue(n - 1, batchSize))->getData()); - - if (useGpu_) { - GruCompute::backward<1>(gruValue, gruGrad, getSize(), batchSize); - } else { - GruCompute::backward<0>(gruValue, gruGrad, getSize(), batchSize); - } - } - } - } - - if (inputGrad) { - batchGrad_->add(*inputGrad, *gate_.grad, /* seq2batch */ false); - } - if (bias_ && bias_->getWGrad()) { - bias_->getWGrad()->collectBias(*gate_.grad, /* scale */ 1); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/GatedRecurrentLayer.h b/paddle/legacy/gserver/layers/GatedRecurrentLayer.h deleted file mode 100644 index 8bbf01ce200c9922f49508b0499aa9422745f474..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/GatedRecurrentLayer.h +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "GruCompute.h" -#include "Layer.h" -#include "SequenceToBatch.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief Please refer to "Junyoung Chung, Empirical Evaluation - * of Gated Recurrent Neural Networks on Sequence Modeling". - * - * GatedRecurrentLayer takes 1 input layer with size * 3. - * Input layer is diveded into 3 equal parts: (xz_t, xr_t, xi_t). - * parameter and biasParameter is also diveded into 3 equal parts: - * - parameter consists of (U_z, U_r, U) - * - baisParameter consists of (bias_z, bias_r, bias_o) - * - * \f[ - * update \ gate: z_t = actGate(xz_t + U_z * h_{t-1} + bias_z) \\ - * reset \ gate: r_t = actGate(xr_t + U_r * h_{t-1} + bias_r) \\ - * output \ candidate: {h}_t = actNode(xi_t + U * dot(r_t, h_{t-1}) + bias_o) \\ - * hidden \ activation: h_t = dot((1-z_t), h_{t-1}) + dot(z_t, {h}_t) \\ - * \f] - * - * @note - * - dot denotes "element-wise multiplication". - * - actNode is defined by config active_type - * - actGate is defined by config actvie_gate_type - * - * The config file is grumemory. - */ - -class GatedRecurrentLayer : public Layer, public GruCompute { - public: - explicit GatedRecurrentLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback) override; - - void resetState() override; - - void setState(LayerStatePtr state) override; - - LayerStatePtr getState() override; - - protected: - void forwardSequence(int batchSize, - size_t numSequences, - const int* starts, - MatrixPtr inputValue); - void backwardSequence(int batchSize, - size_t numSequences, - const int* starts, - MatrixPtr inputGrad); - - void forwardBatch(int batchSize, - size_t numSequences, - const int* starts, - MatrixPtr inputValue); - void backwardBatch(int batchSize, MatrixPtr inputGrad); - - protected: - std::unique_ptr weight_; - std::unique_ptr gateWeight_; - std::unique_ptr stateWeight_; - std::unique_ptr bias_; - - Argument gate_; - Argument resetOutput_; - - bool reversed_; - bool useBatch_; - std::unique_ptr batchValue_; - std::unique_ptr batchGrad_; - std::unique_ptr activationGate_; - - MatrixPtr prevOutput_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/GetOutputLayer.cpp b/paddle/legacy/gserver/layers/GetOutputLayer.cpp deleted file mode 100644 index 7c1e3c407cca374c7aa238d07e2263c4a142b6a5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/GetOutputLayer.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" - -namespace paddle { - -class GetOutputLayer : public Layer { - public: - explicit GetOutputLayer(const LayerConfig& config) : Layer(config) {} - - ~GetOutputLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override { - if (!Layer::init(layerMap, parameterMap)) return false; - CHECK_EQ(1U, inputLayers_.size()); - CHECK_NE(inputArgument_[0], ""); - return true; - } - - void forward(PassType passType) override { - output_ = getPrev(0)->getOutput(inputArgument_[0]); - } - void backward(const UpdateCallback& callback = nullptr) override {} -}; - -REGISTER_LAYER(get_output, GetOutputLayer); - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/GruCompute.cpp b/paddle/legacy/gserver/layers/GruCompute.cpp deleted file mode 100644 index adad6285b7d5acd8780444ffeab6627531683cb7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/GruCompute.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "GruCompute.h" -#include "hl_recurrent_apply.cuh" -#include "paddle/legacy/function/GruFunctor.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -void GruCompute::init(LayerConfig &config) { - activeNode_ = hlActiveType(config.active_type()); - activeGate_ = hlActiveType(config.active_gate_type()); -} - -template <> -void GruCompute::forward<0>(hl_gru_value value, int frameSize, int batchSize) { - GruFunctor::compute(hppl::forward::gru_resetOutput(), - hppl::forward::gru_finalOutput(), - value, - frameSize, - batchSize, - activeNode_, - activeGate_); -} - -template <> -void GruCompute::backward<0>(hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize) { - GruGradFunctor::compute( - hppl::backward::gru_stateGrad(), - hppl::backward::gru_resetGrad(), - value, - grad, - frameSize, - batchSize, - activeNode_, - activeGate_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/GruCompute.cu b/paddle/legacy/gserver/layers/GruCompute.cu deleted file mode 100644 index 54be6b804753df76c0ffe3edee234dc8842f1df4..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/GruCompute.cu +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "GruCompute.h" - -#include "hl_recurrent_apply.cuh" - -namespace paddle { - -template <> -void GruCompute::forward<1>(hl_gru_value value, int frameSize, int batchSize) { - hl_gpu_gru_forward(hppl::forward::gru_resetOutput(), - hppl::forward::gru_finalOutput(), - value, - frameSize, - batchSize, - activeNode_, - activeGate_); -} - -template <> -void GruCompute::backward<1>(hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize) { - hl_gpu_gru_backward(hppl::backward::gru_stateGrad(), - hppl::backward::gru_resetGrad(), - value, - grad, - frameSize, - batchSize, - activeNode_, - activeGate_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/GruCompute.h b/paddle/legacy/gserver/layers/GruCompute.h deleted file mode 100644 index 6feea7aca81b8618071893581a4e16d8ad38101c..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/GruCompute.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "ModelConfig.pb.h" -#include "hl_gpu.h" -#include "paddle/legacy/utils/Common.h" - -namespace paddle { - -class GruCompute { - public: - void init(LayerConfig &config); - - template - void forward(hl_gru_value value, int frameSize, int batchSize = 1); - - template - void backward(hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize = 1); - - public: - hl_activation_mode_t activeNode_; - hl_activation_mode_t activeGate_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/GruStepLayer.cpp b/paddle/legacy/gserver/layers/GruStepLayer.cpp deleted file mode 100644 index 2480e42d68b87ee406efc2b220b9ad6bf5cacbd6..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/GruStepLayer.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "GruCompute.h" -#include "Layer.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief GruStepLayer is like GatedRecurrentLayer, but used in recurrent - * layer group. GruStepLayer takes 2 input layer. - * - input[0] with size * 3 and diveded into 3 equal parts: (xz_t, xr_t, xi_t). - * - input[1] with size: {prev_out}. - * - * parameter and biasParameter is also diveded into 3 equal parts: - * - parameter consists of (U_z, U_r, U) - * - baisParameter consists of (bias_z, bias_r, bias_o) - * - * \f[ - * update \ gate: z_t = actGate(xz_t + U_z * prev_out + bias_z) \\ - * reset \ gate: r_t = actGate(xr_t + U_r * prev_out + bias_r) \\ - * output \ candidate: {h}_t = actNode(xi_t + U * dot(r_t, prev_out) + bias_o) - * \\ - * output: h_t = dot((1-z_t), prev_out) + dot(z_t, prev_out) - * \f] - * - * @note - * - dot denotes "element-wise multiplication". - * - actNode is defined by config active_type - * - actGate is defined by config actvie_gate_type - * - * The config file api if gru_step_layer. - */ -class GruStepLayer : public Layer, public GruCompute { - protected: - Argument gate_; - Argument resetOutput_; - std::unique_ptr weight_; - std::unique_ptr bias_; - - public: - explicit GruStepLayer(const LayerConfig& config) : Layer(config) {} - - ~GruStepLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(gru_step, GruStepLayer); - -bool GruStepLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!Layer::init(layerMap, parameterMap)) return false; - CHECK_EQ(2U, inputLayers_.size()); - - CHECK_EQ(getSize() * getSize() * 3, parameters_[0]->getSize()); - weight_.reset(new Weight(getSize(), getSize() * 3, parameters_[0])); - - if (biasParameter_.get() != NULL) { - CHECK_EQ(getSize() * 3, biasParameter_->getSize()); - bias_.reset(new Weight(1, getSize() * 3, biasParameter_)); - } - - GruCompute::init(config_); - return true; -} - -void GruStepLayer::forward(PassType passType) { - REGISTER_TIMER_INFO("GruStepFwTime", getName().c_str()); - Layer::forward(passType); - - const Argument& input = getInput(0); - const Argument& prevOutput = getInput(1); - CHECK_EQ(getSize() * 3, input.value->getWidth()); - CHECK_EQ(getSize(), prevOutput.value->getWidth()); - - int batchSize = input.getBatchSize(); - resetOutput(batchSize, getSize()); - resetSpecifyOutput(gate_, - batchSize, - getSize() * 3, - /* isValueClean */ false, - /* isGradClean */ false); - resetSpecifyOutput(resetOutput_, - batchSize, - getSize(), - /* isValueClean */ false, - /* isGradClean */ false); - gate_.value->assign(*input.value); - if (bias_) { - gate_.value->addBias(*(bias_->getW()), 1); - } - - hl_gru_value gruValue; - gruValue.gateWeight = weight_->getW()->getData(); - gruValue.stateWeight = weight_->getW()->getData() + getSize() * getSize() * 2; - gruValue.gateValue = gate_.value->getData(); - gruValue.resetOutputValue = resetOutput_.value->getData(); - gruValue.outputValue = output_.value->getData(); - gruValue.prevOutValue = prevOutput.value->getData(); - - if (useGpu_) { - GruCompute::forward<1>(gruValue, getSize(), batchSize); - } else { - GruCompute::forward<0>(gruValue, getSize(), batchSize); - } -} - -void GruStepLayer::backward(const UpdateCallback& callback) { - REGISTER_TIMER_INFO("GruStepBwTime", getName().c_str()); - - const Argument& input = getInput(0); - const Argument& prevOutput = getInput(1); - int batchSize = input.getBatchSize(); - - hl_gru_value gruValue; - gruValue.gateWeight = weight_->getW()->getData(); - gruValue.stateWeight = weight_->getW()->getData() + getSize() * getSize() * 2; - gruValue.gateValue = gate_.value->getData(); - gruValue.resetOutputValue = resetOutput_.value->getData(); - gruValue.outputValue = output_.value->getData(); - gruValue.prevOutValue = prevOutput.value->getData(); - - hl_gru_grad gruGrad; - gruGrad.gateWeightGrad = - (weight_->getWGrad() ? weight_->getWGrad()->getData() : nullptr); - gruGrad.stateWeightGrad = - (weight_->getWGrad() - ? weight_->getWGrad()->getData() + getSize() * getSize() * 2 - : nullptr); - - gruGrad.gateGrad = gate_.grad->getData(); - gruGrad.resetOutputGrad = resetOutput_.grad->getData(); - gruGrad.outputGrad = output_.grad->getData(); - if (prevOutput.grad) { - gruGrad.prevOutGrad = prevOutput.grad->getData(); - } else { - gruGrad.prevOutGrad = nullptr; - } - - if (useGpu_) { - GruCompute::backward<1>(gruValue, gruGrad, getSize(), batchSize); - } else { - GruCompute::backward<0>(gruValue, gruGrad, getSize(), batchSize); - } - - if (input.grad) { - input.grad->add(*gate_.grad); - } - - if (bias_ && bias_->getWGrad()) { - bias_->getWGrad()->collectBias(*gate_.grad, 1); - } - - if (bias_) { - bias_->getParameterPtr()->incUpdate(callback); - } - weight_->getParameterPtr()->incUpdate(callback); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.cpp b/paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.cpp deleted file mode 100644 index 34495994096a87640bdeef777feb5cd783cd4598..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "HierarchicalSigmoidLayer.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -REGISTER_LAYER(hsigmoid, HierarchicalSigmoidLayer); - -bool HierarchicalSigmoidLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK(config_.has_num_classes()) << "num_classes must be specifed in config"; - numClasses_ = config_.num_classes(); - CHECK_GE(numClasses_, (size_t)2); - codeLength_ = findLastSet(numClasses_ - 1); - - size_t height = numClasses_ - 1; - - /* initialize the weightList */ - // The last input layer is for label - CHECK(!parameters_.back()); - for (size_t i = 0; i < inputLayers_.size() - 1; i++) { - size_t width = inputLayers_[i]->getSize(); - // create a new weight - CHECK_EQ(parameters_[i]->getSize(), width * height); - Weight* w = new Weight(height, width, parameters_[i]); - - // append the new weight to the list - weights_.emplace_back(w); - } - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - CHECK_EQ(biasParameter_->getSize(), numClasses_ - 1); - biases_.reset(new Weight(1, numClasses_ - 1, biasParameter_)); - } - - return true; -} - -void HierarchicalSigmoidLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInputValue(0)->getHeight(); - int size = getSize(); - reserveOutput(batchSize, size); - Matrix::resizeOrCreate(preOutput_.value, - batchSize, - codeLength_, - /* trans */ false, - false); - Matrix::resizeOrCreate(preOutput_.grad, - batchSize, - codeLength_, - /* trans */ false, - false); - IVectorPtr label = getInput(*getLabelLayer()).ids; - preOutput_.value->zeroMem(); - - if (useGpu_) { - Matrix::resizeOrCreate(cpuOutput_, - output_.value->getHeight(), - output_.value->getWidth(), - /* trans */ false, - false); - IVector::resizeOrCreate(cpuLabel_, label->getSize(), false); - cpuLabel_->copyFrom(*label); - cpuOutput_->copyFrom(*output_.value); - } else { - cpuOutput_ = output_.value; - cpuLabel_ = label; - } - /* add the bias-vector */ - if (biases_.get() != NULL) { - if (useGpu_) { - Matrix::resizeOrCreate(cpuBias_, - 1, - numClasses_ - 1, - /* trans */ false, - false); - cpuBias_->copyFrom(*biases_->getW()); - } else { - cpuBias_ = biases_->getW(); - } - preOutput_.value->addByBitCode(numClasses_, *cpuLabel_, *cpuBias_); - } - for (size_t i = 0; i < inputLayers_.size() - 1; ++i) { - MatrixPtr input = getInputValue(i); - if (useGpu_) { - Matrix::resizeOrCreate(cpuInput_, - input->getHeight(), - input->getWidth(), - /* trans */ false, - false); - Matrix::resizeOrCreate(cpuWeight_, - weights_[i]->getW()->getHeight(), - weights_[i]->getW()->getWidth(), - /* trans */ false, - false); - cpuInput_->copyFrom(*input); - cpuWeight_->copyFrom(*weights_[i]->getW()); - } else { - cpuInput_ = input; - cpuWeight_ = weights_[i]->getW(); - } - preOutput_.value->mulByBitCode( - numClasses_, *cpuLabel_, *cpuWeight_, *cpuInput_); - } - // keep consistent with the clipping in the following softrelu - preOutput_.value->clip(-40.0, 40.0); - preOutput_.value->sumByBitCode(numClasses_, - *cpuLabel_, - *cpuOutput_, - -1); // scaleSum - preOutput_.value->softrelu(*preOutput_.value); - MatrixPtr sum = Matrix::create(batchSize, 1, /* trans= */ false, false); - preOutput_.value->rowSum(*sum); - cpuOutput_->add(*sum); - if (useGpu_) { - output_.value->copyFrom(*cpuOutput_); - } else { - output_.value = cpuOutput_; - } -} - -void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { - IVectorPtr label = getInput(*getLabelLayer()).ids; - if (useGpu_) { - IVector::resizeOrCreate(cpuLabel_, label->getSize(), false); - cpuLabel_->copyFrom(*label); - } else { - cpuLabel_ = label; - } - preOutput_.grad->one(); - preOutput_.grad->softreluDerivative(*preOutput_.value); - preOutput_.grad->subByBitCode(numClasses_, *cpuLabel_); - - if (biases_ && biases_->getWGrad()) { - MatrixPtr biases_grad = biases_->getWGrad(); - if (useGpu_) { - Matrix::resizeOrCreate(cpuBias_, - 1, - numClasses_ - 1, - /* trans */ false, - false); - cpuBias_->copyFrom(*biases_grad); - } else { - cpuBias_ = biases_grad; - } - preOutput_.grad->addByBitCodeBackward(numClasses_, *cpuLabel_, *cpuBias_); - if (useGpu_) { - biases_grad->copyFrom(*cpuBias_); - } else { - biases_grad = cpuBias_; - } - /* Increasing the number of gradient */ - biases_->getParameterPtr()->incUpdate(callback); - } - - for (size_t i = 0; i < inputLayers_.size() - 1; ++i) { - /* Calculate the W-gradient for the current layer */ - MatrixPtr input = getInputValue(i); - if (weights_[i]->getWGrad()) { - MatrixPtr weights_grad = weights_[i]->getWGrad(); - if (useGpu_) { - Matrix::resizeOrCreate(cpuInput_, - input->getHeight(), - input->getWidth(), - /* trans */ false, - false); - Matrix::resizeOrCreate(cpuWeightGrad_, - weights_grad->getHeight(), - weights_grad->getWidth(), - /* trans */ false, - false); - cpuInput_->copyFrom(*input); - cpuWeightGrad_->copyFrom(*weights_grad); - } else { - cpuInput_ = input; - cpuWeightGrad_ = weights_grad; - } - preOutput_.grad->mulByBitCodeBackwardWeight( - numClasses_, *cpuLabel_, *cpuWeightGrad_, *cpuInput_); - if (useGpu_) { - weights_grad->copyFrom(*cpuWeightGrad_); - } else { - weights_grad = cpuWeightGrad_; - } - /* Increasing the number of gradient */ - weights_[i]->getParameterPtr()->incUpdate(callback); - } - - /* Calculate the input layers error */ - MatrixPtr inputGrad = getInputGrad(i); - if (inputGrad) { - if (useGpu_) { - Matrix::resizeOrCreate(cpuInputGrad_, - inputGrad->getHeight(), - inputGrad->getWidth(), - /* trans */ false, - false); - Matrix::resizeOrCreate(cpuWeight_, - weights_[i]->getW()->getHeight(), - weights_[i]->getW()->getWidth(), - /* trans */ false, - false); - cpuInputGrad_->copyFrom(*inputGrad); - cpuWeight_->copyFrom(*weights_[i]->getW()); - } else { - cpuInputGrad_ = inputGrad; - cpuWeight_ = weights_[i]->getW(); - } - preOutput_.grad->mulByBitCodeBackwardError( - numClasses_, *cpuLabel_, *cpuWeight_, *cpuInputGrad_); - if (useGpu_) { - inputGrad->copyFrom(*cpuInputGrad_); - } else { - inputGrad = cpuInputGrad_; - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.h b/paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.h deleted file mode 100644 index 73ef252fd5a5443fe065f3b7bd8c49951ae0b4bd..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.h +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" - -namespace paddle { - -/** - * Organize the classes into a binary tree. At each node, a sigmoid function - * is used to calculate the probability of belonging to the right branch. - * This idea is from "F. Morin, Y. Bengio (AISTATS 05): - * Hierarchical Probabilistic Neural Network Language Model." - * - * Here we uses a simple way of making the binary tree. - * Assuming the number of classes C = 6, - * The classes are organized as a binary tree in the following way: - * - * @code{.py} - * *-*-*- 2 - * | | |- 3 - * | | - * | |-*- 4 - * | |- 5 - * | - * |-*- 0 - * |- 1 - * @endcode - * - * where * indicates an internal node, and each leaf node represents a class. - * - Node 0 ... C-2 are internal nodes. - * - Node C-1 ... 2C-2 are leaf nodes. - * - Class c is represented by leaf node \f$c+C-1\f$. - * - * We assign an id for each node: - * - the id of root be 0. - * - the left child of a node i is 2*i+1. - * - the right child of a node i is 2*i+2. - * - * It's easy to see that: - * - the parent of node i is \f$\left\lfloor(i-1)/2\right\rfloor\f$. - * - the j-th level ancestor of node i is - * \f$\left\lfloor(i+1)/2^{j+1}\right\rfloor - 1\f$. - * - A node i is a left child of its parent if \f$(i-1)\%2==0\f$. - * - * The config file api is hsigmod_layer. - */ -class HierarchicalSigmoidLayer : public Layer { - public: - explicit HierarchicalSigmoidLayer(const LayerConfig& config) - : Layer(config) {} - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - - protected: - /** - * The last of inputs is label layer. - */ - LayerPtr getLabelLayer() { return inputLayers_.back(); } - - WeightList weights_; - std::unique_ptr biases_; - /// number of classes - size_t numClasses_; - /// codeLength_ = \f$1 + \left\lfloor log_{2}(numClasses-1)\right\rfloor\f$ - int codeLength_; - /// temporary result of output_ - Argument preOutput_; - - /// The temporary variables in CPU memory. - MatrixPtr cpuWeight_; - MatrixPtr cpuWeightGrad_; - MatrixPtr cpuInput_; - MatrixPtr cpuInputGrad_; - MatrixPtr cpuBias_; - MatrixPtr cpuOutput_; - IVectorPtr cpuLabel_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/IdentityProjection.cpp b/paddle/legacy/gserver/layers/IdentityProjection.cpp deleted file mode 100644 index f707642e09b86721a88142ab8b745bb3492e820c..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/IdentityProjection.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Projection.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * IdentityProjection performs addition: - * \f[ - * out.row[i] += in.row[i] - * \f] - * - * The config file api is identity_projection. - */ -class IdentityProjection : public Projection { - public: - IdentityProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu); - virtual void forward(); - virtual void backward(const UpdateCallback& callback); -}; - -REGISTER_PROJECTION(identity, IdentityProjection); - -/** - * Constructed function. - * @note IdentityProjection should not have any parameter. - */ -IdentityProjection::IdentityProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - CHECK(!parameter) << "'identity' projection should not have any parameter"; -} - -void IdentityProjection::forward() { out_->value->add(*in_->value); } - -void IdentityProjection::backward(const UpdateCallback& callback) { - if (in_->grad) { - in_->grad->add(*out_->grad); - } -} - -/** - * IdentityOffsetProjection likes IdentityProjection, but layer size may be - * smaller - * than input size. It selects dimensions [offset, offset+layer_size) from input - * to - * perform addition: - * \f[ - * out.row[i] += in.row[i + \textrm{offset}] - * \f] - * - * The config file api is identity_projection. - */ -class IdentityOffsetProjection : public Projection { - public: - IdentityOffsetProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu); - virtual void forward(); - virtual void backward(const UpdateCallback& callback); -}; - -REGISTER_PROJECTION(identity_offset, IdentityOffsetProjection); - -/** - * Constructed function. - * @note IdentityOffsetProjection should not have any parameter. - */ -IdentityOffsetProjection::IdentityOffsetProjection( - const ProjectionConfig& config, const ParameterPtr& parameter, bool useGpu) - : Projection(config, parameter, useGpu) { - CHECK(!parameter) << "'identity_offset' projection " - "should not have any parameter"; - CHECK_LE(config.output_size() + config.offset(), config.input_size()); -} - -void IdentityOffsetProjection::forward() { - out_->value->addAtOffset(*in_->value, config_.offset()); -} - -void IdentityOffsetProjection::backward(const UpdateCallback& callback) { - if (in_->grad) { - in_->grad->addAtOffset(*out_->grad, config_.offset()); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/InterpolationLayer.cpp b/paddle/legacy/gserver/layers/InterpolationLayer.cpp deleted file mode 100644 index ed2294e8a397edfee6ad3c1f52235970d6ad48a9..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/InterpolationLayer.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * A layer for linear interpolation with two inputs, - * which is used in NEURAL TURING MACHINE. - * \f[ - * y.row[i] = w[i] * x_1.row[i] + (1 - w[i]) * x_2.row[i] - * \f] - * where \f$x_1\f$ and \f$x_2\f$ are two (batchSize x dataDim) inputs, - * \f$w\f$ is (batchSize x 1) weight vector, - * and \f$y\f$ is (batchSize x dataDim) output. - * - * The config file api is interpolation_layer. - */ - -class InterpolationLayer : public Layer { - protected: - /// weightLast = 1 - weight - MatrixPtr weightLast_; - MatrixPtr tmpMatrix; - - public: - explicit InterpolationLayer(const LayerConfig& config) : Layer(config) {} - - ~InterpolationLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(interpolation, InterpolationLayer); - -bool InterpolationLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(3U, inputLayers_.size()); - - return true; -} - -void InterpolationLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr weightV = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr inV2 = getInputValue(2); - - size_t batchSize = inV1->getHeight(); - size_t dataDim = inV1->getWidth(); - - CHECK_EQ(dataDim, getSize()); - CHECK_EQ(dataDim, inV2->getWidth()); - CHECK_EQ(batchSize, inV1->getHeight()); - CHECK_EQ(batchSize, inV2->getHeight()); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - resetOutput(batchSize, dataDim); - } - - MatrixPtr outV = getOutputValue(); - - Matrix::resizeOrCreate(weightLast_, batchSize, 1, false, useGpu_); - weightLast_->one(); - weightLast_->sub(*weightV); - - REGISTER_TIMER_INFO("FwInterpTimer", getName().c_str()); - // outV = inV1 * weight + inV2 * weightLast - outV->addRowScale(0, *inV1, *weightV); - outV->addRowScale(0, *inV2, *weightLast_); -} - -void InterpolationLayer::backward(const UpdateCallback& callback) { - MatrixPtr outG = getOutputGrad(); - MatrixPtr weightV = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr inV2 = getInputValue(2); - MatrixPtr inG0 = getInputGrad(0); - MatrixPtr inG1 = getInputGrad(1); - MatrixPtr inG2 = getInputGrad(2); - - size_t batchSize = inV1->getHeight(); - size_t dataDim = inV1->getWidth(); - - REGISTER_TIMER_INFO("BwInterpTimer", getName().c_str()); - - if (inG0) { - Matrix::resizeOrCreate(tmpMatrix, batchSize, dataDim, false, useGpu_); - - // inG0 += outG .* (inV1 - inV2) - tmpMatrix->sub(*inV1, *inV2); - inG0->rowDotMul(0, *outG, *tmpMatrix); - } - - if (inG1) { - // inG1 += outG * weight - inG1->addRowScale(0, *outG, *weightV); - } - - if (inG2) { - // inG2 += outG * weightLast - inG2->addRowScale(0, *outG, *weightLast_); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/legacy/gserver/layers/KmaxSeqScoreLayer.cpp deleted file mode 100644 index 7fd25954efeb9d9e672040f9909198f2ae3c0449..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/KmaxSeqScoreLayer.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" - -namespace paddle { - -class KmaxSeqScoreLayer : public Layer { - private: - MatrixPtr scores_; - size_t beamSize_; - void kmaxScorePerSeq(const real* score, - real* sortedRes, - const ICpuGpuVectorPtr seqStartPos); - - public: - explicit KmaxSeqScoreLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(kmax_seq_score, KmaxSeqScoreLayer); - -bool KmaxSeqScoreLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - bool ret = Layer::init(layerMap, parameterMap); - CHECK_EQ(1U, inputLayers_.size()); - - beamSize_ = config_.beam_size(); - CHECK_GE(beamSize_, 1U); - - setNeedSequenceInfo(false); - setNeedGradient(false); - return ret; -} - -void KmaxSeqScoreLayer::kmaxScorePerSeq(const real* scores, - real* sortedIds, - const ICpuGpuVectorPtr seqStartPos) { - int* starts = seqStartPos->getMutableData(false); - std::vector indices; - for (size_t i = 0; i < seqStartPos->getSize() - 1; ++i) { - int seqLen = starts[i + 1] - starts[i]; - int k = std::min(static_cast(beamSize_), seqLen); - - indices.resize(seqLen, 0); - std::iota(begin(indices), end(indices), 0.); - std::vector tmpScore(scores + starts[i], scores + starts[i + 1]); - std::partial_sort( - begin(indices), - begin(indices) + k, - end(indices), - [&](size_t a, size_t b) { return tmpScore[a] > tmpScore[b]; }); - memcpy(sortedIds + (i * beamSize_), indices.data(), k * sizeof(real)); - } -} - -void KmaxSeqScoreLayer::forward(PassType passType) { - Layer::forward(passType); - - const Argument& input = getInput(0); - const MatrixPtr inputScore = getInputValue(0); - - CHECK(input.hasSeq() || input.hasSubseq()) - << "input of " << getName() - << " must be a sequence or a nested sequence."; - CHECK_EQ(input.value->getWidth(), 1UL) - << "input of " << getName() << " are scores over a sequence or " - << "a nested sequence, so its width must be 1."; - - if (useGpu_) { - /* - * currently, this Layer only runs in CPU, if the other part of the model is - * runing on GPU, then copy the input to this layer from GPU to CPU. - */ - Matrix::resizeOrCreate(scores_, - inputScore->getHeight(), - 1, - false /* trans */, - false /* useGpu */); - scores_->copyFrom(*inputScore); - } else { - scores_ = inputScore; - } - - /* - * TODO(caoying) - * In PaddePaddle, currently all matrices are real number types, - * but output of this layer which is some selected indices of the give - * sequence are actually filled with int types so that storing int types - * information in a real number matrix is dangerous, since real numbers will - * be convered to int types. - */ - Matrix::resizeOrCreate( - output_.value, - input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), - beamSize_, - false, - false); - output_.value->one(); - output_.value->mulScalar(-1.); - - kmaxScorePerSeq(scores_->getData(), - output_.value->getData(), - input.hasSubseq() ? input.subSequenceStartPositions - : input.sequenceStartPositions); -} - -void KmaxSeqScoreLayer::backward(const UpdateCallback& callback) {} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/L2DistanceLayer.cpp b/paddle/legacy/gserver/layers/L2DistanceLayer.cpp deleted file mode 100644 index a3e627e57047b790b4f74089a352f06b55e48664..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/L2DistanceLayer.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "L2DistanceLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(l2_distance, L2DistanceLayer); - -bool L2DistanceLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2UL) << "The L2DistanceLayer accepts two and " - << "only two inputs."; - CHECK_EQ(getSize(), 1UL) << "The output dimensionality of L2DistanceLayer " - << "is fixed to be 1."; - - return true; -} - -void L2DistanceLayer::forward(PassType passType) { - Layer::forward(passType); - - const auto inV1 = getInputValue(0); - const auto inV2 = getInputValue(1); - - CHECK(inV1 && inV2); - CHECK_EQ(inV1->getHeight(), inV2->getHeight()) - << "The height of two inputs of this layer must be the same."; - CHECK_EQ(inV1->getWidth(), inV2->getWidth()) - << "The width of two inputs of this layer must be the same."; - - int batchSize = inV1->getHeight(); - int output_dim = getSize(); - { - REGISTER_TIMER_INFO("L2DistanceBpAtvTimer", getName().c_str()); - reserveOutput(batchSize, output_dim); - auto outV = getOutputValue(); - CHECK(outV) << "The output matrix should not be null."; - - Matrix::resizeOrCreate( - inputSub_, inV1->getHeight(), inV1->getWidth(), false, useGpu_); - - inputSub_->assign(*inV1); - inputSub_->sub(*inV2); - outV->sumOfProducts(*inputSub_, *inputSub_, 1, 0); - outV->sqrt2(*outV); - } -} - -void L2DistanceLayer::backward(const UpdateCallback& callback) { - const auto outG = getOutputGrad(); - const auto outV = getOutputValue(); - CHECK(outG && outV); - - auto inGrad1 = getInputGrad(0); - auto inGrad2 = getInputGrad(1); - - { - REGISTER_TIMER_INFO("L2DistanceBpAtvTimer", getName().c_str()); - - if (inGrad1 || inGrad2) { - outV->scalarDiv(*outV, 1.); - outV->dotMul(*outG, *outV); - } - - if (inGrad1) inGrad1->addRowScale(0, *inputSub_, *outV); - - if (inGrad2) { - inputSub_->mulScalar(-1.); - inGrad2->addRowScale(0, *inputSub_, *outV); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/L2DistanceLayer.h b/paddle/legacy/gserver/layers/L2DistanceLayer.h deleted file mode 100644 index aa8aabd9ca5702e3ebdccbe7bb4f98fa087dd238..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/L2DistanceLayer.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief The layer calculates the l2 distance between two input vectors. - * \f[ - * f(\bf{x}, \bf{y}) = \sqrt{\sum_{i=1}^D(x_i - y_i)} - * \f] - * - * - Input1: A vector (batchSize * dataDim) - * - Input2: A vector (batchSize * dataDim) - * - Output: A vector (batchSize * 1) - * - * The configuration api is: l2_distance_layer. - */ - -class L2DistanceLayer : public Layer { - public: - explicit L2DistanceLayer(const LayerConfig& config) : Layer(config) {} - ~L2DistanceLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - private: - // Store the result of subtracting Input2 from Input1 in forward computation, - // which will be reused in backward computation. - MatrixPtr inputSub_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Layer.cpp b/paddle/legacy/gserver/layers/Layer.cpp deleted file mode 100644 index 890d33552dd31a8fd348a36d44fb0824ac9b32b5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Layer.cpp +++ /dev/null @@ -1,410 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Util.h" - -#include "CostLayer.h" -#include "paddle/legacy/math/SparseMatrix.h" -#include "paddle/legacy/utils/Error.h" -#include "paddle/legacy/utils/Logging.h" - -#ifndef PADDLE_MOBILE_INFERENCE -#include "ValidationLayer.h" -#endif - -DEFINE_bool(log_error_clipping, false, "enable log error clipping or not"); - -namespace paddle { - -Layer::Layer(const LayerConfig& config, bool useGpu) - : config_(config), - useGpu_(useGpu), - deviceId_(CPU_DEVICE), - needSequenceInfo_(true) {} - -bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { - if (useGpu_ && FLAGS_parallel_nn) { - /* gpu environment is specified by device property */ - deviceId_ = config_.device(); - if (deviceId_ < 0) { - useGpu_ = false; - } - } - - output_.deviceId = deviceId_; - - for (auto& inputConfig : config_.inputs()) { - std::string inputName = inputConfig.input_layer_name(); - LayerPtr inputLayer; - CHECK(mapGet(inputName, layerMap, &inputLayer)) - << "Cannot find input layer " << inputName << " for layer " - << getName(); - this->addPrev(inputLayer); - - inputLayer->addOutputArgument(deviceId_); - - if (inputConfig.has_input_parameter_name()) { - ParameterPtr parameter; - CHECK( - mapGet(inputConfig.input_parameter_name(), parameterMap, ¶meter)) - << "Cannot find input parameter " - << inputConfig.input_parameter_name() << " for layer " << getName(); - parameter->incShared(); - CHECK_EQ(parameter->getDeviceId(), getDeviceId()); - parameters_.push_back(parameter); - } else { - parameters_.push_back(nullptr); - } - - if (inputConfig.has_input_layer_argument()) { - inputArgument_.push_back(inputConfig.input_layer_argument()); - } else { - inputArgument_.push_back(""); - } - } - - if (config_.has_bias_parameter_name()) { - CHECK(mapGet(config_.bias_parameter_name(), parameterMap, &biasParameter_)) - << "Cannot find bias parameter " << config_.bias_parameter_name() - << " for layer " << getName(); - biasParameter_->incShared(); - CHECK_EQ(biasParameter_->getDeviceId(), getDeviceId()); - } - - /* specify the activation function according to the configuration */ - std::string action_type = config_.active_type(); - activation_.reset(ActivationFunction::create(action_type)); - CHECK(activation_); - - initNeedFlags(); - markInBackward_.assign(inputLayers_.size(), false); - - return true; -} - -ClassRegistrar Layer::registrar_; - -LayerPtr Layer::create(const LayerConfig& config) { - std::string type = config.type(); - -#ifndef PADDLE_MOBILE_INFERENCE - // NOTE: As following types have illegal character '-', - // they can not use REGISTER_LAYER to registrar. - // Besides, to fit with old training models, - // they can not use '_' instead. - if (type == "multi-class-cross-entropy") - return LayerPtr(new MultiClassCrossEntropy(config)); - else if (type == "rank-cost") - return LayerPtr(new RankingCost(config)); - else if (type == "auc-validation") - return LayerPtr(new AucValidation(config)); - else if (type == "pnpair-validation") - return LayerPtr(new PnpairValidation(config)); -#endif - - return LayerPtr(registrar_.createByType(config.type(), config)); -} - -void Layer::resetSpecifyOutput(Argument& output, - size_t height, - size_t width, - bool isValueClean, - bool isGradClean) { - SetDevice device(output.deviceId); - - Matrix::resizeOrCreate( - output.value, height, width, /* trans */ false, useGpu(output.deviceId)); - if (isValueClean) { - output.value->zeroMem(); - } - - if (passType_ != PASS_TEST && needGradient()) { - Matrix::resizeOrCreate( - output.grad, height, width, /* trans */ false, useGpu(output.deviceId)); - if (isGradClean) { - output.grad->zeroMem(); - } - } -} - -void Layer::resizeOutput(size_t height, size_t width) { - resetSpecifyOutput(output_, height, width, false, false); - - for (size_t i = 0; i != outputOtherDevice_.size(); i++) { - resetSpecifyOutput(outputOtherDevice_[i], height, width, false, false); - } -} - -void Layer::reserveOutput(size_t height, size_t width) { - resetSpecifyOutput(output_, height, width, false, true); - - for (size_t i = 0; i != outputOtherDevice_.size(); i++) { - resetSpecifyOutput(outputOtherDevice_[i], height, width, false, true); - } -} - -void Layer::resetOutput(size_t height, size_t width) { - resetSpecifyOutput(output_, height, width, true, true); - - for (size_t i = 0; i != outputOtherDevice_.size(); i++) { - resetSpecifyOutput(outputOtherDevice_[i], height, width, true, true); - } -} - -void Layer::addOutputArgument(int deviceId) { - if (deviceId == deviceId_) { - output_.countIncrement(); - return; - } else { - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - if (outputOtherDevice_[i].deviceId == deviceId) { - outputOtherDevice_[i].countIncrement(); - return; - } - } - } - - Argument argu; - argu.deviceId = deviceId; - outputOtherDevice_.push_back(argu); - outputOtherDevice_.back().countIncrement(); -} - -void Layer::copyOutputToOtherDevice() { - for (size_t i = 0; i != outputOtherDevice_.size(); i++) { - SetDevice device(outputOtherDevice_[i].deviceId); - // If outputOtherDevice_[i].value is a CpuMatrix, - // the copyFrom is a synchronous interface. - // If outputOtherDevice_[i].value is a GpuMatrix, since subsequent - // calculations are all on HPPL_STREAM_DEFAULT, - // copyFrom can be an asynchronous interface. - outputOtherDevice_[i].value->copyFrom(*getOutputValue(), - HPPL_STREAM_DEFAULT); - outputOtherDevice_[i].sequenceStartPositions = - output_.sequenceStartPositions; - outputOtherDevice_[i].subSequenceStartPositions = - output_.subSequenceStartPositions; - outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; - - outputOtherDevice_[i].notifyValueReady(); - } -} - -void Layer::waitInputValue() { - for (size_t i = 0; i != inputLayers_.size(); i++) { - if (inputLayers_[i]->getDeviceId() != deviceId_) { - getInput(i).waitValueReady(); - } - } -} - -void Layer::waitAndMergeOutputGrad() { - if (!output_.grad || !outputOtherDevice_.size()) { - return; - } - - for (size_t i = 0; i != outputOtherDevice_.size(); i++) { - outputOtherDevice_[i].waitGradReady(); - } - - /* merge output grad */ - size_t i = 0; - if (!output_.getAllCount()) { - output_.grad->copyFrom(*outputOtherDevice_[0].grad, HPPL_STREAM_1); - hl_stream_synchronize(HPPL_STREAM_1); - - i++; - if (outputOtherDevice_.size() == 1) return; - } - - Matrix::resizeOrCreate(tmpGrad_, - output_.grad->getHeight(), - output_.grad->getWidth(), - /* trans */ false, - useGpu(output_.deviceId)); - - for (; i != outputOtherDevice_.size(); i++) { - tmpGrad_->copyFrom(*outputOtherDevice_[i].grad, HPPL_STREAM_1); - hl_stream_synchronize(HPPL_STREAM_1); - output_.grad->add(*tmpGrad_); - } -} - -void Layer::markAllInputGrad() { - for (size_t i = 0; i != inputLayers_.size(); ++i) { - if (!markInBackward_[i]) { - inputLayers_[i]->getOutput(deviceId_).notifyGradReady(); - } - markInBackward_[i] = false; - } -} - -void Layer::markInputGrad(int inputIndex) { - inputLayers_[inputIndex]->getOutput(deviceId_).notifyGradReady(); - markInBackward_[inputIndex] = true; -} - -void Layer::zeroGrad() { - CHECK(output_.grad.get() != NULL); - output_.grad->zeroMem(); -} - -void Layer::initNeedFlags() { - auto initFlag = [this]( - bool& flag, bool (Layer::*flagQueryFunc)() const, ParameterType type) { - flag = false; - if (biasParameter_ && biasParameter_->hasType(type)) { - flag = true; - } - if (!flag) { - for (auto& para : parameters_) { - if (para && para->hasType(type)) { - flag = true; - break; - } - } - } - if (!flag) { - for (auto& layer : inputLayers_) { - if ((layer.get()->*flagQueryFunc)()) { - flag = true; - } - } - } - }; - initFlag(needGradient_, &Layer::needGradient, PARAMETER_GRADIENT); -} - -void Layer::showOutputStats() { - MatrixPtr out = getOutputValue(); - if (!out) return; - if (!out->getElementCnt()) { - LOG(INFO) << "The number of output of " << config_.name() - << " is 0, skip to show the statistics"; - return; - } - MatrixPtr outSquare; - if (dynamic_cast(out.get())) { - GpuSparseMatrix* tmp = dynamic_cast(out.get()); - outSquare = std::make_shared(tmp->getHeight(), - tmp->getWidth(), - tmp->getElementCnt(), - tmp->getValueType(), - tmp->getFormat()); - } else { - outSquare = out->clone(); - } - outSquare->copyFrom(*out, HPPL_STREAM_DEFAULT); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - - real mean = outSquare->getSum() / out->getElementCnt(); - real min; - real max; - if (dynamic_cast(outSquare.get())) { - auto tmpMat = dynamic_cast(outSquare.get()); - min = tmpMat->getMin(); - max = tmpMat->getMax(); - tmpMat->square2(); - LOG(INFO) << "show statistics of [none zero values] in sparse matrix"; - } else { - min = outSquare->getMin(); - max = outSquare->getMax(); - outSquare->square2(); - } - real std = (outSquare->getSum() / outSquare->getElementCnt()) - mean * mean; - std = std > 0 ? std : 0; - LOG(INFO) << "The output state of " << config_.name() << ": mean=" << mean - << ", " - << "std=" << std << ", " - << "min=" << min << ", " - << "max=" << max; -} - -void Layer::forwardActivation() { - /* activation */ - auto status = activation_->forward(output_); - status.check(); - - /* dropout */ - if (config_.drop_rate() > 0) { - forwardDropOut(); - CHECK_NE(activation_->getName(), "softmax") - << "Softmax activation cannot be used with Dropout"; - } - - if (FLAGS_show_layer_stat) { - showOutputStats(); - } -} - -void Layer::backwardActivation() { - /* Do error clipping */ - if (config_.error_clipping_threshold() > 0.0f) { - if (FLAGS_log_error_clipping) { - VectorPtr outGradVec = Vector::create( - output_.grad->getData(), output_.grad->getElementCnt(), useGpu_); - real maxAbsGrad = outGradVec->getAbsMax(); - if (maxAbsGrad > config_.error_clipping_threshold()) { - real avgAbsGrad = outGradVec->getAbsSum() / outGradVec->getSize(); - LOG(INFO) << " layer=" << config_.name() << " need clipping," - << " max error=" << maxAbsGrad << " avg error=" << avgAbsGrad; - } - } - output_.grad->clip(-config_.error_clipping_threshold(), - config_.error_clipping_threshold()); - } - - /* Do dropout for delta*/ - if (config_.drop_rate() > 0 && passType_ != PASS_TEST) { - MatrixPtr oGrad = getOutputGrad(); - oGrad->dotMul(*oGrad, *dropOutMask_); - } - - auto status = activation_->backward(output_); - status.check(); -} - -void Layer::forwardDropOut() { - auto& outV = getOutputValue(); - - if (passType_ == PASS_TRAIN) { - // new dropOutMask_ if dropOutMask_ is null ptr - Matrix::resizeOrCreate(dropOutMask_, - outV->getHeight(), - outV->getWidth(), - false, - useGpu(deviceId_)); - dropOutMask_->randomizeUniform(); // generate a uniform random matrix - dropOutMask_->biggerThanScalar(config_.drop_rate()); // random mask - outV->dotMul(*outV, *dropOutMask_); // dropout - } else if (passType_ == PASS_GC) { - // only initialize once - if (!dropOutMask_) { - dropOutMask_ = Matrix::create( - outV->getHeight(), outV->getWidth(), false, useGpu(deviceId_)); - // We use cpu matrix to generate mask so that the mask - // will be same for both gpu version and cpu version. - // This will help unittest to make sure they have same result. - MatrixPtr tmpMask = Matrix::create(outV->getHeight(), outV->getWidth()); - tmpMask->randomizeUniform(); // generate a uniform random matrix - tmpMask->biggerThanScalar(config_.drop_rate()); // random mask - dropOutMask_->copyFrom(*tmpMask); - } - outV->dotMul(*outV, *dropOutMask_); - } else { // passType == PASS_TEST - outV->mulScalar(1.0 - config_.drop_rate()); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Layer.h b/paddle/legacy/gserver/layers/Layer.h deleted file mode 100644 index a7ff76decea9a448acfcdef1c81a68b5a823cc56..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Layer.h +++ /dev/null @@ -1,512 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/function/Function.h" -#include "paddle/legacy/gserver/activations/ActivationFunction.h" -#include "paddle/legacy/math/CpuSparseMatrix.h" -#include "paddle/legacy/parameter/Argument.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/parameter/Weight.h" -#include "paddle/legacy/utils/ClassRegistrar.h" -#include "paddle/legacy/utils/Util.h" - -/// Macro for registering a layer type. -/// Example: REGISTER_LAYER(crf_error, CRFDecodingErrorLayer); -#define REGISTER_LAYER(__type_name, __class_name) \ - static InitFunction __reg_type_##__type_name( \ - []() { Layer::registrar_.registerClass<__class_name>(#__type_name); }) - -#define REGISTER_LAYER_CREATE_FUNC(__type_name, createFunction) \ - static InitFunction __reg_type_##__type_name( \ - []() { Layer::registrar_.registerClass(#__type_name, createFunction); }) - -namespace paddle { - -class Layer; -typedef std::shared_ptr LayerPtr; -typedef std::map LayerMap; -class NeuralNetwork; - -/// layer state, used for RNN and LSTM layers -struct LayerState { - std::vector value; -}; -typedef std::shared_ptr LayerStatePtr; - -/// Paddle device ID, MKLDNN is -2, CPU is -1 -enum PADDLE_DEVICE_ID { - MKLDNN_DEVICE = -2, - CPU_DEVICE = -1, -}; - -/** - * @brief Base class for layer. - * Define necessary variables and functions for every layer. - */ -class Layer { - protected: - /// Layer config - LayerConfig config_; - /// whether to use GPU - bool useGpu_; - /// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ... - int deviceId_; - /// Input layers - std::vector inputLayers_; - /// Argument of input layers - std::vector inputArgument_; - - /// Parameter for each input layer. - /// Parameters_[i] is nullptr if inputLayers_[i] does not need parameter. - std::vector parameters_; - - /// nullptr if bias is not needed. - ParameterPtr biasParameter_; - - /// Output - Argument output_; - /// Several outputs stored on different devices, used in 'parallel_nn' case, - /// and record them by deviceId_. - /// Also used in 'use_mkldnn' case. - std::vector outputOtherDevice_; - /// If there are several outputs, map them by each name. - /// MKLDNNLayer use it only to merge output grad - std::map outputMap_; - /// Used to merge grad on different devices. - MatrixPtr tmpGrad_; - - std::unique_ptr activation_; - - /// Current passType, PASS_TRAIN or PASS_TEST - PassType passType_; - - /// Random 0-1 matrix for dropOut - MatrixPtr dropOutMask_; - - /// Whether the layer need to compute gradient - bool needGradient_; - /// Whether the layer need to compute re-sequence information - bool needSequenceInfo_; - - /// Mark input grad in(true) or out(false) of backward function. - std::vector markInBackward_; - - /// Layer forward function - std::vector> forward_; - /// Layer backward function - std::vector> backward_; - - public: - /** - * Wait until all input value ready. - * Called before Layer::forward() function. - */ - virtual void waitInputValue(); - - /** - * Copy layer's output_ to other device. - * If output layer is in other device, called after Layer::forward() function. - */ - virtual void copyOutputToOtherDevice(); - - /** - * Wait until all output grad ready and merge them to output_.grad. - * Called before Layer::backward() function. - */ - virtual void waitAndMergeOutputGrad(); - - /** - * Notify previous layer the output grad ready. - * Called after Layer::backward() function. - */ - virtual void markAllInputGrad(); - - protected: - /** - * Create layer function. Function is called in forward or backward. - * \param function, Layer::forward_ or Layer::backward_ - * \param name, function name - * \param config, initialization configuration for the function - */ - void createFunction(std::vector>& function, - const std::string& name, - const FuncConfig& config) { - if (useGpu_) { - function.emplace_back( - FunctionBase::funcRegistrar_.createByType(name + "-GPU")); - } else { - function.emplace_back( - FunctionBase::funcRegistrar_.createByType(name + "-CPU")); - } - auto& func = function.back(); - func->init(config); - } - - /** - * Notify specified layer the output grad ready. - * Called in the backward function. - * If do mark input grad in the backward function, you should to ensure - * that all input grad will be marked in the backward function. - */ - void markInputGrad(int inputIndex); - - /** - * Get the argument of input layer. - */ - const Argument& getInput(size_t inputIndex) const { - return inputLayers_[inputIndex]->getOutput(deviceId_); - } - - /** - * Get the argument of input layer. - */ - const Argument& getInput(const Layer& inputLayer) const { - return inputLayer.getOutput(deviceId_); - } - - /** - * Get the argument of input layer with deviceId. - */ - const Argument& getInput(size_t inputIndex, int deviceId) const { - return inputLayers_[inputIndex]->getOutput(deviceId); - } - - /** - * Get the forward-input value. - */ - const MatrixPtr& getInputValue(int inputIndex) { - return inputLayers_[inputIndex]->getOutput(deviceId_).value; - } - - /** - * Get the forward-input value. - */ - const MatrixPtr& getInputValue(const Layer& inputLayer) { - return inputLayer.getOutput(deviceId_).value; - } - - /** - * Get the forward-input value with deviceId. - */ - const MatrixPtr& getInputValue(int inputIndex, int deviceId) { - return inputLayers_[inputIndex]->getOutput(deviceId).value; - } - - /** - * Get the forward-input grad. - */ - const MatrixPtr& getInputGrad(int inputIndex) { - return inputLayers_[inputIndex]->getOutput(deviceId_).grad; - } - - /** - * Get the forward-input grad. - */ - const MatrixPtr& getInputGrad(const Layer& inputLayer) { - return inputLayer.getOutput(deviceId_).grad; - } - - /** - * Get the forward-input grad. - */ - const MatrixPtr& getInputGrad(int inputIndex, int deviceId) { - return inputLayers_[inputIndex]->getOutput(deviceId).grad; - } - - /** - * Get the forward-input label. - */ - const IVectorPtr& getInputLabel(const Layer& inputLayer) { - return inputLayer.getOutput(deviceId_).ids; - } - - /** - * Change the size of output (value, grad). - * Reset to value zero if isValueClean = true, - * Reset to grad zero if isGradClean = true. - */ - void resetSpecifyOutput(Argument& output, - size_t height, - size_t width, - bool isValueClean, - bool isGradClean); - - /** - * Add output argument to other devices. - */ - void addOutputArgument(int deviceId); - - public: - explicit Layer(const LayerConfig& config, bool useGpu = FLAGS_use_gpu); - virtual ~Layer() {} - - /// Register a Layer - static ClassRegistrar registrar_; - - /** - * Get the flag whether layer need to compute gradient. - */ - bool needGradient() const { return needGradient_; } - - /** - * Set the flag whether layer need to compute gradient. - */ - void setNeedGradient(bool need) { needGradient_ = need; } - - /** - * Set the flag whether layer need to re-compute sequence information, - * which includes sequenceStartPositions or subSequenceStartPositions. - */ - void setNeedSequenceInfo(bool need) { needSequenceInfo_ = need; } - - /** - * Get layer's name. - */ - const std::string& getName() const { return config_.name(); } - - /** - * Get layer's type. - */ - const std::string& getType() const { return config_.type(); } - - /** - * Get layer's size. - */ - size_t getSize() const { return config_.size(); } - - /** - * Get layer's deviceId. - */ - int getDeviceId() const { return deviceId_; } - - /** - * Add the inputLayer. - */ - void addPrev(LayerPtr l) { inputLayers_.push_back(l); } - - /** - * Get the size of inputLayer[i]. - */ - const LayerPtr& getPrev(size_t i) { return inputLayers_[i]; } - - /** - * Get the forward-output value. - */ - const MatrixPtr& getOutputValue() { return output_.value; } - - /** - * Get the forward-output label. - */ - const IVectorPtr& getOutputLabel() { return output_.ids; } - - /** - * Get the backward-Loss value. - */ - const MatrixPtr& getOutputGrad() { return output_.grad; } - /** - * If layer has multi-output, set output into outputMap_. - */ - void setOutput(const std::string& name, Argument* output) { - outputMap_[name] = output; - } - - /** - * Get the output map size, if layer has multi-output. - */ - size_t getOutputMapSize() { return outputMap_.size(); } - - /** - * Get the output based on layer's name. - */ - Argument& getOutput(const std::string& str = "") { - if (str == "") { - return output_; - } else { - auto output = outputMap_.find(str); - if (output != outputMap_.end()) { - return *output->second; - } else { - LOG(FATAL) << "No specific output " << str; - return *((Argument*)nullptr); - } - } - } - - /** - * Get the output based on deviceId. - */ - const Argument& getOutput(int deviceId) const { - if (deviceId == getDeviceId()) { - return output_; - } else { - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - if (outputOtherDevice_[i].deviceId == deviceId) { - return outputOtherDevice_[i]; - } - } - - LOG(FATAL) << "No specific device output "; - return *((Argument*)nullptr); - } - } - - /** - * Get layer's parameters. - */ - const std::vector& getParameters() { return parameters_; } - - /** - * Get layer's bias-parameters. - */ - const ParameterPtr& getBiasParameter() { return biasParameter_; } - - /** - * Create pointer of layer. - */ - static LayerPtr create(const LayerConfig& config); - - /** - * Resize the output matrix size. - */ - void resizeOutput(size_t height, size_t width); - - /** - * Resize the output matrix size, - * and reset value to zero. - */ - void reserveOutput(size_t height, size_t width); - - /** - * Resize the output matrix size, - * and reset value and grad to zero. - */ - void resetOutput(size_t height, size_t width); - - /** - * Clear the gradient of output. - */ - void zeroGrad(); - - /** - * Intialization. - * For example, adding input layers from layerMap and parameterMap. - */ - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - /** - * Intialization for sub network if there has sub network. - * @param rootNetwork root network - * @param config model config - * @param parameterTypes parameter's type - * @param useGpu whether to use gpu or not - */ - virtual void initSubNetwork(NeuralNetwork* rootNetwork, - const ModelConfig& config, - const std::vector& parameterTypes, - bool useGpu) {} - - /** - * @brief Access SubNetwork Object. - * If subnetwork exists, then invoke callback with subnetwrk. - * @param callback if sub-network is exist, the callback is invoked. - */ - virtual void accessSubNetwork( - const std::function& callback) {} - - /** - * If use sparse row matrix as parameter, - * prefetch feature ids in input label. - */ - virtual void prefetch() {} - - /** - * Forward propagation. - * All inherited implementation should call Layer::foward() function. - */ - virtual void forward(PassType passType) { - passType_ = passType; - if (!inputLayers_.empty() && needSequenceInfo_) { - const Argument& input = getInput(0); - output_.sequenceStartPositions = input.sequenceStartPositions; - output_.subSequenceStartPositions = input.subSequenceStartPositions; - output_.cpuSequenceDims = input.cpuSequenceDims; - } - } - - /** - * Reset the internal state variables. - * Allocate them if they have not been allocated. - * This function need to called before Layer::forward() for generating - * sequence. - * - * This is used for sequence generation. When generating sequence, the - * calculation at current timestamp depends on the state from previous - * timestamp. The model needs to keep the information about the previous - * timestamp in the state variables. Layers such as RecurrentLayer, - * LstmLayer and ContextLayer have state variables. - */ - virtual void resetState() {} - - /** - * Set layer state. - */ - virtual void setState(LayerStatePtr state) {} - - /** - * Get layer state. - * @return A copy of internal state. - */ - virtual LayerStatePtr getState() { return nullptr; } - - /** - * Show output state. - */ - void showOutputStats(); - - /** - * Backward propagation. - * Should only be called after Layer::forward() function. - */ - virtual void backward(const UpdateCallback& callback = nullptr) = 0; - - /** - * One pass is finished. - */ - virtual void onPassEnd() {} - - protected: - /** - * Forward of activation function. - */ - void forwardActivation(); - /** - * Backward of activation function. - */ - void backwardActivation(); - /** - * Forward of dropOut. - */ - void forwardDropOut(); - /** - * Initilize the needGradient_ flag. - */ - void initNeedFlags(); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LinearChainCRF.cpp b/paddle/legacy/gserver/layers/LinearChainCRF.cpp deleted file mode 100644 index 315fc25fab30d80bd24a72a9a3ed5c3c6ba33629..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LinearChainCRF.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "LinearChainCRF.h" -#include - -namespace paddle { - -LinearChainCRF::LinearChainCRF(int numClasses, real* para) - : numClasses_(numClasses) { - a_ = Matrix::create(para, 1, numClasses_); - b_ = Matrix::create(para + numClasses_, 1, numClasses_); - w_ = Matrix::create(para + 2 * numClasses_, numClasses_, numClasses_); - - ones_ = Matrix::create(1, numClasses_); - ones_->one(); - - expW_ = Matrix::create(numClasses_, numClasses_); -} - -// normalize x so that its sum is 1 and return the original sum; -static real normalizeL1(real* x, int n) { - real sum = 0; - for (int i = 0; i < n; ++i) { - sum += x[i]; - } - // Right now, we just bet that sum won't be zero. If this really happens, - // we will figure out what should be done then. - CHECK_GT(sum, 0); - real s = 1 / sum; - for (int i = 0; i < n; ++i) { - x[i] *= s; - } - return sum; -} - -real LinearChainCRF::forward(real* x, int* s, int length) { - Matrix::resizeOrCreate(maxX_, length, 1); - Matrix::resizeOrCreate(expX_, length, numClasses_); - Matrix::resizeOrCreate(alpha_, length, numClasses_); - MatrixPtr matX = Matrix::create(x, length, numClasses_); - matX->rowMax(*maxX_); - expX_->assign(*matX); - // subtract max to avoid overflow or underflow - expX_->mul(*maxX_, *ones_, (real)-1, (real)1); - expX_->exp2(); - - real* a = a_->getData(); - real* b = b_->getData(); - real* w = w_->getData(); - real* alpha = alpha_->getData(); - real* expX = expX_->getData(); - real* maxX = maxX_->getData(); - - expW_->exp2(*w_); - real* expW = expW_->getData(); - - for (int i = 0; i < numClasses_; ++i) { - alpha[i] = exp(a[i]) * expX[i]; - } - real ll = -maxX[0] - log(normalizeL1(alpha, numClasses_)); - - for (int k = 1; k < length; ++k) { - for (int i = 0; i < numClasses_; ++i) { - real sum = 0; - for (int j = 0; j < numClasses_; ++j) { - sum += alpha[(k - 1) * numClasses_ + j] // (*) - * expW[j * numClasses_ + i]; - } - alpha[k * numClasses_ + i] = expX[k * numClasses_ + i] * sum; - } - // normalizeL1 is to avoid underflow or overflow at (*) - ll -= maxX[k] + log(normalizeL1(alpha + k * numClasses_, numClasses_)); - } - real sum = 0; - for (int i = 0; i < numClasses_; ++i) { - sum += alpha[(length - 1) * numClasses_ + i] * exp(b[i]); - } - ll -= log(sum); - // Now ll is equal to -log(Z) - - CHECK_LT(*std::max_element(s, s + length), numClasses_); - // Calculate the nominator part, which depends on s - ll += a[s[0]] + x[s[0]] + b[s[length - 1]]; - for (int k = 1; k < length; ++k) { - ll += x[k * numClasses_ + s[k]] + w[s[k - 1] * numClasses_ + s[k]]; - } - - VLOG(1) << "ll=" << ll; - return -ll; -} - -void LinearChainCRF::backward(real* x, int* s, int length, bool needWGrad) { - Matrix::resizeOrCreate(matGrad_, length, numClasses_); - Matrix::resizeOrCreate(beta_, length, numClasses_); - real* b = b_->getData(); - if (needWGrad) { - Matrix::resizeOrCreate(matWGrad_, numClasses_ + 2, numClasses_); - matWGrad_->zeroMem(); - da_ = matWGrad_->subRowMatrix(0, 1); - db_ = matWGrad_->subRowMatrix(1, 2); - dw_ = matWGrad_->subRowMatrix(2, numClasses_ + 2); - } - - real* alpha = alpha_->getData(); - real* beta = beta_->getData(); - real* expW = expW_->getData(); - real* expX = expX_->getData(); - real* grad = matGrad_->getData(); - - for (int i = 0; i < numClasses_; ++i) { - beta[(length - 1) * numClasses_ + i] = exp(b[i]); - } - normalizeL1(beta + (length - 1) * numClasses_, numClasses_); - - for (int k = length - 2; k >= 0; --k) { - for (int i = 0; i < numClasses_; ++i) { - real sum = 0; - for (int j = 0; j < numClasses_; ++j) { - sum += expW[i * numClasses_ + j] // (**) - * beta[(k + 1) * numClasses_ + j] * - expX[(k + 1) * numClasses_ + j]; - } - beta[k * numClasses_ + i] = sum; - } - // normalizeL1 is to avoid underflow or overflow at (**) - normalizeL1(beta + k * numClasses_, numClasses_); - } - - matGrad_->dotMul(*alpha_, *beta_); - matGrad_->rowNormalizeL1(*matGrad_); - for (int k = 0; k < length; ++k) { - grad[k * numClasses_ + s[k]] -= (real)1; - } - - if (needWGrad) { - da_->add(*matGrad_->subMatrix(/* startRow= */ 0, /* numRows= */ 1)); - db_->add(*matGrad_->subMatrix(/* startRow= */ length - 1, 1)); - - beta_->dotMul(*beta_, *expX_); - beta_->rowNormalizeL1(*beta_); - - real* dw = dw_->getData(); - for (int k = 1; k < length; ++k) { - real sum = 0; - for (int i = 0; i < numClasses_; ++i) { - for (int j = 0; j < numClasses_; ++j) { - sum += expW[i * numClasses_ + j] * alpha[(k - 1) * numClasses_ + i] * - beta[k * numClasses_ + j]; - } - } - sum = 1 / sum; - for (int i = 0; i < numClasses_; ++i) { - for (int j = 0; j < numClasses_; ++j) { - dw[i * numClasses_ + j] += sum * expW[i * numClasses_ + j] * - alpha[(k - 1) * numClasses_ + i] * - beta[k * numClasses_ + j]; - } - } - dw[s[k - 1] * numClasses_ + s[k]] -= (real)1; - } - } -} - -void LinearChainCRF::decode(real* x, int* s, int length) { - Matrix::resizeOrCreate(alpha_, length, numClasses_); - real* a = a_->getData(); - real* b = b_->getData(); - real* w = w_->getData(); - IVector::resizeOrCreate(track_, numClasses_ * length, /* useGpu= */ false); - int* track = track_->getData(); - real* alpha = alpha_->getData(); - - for (int i = 0; i < numClasses_; ++i) { - alpha[i] = a[i] + x[i]; - } - for (int k = 1; k < length; ++k) { - for (int i = 0; i < numClasses_; ++i) { - real maxScore = -std::numeric_limits::max(); - int maxJ = 0; - for (int j = 0; j < numClasses_; ++j) { - real score = alpha[(k - 1) * numClasses_ + j] + w[j * numClasses_ + i]; - if (score > maxScore) { - maxScore = score; - maxJ = j; - } - } - alpha[k * numClasses_ + i] = maxScore + x[k * numClasses_ + i]; - track[k * numClasses_ + i] = maxJ; - } - } - real maxScore = -std::numeric_limits::max(); - int maxI = 0; - for (int i = 0; i < numClasses_; ++i) { - real score = alpha[(length - 1) * numClasses_ + i] + b[i]; - if (score > maxScore) { - maxScore = score; - maxI = i; - } - } - s[length - 1] = maxI; - for (int k = length - 1; k >= 1; --k) { - s[k - 1] = maxI = track[k * numClasses_ + maxI]; - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LinearChainCRF.h b/paddle/legacy/gserver/layers/LinearChainCRF.h deleted file mode 100644 index 65e23905435da24a1a7554c30e33d303b05aef69..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LinearChainCRF.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -class LinearChainCRF { - public: - /** - * The size of para must be \f$(numClasses + 2) * numClasses\f$. - * The first numClasses values of para are for starting weights (\f$a\f$). - * The next numClasses values of para are for ending weights (\f$b\f$), - * The remaning values are for transition weights (\f$w\f$). - * - * The probability of a state sequence s of length \f$L\f$ is defined as: - * \f$P(s) = (1/Z) exp(a_{s_1} + b_{s_L} - * + \sum_{l=1}^L x_{s_l} - * + \sum_{l=2}^L w_{s_{l-1},s_l})\f$ - * where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over - * all possible - * sequences is \f$1\f$, and \f$x\f$ is the input feature to the CRF. - */ - LinearChainCRF(int numClasses, real* para); - - /** - * Calculate the negative log likelihood of s given x. - * The size of x must be length * numClasses. Each consecutive numClasses - * values are the features for one time step. - */ - real forward(real* x, int* s, int length); - - /** - * Calculate the gradient with respect to x, a, b, and w. - * backward() can only be called after a corresponding call to forward() with - * the same x, s and length. - * The gradient with respect to a, b, and w will not be calculated if - * needWGrad is false. - * @note Please call getWGrad() and getXGrad() to get the gradient with - * respect to (a, b, w) and x respectively. - */ - void backward(real* x, int* s, int length, bool needWGrad); - - /** - * Find the most probable sequence given x. The result will be stored in s. - */ - void decode(real* x, int* s, int length); - - /* - * Return the gradient with respect to (a, b, w). It can only be called after - * a corresponding call to backward(). - */ - MatrixPtr getWGrad() { return matWGrad_; } - - /* - * Return the gradient with respect to x. It can only be called after a - * corresponding call to backward(). - */ - MatrixPtr getXGrad() { return matGrad_; } - - protected: - int numClasses_; - MatrixPtr a_; - MatrixPtr b_; - MatrixPtr w_; - MatrixPtr matWGrad_; - MatrixPtr da_; - MatrixPtr db_; - MatrixPtr dw_; - MatrixPtr ones_; - - MatrixPtr expX_; - MatrixPtr matGrad_; - MatrixPtr alpha_; - MatrixPtr beta_; - MatrixPtr maxX_; - MatrixPtr expW_; - - // track_(k,i) = j means that the best sequence at time k for class i comes - // from the sequence at time k-1 for class j - IVectorPtr track_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LinearChainCTC.cpp b/paddle/legacy/gserver/layers/LinearChainCTC.cpp deleted file mode 100644 index 1fad545b7a56bf9c0f0da219b4ab2e5190d44d52..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LinearChainCTC.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "LinearChainCTC.h" -#include -#include - -namespace paddle { - -/* log scale */ -const real EXP_MAX = std::numeric_limits::max(); -const real EXP_MIN = std::numeric_limits::min(); -const real LOG_ZERO = std::log(EXP_MIN); -const real LOG_INFINITY = std::log(EXP_MAX); - -static inline real safeExp(real x) { - if (x <= LOG_ZERO) { - return 0; - } - if (x >= LOG_INFINITY) { - return EXP_MAX; - } - return std::exp(x); -} - -static inline real safeLog(real x) { - if (x <= EXP_MIN) { - return LOG_ZERO; - } - return std::log(x); -} - -// x=lna and y=lnb is log scale, ln(a/b)=lna-lnb -static inline real logDiv(real x, real y) { - if (x - y <= LOG_ZERO) { - return LOG_ZERO; - } - if (x - y >= LOG_INFINITY) { - return LOG_INFINITY; - } - return x - y; -} - -// x=lna and y=lnb is log scale, ln(a*b)=lna+lnb -static inline real logMul(real x, real y) { - if (x + y <= LOG_ZERO) { - return LOG_ZERO; - } - if (x + y >= LOG_INFINITY) { - return LOG_INFINITY; - } - return x + y; -} - -// x=lna and y=lnb is log scale, ln(a+b)=lna+ln(1+exp(lnb-lna)), where b > a -static inline real logAdd(real x, real y) { - if (x < y) { - real t = y; - y = x; - x = t; - } - return x + safeLog(1 + safeExp(y - x)); -} - -static void setLogZero(MatrixPtr mat) { - size_t size = mat->getElementCnt(); - real* data = mat->getData(); - for (size_t i = 0; i < size; i++) { - data[i] = LOG_ZERO; - } -} - -LinearChainCTC::LinearChainCTC(int numClasses, bool normByTimes) - : numClasses_(numClasses), normByTimes_(normByTimes), logProb_(0) { - // set the class label of blank as "numClasses-1" - blank_ = numClasses - 1; - - Matrix::resizeOrCreate(gradTerms_, 1, numClasses_); -} - -real LinearChainCTC::forward(real* softmaxSeq, - int softmaxSeqLen, - int* labelSeq, - int labelSeqLen) { - isInvalid_ = false; - totalTime_ = softmaxSeqLen; - totalSegments_ = labelSeqLen * 2 + 1; - - int requiredTime = labelSeqLen; - int oldLabel = -1; - - for (int i = 0; i < labelSeqLen; i++) { - if (labelSeq[i] == oldLabel) { - requiredTime++; - } - oldLabel = labelSeq[i]; - } - - if (totalTime_ < requiredTime) { - isInvalid_ = true; - return 0; - } - - /* calculate the forward and backward variables, - * reference Chapter 7.3 of "Alex Grave, Supervised Sequence - * Labelling with Recurrent Neural Networks" */ - Matrix::resizeOrCreate(logActs_, totalTime_, numClasses_, false, false); - real* logActsData = logActs_->getData(); - for (int i = 0; i < totalTime_ * numClasses_; i++) { - logActsData[i] = safeLog(softmaxSeq[i]); - } - - Matrix::resizeOrCreate(forwardVars_, totalTime_, totalSegments_); - Matrix::resizeOrCreate(backwardVars_, totalTime_, totalSegments_); - - /* calculate the forward variables */ - setLogZero(forwardVars_); - real* fwdVars = forwardVars_->getData(); - - /* dp initialization at t0 */ - fwdVars[0] = logActs_->getData()[blank_]; - if (totalSegments_ > 1) { - fwdVars[1] = logActs_->getData()[labelSeq[0]]; - } - /* dp from t1 */ - for (int i = 1; i < totalTime_; i++) { - real* dataPerStep = logActsData + i * numClasses_; - real* oldFvars = fwdVars + (i - 1) * totalSegments_; - real* fvars = fwdVars + i * totalSegments_; - int start, end; - segmentRange(start, end, i); - for (int j = start; j < end; j++) { - real fv; - if (j & 1) { - int labelIdx = j / 2; - int labelVal = labelSeq[labelIdx]; - fv = logAdd(oldFvars[j], oldFvars[j - 1]); - if (j > 1 && (labelVal != labelSeq[labelIdx - 1])) { - fv = logAdd(fv, oldFvars[j - 2]); - } - fv = logMul(fv, dataPerStep[labelVal]); - } else { - fv = oldFvars[j]; - if (j) { - fv = logAdd(fv, oldFvars[j - 1]); - } - fv = logMul(fv, dataPerStep[blank_]); - } - fvars[j] = fv; - } - } - - real* lastFvs = fwdVars + (totalTime_ - 1) * totalSegments_; - - /* sum the last two value as logprob */ - logProb_ = lastFvs[totalSegments_ - 1]; - if (totalSegments_ > 1) { - logProb_ = logAdd(logProb_, lastFvs[totalSegments_ - 2]); - } - - /* calculate the backward variables */ - setLogZero(backwardVars_); - real* bwdVars = backwardVars_->getData(); - real* lastBvs = bwdVars + (totalTime_ - 1) * totalSegments_; - - lastBvs[totalSegments_ - 1] = 0; - if (totalSegments_ > 1) { - lastBvs[totalSegments_ - 2] = 0; - } - - for (int i = totalTime_ - 2; i >= 0; i--) { - real* oldDataPerStep = logActsData + (i + 1) * numClasses_; - real* oldBvars = bwdVars + (i + 1) * totalSegments_; - real* bvars = bwdVars + i * totalSegments_; - int start, end; - segmentRange(start, end, i); - for (int j = start; j < end; j++) { - real bv; - if (j & 1) { - int labelIdx = j / 2; - int labelVal = labelSeq[labelIdx]; - - bv = logAdd(logMul(oldBvars[j], oldDataPerStep[labelVal]), - logMul(oldBvars[j + 1], oldDataPerStep[blank_])); - if (j < (totalSegments_ - 2)) { - int nextLabelVal = labelSeq[labelIdx + 1]; - if (labelVal != nextLabelVal) { - bv = logAdd(bv, - logMul(oldBvars[j + 2], oldDataPerStep[nextLabelVal])); - } - } - } else { - bv = logMul(oldBvars[j], oldDataPerStep[blank_]); - if (j < (totalSegments_ - 1)) { - bv = logAdd(bv, - logMul(oldBvars[j + 1], oldDataPerStep[labelSeq[j / 2]])); - } - } - bvars[j] = bv; - } - } - - VLOG(1) << "ctcLoss=" << -logProb_; - - return -logProb_; -} - -void LinearChainCTC::backward(real* softmaxSeq, - real* grad, - int* labelSeq, - int labelSeqLen) { - /* if not meet the conditions of CTC computing, then set the grads to zeros */ - if (isInvalid_) { - for (int i = 0; i < totalTime_ * numClasses_; i++) { - grad[i] += 0; - } - return; - } - - real* fwdVars = forwardVars_->getData(); - real* bwdVars = backwardVars_->getData(); - real* logActsData = logActs_->getData(); - - for (int i = 0; i < totalTime_; i++) { - setLogZero(gradTerms_); - real* gradTermsData = gradTerms_->getData(); - real* fvars = fwdVars + i * totalSegments_; - real* bvars = bwdVars + i * totalSegments_; - for (int j = 0; j < totalSegments_; j++) { - int k = (j & 1) ? labelSeq[j / 2] : blank_; - gradTermsData[k] = logAdd(gradTermsData[k], logMul(fvars[j], bvars[j])); - } - for (int j = 0; j < numClasses_; j++) { - if (normByTimes_) { - grad[i * numClasses_ + j] += - -safeExp( - logDiv(gradTermsData[j], - logMul(logProb_, logActsData[i * numClasses_ + j]))) / - totalTime_; - } else { - grad[i * numClasses_ + j] += -safeExp( - logDiv(gradTermsData[j], - logMul(logProb_, logActsData[i * numClasses_ + j]))); - } - } - } -} - -void LinearChainCTC::segmentRange(int& start, int& end, int time) { - start = std::max(0, totalSegments_ - (2 * (totalTime_ - time))); - end = std::min(totalSegments_, 2 * (time + 1)); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LinearChainCTC.h b/paddle/legacy/gserver/layers/LinearChainCTC.h deleted file mode 100644 index e6c4c7bfe0cdb1bbcafbf5b847ea592eef02794a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LinearChainCTC.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -class LinearChainCTC { - public: - LinearChainCTC(int numClasses, bool normByTimes); - - // Calculate the negative log probability as loss - real forward(real* softmaxSeq, - int softmaxSeqLen, - int* labelSeq, - int labelSeqLen); - - // calculate the gradient - void backward(real* softmaxSeq, - real* softmaxSeqGrad, - int* labelSeq, - int labelSeqLen); - - protected: - int numClasses_, blank_, totalSegments_, totalTime_; - bool normByTimes_; - bool isInvalid_; - - MatrixPtr logActs_, forwardVars_, backwardVars_, gradTerms_; - - real logProb_; - - void segmentRange(int& start, int& end, int time); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LstmCompute.cpp b/paddle/legacy/gserver/layers/LstmCompute.cpp deleted file mode 100644 index 70f08e1d4efd2223e7ddec1b104e4ee63fc34de5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LstmCompute.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "LstmCompute.h" -#include "hl_recurrent_apply.cuh" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -void LstmCompute::init(LayerConfig &config) { - activeNode_ = hlActiveType(config.active_type()); - activeGate_ = hlActiveType(config.active_gate_type()); - activeState_ = hlActiveType(config.active_state_type()); -} - -template <> -void LstmCompute::forwardOneSequence<0>(hl_lstm_value value, int frameSize) { - hl_cpu_lstm_forward(hppl::forward::lstm(), - value, - frameSize, - activeNode_, - activeGate_, - activeState_); -} - -template <> -void LstmCompute::backwardOneSequence<0>(hl_lstm_value value, - hl_lstm_grad grad, - int frameSize) { - hl_cpu_lstm_backward(hppl::backward::lstm(), - value, - grad, - frameSize, - activeNode_, - activeGate_, - activeState_); -} - -template <> -void LstmCompute::forwardBatch<0>(hl_lstm_value value, - int frameSize, - int batchSize) { - for (int b = 0; b < batchSize; b++) { - forwardOneSequence<0>(value, frameSize); - - value.gateValue += frameSize * 4; - value.stateValue += frameSize; - value.stateActiveValue += frameSize; - value.outputValue += frameSize; - if (value.prevStateValue) { - value.prevStateValue += frameSize; - } - } -} - -template <> -void LstmCompute::backwardBatch<0>(hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - int batchSize) { - for (int b = 0; b < batchSize; b++) { - backwardOneSequence<0>(value, grad, frameSize); - - value.gateValue += frameSize * 4; - value.stateValue += frameSize; - value.stateActiveValue += frameSize; - value.outputValue += frameSize; - if (value.prevStateValue) { - value.prevStateValue += frameSize; - } - - grad.gateGrad += frameSize * 4; - grad.stateGrad += frameSize; - grad.stateActiveGrad += frameSize; - grad.outputGrad += frameSize; - if (grad.prevStateGrad) { - grad.prevStateGrad += frameSize; - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LstmCompute.cu b/paddle/legacy/gserver/layers/LstmCompute.cu deleted file mode 100644 index 3f15edcacabdae42bb1871833430361ebeb22fc8..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LstmCompute.cu +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "LstmCompute.h" -#include "hl_recurrent_apply.cuh" - -namespace paddle { - -template <> -void LstmCompute::forwardBatch<1>(hl_lstm_value value, - int frameSize, - int batchSize) { - hl_gpu_lstm_forward(hppl::forward::lstm(), - value, - frameSize, - batchSize, - activeNode_, - activeGate_, - activeState_); -} - -template <> -void LstmCompute::backwardBatch<1>(hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - int batchSize) { - hl_gpu_lstm_backward(hppl::backward::lstm(), - value, - grad, - frameSize, - batchSize, - activeNode_, - activeGate_, - activeState_); -} - -template <> -void LstmCompute::forwardOneSequence<1>(hl_lstm_value value, int frameSize) { - hl_gpu_lstm_forward(hppl::forward::lstm(), - value, - frameSize, - /* batchSize */ 1, - activeNode_, - activeGate_, - activeState_); -} - -template <> -void LstmCompute::backwardOneSequence<1>(hl_lstm_value value, - hl_lstm_grad grad, - int frameSize) { - hl_gpu_lstm_backward(hppl::backward::lstm(), - value, - grad, - frameSize, - /* batchSize */ 1, - activeNode_, - activeGate_, - activeState_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LstmCompute.h b/paddle/legacy/gserver/layers/LstmCompute.h deleted file mode 100644 index ac40c35ef1b0a11e61b5d1b11476ffe7daff6d5e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LstmCompute.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "ModelConfig.pb.h" -#include "hl_gpu.h" -#include "paddle/legacy/utils/Common.h" - -namespace paddle { - -class LstmCompute { - public: - void init(LayerConfig &config); - - /** - * LstmLayer batch compute API (forwardBatch, backwardBatch). - * If use batch compute api, lstm value(and grad) need to be batch structure. - * Compute order: - * forwardBatch: for 0 <= id < numBatch - * backwardBatch: for numBatch > id >= 0 - */ - template - void forwardBatch(hl_lstm_value value, int frameSize, int batchSize); - - template - void backwardBatch(hl_lstm_value value, - hl_lstm_grad grad, - int frameSize, - int batchSize); - - /** - * LstmLayer sequence compute API (forwardOneSequence, backwardOneSequence). - * Compute order(for each sequence): - * forwardOneSequence: - * if (!reversed) for 0 <= seqId < seqLength - * if (reversed) for seqLength > seqId >= 0 - * backwardOneSequence: - * if (!reversed) for seqLength > seqId >= 0 - * if (reversed) for 0 <= seqId < seqLength - */ - template - void forwardOneSequence(hl_lstm_value value, int frameSize); - template - void backwardOneSequence(hl_lstm_value value, - hl_lstm_grad grad, - int frameSize); - - public: - hl_activation_mode_t activeNode_; - hl_activation_mode_t activeGate_; - hl_activation_mode_t activeState_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LstmLayer.cpp b/paddle/legacy/gserver/layers/LstmLayer.cpp deleted file mode 100644 index 43a55d8d490faf0049d47bbca6ae1947d13e6be8..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LstmLayer.cpp +++ /dev/null @@ -1,805 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "LstmLayer.h" -#include "paddle/legacy/math/BaseMatrix.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Stat.h" - -DECLARE_bool(prev_batch_state); - -namespace paddle { - -REGISTER_LAYER(lstmemory, LstmLayer); - -bool LstmLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { - if (!Layer::init(layerMap, parameterMap)) return false; - CHECK_EQ(1U, inputLayers_.size()); - CHECK_EQ(1U, parameters_.size()); - CHECK_EQ(getSize() * getSize() * 4, parameters_[0]->getSize()); - CHECK_EQ(getSize() * 7, biasParameter_->getSize()); - weight_.reset(new Weight(getSize(), getSize() * 4, parameters_[0])); - if (biasParameter_.get() != NULL) { - bias_.reset(new Weight(1, getSize() * 7, biasParameter_)); - if (bias_->getW()) { - localBias_ = Matrix::create(nullptr, - /* height= */ 1, - getSize() * 4, - /* trans= */ false, - useGpu_); - checkIg_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - checkFg_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - checkOg_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - - localBias_->setData(bias_->getW()->getData()); - checkIg_->setData(bias_->getW()->getData() + getSize() * 4); - checkFg_->setData(bias_->getW()->getData() + getSize() * 5); - checkOg_->setData(bias_->getW()->getData() + getSize() * 6); - } - - if (bias_->getWGrad()) { - localBiasGrad_ = Matrix::create(nullptr, - /* height= */ 1, - getSize() * 4, - /* trans= */ false, - useGpu_); - checkIgGrad_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - checkFgGrad_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - checkOgGrad_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - localBiasGrad_->setData(bias_->getWGrad()->getData()); - checkIgGrad_->setData(bias_->getWGrad()->getData() + getSize() * 4); - checkFgGrad_->setData(bias_->getWGrad()->getData() + getSize() * 5); - checkOgGrad_->setData(bias_->getWGrad()->getData() + getSize() * 6); - } - } else { - LOG(FATAL) << "Bias should be here."; - } - reversed_ = config_.reversed(); - - // create IdentityActivation for using drop_rate - activation_.reset(ActivationFunction::create("")); - - LstmCompute::init(config_); - useBatch_ = true; - useSeqParallel_ = false; - if (useGpu_ && (getSize() == 32 || getSize() == 64)) { - useSeqParallel_ = true; - } - - return true; -} - -void LstmLayer::resetState() { - CHECK(!reversed_) << "state is not allowed for reversed lstmemory layer"; - Matrix::resizeOrCreate( - prevOutput_, 1, getSize(), /* trans= */ false, useGpu_); - Matrix::resizeOrCreate(prevState_, 1, getSize(), /* trans= */ false, useGpu_); - prevOutput_->resize(0, getSize()); - prevState_->resize(0, getSize()); - if (FLAGS_prev_batch_state) { - useBatch_ = true; - } else { - useBatch_ = false; - } -} - -void LstmLayer::setState(LayerStatePtr state) { - CHECK(state->value.size() == 2) << "two matrices are expected for LSTM state"; - prevOutput_->resize(state->value[0]->getHeight(), - state->value[0]->getWidth()); - prevState_->resize(state->value[1]->getHeight(), state->value[1]->getWidth()); - prevOutput_->copyFrom(*(state->value[0])); - prevState_->copyFrom(*(state->value[1])); -} - -LayerStatePtr LstmLayer::getState() { - LayerStatePtr res = std::make_shared(); - if (prevOutput_->getHeight() && prevOutput_->getWidth()) { - res->value.push_back(prevOutput_->clone(0, 0, useGpu_)); - res->value[0]->copyFrom(*prevOutput_); - res->value.push_back(prevState_->clone(0, 0, useGpu_)); - res->value[1]->copyFrom(*prevState_); - } else { - MatrixPtr output = - Matrix::create(1, getSize(), /* trans= */ false, useGpu_); - MatrixPtr state = Matrix::create(1, getSize(), /* trans= */ false, useGpu_); - output->resize(0, getSize()); - state->resize(0, getSize()); - res->value.push_back(output); - res->value.push_back(state); - } - return res; -} - -void LstmLayer::forward(PassType passType) { - REGISTER_TIMER_INFO("LstmFwTimer", getName().c_str()); - Layer::forward(passType); - - const Argument &input = getInput(0); - CHECK(input.sequenceStartPositions); - int batchSize = input.getBatchSize(); - resetOutput(batchSize, getSize()); - CHECK_EQ(getSize() * 4, input.value->getWidth()); - size_t numSequences = input.getNumSequences(); - const int *starts = input.sequenceStartPositions->getData(false); - CHECK_EQ(starts[numSequences], batchSize); - - Matrix::resizeOrCreate(gate_.value, - /* height= */ batchSize, - getSize() * 4, - /* trans= */ false, - useGpu_); - if (prevOutput_) { - size_t prevNumSeq = useBatch_ ? numSequences : 1; - if (prevOutput_->getHeight() == 0) { - prevOutput_->resize(prevNumSeq, getSize()); - prevState_->resize(prevNumSeq, getSize()); - prevOutput_->zeroMem(); - prevState_->zeroMem(); - } else { - CHECK_EQ(prevOutput_->getHeight(), prevNumSeq) - << "the number of sequences must be the same"; - } - Matrix::resizeOrCreate(totalState_, - prevState_->getHeight() + batchSize, - getSize(), - /*trans*/ false, - useGpu_); - state_.value = Matrix::create(nullptr, - /* height= */ batchSize, - getSize(), - /* trans= */ false, - useGpu_); - state_.value->setData(totalState_->getData() + - prevState_->getHeight() * getSize()); - } else { - Matrix::resizeOrCreate(state_.value, - /* height= */ batchSize, - getSize(), - /* trans= */ false, - useGpu_); - } - Matrix::resizeOrCreate(preOutput_.value, - /* height= */ batchSize, - getSize(), - /* trans= */ false, - useGpu_); - - if (!useBatch_) { - forwardSequence(batchSize, numSequences, starts, input.value); - } else { - if (!useSeqParallel_) { - forwardBatch(batchSize, numSequences, starts, input.value); - } else { - const int *starts = input.sequenceStartPositions->getData(useGpu_); - forwardSeqParallel(batchSize, numSequences, starts, input.value); - } - } - /* activation */ { forwardActivation(); } -} - -void LstmLayer::backward(const UpdateCallback &callback) { - REGISTER_TIMER_INFO("LstmBwTimer", getName().c_str()); - /* Do derivation */ { backwardActivation(); } - - const Argument &input = getInput(0); - CHECK(input.sequenceStartPositions); - int batchSize = input.getBatchSize(); - size_t numSequences = input.getNumSequences(); - - Matrix::resizeOrCreate(gate_.grad, - /* height= */ batchSize, - getSize() * 4, - /* trans= */ false, - useGpu_); - Matrix::resizeOrCreate(state_.grad, - /* height= */ batchSize, - getSize(), - /* trans= */ false, - useGpu_); - Matrix::resizeOrCreate(preOutput_.grad, - /* height= */ batchSize, - getSize(), - /* trans= */ false, - useGpu_); - state_.grad->zero(); - - const int *starts = input.sequenceStartPositions->getData(false); - if (!useBatch_) { - backwardSequence(batchSize, numSequences, starts, input.grad); - } else { - if (!useSeqParallel_) { - backwardBatch(batchSize, numSequences, starts, input.grad); - } else { - const int *starts = input.sequenceStartPositions->getData(useGpu_); - backwardSeqParallel(batchSize, numSequences, starts, input.grad); - } - } - - if (bias_) { - bias_->getParameterPtr()->incUpdate(callback); - } - weight_->getParameterPtr()->incUpdate(callback); -} - -void LstmLayer::forwardSequence(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputValue) { - REGISTER_TIMER_INFO("LstmFwSequenceTime", getName().c_str()); - gate_.value->assign(*inputValue); - if (bias_) { - gate_.value->addBias(*localBias_, 1); - } - - hl_lstm_value lstmValue; - lstmValue.checkIg = checkIg_->getData(); - lstmValue.checkFg = checkFg_->getData(); - lstmValue.checkOg = checkOg_->getData(); - lstmValue.gateValue = gate_.value->getData(); - lstmValue.stateValue = state_.value->getData(); - lstmValue.stateActiveValue = preOutput_.value->getData(); - lstmValue.outputValue = output_.value->getData(); - lstmValue.prevStateValue = nullptr; - if (reversed_) { - lstmValue.gateValue += (batchSize - 1) * getSize() * 4; - lstmValue.stateValue += (batchSize - 1) * getSize(); - lstmValue.stateActiveValue += (batchSize - 1) * getSize(); - lstmValue.outputValue += (batchSize - 1) * getSize(); - } - - auto nextFrame = [&lstmValue](bool reversed, int frameSize) { - lstmValue.prevStateValue = lstmValue.stateValue; - if (!reversed) { - lstmValue.gateValue += frameSize * 4; - lstmValue.stateValue += frameSize; - lstmValue.stateActiveValue += frameSize; - lstmValue.outputValue += frameSize; - } else { - lstmValue.gateValue -= frameSize * 4; - lstmValue.stateValue -= frameSize; - lstmValue.stateActiveValue -= frameSize; - lstmValue.outputValue -= frameSize; - } - }; - - MatrixPtr frameGate = Matrix::create(nullptr, - /* height= */ 1, - getSize() * 4, - /* trans= */ false, - useGpu_); - MatrixPtr frameOutput = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - - if (!reversed_) { - if (prevState_) { - lstmValue.prevStateValue = prevState_->getData(); - } - if (prevOutput_) { - frameGate->setData(lstmValue.gateValue); - frameGate->mul(*prevOutput_, *weight_->getW(), 1, 1); - } - } - AsyncGpuBlock asyncGpuBlock; - for (size_t n = 0; n < numSequences; ++n) { - int length; - if (!reversed_) { - length = starts[n + 1] - starts[n]; - } else { - length = starts[numSequences - n] - starts[numSequences - n - 1]; - } - for (int l = 0; l < length; ++l) { - if (useGpu_) { - LstmCompute::forwardOneSequence<1>(lstmValue, getSize()); - } else { - LstmCompute::forwardOneSequence<0>(lstmValue, getSize()); - } - - if (l != length - 1) { - frameOutput->setData(lstmValue.outputValue); - nextFrame(reversed_, getSize()); - frameGate->setData(lstmValue.gateValue); - frameGate->mul(*frameOutput, *weight_->getW(), 1, 1); - } - } - if (n != numSequences - 1) { - frameOutput->setData(lstmValue.outputValue); - nextFrame(reversed_, getSize()); - frameGate->setData(lstmValue.gateValue); - if (!reversed_) { - if (!prevState_) lstmValue.prevStateValue = nullptr; - if (prevOutput_) { - frameGate->mul(*frameOutput, *weight_->getW(), 1, 1); - } - } else { - lstmValue.prevStateValue = nullptr; - } - } - } - - if (!reversed_) { - if (prevState_) { - prevState_->assign(*state_.value->subMatrix(batchSize - 1, 1)); - } - if (prevOutput_) { - prevOutput_->assign(*output_.value->subMatrix(batchSize - 1, 1)); - } - } -} - -void LstmLayer::backwardSequence(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputGrad) { - REGISTER_TIMER_INFO("LstmBwSequenceTime", getName().c_str()); - MatrixPtr weightT = weight_->getW()->getTranspose(); - - hl_lstm_value lstmValue; - hl_lstm_grad lstmGrad; - lstmValue.checkIg = checkIg_->getData(); - lstmValue.checkFg = checkFg_->getData(); - lstmValue.checkOg = checkOg_->getData(); - lstmValue.gateValue = gate_.value->getData(); - lstmValue.stateValue = state_.value->getData(); - lstmValue.stateActiveValue = preOutput_.value->getData(); - lstmValue.outputValue = nullptr; - - if (bias_->getWGrad()) { - lstmGrad.checkIgGrad = checkIgGrad_->getData(); - lstmGrad.checkFgGrad = checkFgGrad_->getData(); - lstmGrad.checkOgGrad = checkOgGrad_->getData(); - } else { - lstmGrad.checkIgGrad = nullptr; - lstmGrad.checkFgGrad = nullptr; - lstmGrad.checkOgGrad = nullptr; - } - lstmGrad.gateGrad = gate_.grad->getData(); - lstmGrad.stateGrad = state_.grad->getData(); - lstmGrad.stateActiveGrad = nullptr; - lstmGrad.outputGrad = output_.grad->getData(); - - if (!reversed_) { - lstmValue.gateValue += (batchSize - 1) * getSize() * 4; - lstmGrad.gateGrad += (batchSize - 1) * getSize() * 4; - lstmValue.stateValue += (batchSize - 1) * getSize(); - lstmGrad.stateGrad += (batchSize - 1) * getSize(); - lstmValue.stateActiveValue += (batchSize - 1) * getSize(); - lstmGrad.outputGrad += (batchSize - 1) * getSize(); - lstmValue.prevStateValue = lstmValue.stateValue - getSize(); - lstmGrad.prevStateGrad = lstmGrad.stateGrad - getSize(); - } else { - lstmValue.prevStateValue = lstmValue.stateValue + getSize(); - lstmGrad.prevStateGrad = lstmGrad.stateGrad + getSize(); - } - - auto nextFrame = [&lstmValue, &lstmGrad](bool reversed, int frameSize) { - if (reversed) { - lstmValue.gateValue += frameSize * 4; - lstmGrad.gateGrad += frameSize * 4; - lstmValue.stateValue += frameSize; - lstmGrad.stateGrad += frameSize; - lstmValue.stateActiveValue += frameSize; - lstmGrad.outputGrad += frameSize; - lstmValue.prevStateValue = lstmValue.stateValue + frameSize; - lstmGrad.prevStateGrad = lstmGrad.stateGrad + frameSize; - } else { - lstmValue.gateValue -= frameSize * 4; - lstmGrad.gateGrad -= frameSize * 4; - lstmValue.stateValue -= frameSize; - lstmGrad.stateGrad -= frameSize; - lstmValue.stateActiveValue -= frameSize; - lstmGrad.outputGrad -= frameSize; - lstmValue.prevStateValue = lstmValue.stateValue - frameSize; - lstmGrad.prevStateGrad = lstmGrad.stateGrad - frameSize; - } - }; - - MatrixPtr frameGate = Matrix::create(nullptr, - /* height= */ 1, - getSize() * 4, - /* trans= */ false, - useGpu_); - MatrixPtr frameOutput = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - - { - AsyncGpuBlock asyncGpuBlock; - for (size_t n = 0; n < numSequences; ++n) { - int length; - int start; - if (reversed_) { - length = starts[n + 1] - starts[n]; - start = starts[n]; - } else { - length = starts[numSequences - n] - starts[numSequences - n - 1]; - start = starts[numSequences - n - 1]; - } - for (int l = 0; l < length; ++l) { - if (l == length - 1) { - lstmValue.prevStateValue = nullptr; - lstmGrad.prevStateGrad = nullptr; - } - if (useGpu_) { - LstmCompute::backwardOneSequence<1>(lstmValue, lstmGrad, getSize()); - } else { - LstmCompute::backwardOneSequence<0>(lstmValue, lstmGrad, getSize()); - } - if (l != length - 1) { - frameGate->setData(lstmGrad.gateGrad); - nextFrame(reversed_, getSize()); - frameOutput->setData(lstmGrad.outputGrad); - frameOutput->mul(*frameGate, *weightT, 1, 1); - } else { - nextFrame(reversed_, getSize()); - } - } - - if (weight_->getWGrad()) { - if (!reversed_) { - weight_->getWGrad()->mul( - *output_.value->subMatrix(start, length - 1)->getTranspose(), - *gate_.grad->subMatrix(start + 1, length - 1), - 1, - 1); - } else { - weight_->getWGrad()->mul( - *output_.value->subMatrix(start + 1, length - 1)->getTranspose(), - *gate_.grad->subMatrix(start, length - 1), - 1, - 1); - } - } - } - } - - if (inputGrad) { - inputGrad->add(*gate_.grad); - } - if (bias_ && bias_->getWGrad()) { - localBiasGrad_->collectBias(*gate_.grad, 1); - } -} - -void LstmLayer::forwardBatch(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputValue) { - REGISTER_TIMER_INFO("LstmFwBatchTime", getName().c_str()); - - hl_lstm_value lstmValue; - lstmValue.checkIg = checkIg_->getData(); - lstmValue.checkFg = checkFg_->getData(); - lstmValue.checkOg = checkOg_->getData(); - - if (!batchValue_) { - batchValue_.reset(new SequenceToBatch(useGpu_)); - } - batchValue_->resizeOrCreateBatch( - batchSize, numSequences, starts, reversed_, prevOutput_ ? true : false); - - batchValue_->resizeOrCreate(*output_.value); - batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */ true); - if (bias_) { - gate_.value->addBias(*localBias_, 1); - } - - { - int numBatch = batchValue_->getNumBatch(); - int batchSize = 0; - AsyncGpuBlock asyncGpuBlock; - if (prevState_) { - lstmValue.prevStateValue = totalState_->getData(); - } else { - lstmValue.prevStateValue = nullptr; - } - for (int n = 0; n < numBatch; n++) { - MatrixPtr outputValue = batchValue_->getBatchValue(n); - MatrixPtr gateValue = batchValue_->getBatchValue(*gate_.value, n); - batchSize = outputValue->getHeight(); - - if (n != 0) { - MatrixPtr batch1 = batchValue_->getBatchValue(n - 1, batchSize); - gateValue->mul(*batch1, *weight_->getW(), 1, 1); - } else if (prevOutput_) { - Matrix::resizeOrCreate(prevBatchOutput2_, - gateValue->getHeight(), - getSize(), - false, - useGpu_); - batchValue_->prevOutput2Batch(*prevOutput_, *prevBatchOutput2_); - gateValue->mul(*prevBatchOutput2_, *weight_->getW(), 1, 1); - - batchValue_->prevOutput2Batch(*prevState_, - *totalState_->subMatrix(0, numSequences)); - } - - lstmValue.gateValue = gateValue->getData(); - lstmValue.outputValue = outputValue->getData(); - lstmValue.stateValue = - batchValue_->getBatchValue(*state_.value, n)->getData(); - lstmValue.stateActiveValue = - batchValue_->getBatchValue(*preOutput_.value, n)->getData(); - { - if (useGpu_) { - LstmCompute::forwardBatch<1>(lstmValue, getSize(), batchSize); - } else { - LstmCompute::forwardBatch<0>(lstmValue, getSize(), batchSize); - } - } - lstmValue.prevStateValue = lstmValue.stateValue; - } - } - { - REGISTER_TIMER_INFO("batchToSeq", getName().c_str()); - batchValue_->copyBackSeq(*output_.value); - } - if (prevOutput_) { - getPrevBatchOutput(numSequences); - getPrevBatchState(numSequences); - } -} - -void LstmLayer::getPrevBatchOutput(size_t numSequences) { - prevOutput_->resize(numSequences, getSize()); - batchValue_->getSeqOutputFromBatch(*prevOutput_, - *batchValue_->getBatchValue()); -} - -void LstmLayer::getPrevBatchState(size_t numSequences) { - prevState_->resize(numSequences, getSize()); - batchValue_->getSeqOutputFromBatch(*prevState_, *state_.value); -} - -void LstmLayer::backwardBatch(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputGrad) { - REGISTER_TIMER_INFO("LstmBwBatchTime", getName().c_str()); - - hl_lstm_value lstmValue; - lstmValue.checkIg = checkIg_->getData(); - lstmValue.checkFg = checkFg_->getData(); - lstmValue.checkOg = checkOg_->getData(); - - hl_lstm_grad lstmGrad; - lstmGrad.stateActiveGrad = preOutput_.grad->getData(); - - if (bias_->getWGrad()) { - lstmGrad.checkIgGrad = checkIgGrad_->getData(); - lstmGrad.checkFgGrad = checkFgGrad_->getData(); - lstmGrad.checkOgGrad = checkOgGrad_->getData(); - } else { - lstmGrad.checkIgGrad = nullptr; - lstmGrad.checkFgGrad = nullptr; - lstmGrad.checkOgGrad = nullptr; - } - - if (!batchGrad_) { - batchGrad_.reset(new SequenceToBatch(useGpu_)); - } - batchGrad_->shareIndexWith(*batchValue_); - - { - REGISTER_TIMER_INFO("seqToBatch", getName().c_str()); - batchGrad_->copyFromSeq(*output_.grad); - } - - { - MatrixPtr weightT = weight_->getW()->getTranspose(); - int numBatch = batchGrad_->getNumBatch(); - int batchSize = 0; - AsyncGpuBlock asyncGpuBlock; - for (int n = (int)numBatch - 1; n >= 0; n--) { - MatrixPtr outputGrad = batchGrad_->getBatchValue(n); - MatrixPtr gateGrad = batchGrad_->getBatchValue(*gate_.grad, n); - - lstmValue.gateValue = - batchGrad_->getBatchValue(*gate_.value, n)->getData(); - lstmValue.stateValue = - batchGrad_->getBatchValue(*state_.value, n)->getData(); - lstmValue.stateActiveValue = - batchGrad_->getBatchValue(*preOutput_.value, n)->getData(); - lstmGrad.stateGrad = - batchGrad_->getBatchValue(*state_.grad, n)->getData(); - lstmGrad.gateGrad = gateGrad->getData(); - lstmGrad.outputGrad = outputGrad->getData(); - { - batchSize = outputGrad->getHeight(); - if (n != 0) { - lstmValue.prevStateValue = - batchGrad_->getBatchValue(*state_.value, n - 1)->getData(); - lstmGrad.prevStateGrad = - batchGrad_->getBatchValue(*state_.grad, n - 1)->getData(); - } else { - if (prevState_) { - lstmValue.prevStateValue = totalState_->getData(); - lstmGrad.prevStateGrad = nullptr; - } else { - lstmValue.prevStateValue = nullptr; - lstmGrad.prevStateGrad = nullptr; - } - } - if (useGpu_) { - LstmCompute::backwardBatch<1>( - lstmValue, lstmGrad, getSize(), batchSize); - } else { - LstmCompute::backwardBatch<0>( - lstmValue, lstmGrad, getSize(), batchSize); - } - } - - if (n != 0) { - MatrixPtr tmp = batchGrad_->getBatchValue(n - 1, batchSize); - tmp->mul(*gateGrad, *weightT, 1, 1); - } - - if (n != 0 && weight_->getWGrad()) { - /* backward weight */ - MatrixPtr outputValue = batchValue_->getBatchValue(n - 1, batchSize); - weight_->getWGrad()->mul(*outputValue->getTranspose(), *gateGrad, 1, 1); - } else if (prevOutput_ && weight_->getWGrad()) { - weight_->getWGrad()->mul( - *prevBatchOutput2_->getTranspose(), *gateGrad, 1, 1); - } - } - } - - if (inputGrad) { - batchGrad_->add(*inputGrad, *gate_.grad, /* seq2batch */ false); - } - if (bias_ && bias_->getWGrad()) { - localBiasGrad_->collectBias(*gate_.grad, /* scale */ 1); - } -} - -void LstmLayer::forwardSeqParallel(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputValue) { - REGISTER_TIMER_INFO("LstmFwSeqParallelTime", getName().c_str()); - gate_.value->assign(*inputValue); - if (bias_) { - gate_.value->addBias(*localBias_, /* scale */ 1); - } - - real *gateValue = gate_.value->getData(); - real *stateValue = state_.value->getData(); - real *outputValue = output_.value->getData(); - real *preOutputValue = preOutput_.value->getData(); - real *checkIg = checkIg_->getData(); - real *checkFg = checkFg_->getData(); - real *checkOg = checkOg_->getData(); - real *weight = weight_->getW()->getData(); - hl_lstm_parallel_forward(gateValue, - stateValue, - preOutputValue, - outputValue, - checkIg, - checkFg, - checkOg, - weight, - starts, - getSize(), - numSequences, - reversed_, - activeNode_, - activeGate_, - activeState_); -} - -void LstmLayer::backwardSeqParallel(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputGrad) { - REGISTER_TIMER_INFO("LstmBwSeqParallelTime", getName().c_str()); - real *gateValue = gate_.value->getData(); - real *gateGrad = gate_.grad->getData(); - real *stateValue = state_.value->getData(); - real *stateGrad = state_.grad->getData(); - real *preOutputValue = preOutput_.value->getData(); - real *preOutputGrad = preOutput_.grad->getData(); - real *checkIg = checkIg_->getData(); - real *checkFg = checkFg_->getData(); - real *checkOg = checkOg_->getData(); - real *outputGrad = output_.grad->getData(); - real *weight = weight_->getW()->getData(); - - real *checkIgGrad; - real *checkFgGrad; - real *checkOgGrad; - if (bias_->getWGrad()) { - checkIgGrad = checkIgGrad_->getData(); - checkFgGrad = checkFgGrad_->getData(); - checkOgGrad = checkOgGrad_->getData(); - } else { - checkIgGrad = nullptr; - checkFgGrad = nullptr; - checkOgGrad = nullptr; - } - - hl_lstm_parallel_backward_data(gateValue, - gateGrad, - stateValue, - stateGrad, - preOutputValue, - preOutputGrad, - outputGrad, - checkIg, - checkIgGrad, - checkFg, - checkFgGrad, - checkOg, - checkOgGrad, - weight, - starts, - getSize(), - numSequences, - reversed_, - activeNode_, - activeGate_, - activeState_); - - if (inputGrad) { - inputGrad->add(*gate_.grad); - } - if (bias_ && bias_->getWGrad()) { - localBiasGrad_->collectBias(*gate_.grad, 1); - } - - real *outputValue = output_.value->getData(); - if (weight_->getWGrad()) { - real *weightGrad = weight_->getWGrad()->getData(); - hl_lstm_parallel_backward_weight(weightGrad, - outputValue, - gateGrad, - starts, - getSize(), - batchSize, - numSequences, - reversed_); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LstmLayer.h b/paddle/legacy/gserver/layers/LstmLayer.h deleted file mode 100644 index 8c8b382f505d791fb1ef4265dcfe95046aa832fb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LstmLayer.h +++ /dev/null @@ -1,221 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "LstmCompute.h" -#include "SequenceToBatch.h" -#include "paddle/legacy/math/BaseMatrix.h" -#include "paddle/legacy/math/Matrix.h" -namespace paddle { - -/** - * @brief LstmLayer takes 1 input layer with size * 4. - * Input layer is diveded into 4 equal parts: - * (input_s, input_ig, input_fg, input_og) - * - * For each sequence [start, end] it performs the following computation: - * @code - * output_{i} = actState(state_{i}) * actGate(outputGate_{i}) - * state_{i} = actInput(input_s_{i} + bias_s + - * output_{i-1} * recurrIW) * actGate(inputGate_{i}) + - * actGate(forgetGate_{i}) * state_{i-1} - * inputGate = input_ig_{i} + bias_ig + output_{i-1} * recurrIGW + - * state_{i-1} * inputCheck - * ouputGate = input_og_{i} + bias_og + output_{i-1} * recurrOGW + - * state_{i} * outputCheck - * forgetGate = input_fg_{i} + bias_fg + output_{i-1} * recurrFGW + - * state_{i-1} * forgetCheck - * @endcode - * - * - parameter[0] consists of (recurrIW, recurrIGW, recurrFGW, recurrOGW) - * - baisParameter consists of - * (bias_s, bias_ig, bias_og, bias_fg, inputCheck, forgetCheck, outputCheck) - * - * - actInput is defined by config active_type. - * - actState is defined by config active_state_type. - * - actGate is defined by config actvie_gate_type. - * - * There are two ways to compute, namely one sequence by one sequence or - * one batch by one batch. By default and no setting pre_batch_state true, - * it will compute batch by batch. - * - * The formula in the paper is as follows: - * \f[ - * i_t = \sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i) \\ - * f_t = \sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f) \\ - * \tilde{c_t} = tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c) \\ - * o_t = \sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o) \\ - * c_t = f_t * c_{t-1} + i_t * \tilde{c_t} \\ - * h_t = o_t tanh(c_t) - * \f] - * - * @note These \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$ - * operations on the input sequence were NOT included in LstmLayer. So - * users should use fc_layer or mixed_layer before lstm_later. - * - * The weight ([size, 4*size]) contains \f$W_{hi}, W_{hf}, W_{hc}, W_{ho}\f$. - * The bias contains \f$b_i, b_f, b_c, b_o\f$ and \f$W_{ci}, W_{cf}, W_{co}\f$. - */ - -class LstmLayer : public Layer, public LstmCompute { - public: - explicit LstmLayer(const LayerConfig &config) : Layer(config) {} - - bool init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) override; - - void forward(PassType passType) override; - - void backward(const UpdateCallback &callback) override; - - void resetState() override; - - void setState(LayerStatePtr state) override; - - LayerStatePtr getState() override; - - protected: - /** - * @brief Compute lstm forward one sequence by one sequence. - * @param batchSize The batchSize is not equal to the batch_size in - * the config file. It is the total words number of all samples - * in this forward batch. - * @param numSequences The sample number. It is equal to the batch_size - * in the config file. - * @param starts Each start position of each samples. - * @param inputValue The input values. - */ - void forwardSequence(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputValue); - /** - * Compute lstm backward one sequence by one sequence. - */ - void backwardSequence(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputGrad); - - /** - * Compute lstm forward one batch by one batch. The batch value is - * reorganized by SequenceToBatch class. The batch output value will - * be convert into sequence value after finishing forward. Here, one - * batch contains one word of each sample. If the length of each sample - * is not equality, the batch will not pads zero and contains less words. - * The total batch numbers are the max length of the sequence. The details - * can refer to SequenceToBatch class. On GPU mode, it will launch GPU - * kernel for loop. - * - * @code - * for (int i = 0; i < numBatch(max_sequence_length); ++i) { - * compute one batch. - * } - * @endcode - */ - void forwardBatch(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputValue); - /** - * Compute lstm backward one batch by one batch. - */ - void backwardBatch(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputGrad); - - /** - * This function only supports GPU. It not need to reorganize input into - * batch value. It will launch one kernel to parallelly compute forward - * propagation in sequence level. - */ - void forwardSeqParallel(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputValue); - /** - * Backward propagation corresponding to forwardSeqParallel. - */ - void backwardSeqParallel(int batchSize, - size_t numSequences, - const int *starts, - MatrixPtr inputGrad); - /** - * This function is used for sequence generation and get output after - * forwardBatch. - */ - void getPrevBatchOutput(size_t numSequences); - /** - * This function is used for sequence generation and get state after - * forwardBatch. - */ - void getPrevBatchState(size_t numSequences); - - protected: - /// Learned parameters, shape: (size, 4*size). - /// The weight ([size, 4*size]) contains \f$W_{hi}, W_{hf}, W_{hc}, W_{ho}\f$. - std::unique_ptr weight_; - /// Learned bias parameter, shape: (1, 7 * size). - /// The bias contains \f$b_i, b_f, b_c, b_o\f$ and \f$W_{ci}, W_{cf}, - /// W_{co}\f$. - std::unique_ptr bias_; - /// The reeal bias, point to \f$b_i, b_f, b_c, b_o\f$. - MatrixPtr localBias_; - /// The peephole connection for input gate. - MatrixPtr checkIg_; - /// The peephole connection for forget gate. - MatrixPtr checkFg_; - /// The peephole connection for output gate. - MatrixPtr checkOg_; - /// The gradient of real bias - MatrixPtr localBiasGrad_; - /// The gradient of peephole connection for input gates. - MatrixPtr checkIgGrad_; - /// The gradient of peephole connection for forget gates. - MatrixPtr checkFgGrad_; - /// The gradient of peephole connection for output gates. - MatrixPtr checkOgGrad_; - - /// Stores the cell state of previous time step, namely \f$c_{t-1}\f$. - Argument state_; - /// Stores the hidden of previous time step, namely \f$h_{t-1}\f$. - Argument preOutput_; - /// Stores the value and gradient of four gates, namely - /// \f$i_t, f_t, o_t, c_t\f$. - Argument gate_; - /// Whether it is reversed lstm. - bool reversed_; - /// Whether to use batch method to compute. - bool useBatch_; - /// Whether to use sequence parallell method to compute. - bool useSeqParallel_; - /// batchValue_ is used in method of batch calculation. It stores the - /// batch value after reorganized input. - std::unique_ptr batchValue_; - /// The gradient of batchValue_. - std::unique_ptr batchGrad_; - - /// Used in generation and stores the state of previous time step. - MatrixPtr prevState_; - /// Used in generation and stores the output of previous time step. - MatrixPtr prevOutput_; - MatrixPtr prevBatchOutput2_; - /// The total state. - MatrixPtr totalState_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/LstmStepLayer.cpp b/paddle/legacy/gserver/layers/LstmStepLayer.cpp deleted file mode 100644 index f02f8ad62fe4d4cb4bb580923200b398c8483a99..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/LstmStepLayer.cpp +++ /dev/null @@ -1,194 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "LstmCompute.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/* - * LstmStepLayer used in recurrent layer group. - */ -class LstmStepLayer : public Layer, public LstmCompute { - protected: - Argument state_; - Argument gate_; - Argument stateActive_; - MatrixPtr checkIg_, checkFg_, checkOg_; - MatrixPtr checkIgGrad_, checkFgGrad_, checkOgGrad_; - std::unique_ptr weight_; - - public: - explicit LstmStepLayer(const LayerConfig& config) : Layer(config) {} - - ~LstmStepLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(lstm_step, LstmStepLayer); - -bool LstmStepLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!Layer::init(layerMap, parameterMap)) return false; - CHECK_EQ(2U, inputLayers_.size()); - - checkIg_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - checkFg_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - checkOg_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - checkIgGrad_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - checkFgGrad_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - checkOgGrad_ = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - - if (biasParameter_.get() != NULL) { - CHECK_EQ(getSize() * 3, biasParameter_->getSize()); - weight_.reset(new Weight(1, getSize() * 3, biasParameter_)); - if (weight_->getW()) { - real* data = weight_->getW()->getData(); - checkIg_->setData(data); - checkFg_->setData(data + getSize()); - checkOg_->setData(data + getSize() * 2); - } - - if (weight_->getWGrad()) { - real* data = weight_->getWGrad()->getData(); - checkIgGrad_->setData(data); - checkFgGrad_->setData(data + getSize()); - checkOgGrad_->setData(data + getSize() * 2); - } - } - - setOutput("state", &state_); - LstmCompute::init(config_); - return true; -} - -void LstmStepLayer::forward(PassType passType) { - REGISTER_TIMER_INFO("LstmRecurrentFwTime", getName().c_str()); - Layer::forward(passType); - - const Argument& input = getInput(0); - const Argument& prevState = getInput(1); - CHECK_EQ(getSize() * 4, input.value->getWidth()); - CHECK_EQ(getSize(), prevState.value->getWidth()); - int batchSize = input.getBatchSize(); - reserveOutput(batchSize, getSize()); - resetSpecifyOutput(state_, - batchSize, - getSize(), - /* isValueClean */ false, - /* isGradClean */ true); - resetSpecifyOutput(gate_, - batchSize, - getSize() * 4, - /* isValueClean */ false, - /* isGradClean */ false); - resetSpecifyOutput(stateActive_, - batchSize, - getSize(), - /* isValueClean */ false, - /* isGradClean */ false); - gate_.value->assign(*input.value); - - hl_lstm_value lstmValue; - lstmValue.checkIg = checkIg_->getData(); - lstmValue.checkFg = checkFg_->getData(); - lstmValue.checkOg = checkOg_->getData(); - lstmValue.gateValue = gate_.value->getData(); - lstmValue.stateValue = state_.value->getData(); - lstmValue.prevStateValue = prevState.value->getData(); - lstmValue.stateActiveValue = stateActive_.value->getData(); - lstmValue.outputValue = output_.value->getData(); - - if (useGpu_) { - LstmCompute::forwardBatch<1>(lstmValue, getSize(), batchSize); - } else { - LstmCompute::forwardBatch<0>(lstmValue, getSize(), batchSize); - } -} - -void LstmStepLayer::backward(const UpdateCallback& callback) { - REGISTER_TIMER_INFO("LstmRecurrentBwTime", getName().c_str()); - const Argument& input = getInput(0); - const Argument& prevState = getInput(1); - int batchSize = input.getBatchSize(); - - hl_lstm_value lstmValue; - hl_lstm_grad lstmGrad; - lstmValue.checkIg = checkIg_->getData(); - lstmValue.checkFg = checkFg_->getData(); - lstmValue.checkOg = checkOg_->getData(); - lstmValue.gateValue = gate_.value->getData(); - lstmValue.prevStateValue = prevState.value->getData(); - lstmValue.stateValue = state_.value->getData(); - lstmValue.stateActiveValue = stateActive_.value->getData(); - - lstmGrad.gateGrad = gate_.grad->getData(); - if (prevState.grad) { - lstmGrad.prevStateGrad = prevState.grad->getData(); - } else { - lstmGrad.prevStateGrad = nullptr; - } - lstmGrad.stateGrad = state_.grad->getData(); - lstmGrad.stateActiveGrad = stateActive_.grad->getData(); - lstmGrad.outputGrad = output_.grad->getData(); - lstmGrad.checkIgGrad = checkIgGrad_->getData(); - lstmGrad.checkFgGrad = checkFgGrad_->getData(); - lstmGrad.checkOgGrad = checkOgGrad_->getData(); - - if (useGpu_) { - LstmCompute::backwardBatch<1>(lstmValue, lstmGrad, getSize(), batchSize); - } else { - LstmCompute::backwardBatch<0>(lstmValue, lstmGrad, getSize(), batchSize); - } - - if (input.grad) { - input.grad->add(*gate_.grad); - } - - if (weight_) { - weight_->getParameterPtr()->incUpdate(callback); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MDLstmLayer.cpp b/paddle/legacy/gserver/layers/MDLstmLayer.cpp deleted file mode 100644 index 4838183e8ccb213aa249fddf5102026198e98d3c..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MDLstmLayer.cpp +++ /dev/null @@ -1,769 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "LstmLayer.h" -#include "paddle/legacy/math/BaseMatrix.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -class CoordIterator { - public: - std::vector dims_; - std::vector directions_; - std::vector curPos_; - bool end_; - - void step(size_t d, bool reversed) { - if (directions_[d] ^ reversed) { - if (curPos_[d] == dims_[d] - 1) { - curPos_[d] = 0; - if (d) { - step(d - 1, reversed); - } else { - end_ = true; - } - } else { - curPos_[d]++; - } - } else { - if (curPos_[d] == 0) { - curPos_[d] = dims_[d] - 1; - if (d) { - step(d - 1, reversed); - } else { - end_ = true; - } - } else { - curPos_[d]--; - } - } - } - - public: - CoordIterator(std::vector dim, std::vector directions) - : dims_(dim), directions_(directions), end_(false) { - CHECK_EQ(dims_.size(), directions_.size()); - for (size_t i = 0; i < dims_.size(); i++) { - curPos_.push_back(-1); - } - } - CoordIterator& operator++() { - step(dims_.size() - 1, false); - return *this; - } - - CoordIterator& operator--() { - step(dims_.size() - 1, true); - return *this; - } - - std::vector& curPos() { return curPos_; } - - int offset() { - int offset = curPos_[0]; - for (size_t i = 1; i < dims_.size(); i++) { - offset = offset * dims_[i] + curPos_[i]; - } - return offset; - } - - int offset(const std::vector& pos) { - int offset = pos[0]; - for (size_t i = 1; i < dims_.size(); i++) { - offset = offset * dims_[i] + pos[i]; - } - return offset; - } - - std::vector& begin() { - for (size_t i = 0; i < dims_.size(); i++) { - curPos_[i] = directions_[i] ? 0 : dims_[i] - 1; - } - end_ = false; - return curPos_; - } - - std::vector& rbegin() { - for (size_t i = 0; i < dims_.size(); i++) { - curPos_[i] = directions_[i] ? dims_[i] - 1 : 0; - } - end_ = false; - return curPos_; - } - - bool end() { return end_; } - - bool getPrePos(const std::vector& delays, - int idx, - std::vector& prePos) { - bool isAvial = true; - prePos.clear(); - prePos.reserve(directions_.size()); - for (size_t i = 0; i < directions_.size(); i++) { - if (int(i) == idx) { - prePos.push_back(curPos_[i] + delays[i] * (directions_[i] ? 1 : -1)); - if (prePos[i] < 0) { - prePos[i] = 0; - isAvial = false; - } - if (prePos[i] >= dims_[i]) { - prePos[i] = dims_[i] - 1; - isAvial = false; - } - } else { - prePos.push_back(curPos_[i]); - } - } - return isAvial; - } - - bool getNextPos(const std::vector& delays, - int idx, - std::vector& nextPos) { - bool isAvial = true; - nextPos.clear(); - nextPos.reserve(directions_.size()); - for (size_t i = 0; i < directions_.size(); i++) { - if (int(i) == idx) { - nextPos.push_back(curPos_[i] - delays[i] * (directions_[i] ? 1 : -1)); - if (nextPos[i] < 0) { - nextPos[i] = 0; - isAvial = false; - } - if (nextPos[i] >= dims_[i]) { - nextPos[i] = dims_[i] - 1; - isAvial = false; - } - } else { - nextPos.push_back(curPos_[i]); - } - } - return isAvial; - } -}; -/* - * MDLstmLayer takes 1 input layer with size * (3+numDims). - * For each sequence [start, end] it performs the following computation: - * out_i = actState(state_i) * actGate(outputGate_i) - * - * For example the image with 2 dims, we take the scanning order from left-top - * to right-bottom, then the 2 previous states of the current pixels are the - * ones located at left and top. And each of them has a independent forget gate. - * - * state_i = actInput(input_i) * actGate(inputGate_i) + - * \sum{j}(actGate(forgetGate_i_j) * state_prev_i_j) - * - * inputGate = input_i * inputW + \sum{j}(output_prev_i_j * recurrInputW_j) + - * \sum{j}(state_prev_i_j * inputCheck_j) - * - * ouputGate = input_i * outputW + \sum{j}(output_prev_i_j * recurrOutputW_j) + - * state_i * outputCheck - * - * forgetGate_j = input_i * forgetW_j + \sum{j}(output_prev_i_j * - * recurrForgetW_j) + \sum{j}(state_prev_i_j * forgetCheck_j) - * - * IG Layer: (Input, InputGate, ForgetGates, OutputGate) * OutputSize - * */ - -class MDLstmLayer : public LstmLayer { - public: - explicit MDLstmLayer(const LayerConfig& config) : LstmLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback) override; - - protected: - void forwardOneSequence(int start, CoordIterator& coordIter); - void backwardOneSequence(int start, CoordIterator& coordIter); - void forwardGate2OutputSequence(int start, CoordIterator& coordIter); - void backwardGate2OutputSequence(int start, CoordIterator& coordIter); - - protected: - std::vector frameInputGate_; - std::vector frameForgetGate_; - std::vector frameOutputGate_; - std::vector frameInputNode_; - std::vector frameGate_; - std::vector frameState_; - std::vector framePreOutput_; - std::vector frameOutput_; - - // Activation - std::unique_ptr activationGate_; - std::unique_ptr activationState_; - - int numDims_; - size_t numBlocks_; - std::vector directions_; - std::vector delays_; - std::vector> dimsV_; -}; - -REGISTER_LAYER(mdlstmemory, MDLstmLayer); - -bool MDLstmLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!Layer::init(layerMap, parameterMap)) return false; - CHECK_EQ(1U, inputLayers_.size()); - CHECK_EQ(1U, parameters_.size()); - - numBlocks_ = getSize(); - numDims_ = config_.directions_size(); - CHECK_EQ(numBlocks_ * numBlocks_ * (3 + numDims_), parameters_[0]->getSize()); - - // inode(1), ig(1), fg(numDims_), og(1), peepIg(1), peepFg(numDims_), - // peepOg(1), then size of localBias_ is 3+numDims_ - CHECK_EQ(numBlocks_ * (5 + 2 * numDims_), biasParameter_->getSize()); - weight_.reset( - new Weight(numBlocks_, numBlocks_ * (3 + numDims_), parameters_[0])); - if (biasParameter_.get() != NULL) { - bias_.reset(new Weight(1, numBlocks_ * (5 + 2 * numDims_), biasParameter_)); - localBias_ = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_ * (3 + numDims_), - /* trans= */ false, - useGpu_); - checkIg_ = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - checkFg_ = Matrix::create(nullptr, - /* height= */ numDims_, - numBlocks_, - /* trans= */ false, - useGpu_); - checkOg_ = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - localBiasGrad_ = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_ * (3 + numDims_), - /* trans= */ false, - useGpu_); - checkIgGrad_ = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - checkFgGrad_ = Matrix::create(nullptr, - /* height= */ numDims_, - numBlocks_, - /* trans= */ false, - useGpu_); - checkOgGrad_ = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - - localBias_->setData(bias_->getW()->getData()); - checkIg_->setData(bias_->getW()->getData() + numBlocks_ * (3 + numDims_)); - checkFg_->setData(bias_->getW()->getData() + numBlocks_ * (4 + numDims_)); - checkOg_->setData(bias_->getW()->getData() + - numBlocks_ * (4 + 2 * numDims_)); - - if (bias_->getWGrad()) { - localBiasGrad_->setData(bias_->getWGrad()->getData()); - checkIgGrad_->setData(bias_->getWGrad()->getData() + - numBlocks_ * (3 + numDims_)); - checkFgGrad_->setData(bias_->getWGrad()->getData() + - numBlocks_ * (4 + numDims_)); - checkOgGrad_->setData(bias_->getWGrad()->getData() + - numBlocks_ * (4 + 2 * numDims_)); - } - } else { - LOG(FATAL) << "Bias should be here."; - } - for (int i = 0; i < numDims_; i++) { - directions_.push_back(config_.directions(i)); - } - for (int i = 0; i < numDims_; i++) { - delays_.push_back(-1); - } - activationGate_.reset(ActivationFunction::create(config_.active_gate_type())); - activationState_.reset( - ActivationFunction::create(config_.active_state_type())); - - return true; -} - -void MDLstmLayer::forward(PassType passType) { - Layer::forward(passType); - - const Argument& input = getInput(0); - CHECK(input.sequenceStartPositions); - int batchSize = input.getBatchSize(); - int numSequences = input.getNumSequences(); - resetOutput(batchSize, numBlocks_); - CHECK_EQ(numBlocks_ * (3 + numDims_), input.value->getWidth()); - const int* starts = input.sequenceStartPositions->getData(false); - CHECK_EQ(starts[numSequences], batchSize); - - int* dimsData = input.cpuSequenceDims->getData(); - CHECK_EQ(int(input.cpuSequenceDims->getSize()), numDims_* numSequences); - - for (int i = 0; i < numSequences; i++) { - std::vector dims; - for (int j = 0; j < numDims_; j++) { - dims.push_back(dimsData[i * numDims_ + j]); - } - dimsV_.push_back(dims); - } - - frameInputGate_.reserve(batchSize); - frameForgetGate_.reserve(batchSize); - frameOutputGate_.reserve(batchSize); - frameInputNode_.reserve(batchSize); - frameGate_.reserve(batchSize); - frameState_.reserve(batchSize); - framePreOutput_.reserve(batchSize); - frameOutput_.reserve(batchSize); - - Matrix::resizeOrCreate(gate_.value, - /* height= */ batchSize, - numBlocks_ * (3 + numDims_), - /* trans= */ false, - useGpu_); - - for (int i = frameGate_.size(); i < batchSize; i++) { - Argument arg; - arg.value = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_ * (3 + numDims_), - /* trans= */ false, - useGpu_); - arg.grad = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_ * (3 + numDims_), - /* trans= */ false, - useGpu_); - frameGate_.push_back(arg); - } - for (int i = frameInputGate_.size(); i < batchSize; i++) { - Argument arg; - arg.value = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - arg.grad = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - frameInputGate_.push_back(arg); - } - for (int i = frameForgetGate_.size(); i < batchSize; i++) { - Argument arg; - arg.value = Matrix::create(nullptr, - /* height= */ numDims_, - numBlocks_, - /* trans= */ false, - useGpu_); - arg.grad = Matrix::create(nullptr, - /* height= */ numDims_, - numBlocks_, - /* trans= */ false, - useGpu_); - frameForgetGate_.push_back(arg); - } - for (int i = frameOutputGate_.size(); i < batchSize; i++) { - Argument arg; - arg.value = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - arg.grad = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - frameOutputGate_.push_back(arg); - } - for (int i = frameInputNode_.size(); i < batchSize; i++) { - Argument arg; - arg.value = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - arg.grad = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - frameInputNode_.push_back(arg); - } - for (int i = frameState_.size(); i < batchSize; i++) { - Argument arg; - arg.value = Matrix::create( - /* height= */ 1, numBlocks_, /* trans= */ false, useGpu_); - frameState_.push_back(arg); - } - for (int i = framePreOutput_.size(); i < batchSize; i++) { - Argument arg; - arg.value = Matrix::create( - /* height= */ 1, numBlocks_, /* trans= */ false, useGpu_); - framePreOutput_.push_back(arg); - } - for (int i = frameOutput_.size(); i < batchSize; i++) { - Argument arg; - arg.value = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - arg.grad = Matrix::create(nullptr, - /* height= */ 1, - numBlocks_, - /* trans= */ false, - useGpu_); - frameOutput_.push_back(arg); - } - - for (int i = 0; i < batchSize; i++) { - frameOutput_[i].value->setData(output_.value->getData() + i * numBlocks_); - frameGate_[i].value->setData(gate_.value->getData() + - i * numBlocks_ * (3 + numDims_)); - frameInputNode_[i].value->setData(gate_.value->getData() + - i * numBlocks_ * (3 + numDims_) + - numBlocks_ * 0); - frameInputGate_[i].value->setData(gate_.value->getData() + - i * numBlocks_ * (3 + numDims_) + - numBlocks_ * 1); - frameForgetGate_[i].value->setData(gate_.value->getData() + - i * numBlocks_ * (3 + numDims_) + - numBlocks_ * 2); - frameOutputGate_[i].value->setData(gate_.value->getData() + - i * numBlocks_ * (3 + numDims_) + - numBlocks_ * (2 + numDims_)); - } - - AsyncGpuBlock asyncGpuBlock; - gate_.value->assign(*input.value); - - if (bias_) { - gate_.value->addBias(*localBias_, 1); - } - - for (int i = 0; i < numSequences; i++) { - CoordIterator coordIter(dimsV_[i], directions_); - forwardOneSequence(starts[i], coordIter); - } -} - -void MDLstmLayer::forwardGate2OutputSequence(int start, - CoordIterator& coordIter) { - int idxCurr = start + coordIter.offset(); - std::vector preOffsetV; - preOffsetV.reserve(numDims_); - for (int i = 0; i < numDims_; i++) { - std::vector prePos; - if (coordIter.getPrePos(delays_, i, prePos)) { - preOffsetV[i] = coordIter.offset(prePos); - } else { - preOffsetV[i] = -1; - } - } - - for (int i = 0; i < numDims_; i++) { - if (preOffsetV[i] >= 0) { - frameInputGate_[idxCurr].value->addDotMul( - *frameState_[start + preOffsetV[i]].value, *checkIg_, 1.0, 1.0); - - MatrixPtr fgGateOneDim = Matrix::create( - frameForgetGate_[idxCurr].value->getData() + i * numBlocks_, - 1, - numBlocks_, - false, - useGpu_); - MatrixPtr checkFgOneDim = - Matrix::create(checkFg_->getData() + i * numBlocks_, - 1.0, - numBlocks_, - false, - useGpu_); - fgGateOneDim->addDotMul( - *frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0); - } - } - auto status = activationGate_->forward(frameInputGate_[idxCurr]); - status.check(); - status = activationGate_->forward(frameForgetGate_[idxCurr]); - status.check(); - status = activation_->forward(frameInputNode_[idxCurr]); - status.check(); - - frameState_[idxCurr].value->zeroMem(); - for (int i = 0; i < numDims_; i++) { - if (preOffsetV[i] >= 0) { - MatrixPtr fgGateOneDim = Matrix::create( - frameForgetGate_[idxCurr].value->getData() + i * numBlocks_, - 1, - numBlocks_, - false, - useGpu_); - frameState_[idxCurr].value->addDotMul( - *frameState_[start + preOffsetV[i]].value, *fgGateOneDim, 1.0, 1.0); - } - } - frameState_[idxCurr].value->addDotMul(*frameInputNode_[idxCurr].value, - *frameInputGate_[idxCurr].value, - 1.0, - 1.0); - - frameOutputGate_[idxCurr].value->addDotMul( - *frameState_[idxCurr].value, *checkOg_, 1.0, 1.0); - status = activationGate_->forward(frameOutputGate_[idxCurr]); - status.check(); - - framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value)); - status = activationState_->forward(framePreOutput_[idxCurr]); - status.check(); - - frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value, - *frameOutputGate_[idxCurr].value); -} - -void MDLstmLayer::forwardOneSequence(int start, CoordIterator& coordIter) { - for (coordIter.begin(); !coordIter.end(); ++coordIter) { - int offset = coordIter.offset(); - for (int i = 0; i < numDims_; i++) { - std::vector prePos; - if (coordIter.getPrePos(delays_, i, prePos)) { - int preOffset = coordIter.offset(prePos); - frameGate_[start + offset].value->mul( - *frameOutput_[start + preOffset].value, *weight_->getW(), 1.0, 1.0); - } - } - forwardGate2OutputSequence(start, coordIter); - } -} - -void MDLstmLayer::backward(const UpdateCallback& callback) { - const Argument& input = getInput(0); - CHECK(input.sequenceStartPositions); - int batchSize = input.getBatchSize(); - const int* starts = input.sequenceStartPositions->getData(false); - size_t numSequences = input.getNumSequences(); - - Matrix::resizeOrCreate(gate_.grad, - /* height= */ batchSize, - numBlocks_ * (3 + numDims_), - /* trans= */ false, - useGpu_); - - for (int i = 0; i < batchSize; i++) { - if (frameState_[i].grad == NULL) - frameState_[i].grad = Matrix::create( - /* height= */ 1, numBlocks_, /* trans= */ false, useGpu_); - } - for (int i = 0; i < batchSize; i++) { - if (framePreOutput_[i].grad == NULL) - framePreOutput_[i].grad = Matrix::create( - /* height= */ 1, numBlocks_, /* trans= */ false, useGpu_); - } - - for (int i = 0; i < batchSize; i++) { - frameOutput_[i].grad->setData(output_.grad->getData() + i * numBlocks_); - frameGate_[i].grad->setData(gate_.grad->getData() + - i * numBlocks_ * (3 + numDims_)); - frameInputNode_[i].grad->setData(gate_.grad->getData() + - i * numBlocks_ * (3 + numDims_) + - numBlocks_ * 0); - frameInputGate_[i].grad->setData(gate_.grad->getData() + - i * numBlocks_ * (3 + numDims_) + - numBlocks_ * 1); - frameForgetGate_[i].grad->setData(gate_.grad->getData() + - i * numBlocks_ * (3 + numDims_) + - numBlocks_ * 2); - frameOutputGate_[i].grad->setData(gate_.grad->getData() + - i * numBlocks_ * (3 + numDims_) + - numBlocks_ * (2 + numDims_)); - } - - { - AsyncGpuBlock asyncGpuBlock; - - for (size_t i = 0; i < numSequences; i++) { - CoordIterator coordIter(dimsV_[i], directions_); - backwardOneSequence(starts[i], coordIter); - } - } - - if (input.grad) { - input.grad->add(*gate_.grad); - } - if (bias_ && bias_->getWGrad()) { - localBiasGrad_->collectBias(*gate_.grad, 1); - bias_->getParameterPtr()->incUpdate(callback); - } - - weight_->getParameterPtr()->incUpdate(callback); -} - -void MDLstmLayer::backwardGate2OutputSequence(int start, - CoordIterator& coordIter) { - int idxCurr = start + coordIter.offset(); - std::vector preOffsetV; - std::vector nextOffsetV; - preOffsetV.reserve(numDims_); - nextOffsetV.reserve(numDims_); - for (int i = 0; i < numDims_; i++) { - std::vector prePos; - if (coordIter.getPrePos(delays_, i, prePos)) { - preOffsetV[i] = coordIter.offset(prePos); - } else { - preOffsetV[i] = -1; - } - std::vector nextPos; - if (coordIter.getNextPos(delays_, i, nextPos)) { - nextOffsetV[i] = coordIter.offset(nextPos); - } else { - nextOffsetV[i] = -1; - } - } - - framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, - *frameOutputGate_[idxCurr].value); - activationState_->backward(framePreOutput_[idxCurr]).check(); - frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad)); - - frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, - *framePreOutput_[idxCurr].value); - activationGate_->backward(frameOutputGate_[idxCurr]).check(); - - frameState_[idxCurr].grad->addDotMul( - *frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0); - for (int i = 0; i < numDims_; i++) { - if (nextOffsetV[i] >= 0) { - frameState_[idxCurr].grad->addDotMul( - *frameInputGate_[start + nextOffsetV[i]].grad, *checkIg_, 1.0, 1.0); - - MatrixPtr fgGateOneDimGrad = Matrix::create( - frameForgetGate_[start + nextOffsetV[i]].grad->getData() + - i * numBlocks_, - 1, - numBlocks_, - false, - useGpu_); - MatrixPtr fgGateOneDimVal = Matrix::create( - frameForgetGate_[start + nextOffsetV[i]].value->getData() + - i * numBlocks_, - 1, - numBlocks_, - false, - useGpu_); - MatrixPtr checkFgOneDim = Matrix::create( - checkFg_->getData() + i * numBlocks_, 1, numBlocks_, false, useGpu_); - - frameState_[idxCurr].grad->addDotMul( - *fgGateOneDimGrad, *checkFgOneDim, 1.0, 1.0); - frameState_[idxCurr].grad->addDotMul( - *frameState_[start + nextOffsetV[i]].grad, - *fgGateOneDimVal, - 1.0, - 1.0); - } - } - - frameInputNode_[idxCurr].grad->dotMul(*frameState_[idxCurr].grad, - *frameInputGate_[idxCurr].value); - frameInputGate_[idxCurr].grad->dotMul(*frameState_[idxCurr].grad, - *frameInputNode_[idxCurr].value); - - frameForgetGate_[idxCurr].grad->zeroMem(); - for (int i = 0; i < numDims_; i++) { - if (preOffsetV[i] >= 0) { - MatrixPtr fgGateOneDimGrad = Matrix::create( - frameForgetGate_[idxCurr].grad->getData() + i * numBlocks_, - 1, - numBlocks_, - false, - useGpu_); - fgGateOneDimGrad->addDotMul(*frameState_[idxCurr].grad, - *frameState_[start + preOffsetV[i]].value, - 1.0, - 1.0); - } - } - - activationGate_->backward(frameInputGate_[idxCurr]).check(); - activationGate_->backward(frameForgetGate_[idxCurr]).check(); - activation_->backward(frameInputNode_[idxCurr]).check(); - - if (bias_->getWGrad()) { - for (int i = 0; i < numDims_; i++) { - if (preOffsetV[i] >= 0) { - checkIgGrad_->addDotMul(*frameInputGate_[idxCurr].grad, - *frameState_[start + preOffsetV[i]].value, - 1.0, - 1.0); - - MatrixPtr fgGateOneDimGrad = Matrix::create( - frameForgetGate_[idxCurr].grad->getData() + i * numBlocks_, - 1, - numBlocks_, - false, - useGpu_); - MatrixPtr checkFgOneDimGrad = - Matrix::create(checkFgGrad_->getData() + i * numBlocks_, - 1, - numBlocks_, - false, - useGpu_); - checkFgOneDimGrad->addDotMul(*fgGateOneDimGrad, - *frameState_[start + preOffsetV[i]].value, - 1.0, - 1.0); - } - } - checkOgGrad_->addDotMul( - *frameOutputGate_[idxCurr].grad, *frameState_[idxCurr].value, 1.0, 1.0); - } -} - -void MDLstmLayer::backwardOneSequence(int start, CoordIterator& coordIter) { - MatrixPtr weightT = weight_->getW()->getTranspose(); - for (coordIter.rbegin(); !coordIter.end(); --coordIter) { - int offset = coordIter.offset(); - backwardGate2OutputSequence(start, coordIter); - for (int i = 0; i < numDims_; i++) { - std::vector prePos; - if (coordIter.getPrePos(delays_, i, prePos)) { - int preOffset = coordIter.offset(prePos); - frameOutput_[start + preOffset].grad->mul( - *frameGate_[start + offset].grad, *weightT, 1.0, 1.0); - if (weight_->getWGrad()) { - weight_->getWGrad()->mul( - *frameOutput_[start + preOffset].value->getTranspose(), - *frameGate_[start + offset].grad, - 1.0, - 1.0); - } - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNAddtoLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNAddtoLayer.cpp deleted file mode 100644 index 544b4082fa0a396af61b54fce2cb672243e59afb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNAddtoLayer.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNAddtoLayer.h" - -using namespace mkldnn; // NOLINT - -namespace paddle { - -REGISTER_LAYER(mkldnn_addto, MKLDNNAddtoLayer); - -bool MKLDNNAddtoLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!MKLDNNLayer::init(layerMap, parameterMap)) { - return false; - } - - layerSize_ = getSize(); - for (size_t i = 0; i < inputLayers_.size(); i++) { - CHECK_EQ(layerSize_, inputLayers_[i]->getSize()) << "input size must equal"; - } - if (biasParameter_.get() != NULL) { - biases_ = - std::unique_ptr(new Weight(1, layerSize_, biasParameter_, 0)); - } - return true; -} - -void MKLDNNAddtoLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { - CHECK_EQ(layerSize_, getSize()) << "this layer size can not be changed"; - reshapeInput(bs, ih, iw); - ic = inputLayers_[0]->getSize() / ih / iw; - CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); - CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(), - (size_t)bs * ic * ih * iw); - for (size_t i = 0; i < inputLayers_.size(); i++) { - CHECK_EQ(int64_t(bs), inputLayers_[i]->getOutput().getBatchSize()); - CHECK_EQ(layerSize_, inputLayers_[i]->getSize()); - } - - oc = ic; - oh = ih; - ow = iw; - reshapeOutput(oh, ow); - resizeOutput(bs, oc * oh * ow); -} - -void MKLDNNAddtoLayer::resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - resetFwdBuffers(inputs, biasVal_, out); - - std::shared_ptr fwdPD; - std::shared_ptr biasPD; - resetFwdPD(fwdPD, biasPD, inputs, biasVal_, out); - - resetFwdPipeline(pipeline, fwdPD, biasPD, inputs, biasVal_, out); -} - -void MKLDNNAddtoLayer::resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - resetBwdBuffers(inputs, biasGrad_, out); - - // backward only need share output grad to input grad - for (size_t i = 0; i < inputs.size(); i++) { - if (inputs[i] != nullptr) { - inputs[i] = out; - inputLayers_[i]->getOutputGrad()->setData(inputs[i]->getData()); - } - } - - // backward bias - bwdBias_ = nullptr; - if (biasGrad_) { - std::vector scales(bs_, 1.0); - std::vector srcPDs(bs_, - biasGrad_->getPrimitiveDesc()); - auto biasPD = - sum::primitive_desc(biasGrad_->getMemoryDesc(), scales, srcPDs); - std::vector srcs; - for (size_t i = 0; i < grads_.size(); ++i) { - srcs.push_back(*(grads_[i])); - } - bwdBias_.reset(new sum(biasPD, srcs, *biasGrad_)); - pipeline.push_back(*bwdBias_); - } -} - -void MKLDNNAddtoLayer::updateWeights(const UpdateCallback& callback) { - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); - } -} - -void MKLDNNAddtoLayer::prepareBias(MKLDNNMatrixPtr& bias, - const MatrixPtr& biasMat, - const MKLDNNMatrixPtr& out, - std::vector& outs) { - auto pd = MKLDNNMatrix::createPrimitiveDesc( - {(int)layerSize_}, memory::format::x, engine_); - bias = MKLDNNMatrix::create(pd, biasMat); - outs.clear(); - real* data = out->getData(); - CHECK_EQ(bs_ * layerSize_, out->getElementCnt()); - for (int i = 0; i < bs_; ++i) { - MatrixPtr tmp = - Matrix::create(data + i * layerSize_, 1, layerSize_, false, false); - outs.push_back(MKLDNNMatrix::create(bias->getPrimitiveDesc(), tmp)); - } -} - -void MKLDNNAddtoLayer::resetFwdBuffers(std::vector& inputs, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - inputs.resize(inputLayers_.size()); - for (size_t i = 0; i < inputs.size(); i++) { - resetInValue(inputs[i], nullptr, i); - CHECK(inputs[i]); - inputs[i]->downSpatial(); - } - for (size_t i = 1; i < inputs.size(); i++) { - CHECK_PRIMITIVE_DESC_EQ(inputs[i], inputs[0]->getPrimitiveDesc()); - } - - resetOutValue(out, inputs[0]->getPrimitiveDesc()); - - if (biases_ && biases_->getW()) { - prepareBias(bias, biases_->getW(), out, vals_); - } else { - bias = nullptr; - } -} - -void MKLDNNAddtoLayer::resetFwdPD(std::shared_ptr& pd, - std::shared_ptr& biasPD, - std::vector& inputs, - MKLDNNMatrixPtr bias, - MKLDNNMatrixPtr out) { - std::vector scales(inputs.size(), 1.0); - std::vector srcPDs; - for (size_t i = 0; i < inputs.size(); i++) { - srcPDs.push_back(inputs[i]->getPrimitiveDesc()); - } - CHECK(out); - pd.reset(new sum::primitive_desc(out->getMemoryDesc(), scales, srcPDs)); - CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc()); - - biasPD = nullptr; - if (bias) { - std::vector scales(2, 1.0); - std::vector srcPDs(2, bias->getPrimitiveDesc()); - biasPD.reset( - new sum::primitive_desc(bias->getMemoryDesc(), scales, srcPDs)); - CHECK_PRIMITIVE_DESC_EQ(bias, biasPD->dst_primitive_desc()); - } -} - -void MKLDNNAddtoLayer::resetFwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - std::shared_ptr& biasPD, - std::vector& inputs, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - std::vector srcs; - for (size_t i = 0; i < inputs.size(); i++) { - srcs.push_back(*(inputs[i])); - } - fwd_.reset(new sum(*pd, srcs, *out)); - pipeline.push_back(*fwd_); - - fwdBias_.clear(); - if (biasPD == nullptr || bias == nullptr) { - return; - } - fwdBias_.resize(vals_.size()); - for (size_t i = 0; i < vals_.size(); ++i) { - std::vector srcs; - srcs.push_back(*(vals_[i])); - srcs.push_back(*bias); - fwdBias_[i].reset(new sum(*biasPD, srcs, *vals_[i])); - pipeline.push_back(*fwdBias_[i]); - } -} - -void MKLDNNAddtoLayer::resetBwdBuffers(std::vector& inputs, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - CHECK(outVal_); - resetOutGrad(out, outVal_->getPrimitiveDesc()); - CHECK(out); - - inputs.resize(inputLayers_.size()); - for (size_t i = 0; i < inputs.size(); i++) { - resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); - CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc()); - } - - if (biases_ && biases_->getWGrad()) { - prepareBias(bias, biases_->getWGrad(), out, grads_); - } else { - bias = nullptr; - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNAddtoLayer.h b/paddle/legacy/gserver/layers/MKLDNNAddtoLayer.h deleted file mode 100644 index 0b385e804fdbc74c8612031cf415d06f15ce311a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNAddtoLayer.h +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "MKLDNNLayer.h" -#include "mkldnn.hpp" - -namespace paddle { - -/** - * @brief A subclass of MKLDNNLayer Addto layer. - * - * The config file api is mkldnn_addto - */ -class MKLDNNAddtoLayer : public MKLDNNLayer { - protected: - // layer size == ic * ih * iw == oc * oh *ow, and can not be changed - size_t layerSize_; - - std::unique_ptr biases_; - - // buffers for adding bias - std::vector vals_; - std::vector grads_; - // primitives for adding bias - std::vector> fwdBias_; - std::shared_ptr bwdBias_; - - public: - explicit MKLDNNAddtoLayer(const LayerConfig& config) : MKLDNNLayer(config) {} - - ~MKLDNNAddtoLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; - - void resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void updateWeights(const UpdateCallback& callback) override; - - protected: - void resetFwdBuffers(std::vector& inputs, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - void resetFwdPD(std::shared_ptr& pd, - std::shared_ptr& biasPD, - std::vector& inputs, - MKLDNNMatrixPtr bias, - MKLDNNMatrixPtr out); - void resetFwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - std::shared_ptr& biasPD, - std::vector& inputs, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - void resetBwdBuffers(std::vector& inputs, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - - void prepareBias(MKLDNNMatrixPtr& bias, - const MatrixPtr& biasMat, - const MKLDNNMatrixPtr& out, - std::vector& outs); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNBase.h b/paddle/legacy/gserver/layers/MKLDNNBase.h deleted file mode 100644 index 786ceaf86086d7c04331641693181809ac019597..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNBase.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "mkldnn.hpp" - -namespace paddle { - -typedef enum { - MKLDNN_BASE = 1, // basical info of MKLDNN - MKLDNN_TESTS = 1, // gtest info of MKLDNN - MKLDNN_FMTS = 2, // format info of MKLDNN - MKLDNN_SIZES = 3, // size info of MKLDNN - MKLDNN_ALL = 4, // show all info of MKLDNN -} MKLDNN_LOG_LEVEL; - -/** - * @brief MKLDNN CPU engine. - * - */ -class CPUEngine { - public: - static CPUEngine& Instance() { - // Thread-safe in C++11. - static CPUEngine myInstance; - return myInstance; - } - - // Disallow copy or move - CPUEngine(const CPUEngine&) = delete; // Copy constructor - CPUEngine(CPUEngine&&) = delete; // Move constructor - CPUEngine& operator=(const CPUEngine&) = delete; // Copy assignment - CPUEngine& operator=(CPUEngine&&) = delete; // Move assignment - - mkldnn::engine& getEngine() { return cpuEngine_; } - - protected: - CPUEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} - // CPUEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} - ~CPUEngine() {} - - private: - mkldnn::engine cpuEngine_; -}; - -/** - * @brief MKLDNN Stream. - * - */ -class MKLDNNStream { - public: - MKLDNNStream() : ready_(false) { resetState(); } - - virtual ~MKLDNNStream() {} - - /** - * @brief Submit stream - * @param prims The primitives vector - * @param block Waiting for the stream to complete - */ - void submit(std::vector& prims, bool block = true) { - resetState(); - stream_->submit(prims).wait(block); - ready_ = false; - } - - /** - * @brief Reset the mkldnn stream - */ - void resetState() { - if (ready_) { - return; - } - // TODO(TJ): change me when mkldnn have method to reset this state - // stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy)); - stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); - ready_ = true; - } - - private: - bool ready_; - std::shared_ptr stream_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.cpp deleted file mode 100644 index dbdfaff32f729a0654eec8e2189d4ae23dfca1cb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.cpp +++ /dev/null @@ -1,306 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNBatchNormLayer.h" - -using namespace mkldnn; // NOLINT -typedef memory::format format; - -namespace paddle { - -REGISTER_LAYER(mkldnn_batch_norm, MKLDNNBatchNormLayer); - -bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!MKLDNNLayer::init(layerMap, parameterMap)) { - return false; - } - - // first one is input layer - // the other two are created in config_parser.py saving moving mean and var - CHECK_EQ(inputLayers_.size(), 3U); - CHECK_EQ(inputLayers_.size(), parameters_.size()); - CHECK_EQ(inputLayers_.size(), size_t(config_.inputs_size())); - - const ImageConfig& conf = config_.inputs(0).image_conf(); - ic_ = conf.channels(); - ih_ = inputLayers_[0]->getOutput().getFrameHeight(); - iw_ = inputLayers_[0]->getOutput().getFrameWidth(); - if (iw_ == 0 && ih_ == 0) { - iw_ = conf.img_size(); - ih_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - } - oc_ = ic_; - oh_ = ih_; - ow_ = iw_; - if (config_.has_use_global_stats()) { - useGlobalStats_ = config_.use_global_stats(); - } - movingAvgFraction_ = config_.moving_average_fraction(); - epsilon_ = config_.epsilon(); - - VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use") - << " --- global stats"; - VLOG(MKLDNN_BASE) << "Moving average fraction: " << movingAvgFraction_; - - initWeight(); - movingMean_.reset(new Weight(oc_, 1, parameters_[1], 0)); - movingVar_.reset(new Weight(oc_, 1, parameters_[2], 0)); - return true; -} - -void MKLDNNBatchNormLayer::initWeight() { - weight_.reset(new Weight(1, oc_, parameters_[0])); - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); - } - CHECK_EQ(weight_ != nullptr, biases_ != nullptr) - << "only support have both weight and bias, or neither"; - if (weight_ && weight_->getW()) { - CHECK(biases_ && biases_->getW()); - valueScaleShift_ = Matrix::create(2, oc_, false, false); - valueScaleShift_->zeroMem(); - VectorPtr scale(new CpuVector(oc_, valueScaleShift_->getMemoryHandle(), 0)); - VectorPtr shift( - new CpuVector(oc_, valueScaleShift_->getMemoryHandle(), oc_)); - const VectorPtr& wgt = parameters_[0]->getBuf(PARAMETER_VALUE); - const VectorPtr& bias = biasParameter_->getBuf(PARAMETER_VALUE); - scale->copyFrom(*wgt); - shift->copyFrom(*bias); - wgt->setData(valueScaleShift_->getData()); - bias->setData(valueScaleShift_->getData() + oc_); - } - if (weight_ && weight_->getWGrad()) { - CHECK(biases_ && biases_->getWGrad()); - gradScaleShift_ = Matrix::create(2, oc_, false, false); - gradScaleShift_->zeroMem(); - const VectorPtr& wgt = parameters_[0]->getBuf(PARAMETER_GRADIENT); - const VectorPtr& bias = biasParameter_->getBuf(PARAMETER_GRADIENT); - wgt->setData(gradScaleShift_->getData()); - bias->setData(gradScaleShift_->getData() + oc_); - } -} - -void MKLDNNBatchNormLayer::convertWeightsFromPaddle() { - if (hasInitedWgt_) { - return; - } - // prepare mean and var if necessary - if (useGlobalStats_) { - CHECK(mean_); - CHECK(var_); - mean_->copyFrom(*(movingMean_->getW())); - var_->copyFrom(*(movingVar_->getW())); - } - hasInitedWgt_ = true; -} - -void MKLDNNBatchNormLayer::calMovingMeanAndVar() { - // calculating and saving moving mean and variance - CHECK_EQ(useGlobalStats_, false); - movingMean_->getW()->add( - *mean_, movingAvgFraction_, 1.0 - movingAvgFraction_); - // here var is v^2 - movingVar_->getW()->add(*var_, movingAvgFraction_, 1.0 - movingAvgFraction_); -} - -void MKLDNNBatchNormLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { - reshapeInput(bs, ih, iw); - oh = ih; - ow = iw; - // ic_ and oc can not be changed - CHECK_EQ((size_t)ic, - inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw) - << "Input channel can not be changed"; - reshapeOutput(oh, ow); - resizeOutput(bs, oc * oh * ow); -} - -void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - // In training phase, it will always calculate mean and var, - // so useGlobalStats must be false. - // In scoring phase, it depends on useGlobalStats choice. - if (passType_ != PASS_TEST && useGlobalStats_ == true) { - LOG(WARNING) << "use_global_stats is invalid setting in training phase"; - useGlobalStats_ = false; - } - - resetFwdBuffers(inputs[0], wgtVal_, out); - - resetFwdPD(fwdPD_, inputs[0], wgtVal_, out); - - resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, out); -} - -void MKLDNNBatchNormLayer::resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - std::shared_ptr pd; - - resetBwdBuffers(inputs[0], wgtGrad_, out); - - resetBwdPD(pd, inputs[0], wgtGrad_, out); - - resetBwdPipeline(pipeline, pd, inputs[0], wgtGrad_, out); -} - -void MKLDNNBatchNormLayer::forward(PassType passType) { - MKLDNNLayer::forward(passType); - - // calculate and save moving mean and variance - if (passType_ != PASS_TEST) { - calMovingMeanAndVar(); - } -} - -void MKLDNNBatchNormLayer::updateWeights(const UpdateCallback& callback) { - weight_->getParameterPtr()->incUpdate(callback); - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); - } -} - -void MKLDNNBatchNormLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out) { - resetInValue(in); - - memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; - CHECK(in); - auto outPD = - MKLDNNMatrix::createPrimitiveDesc(outDims, in->getFormat(), engine_); - resetOutValue(out, outPD); - - if (valueScaleShift_) { - auto pd = MKLDNNMatrix::createPrimitiveDesc({2, oc_}, format::nc, engine_); - resetWithMatrix(wgt, valueScaleShift_, pd); - } - if (passType_ != PASS_TEST || useGlobalStats_) { - auto pd = MKLDNNMatrix::createPrimitiveDesc({oc_}, format::x, engine_); - mean_ = MKLDNNMatrix::create(pd); - var_ = MKLDNNMatrix::create(pd); - } -} - -void MKLDNNBatchNormLayer::resetFwdPD( - std::shared_ptr& pd, - MKLDNNMatrixPtr in, - MKLDNNMatrixPtr wgt, - MKLDNNMatrixPtr out) { - flags_ = 0u; - prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring - : prop_kind::forward_training; - if (useGlobalStats_) { - flags_ = (flags_ | batch_normalization_flag::use_global_stats); - } - if (wgt) { - flags_ = (flags_ | batch_normalization_flag::use_scale_shift); - } - auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), epsilon_, flags_); - pd.reset(new bn_fwd::primitive_desc(fwdDesc, engine_)); - CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc()); - if (wgt) { - CHECK_PRIMITIVE_DESC_EQ(wgt, pd->weights_primitive_desc()); - } - if (passType_ != PASS_TEST || useGlobalStats_) { - CHECK_PRIMITIVE_DESC_EQ(mean_, pd->mean_primitive_desc()); - CHECK_PRIMITIVE_DESC_EQ(var_, pd->variance_primitive_desc()); - } -} - -void MKLDNNBatchNormLayer::resetFwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out) { - if (passType_ == PASS_TEST) { - if (useGlobalStats_) { - fwd_.reset(wgt != nullptr ? new bn_fwd(*pd, - *in, - (const primitive::at)(*mean_), - (const primitive::at)(*var_), - *wgt, - *out) - : new bn_fwd(*pd, - *in, - (const primitive::at)(*mean_), - (const primitive::at)(*var_), - *out)); - } else { - fwd_.reset(wgt != nullptr ? new bn_fwd(*pd, *in, *wgt, *out) - : new bn_fwd(*pd, *in, *out)); - } - } else { - CHECK_EQ(useGlobalStats_, false) - << "useGlobalStats should be false in training"; - fwd_.reset(wgt != nullptr ? new bn_fwd(*pd, *in, *wgt, *out, *mean_, *var_) - : new bn_fwd(*pd, *in, *out, *mean_, *var_)); - } - pipeline.push_back(*fwd_); -} - -void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out) { - CHECK(inVals_[0] && outVal_); - resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVals_[0]->getPrimitiveDesc()); - if (gradScaleShift_) { - CHECK(wgtVal_); - resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc()); - } -} - -void MKLDNNBatchNormLayer::resetBwdPD( - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out) { - pd = nullptr; - if (in == nullptr) { - return; - } - CHECK_PRIMITIVE_DESC_EQ(out, in->getPrimitiveDesc()); - auto md = in->getMemoryDesc(); - auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, epsilon_, flags_); - pd.reset(new bn_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_)); - CHECK(pd->weights_primitive_desc() == fwdPD_->weights_primitive_desc()); - CHECK_PRIMITIVE_DESC_EQ(wgt, pd->diff_weights_primitive_desc()); - CHECK_PRIMITIVE_DESC_EQ(mean_, pd->mean_primitive_desc()); - CHECK_PRIMITIVE_DESC_EQ(var_, pd->variance_primitive_desc()); -} - -void MKLDNNBatchNormLayer::resetBwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out) { - if (pd == nullptr) { - return; - } - CHECK(inVals_[0]); - bwdData_.reset( - wgt && wgtVal_ - ? new bn_bwd( - *pd, *inVals_[0], *mean_, *var_, *out, *wgtVal_, *in, *wgt) - : new bn_bwd(*pd, *inVals_[0], *mean_, *var_, *out, *in)); - pipeline.push_back(*bwdData_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.h deleted file mode 100644 index 9aa20df98f30837e1b80b4269d05d85b7d99ba76..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.h +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "MKLDNNLayer.h" -#include "mkldnn.hpp" - -namespace paddle { -typedef mkldnn::batch_normalization_forward bn_fwd; -typedef mkldnn::batch_normalization_backward bn_bwd; - -/** - * @brief A subclass of MKLDNNLayer BatchNorm layer. - * - * The config file api is mkldnn_batch_norm - */ -class MKLDNNBatchNormLayer : public MKLDNNLayer { - protected: - // save forward primitive_desc, which can be used backward - std::shared_ptr fwdPD_; - - // Epsilon value used in the batch normalization formula. - real epsilon_; - - // weight and bias in paddle - std::unique_ptr weight_; - std::unique_ptr biases_; - // mkldnn use a large buffer store both scale and shift - // which are weight and bias in paddle corresponding. - MatrixPtr valueScaleShift_; - MatrixPtr gradScaleShift_; - // Moving average of mean. - std::unique_ptr movingMean_; - // Moving average of variance. - std::unique_ptr movingVar_; - - // if useGlobalStats_ is true, will use the loaded mean and variance. - // otherwise, calculate mean and variance in every mini-batch. - bool useGlobalStats_; - // used in MKLDNN primitive desc - unsigned flags_; - // use to compute moving mean and variance. - real movingAvgFraction_; - // whether the weight has been init - bool hasInitedWgt_; - - // local mean and variance - // when useGlobalStats_ they are loaded from moving mean and variance - // when do not useGlobalStats_ they are calculated from this mini-batch - MKLDNNMatrixPtr mean_; - MKLDNNMatrixPtr var_; - - public: - explicit MKLDNNBatchNormLayer(const LayerConfig& config) - : MKLDNNLayer(config), useGlobalStats_(true), hasInitedWgt_(false) {} - - ~MKLDNNBatchNormLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - - void reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; - - void resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void updateWeights(const UpdateCallback& callback) override; - - void convertWeightsFromPaddle() override; - - protected: - void initWeight(); - /** - * cal moving mean and variance. - * moving = moving * AvgFraction + local * (1 - AvgFraction) - */ - void calMovingMeanAndVar(); - - void resetFwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out); - void resetFwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr in, - MKLDNNMatrixPtr wgt, - MKLDNNMatrixPtr out); - void resetFwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out); - void resetBwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out); - void resetBwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out); - void resetBwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& out); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNConcatLayer.cpp deleted file mode 100644 index beed6176e118f3b12a4d02a0ec717792bc93364d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNConcatLayer.cpp +++ /dev/null @@ -1,186 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNConcatLayer.h" - -using namespace mkldnn; // NOLINT -typedef memory::format format; - -namespace paddle { - -REGISTER_LAYER(mkldnn_concat, MKLDNNConcatLayer); - -bool MKLDNNConcatLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!MKLDNNLayer::init(layerMap, parameterMap)) { - return false; - } - CHECK_GT(inputLayers_.size(), 1UL); - CHECK(!biasParameter_); - return true; -} - -void MKLDNNConcatLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { - reshapeInput(bs, ih, iw); - ic = inputLayers_[0]->getSize() / ih / iw; - CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); - CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(), - (size_t)bs * ic * ih * iw); - CHECK_GT(inputLayers_.size(), 1UL); - channels_.resize(inputLayers_.size()); - channels_[0] = ic; - oc = ic; - for (size_t i = 1; i < inputLayers_.size(); i++) { - int batchsize = 0, height = 0, witdh = 0; - reshapeInput(batchsize, height, witdh, i); - CHECK_EQ(bs, batchsize); - CHECK_EQ(ih, height); - CHECK_EQ(iw, witdh); - - channels_[i] = inputLayers_[i]->getSize() / height / witdh; - CHECK_EQ((size_t)channels_[i] * height * witdh, inputLayers_[i]->getSize()); - oc += channels_[i]; - } - oh = ih; - ow = iw; - reshapeOutput(oh, ow); - resizeOutput(bs, oc * oh * ow); -} - -void MKLDNNConcatLayer::resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - resetFwdBuffers(inputs, out); - - std::shared_ptr fwdPD; - resetFwdPD(fwdPD, inputs, out); - - resetFwdPipeline(pipeline, fwdPD, inputs, out); -} - -void MKLDNNConcatLayer::resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - resetBwdBuffers(inputs, out); - - resetBwdPipeline(pipeline, bwds_, inputs, out); -} - -void MKLDNNConcatLayer::resetFwdBuffers(std::vector& inputs, - MKLDNNMatrixPtr& out) { - inputs.resize(inputLayers_.size()); - bool has8c = false, has16c = false, hasnc = false; - for (size_t i = 0; i < inputs.size(); i++) { - resetInValue(inputs[i], nullptr, i, channels_[i]); - inputs[i]->downSpatial(); - CHECK(inputs[i]); - auto dm = inputs[i]->getDims(); - // inputs format can be different, but ndims must equal - CHECK(i == 0 || dm.size() == inputs[0]->getDims().size()); - CHECK_EQ(bs_, dm[0]); - CHECK_EQ(channels_[i], dm[1]); - if (dm.size() > 2) { - CHECK_EQ(ih_, dm[2]); - CHECK_EQ(iw_, dm[3]); - } - if (inputs[i]->getFormat() == format::nc) { - hasnc = true; - } - if (inputs[i]->getFormat() == format::nChw8c) { - has8c = true; - } - if (inputs[i]->getFormat() == format::nChw16c) { - has16c = true; - } - } - - format outFmt; - if (has16c && oc_ % 16 == 0) { - outFmt = format::nChw16c; - } else if (has8c && oc_ % 8 == 0) { - outFmt = format::nChw8c; - } else if (hasnc) { - CHECK(oh_ == 1 && ow_ == 1); - outFmt = format::nc; - } else { - outFmt = format::nchw; - } - memory::dims outDims = - hasnc ? memory::dims{bs_, oc_} : memory::dims{bs_, oc_, oh_, ow_}; - auto outPD = MKLDNNMatrix::createPrimitiveDesc(outDims, outFmt, engine_); - resetOutValue(out, outPD); -} - -void MKLDNNConcatLayer::resetFwdPD(std::shared_ptr& pd, - std::vector& inputs, - MKLDNNMatrixPtr out) { - std::vector srcPDs; - for (size_t i = 0; i < inputs.size(); i++) { - srcPDs.push_back(inputs[i]->getPrimitiveDesc()); - } - CHECK(out); - pd.reset(new concat::primitive_desc(out->getMemoryDesc(), axis_, srcPDs)); - CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc()); -} - -void MKLDNNConcatLayer::resetFwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - std::vector srcs; - for (size_t i = 0; i < inputs.size(); i++) { - srcs.push_back(*(inputs[i])); - } - fwd_.reset(new concat(*pd, srcs, *out)); - pipeline.push_back(*fwd_); -} - -void MKLDNNConcatLayer::resetBwdBuffers(std::vector& inputs, - MKLDNNMatrixPtr& out) { - CHECK(outVal_); - resetOutGrad(out, outVal_->getPrimitiveDesc()); - CHECK(out); - - inputs.resize(inputLayers_.size()); - for (size_t i = 0; i < inputs.size(); i++) { - CHECK(inVals_[i]); - resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); - CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc()); - } -} - -void MKLDNNConcatLayer::resetBwdPipeline( - std::vector& pipeline, - std::vector>& prims, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - // reset the backward primitives - memory::dims offsets = {0, 0, 0, 0}; - prims.resize(inputs.size()); - CHECK_EQ(inputs.size(), channels_.size()); - for (size_t i = 0; i < inputs.size(); i++) { - auto viewPD = view::primitive_desc( - out->getPrimitiveDesc(), inputs[i]->getDims(), offsets); - auto bwdPD = reorder::primitive_desc(viewPD.dst_primitive_desc(), - inputs[i]->getPrimitiveDesc()); - prims[i].reset(new reorder(bwdPD, *out, *(inputs[i]))); - offsets[axis_] += channels_[i]; - // push to pipeline - pipeline.push_back(*prims[i]); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNConcatLayer.h b/paddle/legacy/gserver/layers/MKLDNNConcatLayer.h deleted file mode 100644 index d7738df6c106c68f55b313f2d119e31c6e444cbf..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNConcatLayer.h +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "MKLDNNLayer.h" -#include "mkldnn.hpp" - -namespace paddle { - -/** - * @brief A subclass of MKLDNNLayer Concatenate layer. - * - * The config file api is mkldnn_concat - */ -class MKLDNNConcatLayer : public MKLDNNLayer { - protected: - std::vector> bwds_; - // input channel numbers - std::vector channels_; - - // concat_dimension in MKLDNN - // if axis_ == 0, concat batchsize - // if axis_ == 1, concat channel (default) - int axis_; - - public: - explicit MKLDNNConcatLayer(const LayerConfig& config) - : MKLDNNLayer(config), axis_(1) {} - - ~MKLDNNConcatLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; - - void resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void printSizeInfo() override { - CHECK_EQ(channels_.size(), inputLayers_.size()); - for (size_t i = 0; i < channels_.size(); ++i) { - VLOG(MKLDNN_SIZES) << "Input " << i << ", " << inputLayers_[i]->getName() - << ": " << bs_ << ", " << channels_[i] << ", " << ih_ - << ", " << iw_; - } - VLOG(MKLDNN_SIZES) << "Output: " << bs_ << ", " << oc_ << ", " << oh_ - << ", " << ow_; - } - - size_t keepCondition() { - // reset when the total element size of all inputs changed - size_t totalSize = inputLayers_[0]->getOutputValue()->getElementCnt(); - for (size_t i = 1; i < inputLayers_.size(); ++i) { - totalSize += inputLayers_[i]->getOutputValue()->getElementCnt(); - } - return totalSize; - } - - protected: - void resetFwdBuffers(std::vector& inputs, - MKLDNNMatrixPtr& out); - void resetFwdPD(std::shared_ptr& pd, - std::vector& inputs, - MKLDNNMatrixPtr out); - void resetFwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - std::vector& inputs, - MKLDNNMatrixPtr& out); - void resetBwdBuffers(std::vector& inputs, - MKLDNNMatrixPtr& out); - void resetBwdPipeline(std::vector& pipeline, - std::vector>& prims, - std::vector& inputs, - MKLDNNMatrixPtr& out); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNConvLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNConvLayer.cpp deleted file mode 100644 index b47bf14821fed4057227c80bb77e584649ab3145..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNConvLayer.cpp +++ /dev/null @@ -1,388 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNConvLayer.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/utils/Logging.h" - -using namespace mkldnn; // NOLINT -typedef memory::format format; - -namespace paddle { - -REGISTER_LAYER(mkldnn_conv, MKLDNNConvLayer); - -bool MKLDNNConvLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!MKLDNNLayer::init(layerMap, parameterMap)) { - return false; - } - CHECK_EQ(inputLayers_.size(), 1UL) << "Only support one input layer yet"; - CHECK_EQ(inputLayers_.size(), parameters_.size()); - CHECK(config_.shared_biases()) << "Only support shared biases yet"; - - oc_ = config_.num_filters(); - const ConvConfig& conf = config_.inputs(0).conv_conf(); - ic_ = conf.channels(); - fw_ = conf.filter_size(); - fh_ = conf.filter_size_y(); - pw_ = conf.padding(); - ph_ = conf.padding_y(); - dw_ = conf.dilation(); - dh_ = conf.dilation_y(); - sw_ = conf.stride(); - sh_ = conf.stride_y(); - gp_ = conf.groups(); - oh_ = conf.output_y(); - ow_ = conf.output_x(); - ih_ = conf.img_size_y(); - iw_ = conf.img_size(); - caffeMode_ = conf.caffe_mode(); - CHECK(caffeMode_) << "Only support caffe mode yet"; - CHECK(dh_ == 1 && dw_ == 1) << "Only support dilation 1 yet"; - // check group setting - CHECK_EQ((oc_ / gp_) * gp_, oc_) << "group is indivisible for oc"; - CHECK_EQ((ic_ / gp_) * gp_, ic_) << "group is indivisible for ic"; - - // create weight - size_t height = oc_ / gp_; - size_t width = ic_ * fh_ * fw_; - CHECK_EQ(parameters_[0]->getSize(), height * width); - weight_ = - std::unique_ptr(new Weight(height, width, parameters_[0], 0)); - - // create biases - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_, 0)); - } - return true; -} - -void MKLDNNConvLayer::convertWeightsFromPaddle() { - if (hasInitedWgt_) { - return; - } - - CHECK(wgtVal_) << "should have been initialized"; - // the paddle weight format is oihw or goihw - auto targetDim = wgtVal_->getDims(); - auto srcFmt = (gp_ == 1) ? memory::format::oihw : memory::format::goihw; - wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim); - hasInitedWgt_ = true; -} - -void MKLDNNConvLayer::convertWeightsToPaddle() { - CHECK(wgtVal_) << "should have been initialized"; - auto targetDim = wgtVal_->getDims(); - auto dstFmt = (gp_ == 1) ? memory::format::oihw : memory::format::goihw; - wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); -} - -void MKLDNNConvLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { - reshapeInput(bs, ih, iw); - - // cal output sizes - // oc can not be changed - int fh = (fh_ - 1) * dh_ + 1; - int fw = (fw_ - 1) * dw_ + 1; - oh = outputSize(ih, fh, ph_, sh_, caffeMode_); - ow = outputSize(iw, fw, pw_, sw_, caffeMode_); - - reshapeOutput(oh, ow); - resizeOutput(bs, oc * oh * ow); -} - -void MKLDNNConvLayer::resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - resetFwdPD(fwdPD_); - - resetFwdBuffers(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - - resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); -} - -void MKLDNNConvLayer::resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - std::shared_ptr bwdWgtPD; - std::shared_ptr bwdDataPD; - - resetBwdWgtPD(bwdWgtPD); - - resetBwdDataPD(bwdDataPD); - - resetBwdBuffers(bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); - - resetBwdPipeline( - pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); -} - -void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { - weight_->getParameterPtr()->incUpdate(callback); - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); - } -} - -void MKLDNNConvLayer::loadConvSettings(memory::dims& wgt, - memory::dims& bias, - memory::dims& stride, - memory::dims& dilation, - memory::dims& padL, - memory::dims& padR) { - wgt = (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_} - : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_}; - bias = memory::dims{oc_}; - stride = memory::dims{sh_, sw_}; - padL = memory::dims{ph_, pw_}; - padR = getPaddingR(); - // note: mkldnn dilation start from 0 - dilation = memory::dims{dh_ - 1, dw_ - 1}; -} - -void MKLDNNConvLayer::resetFwdPD( - std::shared_ptr& pd) { - // dims for conv - memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; - memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; - memory::dims wgtDims, biasDims, strides, dilations, padL, padR; - loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); - - prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring - : prop_kind::forward_training; - algorithm algo = algorithm::convolution_direct; - padding_kind padKind = padding_kind::zero; - conv_fwd::desc fwdDesc = - biases_ && biases_->getW() - ? conv_fwd::desc(pk, - algo, - MKLDNNMatrix::createMemoryDesc(inDims), - MKLDNNMatrix::createMemoryDesc(wgtDims), - MKLDNNMatrix::createMemoryDesc(biasDims), - MKLDNNMatrix::createMemoryDesc(outDims), - strides, - dilations, - padL, - padR, - padKind) - : conv_fwd::desc(pk, - algo, - MKLDNNMatrix::createMemoryDesc(inDims), - MKLDNNMatrix::createMemoryDesc(wgtDims), - MKLDNNMatrix::createMemoryDesc(outDims), - strides, - dilations, - padL, - padR, - padKind); - pd.reset(new conv_fwd::primitive_desc(fwdDesc, engine_)); -} - -void MKLDNNConvLayer::resetFwdBuffers( - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - CHECK(pd); - resetInValue( - in, std::make_shared(pd->src_primitive_desc())); - - resetOutValue(out, pd->dst_primitive_desc()); - - resetWithMatrix(wgt, weight_->getW(), pd->weights_primitive_desc()); - - if (biases_ && biases_->getW()) { - resetWithMatrix(bias, biases_->getW(), pd->bias_primitive_desc()); - } else { - bias = nullptr; - } -} - -void MKLDNNConvLayer::resetFwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - if (bias) { - fwd_.reset(new conv_fwd(*pd, *in, *wgt, *bias, *out)); - } else { - fwd_.reset(new conv_fwd(*pd, *in, *wgt, *out)); - } - pipeline.push_back(*fwd_); -} - -void MKLDNNConvLayer::resetBwdWgtPD( - std::shared_ptr& pd) { - memory::dims wgtDims, biasDims, strides, dilations, padL, padR; - loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); - - // create backward weight using input, output and weight value memory desc - CHECK(inVals_[0]) << "Should have internal input value"; - CHECK(outVal_) << "Should have internal output value"; - CHECK(wgtVal_) << "Should have weight value"; - algorithm algo = algorithm::convolution_direct; - padding_kind padKind = padding_kind::zero; - auto bwdWgtDesc = biasVal_ != nullptr - ? conv_bwdWgt::desc(algo, - inVals_[0]->getMemoryDesc(), - wgtVal_->getMemoryDesc(), - biasVal_->getMemoryDesc(), - outVal_->getMemoryDesc(), - strides, - padL, - padR, - padKind) - : conv_bwdWgt::desc(algo, - inVals_[0]->getMemoryDesc(), - wgtVal_->getMemoryDesc(), - outVal_->getMemoryDesc(), - strides, - padL, - padR, - padKind); - pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); - CHECK_PRIMITIVE_DESC_EQ(inVals_[0], pd->src_primitive_desc()); - CHECK_PRIMITIVE_DESC_EQ( - outVal_, - pd->diff_dst_primitive_desc(), - "primitive desc of out value and grad should be equal"); - CHECK_PRIMITIVE_DESC_EQ( - wgtVal_, - pd->diff_weights_primitive_desc(), - "primitive desc of weight value and grad should be equal"); -} - -void MKLDNNConvLayer::resetBwdDataPD( - std::shared_ptr& pd) { - pd = nullptr; - if (inputLayers_[0]->getOutput().grad == nullptr) { - return; - } - - memory::dims wgtDims, biasDims, strides, dilations, padL, padR; - loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); - CHECK(inVals_[0]) << "Should have internal input value"; - CHECK(outVal_) << "Should have internal output value"; - // create backward data using input and output value memory desc - // but using weight memory desc with any format - auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, - inVals_[0]->getMemoryDesc(), - MKLDNNMatrix::createMemoryDesc(wgtDims), - outVal_->getMemoryDesc(), - strides, - padL, - padR, - padding_kind::zero); - pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); - CHECK_PRIMITIVE_DESC_EQ( - inVals_[0], - pd->diff_src_primitive_desc(), - "primitive desc of in value and grad should be equal"); - CHECK_PRIMITIVE_DESC_EQ( - outVal_, - pd->diff_dst_primitive_desc(), - "primitive desc of out value and grad should be equal"); -} - -void MKLDNNConvLayer::resetBwdBuffers( - std::shared_ptr& wgtPD, - std::shared_ptr& dataPD, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - CHECK(wgtPD); - resetOutGrad(out, wgtPD->diff_dst_primitive_desc()); - - resetWithMatrix( - wgt, weight_->getWGrad(), wgtPD->diff_weights_primitive_desc()); - CHECK_PRIMITIVE_DESC_EQ( - wgtVal_, - wgt->getPrimitiveDesc(), - "primitive desc of weight grad and value should be equal"); - - bias = nullptr; - if (biases_ && biases_->getWGrad()) { - resetWithMatrix( - bias, biases_->getWGrad(), wgtPD->diff_bias_primitive_desc()); - CHECK(bias); - CHECK_PRIMITIVE_DESC_EQ( - biasVal_, - bias->getPrimitiveDesc(), - "primitive desc of bias grad and value should be equal"); - } - - if (dataPD == nullptr) { - return; - } - resetInGrad(in, dataPD->diff_src_primitive_desc()); - resetWgtValBwdData(dataPD, wgtValBwdData_); -} - -void MKLDNNConvLayer::resetBwdPipeline( - std::vector& pipeline, - std::shared_ptr& wgtPD, - std::shared_ptr& dataPD, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - CHECK(inVals_[0]); - // add bwdWgt handle - if (bias) { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt, *bias)); - } else { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt)); - } - pipeline.push_back(*bwdWgt_); - - if (dataPD == nullptr) { - return; - } - if (cvtWgtVal_) { - pipeline.push_back(*cvtWgtVal_); - } - // add bwdData handle - CHECK(wgtValBwdData_) << "Should have weight memory"; - bwdData_.reset(new conv_bwdData(*dataPD, *out, *wgtValBwdData_, *in)); - pipeline.push_back(*bwdData_); -} - -void MKLDNNConvLayer::resetWgtValBwdData( - std::shared_ptr& dataPD, - MKLDNNMatrixPtr& wgt) { - if (dataPD == nullptr) { - return; - } - - // create new weight value for backward data, and create reorder if necessary - // since the primitive_desc would be different with wgtVal_ - CHECK(wgtVal_) << "should have weight value"; - if (dataPD->weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) { - wgtValBwdData_ = MKLDNNMatrix::create(dataPD->weights_primitive_desc()); - cvtWgtVal_ = MKLDNNMatrix::createReorder(wgtVal_, wgtValBwdData_); - CHECK(cvtWgtVal_); - } else { - wgtValBwdData_ = wgtVal_; - } - VLOG(MKLDNN_FMTS) << "weight value format for backward data: " - << wgtValBwdData_->getFormat(); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNConvLayer.h b/paddle/legacy/gserver/layers/MKLDNNConvLayer.h deleted file mode 100644 index d399035ed3ae2f411587c1fcf1799bb71c8de63e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNConvLayer.h +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "MKLDNNLayer.h" -#include "mkldnn.hpp" - -namespace paddle { -typedef mkldnn::convolution_forward conv_fwd; -typedef mkldnn::convolution_backward_weights conv_bwdWgt; -typedef mkldnn::convolution_backward_data conv_bwdData; - -/** - * @brief A subclass of MKLDNNLayer conv layer. - * - * The config file api is mkldnn_conv - */ -class MKLDNNConvLayer : public MKLDNNLayer { - protected: - // padding height and width - int ph_, pw_; - // stride height and width - int sh_, sw_; - // dilation height and width - int dh_, dw_; - // filter(kenerl) height and width - int fh_, fw_; - // group number - int gp_; - - // in resetBwdData, the format of wgtValBwdData_ is different with wgtVal_ - MKLDNNMatrixPtr wgtValBwdData_; - // convert handle from wgtVal_ to wgtValBwdData_ - std::shared_ptr cvtWgtVal_; - - // save forward primitive_desc, which can be used backward - std::shared_ptr fwdPD_; - - // whether the weight has been init - bool hasInitedWgt_; - - // true by default, which impact the calculation of output image size. - // details can refer to mathUtil.h - bool caffeMode_; - - // weight and bias - std::unique_ptr weight_; - std::unique_ptr biases_; - - public: - explicit MKLDNNConvLayer(const LayerConfig& config) - : MKLDNNLayer(config), hasInitedWgt_(false), caffeMode_(true) {} - - ~MKLDNNConvLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; - - void resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void updateWeights(const UpdateCallback& callback) override; - - void convertWeightsFromPaddle() override; - - void convertWeightsToPaddle() override; - - void printSizeInfo() override { - MKLDNNLayer::printSizeInfo(); - VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_ - << ", ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_ - << ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_; - } - - protected: - /** - * load the dims settings of this conv - */ - void loadConvSettings(mkldnn::memory::dims& wgt, - mkldnn::memory::dims& bias, - mkldnn::memory::dims& stride, - mkldnn::memory::dims& dilation, - mkldnn::memory::dims& padL, - mkldnn::memory::dims& padR); - - void resetFwdPD(std::shared_ptr& pd); - void resetFwdBuffers(std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - void resetFwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - void resetBwdWgtPD(std::shared_ptr& pd); - void resetBwdDataPD(std::shared_ptr& pd); - void resetBwdBuffers(std::shared_ptr& wgtPD, - std::shared_ptr& dataPD, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - void resetBwdPipeline(std::vector& pipeline, - std::shared_ptr& wgtPD, - std::shared_ptr& dataPD, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - - /** - * reset MKLDNNMatrix of weight value for backward data - * since the primitive_desc would be different with wgtVal_ - */ - void resetWgtValBwdData(std::shared_ptr& dataPD, - MKLDNNMatrixPtr& wgt); - - /** - * get padding_r according to - * https://github.com/01org/mkl-dnn/blob/master/tests/gtests/ - * test_convolution_forward_common.hpp - * @note: mkldnn dilation start from 0 while paddle start from 1 - */ - mkldnn::memory::dims getPaddingR() const { - mkldnn::memory::dims padR = {ph_, pw_}; - for (int i = 0; i < 2; ++i) { - if ((ih_ - ((fh_ - 1) * dh_ + 1) + ph_ + padR[0]) / sh_ + 1 != oh_) { - ++padR[0]; - } - if ((iw_ - ((fw_ - 1) * dw_ + 1) + pw_ + padR[1]) / sw_ + 1 != ow_) { - ++padR[1]; - } - } - return padR; - } -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNFcLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNFcLayer.cpp deleted file mode 100644 index f3747c7db84ef53fdcfa3741525a754fab63bca5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNFcLayer.cpp +++ /dev/null @@ -1,262 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNFcLayer.h" -#include "paddle/legacy/utils/Logging.h" - -using namespace mkldnn; // NOLINT -typedef memory::format format; - -namespace paddle { - -REGISTER_LAYER(mkldnn_fc, MKLDNNFcLayer); - -bool MKLDNNFcLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!MKLDNNLayer::init(layerMap, parameterMap)) { - return false; - } - - CHECK_EQ(inputLayers_.size(), 1UL) << "Only support one input layer yet"; - CHECK_EQ(inputLayers_.size(), parameters_.size()); - CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet"; - - // output size, cat not be changed - oc_ = getSize(); - oh_ = 1; - ow_ = 1; - ih_ = 1; - iw_ = 1; - - // input size can not change in FC - iLayerSize_ = inputLayers_[0]->getSize(); - CHECK_EQ(parameters_[0]->getSize(), iLayerSize_ * oc_); - - // create weight - weight_ = - std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); - - // create biases - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_, 0)); - } - return true; -} - -void MKLDNNFcLayer::convertWeightsFromPaddle() { - if (hasInitedWgt_) { - return; - } - - CHECK(wgtVal_) << "should have been initialized"; - auto targetDim = wgtVal_->getDims(); - auto srcFmt = targetDim.size() == 2 ? format::io : format::ihwo; - wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim); - hasInitedWgt_ = true; -} - -void MKLDNNFcLayer::convertWeightsToPaddle() { - CHECK(wgtVal_) << "should have been initialized"; - auto targetDim = wgtVal_->getDims(); - auto dstFmt = targetDim.size() == 2 ? format::io : format::ihwo; - wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); -} - -void MKLDNNFcLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { - reshapeInput(bs, ih, iw); - - CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); - ic = iLayerSize_ / (ih * iw); - CHECK_EQ(size_t(ic * ih * iw), iLayerSize_) << "not divisible"; - CHECK_EQ(size_t(oc), getSize()); - - reshapeOutput(oh, ow); - resizeOutput(bs, oc); -} - -void MKLDNNFcLayer::resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - resetFwdBuffers(inputs[0], wgtVal_, biasVal_, out); - - resetFwdPD(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - - resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); -} - -void MKLDNNFcLayer::resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - std::shared_ptr bwdWgtPD; - std::shared_ptr bwdDataPD; - - resetBwdBuffers(inputs[0], wgtGrad_, biasGrad_, out); - - resetBwdWgtPD(bwdWgtPD, wgtGrad_, biasGrad_, out); - - resetBwdDataPD(bwdDataPD, inputs[0], out); - - resetBwdPipeline( - pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); -} - -void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { - weight_->getParameterPtr()->incUpdate(callback); - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); - } -} - -void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - resetInValue(in); - CHECK(in); - in->downSpatial(); - - auto outPD = - MKLDNNMatrix::createPrimitiveDesc({bs_, oc_}, format::nc, engine_); - resetOutValue(out, outPD); - - format wgtFmt = format::oihw; - if (in->getFormat() == format::nChw8c) { - wgtFmt = format::oIhw8i; - } else if (in->getFormat() == format::nChw16c) { - wgtFmt = format::oIhw16i; - } - auto wgtPD = - MKLDNNMatrix::createPrimitiveDesc({oc_, ic_, ih_, iw_}, wgtFmt, engine_); - resetWithMatrix(wgt, weight_->getW(), wgtPD); - wgt->downSpatial(); - - if (biases_ && biases_->getW()) { - auto biasPD = MKLDNNMatrix::createPrimitiveDesc({oc_}, format::x, engine_); - resetWithMatrix(bias, biases_->getW(), biasPD); - } else { - bias = nullptr; - } -} - -void MKLDNNFcLayer::resetFwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr in, - MKLDNNMatrixPtr wgt, - MKLDNNMatrixPtr bias, - MKLDNNMatrixPtr out) { - CHECK(in); - CHECK(wgt); - CHECK(out); - prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = bias != nullptr ? fc_fwd::desc(pk, - in->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_fwd::desc(pk, - in->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - pd.reset(new fc_fwd::primitive_desc(fwdDesc, engine_)); -} - -void MKLDNNFcLayer::resetFwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - if (bias) { - fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out)); - } else { - fwd_.reset(new fc_fwd(*pd, *in, *wgt, *out)); - } - pipeline.push_back(*fwd_); -} - -void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - CHECK(inVals_[0] && outVal_); - resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVals_[0]->getPrimitiveDesc()); - - CHECK(wgtVal_); - resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc()); - - if (biasVal_) { - resetWithMatrix(bias, biases_->getWGrad(), biasVal_->getPrimitiveDesc()); - } else { - bias = nullptr; - } -} - -void MKLDNNFcLayer::resetBwdWgtPD( - std::shared_ptr& pd, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - CHECK(inVals_[0]); - fc_bwdWgt::desc bwdWgtDesc = - bias ? fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); -} - -void MKLDNNFcLayer::resetBwdDataPD( - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - pd = nullptr; - if (in == nullptr) { - return; - } - CHECK(wgtVal_); - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc( - in->getMemoryDesc(), wgtVal_->getMemoryDesc(), out->getMemoryDesc()); - pd.reset(new fc_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); -} - -void MKLDNNFcLayer::resetBwdPipeline( - std::vector& pipeline, - std::shared_ptr& bwdWgtPD, - std::shared_ptr& bwdDataPD, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - CHECK(inVals_[0]); - if (bias) { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt, *bias)); - } else { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt)); - } - pipeline.push_back(*bwdWgt_); - - if (bwdDataPD == nullptr) { - return; - } - CHECK(wgtVal_) << "Should have weight memory"; - bwdData_.reset(new fc_bwdData(*bwdDataPD, *out, *wgtVal_, *in)); - pipeline.push_back(*bwdData_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNFcLayer.h b/paddle/legacy/gserver/layers/MKLDNNFcLayer.h deleted file mode 100644 index a704066cc818a6b33bd0eed4612d62b674fa72ca..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNFcLayer.h +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "MKLDNNLayer.h" -#include "mkldnn.hpp" - -namespace paddle { -typedef mkldnn::inner_product_forward fc_fwd; -typedef mkldnn::inner_product_backward_weights fc_bwdWgt; -typedef mkldnn::inner_product_backward_data fc_bwdData; - -/** - * @brief A subclass of MKLDNNLayer fc layer. - * - * The config file api is mkldnn_fc - */ -class MKLDNNFcLayer : public MKLDNNLayer { - protected: - // input layer size, can not be change after init - size_t iLayerSize_; // == ic * ih * iw - - // if has already init the weight - bool hasInitedWgt_; - - // save forward primitive_desc, which can be used backward - std::shared_ptr fwdPD_; - - // fc weight and bias - std::unique_ptr weight_; - std::unique_ptr biases_; - - public: - explicit MKLDNNFcLayer(const LayerConfig& config) - : MKLDNNLayer(config), hasInitedWgt_(false) {} - - ~MKLDNNFcLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; - - void resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void updateWeights(const UpdateCallback& callback) override; - - void convertWeightsFromPaddle() override; - - void convertWeightsToPaddle() override; - - protected: - void resetFwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - void resetFwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr in, - MKLDNNMatrixPtr wgt, - MKLDNNMatrixPtr bias, - MKLDNNMatrixPtr out); - void resetFwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - void resetBwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - void resetBwdWgtPD(std::shared_ptr& pd, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); - void resetBwdDataPD(std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out); - void resetBwdPipeline(std::vector& pipeline, - std::shared_ptr& bwdWgtPD, - std::shared_ptr& bwdDataPD, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNLRNLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNLRNLayer.cpp deleted file mode 100644 index 739482348f71bf144551cd1d881f1f1d7d69201f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNLRNLayer.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNLRNLayer.h" -#include "paddle/legacy/utils/Logging.h" - -using namespace mkldnn; // NOLINT -typedef memory::format format; - -namespace paddle { - -REGISTER_LAYER(mkldnn_lrn, MKLDNNLRNLayer); - -bool MKLDNNLRNLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!MKLDNNLayer::init(layerMap, parameterMap)) { - return false; - } - - /* the size of inputs for norm-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1); - const NormConfig& conf = config_.inputs(0).norm_conf(); - localSize_ = conf.size(); - alpha_ = conf.scale(); - beta_ = conf.pow(); - - ic_ = conf.channels(); - oc_ = ic_; - iw_ = conf.img_size(); - ow_ = conf.output_x(); - ih_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - oh_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); - CHECK_EQ(iw_, ow_); - CHECK_EQ(ih_, oh_); - return true; -} - -void MKLDNNLRNLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { - CHECK_EQ(inputLayers_.size(), 1UL); - reshapeInput(bs, ih, iw); - // ic_ and oc can not be changed - CHECK_EQ((size_t)ic, - inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw) - << "Input channel can not be changed"; - oh = ih; - ow = iw; - reshapeOutput(oh, ow); - resizeOutput(bs, oc * oh * ow); -} - -void MKLDNNLRNLayer::resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - resetFwdBuffers(inputs[0], out); - - resetFwdPD(fwdPD_, inputs[0], out); - - resetFwdPipeline(pipeline, fwdPD_, inputs[0], out); -} - -void MKLDNNLRNLayer::resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - std::shared_ptr pd; - - resetBwdBuffers(inputs[0], out); - - resetBwdPD(pd, inputs[0], out); - - resetBwdPipeline(pipeline, pd, inputs[0], out); -} - -void MKLDNNLRNLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - resetInValue(in); - CHECK(in); - resetOutValue(out, in->getPrimitiveDesc()); -} - -void MKLDNNLRNLayer::resetFwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr in, - MKLDNNMatrixPtr out) { - prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring - : prop_kind::forward_training; - auto fwdDesc = lrn_fwd::desc(pk, - algorithm::lrn_across_channels, - in->getMemoryDesc(), - localSize_, - alpha_, - beta_, - 1.0f); - pd.reset(new lrn_fwd::primitive_desc(fwdDesc, engine_)); - // prepare workspace if necessary - workspace_ = - passType_ != PASS_TEST - ? std::make_shared(memory(pd->workspace_primitive_desc())) - : nullptr; -} - -void MKLDNNLRNLayer::resetFwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - fwd_ = workspace_ - ? std::make_shared(lrn_fwd(*pd, *in, *workspace_, *out)) - : std::make_shared(lrn_fwd(*pd, *in, *out)); - pipeline.push_back(*fwd_); -} - -void MKLDNNLRNLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - CHECK(inVals_[0] && outVal_); - resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVals_[0]->getPrimitiveDesc()); -} - -void MKLDNNLRNLayer::resetBwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - pd = nullptr; - if (in == nullptr) { - return; - } - CHECK(out); - auto bwdDesc = lrn_bwd::desc(algorithm::lrn_across_channels, - in->getMemoryDesc(), - out->getMemoryDesc(), - localSize_, - alpha_, - beta_, - 1.0f); - pd.reset(new lrn_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_)); -} - -void MKLDNNLRNLayer::resetBwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - if (pd == nullptr) { - return; - } - CHECK(inVals_[0]); - CHECK(workspace_); - bwdData_ = std::make_shared( - lrn_bwd(*pd, *inVals_[0], *out, *workspace_, *in)); - pipeline.push_back(*bwdData_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNLRNLayer.h b/paddle/legacy/gserver/layers/MKLDNNLRNLayer.h deleted file mode 100644 index 028438f2c93b2182318c53cd348351376d491e79..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNLRNLayer.h +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "MKLDNNLayer.h" -#include "mkldnn.hpp" - -namespace paddle { -typedef mkldnn::lrn_forward lrn_fwd; -typedef mkldnn::lrn_backward lrn_bwd; - -/** - * @brief A subclass of MKLDNNLayer LRN(Local Response Norm) layer. - * - * The config file api is mkldnn_lrn - */ -class MKLDNNLRNLayer : public MKLDNNLayer { - protected: - // save forward primitive_desc, which can be used in backward - std::shared_ptr fwdPD_; - // according to https://github.com/01org/mkl-dnn/blob/master/tests/gtests/ - // test_lrn_backward.cpp, lrn need workspace for backward - std::shared_ptr workspace_; - - int localSize_; - float alpha_, beta_; // scale and pow in paddle - - public: - explicit MKLDNNLRNLayer(const LayerConfig& config) : MKLDNNLayer(config) {} - - ~MKLDNNLRNLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; - - void resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - protected: - void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); - void resetFwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr in, - MKLDNNMatrixPtr out); - void resetFwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out); - void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); - void resetBwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out); - void resetBwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNLayer.cpp deleted file mode 100644 index f0acffe871626c992bf28351c72710168e3d0426..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNLayer.cpp +++ /dev/null @@ -1,304 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNLayer.h" - -using namespace mkldnn; // NOLINT -typedef memory::format format; - -namespace paddle { - -bool MKLDNNLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - CHECK(FLAGS_use_mkldnn) << "MKLDNNLayers only support use_mkldnn." - << "Please set WITH_MKL=ON " - << "and set use_mkldnn=True"; - CHECK(!useGpu_) << "Do not support GPU yet"; - - // set device id before Layer::init - setDevice(MKLDNN_DEVICE); - // change param device to MKLDNN device - setParamsDevice(MKLDNN_DEVICE, parameterMap); - if (!Layer::init(layerMap, parameterMap)) { - return false; - } - setOutputMap(); - checkCPUOutputsNumber(); - - stream_.reset(new MKLDNNStream()); - engine_ = CPUEngine::Instance().getEngine(); - return true; -} - -void MKLDNNLayer::forward(PassType passType) { - passType_ = passType; - - { - REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); - CHECK(!inputLayers_.empty()); - copySeqInfoToOutputs(); - if (condition_ != keepCondition()) { - VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; - condition_ = keepCondition(); - reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); - printSizeInfo(); - // the output_.value and output_.grad are shared with CPU device - shareCPUDevice(); - pipelineFwd_.clear(); - inVals_.resize(inputLayers_.size(), nullptr); - extInVals_.resize(inputLayers_.size(), nullptr); - cvtInVals_.resize(inputLayers_.size(), nullptr); - resetFwd(pipelineFwd_, inVals_, outVal_); - prepareValueConversions(pipelineFwd_); - convertWeightsFromPaddle(); - printValueFormat(); - needResetBwd_ = true; - } - - if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) { - // Update input value data when input layer is "data" type, - // since the input value data address might be changed. - CHECK(extInVals_[0]); - extInVals_[0]->setData(getInputValue(0, CPU_DEVICE)->getData()); - } - - if (!outputOnlyMKLDNN_) { - clearGrads(); - } - stream_->submit(pipelineFwd_); - } - { - REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); - forwardActivation(); - } -} - -void MKLDNNLayer::backward(const UpdateCallback& callback) { - if (needResetBwd_) { - VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; - pipelineBwd_.clear(); - inGrads_.resize(inputLayers_.size(), nullptr); - extInGrads_.resize(inputLayers_.size(), nullptr); - cvtInGrads_.resize(inputLayers_.size(), nullptr); - pipelineMergeGrad_.clear(); - mergeGrad_ = nullptr; - resetBwd(pipelineBwd_, inGrads_, outGrad_); - prepareGradConversions(pipelineBwd_); - printGradFormat(); - needResetBwd_ = false; - } - - // merge grad must before backward activation - if (mergeGrad_) { - REGISTER_TIMER_INFO("MergeBpGrad", getName().c_str()); - stream_->submit(pipelineMergeGrad_); - } - { - REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); - backwardActivation(); - } - { - REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); - stream_->submit(pipelineBwd_); - } - { - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - updateWeights(callback); - } -} - -void MKLDNNLayer::reshapeInput(int& batchsize, - int& height, - int& width, - size_t idx) { - const Argument& input = inputLayers_[idx]->getOutput(); - batchsize = input.getBatchSize(); - int h = input.getFrameHeight(); - int w = input.getFrameWidth(); - if (h != 0) { - height = h; - } - if (w != 0) { - width = w; - } - height = height != 0 ? height : 1; - width = width != 0 ? width : 1; -} - -void MKLDNNLayer::reshapeOutput(size_t height, size_t width) { - output_.setFrameHeight(height); - output_.setFrameWidth(width); - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - outputOtherDevice_[i].setFrameHeight(height); - outputOtherDevice_[i].setFrameWidth(width); - } -} - -void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn, - const MatrixPtr& mat, - memory::primitive_desc pd) { - dnn = nullptr; - if (mat == nullptr) { - return; - } - dnn = MKLDNNMatrix::create(pd, mat); -} - -void MKLDNNLayer::resetInValue( - MKLDNNMatrixPtr& in, - const std::shared_ptr& intPD, - size_t idx, - int inputChannel) { - cvtInVals_[idx] = nullptr; - extInVals_[idx] = nullptr; - in = nullptr; - inputChannel = inputChannel == 0 ? ic_ : inputChannel; - CHECK_GT(bs_ * inputChannel * ih_ * iw_, 0); - auto extPD = MKLDNNMatrix::createPrimitiveDesc( - {bs_, inputChannel, ih_, iw_}, format::nchw, engine_); - const MatrixPtr& inMat = inputLayers_[idx]->getOutputValue(); - extInVals_[idx] = std::dynamic_pointer_cast(inMat); - CHECK_EQ(inputIsOnlyMKLDNN(), extInVals_[idx] != nullptr); - if (extInVals_[idx] == nullptr || - extInVals_[idx]->getFormat() == format::nc) { - extInVals_[idx] = MKLDNNMatrix::create(extPD, inMat); - } - in = extInVals_[idx]; - if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) { - return; - } - // need create reorder - in = MKLDNNMatrix::create(*intPD); - cvtInVals_[idx] = MKLDNNMatrix::createReorder(extInVals_[idx], in); - CHECK(cvtInVals_[idx]) << "should not be emptry"; -} - -void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, - memory::primitive_desc intPD) { - cvtOutVal_ = nullptr; - out = MKLDNNMatrix::create(intPD, output_.value); - extOutVal_ = out; - if (outputIsOnlyMKLDNN() || isPaddleFormat(extOutVal_->getFormat())) { - return; - } - // need create reorder - CHECK_GT(bs_ * oc_ * oh_ * ow_, 0); - extOutVal_ = MKLDNNMatrix::create( - memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_, output_.value); - out = MKLDNNMatrix::create(intPD); - cvtOutVal_ = MKLDNNMatrix::createReorder(out, extOutVal_); - CHECK(cvtOutVal_) << "should not be empty"; -} - -void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, - memory::primitive_desc intPD, - size_t idx) { - cvtInGrads_[idx] = nullptr; - extInGrads_[idx] = nullptr; - in = nullptr; - LayerPtr& input = inputLayers_[idx]; - if (input->getOutputGrad() == nullptr) { - // no need input grad - return; - } - CHECK(inputIsOnlyMKLDNN() || input->getOutputMapSize() <= 1) - << "only support input is MKLDNN layer or only have one output layer"; - // when input is a mkldnn branch node, - // this layer will save input grad to a internal buffer, - // and the mkldnn input layer will merge them to actual prev->output_.grad - const MatrixPtr& inMat = - input->getOutputMapSize() <= 1 ? input->getOutputGrad() : nullptr; - in = MKLDNNMatrix::create(intPD, inMat); - Argument& arg = input->getOutput(this->getName()); - arg.grad = std::dynamic_pointer_cast(in); - CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); - if (inputIsOnlyMKLDNN()) { - return; - } - - extInGrads_[idx] = in; - if (isPaddleFormat(extInGrads_[idx]->getFormat())) { - return; - } - // need create reorder - CHECK(extInVals_[idx] != nullptr && - isPaddleFormat(extInVals_[idx]->getFormat())) - << "should have external input value and the format must be nchw(nc)"; - extInGrads_[idx] = - MKLDNNMatrix::create(extInVals_[idx]->getPrimitiveDesc(), inMat); - CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); - in = MKLDNNMatrix::create(intPD); - cvtInGrads_[idx] = MKLDNNMatrix::createReorder(in, extInGrads_[idx]); - CHECK(cvtInGrads_[idx]); -} - -void MKLDNNLayer::resetOutGrad(MKLDNNMatrixPtr& out, - memory::primitive_desc intPD) { - cvtOutGrad_ = nullptr; - extOutGrad_ = nullptr; - out = nullptr; - MatrixPtr& outMat = output_.grad; - out = MKLDNNMatrix::create(intPD, outMat); - resetMergeGrad(out); - if (outputIsOnlyMKLDNN()) { - return; - } - CHECK_LE(outputMap_.size(), 1U) << "do not support mixed with cpu device"; - extOutGrad_ = out; - if (isPaddleFormat(extOutGrad_->getFormat())) { - return; - } - // need create reorder - CHECK(extOutVal_ != nullptr && isPaddleFormat(extOutVal_->getFormat())) - << "should have external output value and the format must be nchw(nc)"; - extOutGrad_ = MKLDNNMatrix::create(extOutVal_->getPrimitiveDesc(), outMat); - CHECK_PRIMITIVE_DESC_EQ(outVal_, intPD); - out = MKLDNNMatrix::create(intPD); - cvtOutGrad_ = MKLDNNMatrix::createReorder(extOutGrad_, out); - CHECK(cvtOutGrad_); -} - -void MKLDNNLayer::resetMergeGrad(MKLDNNMatrixPtr& out) { - mergeGrad_ = nullptr; - pipelineMergeGrad_.clear(); - if (outputMap_.size() <= 1 || !outputIsOnlyMKLDNN()) { - // do not merge when output is not all MKLDNN or only one output - return; - } - CHECK(out) << "should have reset internal ouput grad"; - std::vector scales(outputMap_.size(), 1.0); - std::vector srcPDs; - std::vector srcs; - for (auto it = outputMap_.begin(); it != outputMap_.end(); ++it) { - MKLDNNMatrixPtr src = - std::dynamic_pointer_cast(it->second->grad); - CHECK(src) << "should be MKLDNNMatrix"; - auto srcDims = src->getDims(); - auto dstDims = out->getDims(); - CHECK_EQ(srcDims.size(), dstDims.size()); - for (size_t i = 0; i < srcDims.size(); ++i) { - CHECK_EQ(srcDims[i], dstDims[i]); - } - VLOG(MKLDNN_BASE) << getName() << " has output grad " << it->first - << ", format " << src->getFormat(); - srcPDs.push_back(src->getPrimitiveDesc()); - srcs.push_back(*src); - } - - auto sumPD = sum::primitive_desc(out->getMemoryDesc(), scales, srcPDs); - mergeGrad_.reset(new sum(sumPD, srcs, *out)); - pipelineMergeGrad_.insert(pipelineMergeGrad_.begin(), *mergeGrad_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNLayer.h b/paddle/legacy/gserver/layers/MKLDNNLayer.h deleted file mode 100644 index 94dc8625f68985a16bd68a6e36a1ad607d77a7cb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNLayer.h +++ /dev/null @@ -1,477 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "Layer.h" -#include "MKLDNNBase.h" -#include "mkldnn.hpp" -#include "paddle/legacy/math/MKLDNNMatrix.h" -#include "paddle/legacy/utils/Stat.h" - -DECLARE_bool(use_mkldnn); - -namespace paddle { - -class MKLDNNLayer; -typedef std::shared_ptr MKLDNNLayerPtr; - -/** - * @brief Base class of MKLDNNlayer. - * - */ -class MKLDNNLayer : public Layer { - protected: - // batch size - int bs_; - // their sizes are always from the first input layer - // input image channel, height and width - int ic_, ih_, iw_; - // output image channel, height and width - int oc_, oh_, ow_; - - // the condition that forward need be reset - size_t condition_; - // backward also need reset after reset forward handle - bool needResetBwd_; - - // is output only mkldnn - bool outputOnlyMKLDNN_; - - // mkldnn engine, stream and primivtives - mkldnn::engine engine_; - std::shared_ptr stream_; - std::shared_ptr fwd_; - std::shared_ptr bwdWgt_; - std::shared_ptr bwdData_; - std::vector pipelineFwd_; - std::vector pipelineBwd_; - - /* Value and grad are seperated as internal and external buffers. - * Each MKLDNNLayer must init or reset internal buffer at least, - * and the external buffer format is always nchw of nc(when h==w==1), - * which is the same format as paddle. - * The output_.value and output_.grad always save the external data, - * when mixed with cpu device. - * When all layers are mkldnn layers, they could save internal data. - */ - // below MKLDNNMatrix buffers are all internal buffers - std::vector inVals_; - std::vector inGrads_; - MKLDNNMatrixPtr outVal_; - MKLDNNMatrixPtr outGrad_; - // below are external value and grad - std::vector extInVals_; - std::vector extInGrads_; - MKLDNNMatrixPtr extOutVal_; - MKLDNNMatrixPtr extOutGrad_; - // convert handle between external and internal buffers - std::vector> cvtInVals_; - std::vector> cvtInGrads_; - std::shared_ptr cvtOutVal_; - std::shared_ptr cvtOutGrad_; - - // weight and bias are always internal buffers - MKLDNNMatrixPtr wgtVal_; - MKLDNNMatrixPtr wgtGrad_; - MKLDNNMatrixPtr biasVal_; - MKLDNNMatrixPtr biasGrad_; - - // merge grad primitive - std::shared_ptr mergeGrad_; - std::vector pipelineMergeGrad_; - // tmp input argument to save input grad, only used to merge grad - Argument tmpInArg_; - - public: - explicit MKLDNNLayer(const LayerConfig& config) - : Layer(config), - ih_(0), - iw_(0), - condition_(0), - needResetBwd_(true), - outputOnlyMKLDNN_(false), - engine_(mkldnn::engine::cpu, 0), - stream_(nullptr), - fwd_(nullptr), - bwdWgt_(nullptr), - bwdData_(nullptr) {} - - ~MKLDNNLayer() {} - - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback); - - /** - * reshape the input and output channels and image sizes - * and reset output buffer size - */ - virtual void reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) = 0; - - /** - * reset the mkldnn forward primitve and memories - * only would be called when input size changes - * weight and bias buffers should be coverd by child class itself - */ - virtual void resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) = 0; - - /** - * reset the mkldnn backward primitve and memories - * only would be called when needed - * weight and bias buffers should be coverd by child class itself - */ - virtual void resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) = 0; - - /** - * Update weights and biases if necessary. - */ - virtual void updateWeights(const UpdateCallback& callback) {} - - /** - * convert weight from paddle format to mkldnn format - * weight_ will be override - */ - virtual void convertWeightsFromPaddle() {} - - /** - * convert mkldnn weight to paddle format - * weight_ will be override - */ - virtual void convertWeightsToPaddle() {} - - /** - * add this interface as public for unit test - */ - void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); } - - protected: - /** - * Some layers may have different condition to reset the forward. - * The function returns the condition that do not need reset forward. - */ - inline virtual size_t keepCondition() { - // reset when the first input element size changed, not only the batchsize - return inputLayers_[0]->getOutputValue()->getElementCnt(); - } - - /** - * reshape the input image sizes and input batchsize - */ - void reshapeInput(int& batchsize, int& height, int& width, size_t idx = 0); - - /** - * reshape output image sizes - */ - void reshapeOutput(size_t height, size_t width); - - /** - * reset MKLDNNMatrix from Matrix and internal primitive desc. - * reset nullptr if matrix or primitive desc is empty - */ - void resetWithMatrix(MKLDNNMatrixPtr& dnn, - const MatrixPtr& mat, - mkldnn::memory::primitive_desc pd); - - /** - * reset input value from input MKLDNNMatrix and internal primitive desc. - * reset both internal and external buffer and create reorder if necessary. - * input channel may be different in concat. - */ - void resetInValue( - MKLDNNMatrixPtr& in, - const std::shared_ptr& intPD = nullptr, - size_t idx = 0, - int inputChannel = 0); - - /** - * reset output value from internal primitive desc. - * reset both internal and external buffer and create reorder if necessary. - */ - void resetOutValue(MKLDNNMatrixPtr& out, - mkldnn::memory::primitive_desc intPD); - - /** - * reset input grad from internal primitive desc. - * reset both internal and external buffer and create reorder if necessary. - */ - void resetInGrad(MKLDNNMatrixPtr& in, - mkldnn::memory::primitive_desc intPD, - size_t idx = 0); - - /** - * reset output grad from internal primitive desc. - * merge grad if necessary. - * reset both internal and external buffer and create reorder if necessary. - * note: about merge grad, when this layer has several outputs, - * it could not be mixed with cpu device, - * since it can not get memory desc from cpu device. - */ - void resetOutGrad(MKLDNNMatrixPtr& out, mkldnn::memory::primitive_desc intPD); - - /** - * reset the merge grad primitive if necessary. - * note: do not support the grads mixed with cpu device, - * since it can not get memory desc from cpu device. - */ - void resetMergeGrad(MKLDNNMatrixPtr& out); - - protected: - /** - * Set deviceId of this layer. - */ - void setDevice(int id) { deviceId_ = id; } - - /** - * check the format is nchw or nc, - * which is supported by Paddle default memory layout - */ - bool isPaddleFormat(mkldnn::memory::format fmt) { - if (fmt == mkldnn::memory::format::nchw || - fmt == mkldnn::memory::format::nc) { - return true; - } else { - return false; - } - } - - /** - * If input only has MKLDNN device. - * Otherwise, only support the previous layer using CPU device. - */ - bool inputIsOnlyMKLDNN(int index = 0) { - int prevDevice = getPrev(index)->getDeviceId(); - if (prevDevice == MKLDNN_DEVICE) { - return true; - } else { - CHECK_EQ(prevDevice, CPU_DEVICE) << "Only support CPU yet"; - return false; - } - } - - /** - * If output only has MKLDNN device. - * Otherwise, other devices should only using CPU device. - */ - bool outputIsOnlyMKLDNN() { - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE) - << "Only support other device is CPU yet"; - } - outputOnlyMKLDNN_ = outputOtherDevice_.size() == 0; - return outputOnlyMKLDNN_; - } - - /** - * print info about sizes - */ - virtual void printSizeInfo() { - VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ - << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ - << ", oh: " << oh_ << ", ow: " << ow_; - } - - /** - * print the mkldnn memory format of value - */ - virtual void printValueFormat() { - for (size_t i = 0; i < inVals_.size(); ++i) { - if (!inVals_[i]) { - continue; - } - VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() - << ": " << (extInVals_[i] ? extInVals_[i]->getFormat() - : inVals_[i]->getFormat()) - << " >>> " << inVals_[i]->getFormat() << " >>>"; - } - if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> " - << (extOutVal_ ? extOutVal_->getFormat() - : outVal_->getFormat()); - } - if (wgtVal_) { - VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat(); - } - if (biasVal_) { - VLOG(MKLDNN_FMTS) << "Bias value format: " << biasVal_->getFormat(); - } - } - - /** - * print the mkldnn memory format of grad - */ - virtual void printGradFormat() { - if (outGrad_) { - VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< " - << (extOutGrad_ ? extOutGrad_->getFormat() - : outGrad_->getFormat()); - } - for (size_t i = 0; i < inGrads_.size(); ++i) { - if (!inGrads_[i]) { - continue; - } - VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() - << ": " << (extInGrads_[i] ? extInGrads_[i]->getFormat() - : inGrads_[i]->getFormat()) - << " <<< " << inGrads_[i]->getFormat() << " <<<"; - } - if (wgtGrad_) { - VLOG(MKLDNN_FMTS) << "Weight grad format: " << wgtGrad_->getFormat(); - } - if (biasGrad_) { - VLOG(MKLDNN_FMTS) << "Bias grad format: " << biasGrad_->getFormat(); - } - } - - private: - /** - * clear all grad - */ - void clearGrads() { - if (output_.grad) { - output_.grad->zeroMem(); - } - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - if (outputOtherDevice_[i].grad) { - outputOtherDevice_[i].grad->zeroMem(); - } - } - } - - /** - * Set deviceId of the params used in this layer. - */ - void setParamsDevice(int id, const ParameterMap& parameterMap) { - for (auto& inputConfig : config_.inputs()) { - if (inputConfig.has_input_parameter_name()) { - ParameterPtr parameter; - std::string name = inputConfig.input_parameter_name(); - CHECK(mapGet(name, parameterMap, ¶meter)) - << "Cannot find input parameter " << name << " for layer " - << getName(); - parameter->setDevice(id); - } - } - if (config_.has_bias_parameter_name()) { - ParameterPtr parameter; - std::string name = config_.bias_parameter_name(); - CHECK(mapGet(name, parameterMap, ¶meter)) - << "Cannot find bias parameter " << name << " for layer " - << getName(); - parameter->setDevice(id); - } - } - - /** - * Set output map of prev layers. - */ - void setOutputMap() { - outputMap_.clear(); - for (size_t i = 0; i < inputLayers_.size(); ++i) { - inputLayers_[i]->setOutput(getName(), &tmpInArg_); - } - } - - /** - * if have cpu device, share value and grad data with output_ - */ - void shareCPUDevice() { - if (outputIsOnlyMKLDNN()) { - return; - } - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - outputOtherDevice_[i].value = output_.value; - outputOtherDevice_[i].grad = output_.grad; - } - } - - /** - * Check the cpu device number of outputOtherDevice_. - * should have only one at most. - */ - void checkCPUOutputsNumber(int max = 1) { - int cnt = 0; - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { - ++cnt; - } - } - CHECK_LE(cnt, max) << "too much CPU devies"; - } - - /** - * copy SeqInfo from input layer to this output and other output devices. - * @note: do not use getInput(0) since it used this deviceId_, - * use "inputLayers_[0]->getOutput()" instead. - */ - void copySeqInfoToOutputs() { - if (inputLayers_.empty() || !needSequenceInfo_) { - return; - } - const Argument& input = inputLayers_[0]->getOutput(); - output_.sequenceStartPositions = input.sequenceStartPositions; - output_.subSequenceStartPositions = input.subSequenceStartPositions; - output_.cpuSequenceDims = input.cpuSequenceDims; - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - outputOtherDevice_[i].sequenceStartPositions = - output_.sequenceStartPositions; - outputOtherDevice_[i].subSequenceStartPositions = - output_.subSequenceStartPositions; - outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; - } - } - - void prepareValueConversions(std::vector& pipeline) { - // MKLDNNLayer output value should be MKLDNNMatrix - // so external output value is necessary. - // Then external input value is not necessary, - // since input may be mkldnn internal buffer. - CHECK(extOutVal_) << "external output value is necessary"; - output_.value = std::dynamic_pointer_cast(extOutVal_); - CHECK(inVals_[0] && outVal_) << "internal memories are necessary"; - for (size_t i = 0; i < cvtInVals_.size(); ++i) { - if (cvtInVals_[i]) { - pipeline.insert(pipeline.begin(), *cvtInVals_[i]); - } - } - if (cvtOutVal_) { - pipeline.push_back(*cvtOutVal_); - } - } - void prepareGradConversions(std::vector& pipeline) { - // external output grad is not necessary - // since output may be mkldnn internal buffer or merge them directly. - CHECK(outGrad_) << "internal output grad is necessary"; - if (extOutGrad_) { - CHECK_EQ(extOutGrad_->getData(), output_.grad->getData()) - << "the external buffer should share the same data with output_.grad"; - } - if (cvtOutGrad_) { - pipeline.insert(pipeline.begin(), *cvtOutGrad_); - } - for (size_t i = 0; i < cvtInGrads_.size(); ++i) { - if (cvtInGrads_[i]) { - pipeline.push_back(*cvtInGrads_[i]); - } - } - } -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNPoolLayer.cpp deleted file mode 100644 index 83d980538d2b1b7351bf858ab391c14f6e7170bd..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNPoolLayer.cpp +++ /dev/null @@ -1,195 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNPoolLayer.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/utils/Logging.h" - -using namespace mkldnn; // NOLINT -typedef memory::format format; - -namespace paddle { - -REGISTER_LAYER(mkldnn_pool, MKLDNNPoolLayer); - -bool MKLDNNPoolLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!MKLDNNLayer::init(layerMap, parameterMap)) { - return false; - } - - /* the size of inputs for pool-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1); - const PoolConfig& conf = config_.inputs(0).pool_conf(); - ic_ = conf.channels(); - ih_ = conf.img_size_y(); - iw_ = conf.img_size(); - oc_ = ic_; - oh_ = conf.output_y(); - ow_ = conf.output_x(); - fh_ = conf.size_y(); - fw_ = conf.size_x(); - ph_ = conf.padding_y(); - pw_ = conf.padding(); - sh_ = conf.stride_y(); - sw_ = conf.stride(); - - const std::string& type = conf.pool_type(); - if (type == "max-projection") { - poolAlgo_ = algorithm::pooling_max; - } else if (type == "avg-projection") { - // paddle only use exclude_padding - poolAlgo_ = algorithm::pooling_avg_exclude_padding; - } else { - LOG(FATAL) << "unknow pooling type!"; - } - return true; -} - -void MKLDNNPoolLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { - reshapeInput(bs, ih, iw); - // ic_ and oc can not be changed - CHECK_EQ((size_t)ic, - inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw) - << "Input channel can not be changed"; - - // cal output sizes - // paddle used false caffeMode for pooling - oh = outputSize(ih, fh_, ph_, sh_, false); - ow = outputSize(iw, fw_, pw_, sw_, false); - reshapeOutput(oh, ow); - - resizeOutput(bs, oc * oh * ow); -} - -void MKLDNNPoolLayer::resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - resetFwdBuffers(inputs[0], out); - - resetFwdPD(fwdPD_, inputs[0], out); - - resetFwdPipeline(pipeline, fwdPD_, inputs[0], out); -} - -void MKLDNNPoolLayer::resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) { - std::shared_ptr pd; - - resetBwdBuffers(inputs[0], out); - - resetBwdPD(pd, inputs[0], out); - - resetBwdPipeline(pipeline, pd, inputs[0], out); -} - -void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - resetInValue(in); - - memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; - CHECK(in); - auto outPD = - MKLDNNMatrix::createPrimitiveDesc(outDims, in->getFormat(), engine_); - resetOutValue(out, outPD); -} - -void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr in, - MKLDNNMatrixPtr out) { - memory::dims kernels = memory::dims{fh_, fw_}; - memory::dims strides = memory::dims{sh_, sw_}; - memory::dims padL = memory::dims{ph_, pw_}; - memory::dims padR = getPaddingR(); - padding_kind padKind = padding_kind::zero; - prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring - : prop_kind::forward_training; - auto fwdDesc = pool_fwd::desc(pk, - poolAlgo_, - in->getMemoryDesc(), - out->getMemoryDesc(), - strides, - kernels, - padL, - padR, - padKind); - pd.reset(new pool_fwd::primitive_desc(fwdDesc, engine_)); - - // prepare workspace if necessary - workspace_ = - (passType_ != PASS_TEST && poolAlgo_ == algorithm::pooling_max) - ? std::make_shared(memory(pd->workspace_primitive_desc())) - : nullptr; -} - -void MKLDNNPoolLayer::resetFwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - fwd_ = workspace_ - ? std::make_shared(pool_fwd(*pd, *in, *out, *workspace_)) - : std::make_shared(pool_fwd(*pd, *in, *out)); - pipeline.push_back(*fwd_); -} - -void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - CHECK(inVals_[0] && outVal_); - resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVals_[0]->getPrimitiveDesc()); -} - -void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - pd = nullptr; - if (in == nullptr) { - return; - } - memory::dims kernels = memory::dims{fh_, fw_}; - memory::dims strides = memory::dims{sh_, sw_}; - memory::dims padL = memory::dims{ph_, pw_}; - memory::dims padR = getPaddingR(); - CHECK(out); - auto bwdDesc = pool_bwd::desc(poolAlgo_, - in->getMemoryDesc(), - out->getMemoryDesc(), - strides, - kernels, - padL, - padR, - padding_kind::zero); - pd.reset(new pool_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_)); -} - -void MKLDNNPoolLayer::resetBwdPipeline( - std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out) { - if (pd == nullptr) { - return; - } - - bwdData_ = - workspace_ - ? std::make_shared(pool_bwd(*pd, *out, *workspace_, *in)) - : std::make_shared(pool_bwd(*pd, *out, *in)); - pipeline.push_back(*bwdData_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLDNNPoolLayer.h b/paddle/legacy/gserver/layers/MKLDNNPoolLayer.h deleted file mode 100644 index 1eb0ee4ad946f61e32b7d4f4fd376dda89d6acf7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLDNNPoolLayer.h +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "MKLDNNLayer.h" -#include "mkldnn.hpp" - -namespace paddle { -typedef mkldnn::pooling_forward pool_fwd; -typedef mkldnn::pooling_backward pool_bwd; - -/** - * @brief A subclass of MKLDNNLayer pool layer. - * - * The config file api is mkldnn_pool - */ -class MKLDNNPoolLayer : public MKLDNNLayer { - protected: - // padding height and width - int ph_, pw_; - // stride height and width - int sh_, sw_; - // filter(kenerl) height and width - int fh_, fw_; - - // pooling_avg or pooling_max - mkldnn::algorithm poolAlgo_; - - // save forward primitive_desc, which can be used backward - std::shared_ptr fwdPD_; - // according to https://github.com/01org/mkl-dnn/blob/master/tests/gtests/ - // test_pooling_forward.cpp, pool need workspace for backward - std::shared_ptr workspace_; - - public: - explicit MKLDNNPoolLayer(const LayerConfig& config) : MKLDNNLayer(config) {} - - ~MKLDNNPoolLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void reshape( - int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; - - void resetFwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void resetBwd(std::vector& pipeline, - std::vector& inputs, - MKLDNNMatrixPtr& out) override; - - void printSizeInfo() override { - MKLDNNLayer::printSizeInfo(); - VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_ - << ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_ - << ", sw: " << sw_; - } - - protected: - void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); - void resetFwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr in, - MKLDNNMatrixPtr out); - void resetFwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out); - void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); - void resetBwdPD(std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out); - void resetBwdPipeline(std::vector& pipeline, - std::shared_ptr& pd, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& out); - - /** - * get padding_r according to - * https://github.com/01org/mkl-dnn/blob/master/tests/gtests/ - * test_pooling_forward.cpp - */ - mkldnn::memory::dims getPaddingR() const { - mkldnn::memory::dims padR = {ph_, pw_}; - for (int i = 0; i < 2; ++i) { - if ((ih_ + ph_ + padR[0] - fh_) / sh_ + 1 < oh_) { - ++padR[0]; - } - if ((iw_ + pw_ + padR[1] - fw_) / sw_ + 1 < ow_) { - ++padR[1]; - } - } - return padR; - } -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.cpp b/paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.cpp deleted file mode 100644 index d928ebc3248f57b13f4380ddcdfec767bbd083ff..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLPackedRecurrentLayer.h" - -namespace paddle { - -REGISTER_LAYER(mkl_packed_recurrent, MKLPackedRecurrentLayer); - -bool MKLPackedRecurrentLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!RecurrentLayer::init(layerMap, parameterMap)) return false; - packed_weight_.reset(new MKLPackedWeight(weight_->getW())); - packed_weight_->pack(); - if (needGradient_) { - packed_weightT_.reset(new MKLPackedWeight(weight_->getW(), true)); - packed_weightT_->pack(); - } - return true; -} - -void MKLPackedRecurrentLayer::backward(const UpdateCallback& callback) { - RecurrentLayer::backward(callback); - packed_weight_->pack(); - if (needGradient_) { - packed_weightT_->pack(); - } -} - -void MKLPackedRecurrentLayer::forwardBatch(int batchSize, - size_t numSequences, - const int* starts) { - if (!batchValue_) { - batchValue_.reset(new SequenceToBatch(useGpu_)); - } - - batchValue_->resizeOrCreateBatch(batchSize, numSequences, starts, reversed_); - - batchValue_->copyFromSeq(*output_.value); - - { - REGISTER_TIMER_INFO("RecurrentFwBatch", getName().c_str()); - /* forward one batch */ - for (size_t n = 0; n < batchValue_->getNumBatch(); n++) { - MatrixPtr batchValue = batchValue_->getBatchValue(n); - - if (n != 0) { - MatrixPtr preBatchValue = - batchValue_->getBatchValue(n - 1, batchValue->getHeight()); - - packed_weight_->gemm_compute(preBatchValue, batchValue); - } - Argument arg; - arg.value = batchValue; - activation_->forward(arg).check(); - } - } - batchValue_->copyBackSeq(*output_.value); -} - -void MKLPackedRecurrentLayer::backwardBatch(int batchSize, - size_t numSequences, - const int* starts) { - if (!batchGrad_) { - batchGrad_.reset(new SequenceToBatch(useGpu_)); - } - batchGrad_->shareIndexWith(*batchValue_); - - size_t numBatch = batchGrad_->getNumBatch(); - bool backwardByBatch = numBatch < numSequences; - - batchGrad_->copyFromSeq(*output_.grad); - { - REGISTER_TIMER_INFO("RecurrentBwData", getName().c_str()); - /* backward one batch */ - for (int n = (int)numBatch - 1; n >= 0; n--) { - MatrixPtr batchGrad = batchGrad_->getBatchValue(n); - MatrixPtr batchValue = - batchValue_->getBatchValue(n, batchGrad->getHeight()); - - Argument arg; - arg.value = batchValue; - arg.grad = batchGrad; - activation_->backward(arg).check(); - - if (n != 0) { - batchValue = batchGrad_->getBatchValue(n - 1, batchGrad->getHeight()); - packed_weightT_->gemm_compute(batchGrad, batchValue); - } - - if (backwardByBatch && weight_->getWGrad()) { - if (n != 0) { - /* backward weight */ - batchValue = - batchValue_->getBatchValue(n - 1, batchGrad->getHeight()); - weight_->getWGrad()->mul( - *batchValue->getTranspose(), *batchGrad, 1, 1); - } - } - } - } - - batchGrad_->copyBackSeq(*output_.grad); - - if (!backwardByBatch && weight_->getWGrad()) { - REGISTER_TIMER_INFO("RecurrentBwWeight", getName().c_str()); - for (size_t seq = 0; seq < numSequences; ++seq) { - int len = starts[seq + 1] - starts[seq]; - weight_->getWGrad()->mul( - *output_.value - ->subMatrix(reversed_ ? starts[seq] + 1 : starts[seq], len - 1) - ->getTranspose(), - *output_.grad->subMatrix(reversed_ ? starts[seq] : starts[seq] + 1, - len - 1), - 1, - 1); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.h b/paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.h deleted file mode 100644 index 441025a9c9d75786b17db84c74995a96b6a06ea8..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.h +++ /dev/null @@ -1,58 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "MKLPackedWeight.h" -#include "RecurrentLayer.h" - -DECLARE_bool(rnn_use_batch); - -namespace paddle { - -/** - * @brief MKLPackedRecurrentLayer is almost the same with RecurrentLayer - * but is optimized with MKL cblas packed gemm. - * More details: - * https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/mkl/mkl_packed.md - */ - -class MKLPackedRecurrentLayer : public RecurrentLayer { - public: - explicit MKLPackedRecurrentLayer(const LayerConfig& config) - : RecurrentLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void backward(const UpdateCallback& callback) override; - - protected: - void forwardBatch(int batchSize, - size_t numSequences, - const int* starts) override; - - void backwardBatch(int batchSize, - size_t numSequences, - const int* starts) override; - - protected: - /// packed_weight_ contains same data with - /// RecurrentLayer::weight_ but is packed - std::unique_ptr packed_weight_; - /// packed_weightT_ is the transposition matrix of packed_weight_ - std::unique_ptr packed_weightT_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MKLPackedWeight.h b/paddle/legacy/gserver/layers/MKLPackedWeight.h deleted file mode 100644 index 47f225bd03c3ccb594db952483d3b8397b61e1ec..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MKLPackedWeight.h +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/math/MathFunctions.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/parameter/Weight.h" - -namespace paddle { - -class MKLPackedWeight { - protected: - /// The pointer of weight - real *weight_; - /// The pointer of cblas packed gemm to weight - real *packedWeight_; - size_t height_; - size_t width_; - bool transW_; - - public: - explicit MKLPackedWeight(MatrixPtr weight, bool transW = false) { - packedWeight_ = nullptr; - weight_ = weight->getData(); - height_ = weight->getHeight(); - width_ = weight->getWidth(); - transW_ = transW; - } - - ~MKLPackedWeight() { free_(); } - - void pack() { pack_(weight_); } - - void gemm_compute(const MatrixPtr src, MatrixPtr dst) { - cblas_sgemm_compute(CblasRowMajor, - CblasNoTrans, - CblasPacked, - src->getHeight(), - transW_ ? height_ : width_, - transW_ ? width_ : height_, - src->getData(), - src->getWidth(), - packedWeight_, - width_, - 1.0, - dst->getData(), - dst->getWidth()); - } - - protected: - void pack_(real *src) { - if (!packedWeight_) { - packedWeight_ = cblas_sgemm_alloc(CblasBMatrix, 1, width_, height_); - } - cblas_sgemm_pack(CblasRowMajor, - CblasBMatrix, - transW_ ? CblasTrans : CblasNoTrans, - 1, - transW_ ? height_ : width_, - transW_ ? width_ : height_, - 1.0, - src, - width_, - packedWeight_); - } - - void free_() { - if (packedWeight_) { - cblas_sgemm_free(packedWeight_); - } - } -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MaxIdLayer.cpp b/paddle/legacy/gserver/layers/MaxIdLayer.cpp deleted file mode 100644 index eecd4996e962857b09001a1bb36bc027cbaa4308..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MaxIdLayer.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" - -namespace paddle { - -/** - * A layer for finding the id which has the maximal value for each sample. - * The result is stored in output_.ids. - * - * The config file api is maxid_layer. - */ -class MaxIdLayer : public Layer { - private: - /// a predetermined number of best states at each level - size_t beamSize_; - - public: - explicit MaxIdLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override { - bool ret = Layer::init(layerMap, parameterMap); - CHECK_EQ(1UL, inputLayers_.size()); - - beamSize_ = config_.has_beam_size() ? config_.beam_size() : FLAGS_beam_size; - CHECK_GE(beamSize_, 1LU); - return ret; - } - - void forward(PassType passType) override { - Layer::forward(passType); - const Argument& input = getInput(0); - size_t batchSize = input.getBatchSize(); - IVector::resizeOrCreate(output_.ids, batchSize * beamSize_, useGpu_); - Matrix::resizeOrCreate(output_.in, - batchSize, - beamSize_, - false, - /* useGpu */ useGpu_); - output_.value = nullptr; - input.value->rowMax(*output_.ids, *output_.in); - } - - void backward(const UpdateCallback& callback) override {} -}; - -REGISTER_LAYER(maxid, MaxIdLayer); - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MaxLayer.cpp b/paddle/legacy/gserver/layers/MaxLayer.cpp deleted file mode 100644 index b51251b663cf818fbe662a96b7c0d55a615640d4..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MaxLayer.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MaxLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(max, MaxLayer); - -void MaxLayer::forward(PassType passType) { - SequencePoolLayer::forward(passType); - - IVector::resizeOrCreate( - maxIndex_, newBatchSize_ * getSize(), useGpu(deviceId_)); - maxIndex_->zeroMem(); - - MatrixPtr inputValue = getInputValue(0); - MatrixPtr outputValue = getOutputValue(); - - { - REGISTER_TIMER_INFO("MaxLayerForward", getName().c_str()); - outputValue->maxSequenceForward( - *inputValue, *startPositions_->getVector(useGpu_), *maxIndex_); - } - - if (config_.output_max_index()) { - // copy maxIndex_ to output - outputValue->copyFrom(*maxIndex_); - } else { - /* add the bias-vector AFTER max operation */ - if (biases_.get() != NULL) { - outputValue->addBias(*(biases_->getW()), 1); - } - /* activation */ { forwardActivation(); } - } -} - -void MaxLayer::backward(const UpdateCallback& callback) { - CHECK(!config_.output_max_index()) - << "backward is not available when output_max_index is set"; - SequencePoolLayer::backward(callback); - - MatrixPtr inputGrad = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - if (inputGrad) { - REGISTER_TIMER_INFO("MaxLayerBackward", getName().c_str()); - inputGrad->maxSequenceBackward( - *outputGrad, *(startPositions_->getVector(useGpu_)), *maxIndex_); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MaxLayer.h b/paddle/legacy/gserver/layers/MaxLayer.h deleted file mode 100644 index 12d0128e39f2113d0e156813f9b3657cae145eed..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MaxLayer.h +++ /dev/null @@ -1,58 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "SequencePoolLayer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { - -/** - * A layer for "internal max" for sequence input. - * Input: one or more sequences. Each sequence contains some instances. - * If SequenceLevel = kNonSeq: - * Output: output size is the number of input sequences (NOT input instances) - * output[i] = max_{for each instance in this sequence}{input[i]} - * If stride_ > 0: - * Output: a shorten sequence. Stride is the step size by which we slide a - * window upon the input sequence, and the max pooling operation is - * then applied to each interval independently. - * If SequenceLevel = kSeq: - * Check input sequence must has sub-sequence - * Output: output size is the number of input sub-sequences - * output[i] = max_{for each instance in this sub-sequence}{input[i]} - * - * The config file api is pooling_layer. - */ - -class MaxLayer : public SequencePoolLayer { - protected: - // maxIndex_[i][j] = k : the value at (i, j) is from input[k]. - IVectorPtr maxIndex_; - - public: - explicit MaxLayer(const LayerConfig& config) : SequencePoolLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override { - return SequencePoolLayer::init(layerMap, parameterMap); - } - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MaxOutLayer.cpp b/paddle/legacy/gserver/layers/MaxOutLayer.cpp deleted file mode 100644 index 919f62a45ba0729827b50e09479b4f0153a061a5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MaxOutLayer.cpp +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MaxOutLayer.h" -#include "hl_cnn.h" -#include "hl_gpu.h" - -namespace paddle { - -REGISTER_LAYER(maxout, MaxOutLayer); - -size_t MaxOutLayer::getSize() { - const MaxOutConfig& maxoutConf = config_.inputs(0).maxout_conf(); - imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); - imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); - if (imgSizeH_ == 0) { - imgSizeH_ = maxoutConf.image_conf().img_size_y(); - } - if (imgSizeW_ == 0) { - imgSizeW_ = maxoutConf.image_conf().img_size(); - } - - featLen_ = imgSizeH_ * imgSizeW_; - size_t layerSize = featLen_ * outputChannels_; - - getOutput().setFrameHeight(imgSizeH_); - getOutput().setFrameWidth(imgSizeW_); - - return layerSize; -} - -bool MaxOutLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* the size of inputs for maxout-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1); - - const MaxOutConfig& conf = config_.inputs(0).maxout_conf(); - groups_ = conf.groups(); - channels_ = conf.image_conf().channels(); - CHECK_EQ(channels_ % groups_, 0UL); - outputChannels_ = channels_ / groups_; - - return true; -} - -void MaxOutLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - /* note: one sample correspond to one column */ - size_t batchSize = getInput(0).getBatchSize(); - size_t size = getSize(); - resetOutput(batchSize, size); - MatrixPtr inputV = getInputValue(0); - MatrixPtr outV = getOutputValue(); - - IVector::resizeOrCreate(maxoutId_, size * batchSize, useGpu_); - outV->maxoutForward(*inputV, *maxoutId_, outputChannels_, groups_); -} - -void MaxOutLayer::backward(const UpdateCallback& callback) { - (void)callback; - - /* Do derivation */ - MatrixPtr inputG = getInputGrad(0); - MatrixPtr outG = getOutputGrad(); - - if (inputG) { - inputG->maxoutBackward(*outG, *maxoutId_, outputChannels_, groups_); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MaxOutLayer.h b/paddle/legacy/gserver/layers/MaxOutLayer.h deleted file mode 100644 index e56f34b8e02bf1dd48c6b5b6ea135cc1009c25b5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MaxOutLayer.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * A layer to do max out on conv layer output. - * Input: output of a conv layer. - * Output: feature map size same as input. Channel is (input channel) / groups. - * So the num of channels should be able to devided by groups. - * - * The config file api is maxout_layer. - */ - -class MaxOutLayer : public Layer { - protected: - size_t groups_; - size_t imgSizeH_, imgSizeW_; - /// outputChannels_ = channels_ / groups_ - size_t channels_, outputChannels_; - /// feature length = imgSizeH_ * imgSizeW_ - size_t featLen_; - IVectorPtr maxoutId_; - - public: - /// return imgSizeH_ * imgSizeW_ * outputChannels_; - size_t getSize(); - - explicit MaxOutLayer(const LayerConfig& config) : Layer(config) {} - virtual ~MaxOutLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.cpp b/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.cpp deleted file mode 100644 index a1cc59a719e43453a8919a5827369982ac355480..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MaxPoolWithMaskLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -bool MaxPoolWithMaskLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - PoolLayer::init(layerMap, parameterMap); - setOutput("mask", &mask_); - return true; -} - -size_t MaxPoolWithMaskLayer::getSize() { - CHECK_EQ(inputLayers_.size(), 1UL); - size_t layerSize = 0; - - outputY_ = outputSize(imgSizeY_, - sizeY_, - confPaddingY_, - strideY_, - /* caffeMode */ false); - outputX_ = outputSize(imgSize_, - sizeX_, - confPadding_, - stride_, - /* caffeMode */ false); - - layerSize = outputX_ * outputY_ * channels_; - getOutput().setFrameHeight(outputY_); - getOutput().setFrameWidth(outputX_); - - return layerSize; -} - -void MaxPoolWithMaskLayer::forward(PassType passType) { - size_t size = getSize(); - MatrixPtr inputV = inputLayers_[0]->getOutputValue(); - int batchSize = inputV->getHeight(); - resetOutput(batchSize, size); - - MatrixPtr outV = getOutputValue(); - CHECK_EQ(size, outV->getWidth()); - - resetSpecifyOutput(mask_, - batchSize, - size, - /* isValueClean */ false, - /* isGradClean */ true); - - MatrixPtr maskV = mask_.value; - outV->maxPoolForward(*inputV, - imgSizeY_, - imgSize_, - channels_, - sizeX_, - sizeY_, - strideY_, - stride_, - outputY_, - outputX_, - confPaddingY_, - confPadding_, - maskV); -} - -void MaxPoolWithMaskLayer::backward(const UpdateCallback& callback) { - (void)callback; - if (NULL == getInputGrad(0)) { - return; - } - - MatrixPtr outGrad = getOutputGrad(); - MatrixPtr inputV = inputLayers_[0]->getOutputValue(); - MatrixPtr outV = getOutputValue(); - MatrixPtr inputGrad = inputLayers_[0]->getOutputGrad(); - - inputGrad->maxPoolBackward(*inputV, - imgSizeY_, - imgSize_, - *outGrad, - *outV, - sizeX_, - sizeY_, - strideY_, - stride_, - outputY_, - outputX_, - 1, - 1, - confPaddingY_, - confPadding_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.h b/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.h deleted file mode 100644 index fcd5388abe3f8229dfa418e6917a8a73c93900a7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "PoolLayer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { -/** - * @brief Basic parent layer of different kinds of pooling - */ -class MaxPoolWithMaskLayer : public PoolLayer { - protected: - Argument mask_; - - public: - explicit MaxPoolWithMaskLayer(const LayerConfig& config) - : PoolLayer(config) {} - - size_t getSize(); - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MixedLayer.cpp b/paddle/legacy/gserver/layers/MixedLayer.cpp deleted file mode 100644 index 63e658c09c2b3bae30c8b2890e4d67f72266dd4d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MixedLayer.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MixedLayer.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(mixed, MixedLayer); - -bool MixedLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - if (!Layer::init(layerMap, parameterMap)) return false; - - CHECK_EQ(inputLayers_.size(), parameters_.size()); - projections_.resize(inputLayers_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - if (config_.inputs(i).has_proj_conf()) { - projections_[i].reset(Projection::create( - config_.inputs(i).proj_conf(), parameters_[i], useGpu_)); - } else { - CHECK(!parameters_[i]) << "should no parameters for operators"; - } - } - for (auto& operator_conf : config_.operator_confs()) { - for (auto& input_index : operator_conf.input_indices()) { - CHECK(!config_.inputs(input_index).has_proj_conf()); - } - operators_.emplace_back(Operator::create(operator_conf, useGpu_)); - } - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - sharedBias_ = config_.shared_biases(); - size_t psize = config_.bias_size(); - biases_ = std::unique_ptr(new Weight(1, psize, biasParameter_)); - } - - return true; -} - -void MixedLayer::prefetch() { - for (size_t i = 0; i != inputLayers_.size(); ++i) { - if (projections_[i]) { - projections_[i]->prefetch(&getInput(i)); - } - } -} - -void MixedLayer::resetState() { - for (auto& proj : projections_) { - if (proj) { - proj->resetState(); - } - } -} - -void MixedLayer::setState(LayerStatePtr state) { - CHECK(projectionStateMatrixSize_.size() == projections_.size()) - << "projection size mis-match"; - - int start = 0; - LayerStatePtr statePtr = std::make_shared(); - for (int i = 0; i < (int)projectionStateMatrixSize_.size(); i++) { - if (projectionStateMatrixSize_[i] > 0) { - statePtr->value.clear(); - for (int j = start; j < start + projectionStateMatrixSize_[i]; j++) { - statePtr->value.push_back(state->value[j]); - } - projections_[i]->setState(statePtr); - start += projectionStateMatrixSize_[i]; - } - } - CHECK((int)state->value.size() == start) << "state matrix size mis-match"; -} - -// Return state which consists of all projections states -LayerStatePtr MixedLayer::getState() { - bool init = projectionStateMatrixSize_.size() == 0; - LayerStatePtr res = std::make_shared(); - for (int i = 0; i < (int)projections_.size(); i++) { - LayerStatePtr statePtr = - projections_[i] ? projections_[i]->getState() : nullptr; - int stateSize = statePtr == nullptr ? 0 : statePtr->value.size(); - if (init) { - projectionStateMatrixSize_.push_back(stateSize); - } else { - CHECK(projectionStateMatrixSize_[i] == stateSize) - << "state matrix size mis-match"; - } - if (statePtr != nullptr) { - for (auto& matrixPtr : statePtr->value) { - res->value.push_back(matrixPtr); - } - } - } - return res; -} - -void MixedLayer::forward(PassType passType) { - Layer::forward(passType); - - int batchSize = getInput(0).getBatchSize(); - int size = getSize(); - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - resetOutput(batchSize, size); - } - - MatrixPtr outV = getOutputValue(); - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - if (projections_[i]) { - projections_[i]->forward(&getInput(i), &output_, passType); - } - } - - std::vector ins; - for (auto& op : operators_) { - ins.clear(); - for (auto& input_index : op->getConfig().input_indices()) { - ins.push_back(&getInput(input_index)); - } - op->forward(ins, &output_, passType); - } - - /* add the bias-vector */ - if (biases_.get() != NULL) { - REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); - outV->addBias(*(biases_->getW()), 1, sharedBias_); - } - - /* activation */ { - REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void MixedLayer::backward(const UpdateCallback& callback) { - /* Do activation */ { - REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); - backwardActivation(); - } - - if (biases_ && biases_->getWGrad()) { - REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str()); - biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_); - - /* Increasing the number of gradient */ - biases_->getParameterPtr()->incUpdate(callback); - } - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - if (projections_[i]) { - projections_[i]->backward(callback); - } - } - - for (auto& op : operators_) { - op->backward(); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MixedLayer.h b/paddle/legacy/gserver/layers/MixedLayer.h deleted file mode 100644 index 43ee2bd81854f2dea837734f556c197613f6fdaf..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MixedLayer.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "Operator.h" -#include "Projection.h" - -namespace paddle { - -/** - * A mixed layer has multiple input layers. - * Each input layer was processed by a Projection or Operator. - * The results of all projections or Operators are summed together with bias - * (if configured), and then go through an activation function and dropout - * (if configured). - * - * The config file api is mixed_layer. - */ -class MixedLayer : public Layer { - public: - explicit MixedLayer(const LayerConfig& config) : Layer(config) {} - - ~MixedLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void prefetch() override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - void resetState() override; - /** - * setState() should be called after getState(). - * Argument state consists of all projections states. - */ - void setState(LayerStatePtr state) override; - /** - * Return state which consists of all projections states. - */ - LayerStatePtr getState() override; - - protected: - std::vector> projections_; - std::vector> operators_; - /// the matrix size of projection state - std::vector projectionStateMatrixSize_; - std::unique_ptr biases_; - bool sharedBias_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MultiBoxLossLayer.cpp b/paddle/legacy/gserver/layers/MultiBoxLossLayer.cpp deleted file mode 100644 index 335e9a6ac4786188903aab0d793fb71623734f57..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MultiBoxLossLayer.cpp +++ /dev/null @@ -1,376 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MultiBoxLossLayer.h" -#include -#include -#include "DataLayer.h" - -namespace paddle { - -REGISTER_LAYER(multibox_loss, MultiBoxLossLayer); - -bool MultiBoxLossLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - auto layerConf = config_.inputs(0).multibox_loss_conf(); - numClasses_ = layerConf.num_classes(); - inputNum_ = layerConf.input_num(); - overlapThreshold_ = layerConf.overlap_threshold(); - negPosRatio_ = layerConf.neg_pos_ratio(); - negOverlap_ = layerConf.neg_overlap(); - backgroundId_ = layerConf.background_id(); - return true; -} - -void MultiBoxLossLayer::forward(PassType passType) { - Layer::forward(passType); - size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight(); - resetOutput(batchSize, 1); - - // all location data and confidence score data - locSizeSum_ = 0; - confSizeSum_ = 0; - for (size_t n = 0; n < inputNum_; ++n) { - const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); - const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); - locSizeSum_ += inLoc->getElementCnt(); - confSizeSum_ += inConf->getElementCnt(); - } - - // locBuffer layout: - // | xmin1 | ymin1 | xmax1 | ymax1 | xmin2 ...... - Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_); - locBuffer_ = locTmpBuffer_; - - // confBuffer layout: - // | class1 score | class2 score | ... |classN score | class1 score | ...... - Matrix::resizeOrCreate(confTmpBuffer_, 1, confSizeSum_, false, useGpu_); - confBuffer_ = confTmpBuffer_; - - // concate location data and confidence score data - size_t locOffset = 0; - size_t confOffset = 0; - auto& layerConf = config_.inputs(0).multibox_loss_conf(); - for (size_t n = 0; n < inputNum_; ++n) { - const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); - const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); - size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); - if (!height) height = layerConf.height(); - size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); - if (!width) width = layerConf.width(); - locOffset += appendWithPermute(*inLoc, - height, - width, - locSizeSum_, - locOffset, - batchSize, - *locBuffer_, - kNCHWToNHWC); - confOffset += appendWithPermute(*inConf, - height, - width, - confSizeSum_, - confOffset, - batchSize, - *confBuffer_, - kNCHWToNHWC); - } - CHECK_EQ(locOffset, locSizeSum_ / batchSize); - CHECK_EQ(confOffset, confSizeSum_ / batchSize); - - // priorValue layout: - // | xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var - // | xmin2 | ...... - MatrixPtr priorValue; - - // labelValue layout: - // | class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ...... - MatrixPtr labelValue; - - // Copy data from GPU to CPU if use GPU - if (useGpu_) { - Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false); - Matrix::resizeOrCreate(confCpuBuffer_, 1, confSizeSum_, false, false); - MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer()); - Matrix::resizeOrCreate( - priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false); - MatrixPtr labelTmpValue = getInputValue(*getLabelLayer()); - Matrix::resizeOrCreate(labelCpuValue_, - labelTmpValue->getHeight(), - labelTmpValue->getWidth(), - false, - false); - - locCpuBuffer_->copyFrom(*locTmpBuffer_); - confCpuBuffer_->copyFrom(*confTmpBuffer_); - priorCpuValue_->copyFrom(*priorTmpValue); - labelCpuValue_->copyFrom(*labelTmpValue); - - locBuffer_ = locCpuBuffer_; - confBuffer_ = confCpuBuffer_; - priorValue = priorCpuValue_; - labelValue = labelCpuValue_; - } else { - priorValue = getInputValue(*getPriorBoxLayer()); - labelValue = getInputValue(*getLabelLayer()); - } - - // Get max scores for each prior bbox. Used in negative mining - std::vector> allMaxConfScore; - numPriors_ = priorValue->getElementCnt() / 8; - getMaxConfidenceScores(confBuffer_->getData(), - batchSize, - numPriors_, - numClasses_, - backgroundId_, - &allMaxConfScore); - - // Match prior bbox to groundtruth bbox - Argument label = getInput(*getLabelLayer()); - const int* labelIndex = label.sequenceStartPositions->getData(false); - size_t seqNum = label.getNumSequences(); - numMatches_ = 0; - numNegs_ = 0; - allMatchIndices_.clear(); - allNegIndices_.clear(); - - std::pair retPair = generateMatchIndices(*priorValue, - numPriors_, - *labelValue, - labelIndex, - seqNum, - allMaxConfScore, - batchSize, - overlapThreshold_, - negOverlap_, - negPosRatio_, - &allMatchIndices_, - &allNegIndices_); - numMatches_ = retPair.first; - numNegs_ = retPair.second; - - // BBox location L1 smooth loss - locLoss_ = 0.0; - if (numMatches_ >= 1) { - size_t count = 0; - MatrixPtr locLossOutput; - Matrix::resizeOrCreate(locLossOutput, numMatches_ * 4, 1, false, false); - Matrix::resizeOrCreate(locGTData_, numMatches_ * 4, 1, false, false); - Matrix::resizeOrCreate(locDiff_, numMatches_ * 4, 1, false, false); - locDiff_->zeroMem(); - std::vector locGTData; - - real* locDiffData = locDiff_->getData(); - const real* locBufferData = locBuffer_->getData(); - for (size_t n = 0; n < batchSize; ++n) { - for (size_t i = 0; i < numPriors_; ++i) { - if (allMatchIndices_[n][i] == -1) continue; // match none - size_t locOffset = - n * (locBuffer_->getElementCnt() / batchSize) + i * 4; - std::copy(locBufferData + locOffset, - locBufferData + locOffset + 4, - locDiffData + count); - count += 4; - const int gtIdx = allMatchIndices_[n][i]; - size_t priorOffset = i * 8; - std::vector priorBBoxVec; - getBBoxFromPriorData( - priorValue->getData() + priorOffset, 1, priorBBoxVec); - std::vector> priorBBoxVar; - getBBoxVarFromPriorData( - priorValue->getData() + priorOffset, 1, priorBBoxVar); - size_t labelOffset = (labelIndex[n] + gtIdx) * 6; - std::vector gtBBoxVec; - getBBoxFromLabelData(labelValue->getData() + labelOffset, 1, gtBBoxVec); - std::vector gtEncode; - encodeBBoxWithVar( - priorBBoxVec[0], priorBBoxVar[0], gtBBoxVec[0], gtEncode); - locGTData.insert(locGTData.end(), gtEncode.begin(), gtEncode.end()); - } - } - locGTData_->copyFrom(&locGTData[0], numMatches_ * 4); - locLossOutput->smoothL1(*locDiff_, *locGTData_, 0.0); - locLoss_ = locLossOutput->getSum() / numMatches_; - } - - // BBox confidence softmax loss - confLoss_ = 0; - numConf_ = numMatches_ + numNegs_; - if (numConf_ >= 1) { - Matrix::resizeOrCreate(confProb_, numConf_, numClasses_, false, false); - IVector::resizeOrCreate(confGTData_, numConf_, false); - confProb_->zeroMem(); - size_t count = 0; - - std::vector confPredData; - real* confProbData = confProb_->getData(); - const real* confBufferData = confBuffer_->getData(); - for (size_t n = 0; n < batchSize; ++n) { - for (size_t i = 0; i < numPriors_; ++i) { - if (allMatchIndices_[n][i] == -1) continue; - size_t labelOffset = (labelIndex[n] + allMatchIndices_[n][i]) * 6; - const int gtLabel = (labelValue->getData() + labelOffset)[0]; - confGTData_->getData()[count] = gtLabel; - size_t confOffset = n * numPriors_ * numClasses_ + i * numClasses_; - std::copy(confBufferData + confOffset, - confBufferData + confOffset + numClasses_, - confProbData + count * numClasses_); - confPredData.reserve(confPredData.size() + numClasses_); - confPredData.insert(confPredData.end(), - confBufferData + confOffset, - confBufferData + confOffset + numClasses_); - ++count; - } - // Negative mining samples - for (size_t i = 0; i < allNegIndices_[n].size(); ++i) { - confGTData_->getData()[count] = backgroundId_; - size_t confOffset = - n * numPriors_ * numClasses_ + allNegIndices_[n][i] * numClasses_; - std::copy(confBufferData + confOffset, - confBufferData + confOffset + numClasses_, - confProbData + count * numClasses_); - confPredData.reserve(confPredData.size() + numClasses_); - confPredData.insert(confPredData.end(), - confBufferData + confOffset, - confBufferData + confOffset + numClasses_); - ++count; - } - } - CHECK_EQ(numConf_, count); - confProb_->softmax(*confProb_); - MatrixPtr confLossOutput; - Matrix::resizeOrCreate(confLossOutput, numConf_, 1, false, false); - confLossOutput->oneHotCrossEntropy(*confProb_, *confGTData_); - confLoss_ = confLossOutput->getSum() / numMatches_; - } - real loss = locLoss_ + confLoss_; - MatrixPtr outV = getOutputValue(); - outV->assign(loss); -} - -void MultiBoxLossLayer::backward(const UpdateCallback& callback) { - size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight(); - locBuffer_->zeroMem(); - confBuffer_->zeroMem(); - - // Back propagate on location prediction - if (numMatches_ >= 1) { - MatrixPtr locDiffBuffer; - Matrix::resizeOrCreate(locDiffBuffer, numMatches_ * 4, 1, false, false); - locDiffBuffer->smoothL1Bp(*locDiff_, *locGTData_, 0.0); - locDiff_->copyFrom(*locDiffBuffer); - // scale gradient - for (size_t i = 0; i < numMatches_ * 4; ++i) - locDiff_->getData()[i] *= (1. / numMatches_); - // Copy gradient back - size_t count = 0; - const real* locDiffData = locDiff_->getData(); - for (size_t n = 0; n < batchSize; ++n) { - for (size_t i = 0; i < numPriors_; ++i) { - if (allMatchIndices_[n][i] == -1) continue; - real* locBufferData = - locBuffer_->getData() + n * numPriors_ * 4 + i * 4; - std::copy(locDiffData + count * 4, - locDiffData + (count + 1) * 4, - locBufferData); - ++count; - } - } - CHECK_EQ(count, numMatches_); - } - - if (numConf_ >= 1) { - for (size_t i = 0; i < numConf_; ++i) - confProb_->getData()[i * numClasses_ + confGTData_->getData()[i]] -= 1; - for (size_t i = 0; i < numConf_ * numClasses_; ++i) - confProb_->getData()[i] *= (1. / numMatches_); - size_t count = 0; - const real* confProbData = confProb_->getData(); - for (size_t n = 0; n < batchSize; ++n) { - for (size_t i = 0; i < numPriors_; ++i) { - if (allMatchIndices_[n][i] == -1) continue; - real* confDiffData = confBuffer_->getData() + - n * numPriors_ * numClasses_ + i * numClasses_; - std::copy(confProbData + count * numClasses_, - confProbData + (count + 1) * numClasses_, - confDiffData); - ++count; - } - for (size_t i = 0; i < allNegIndices_[n].size(); ++i) { - int idx = allNegIndices_[n][i]; - real* confDiffData = confBuffer_->getData() + - n * numPriors_ * numClasses_ + idx * numClasses_; - std::copy(confProbData + count * numClasses_, - confProbData + (count + 1) * numClasses_, - confDiffData); - ++count; - } - } - CHECK_EQ(count, numConf_); - } - if (useGpu_) { - locTmpBuffer_->copyFrom(*locCpuBuffer_); - confTmpBuffer_->copyFrom(*confCpuBuffer_); - locBuffer_ = locTmpBuffer_; - confBuffer_ = confTmpBuffer_; - } - // copy back - size_t locOffset = 0; - size_t confOffset = 0; - auto layerConf = config_.inputs(0).multibox_loss_conf(); - for (size_t n = 0; n < inputNum_; ++n) { - const MatrixPtr inLocG = getInputGrad(*getLocInputLayer(n)); - const MatrixPtr inConfG = getInputGrad(*getConfInputLayer(n)); - size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); - // only for unittest, there are no width and height information - // when constructing matrix in unittest, so we should - // set the shape in configuration - if (!height) height = layerConf.height(); - size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); - if (!width) width = layerConf.width(); - - // NHWC to NCHW - MatrixPtr locGBuffer; - Matrix::resizeOrCreate( - locGBuffer, inLocG->getHeight(), inLocG->getWidth(), false, useGpu_); - MatrixPtr confGBuffer; - Matrix::resizeOrCreate( - confGBuffer, inConfG->getHeight(), inConfG->getWidth(), false, useGpu_); - - locOffset += decomposeWithPermute(*locBuffer_, - height, - width, - locSizeSum_, - locOffset, - batchSize, - *locGBuffer, - kNHWCToNCHW); - inLocG->add(*locGBuffer); - confOffset += decomposeWithPermute(*confBuffer_, - height, - width, - confSizeSum_, - confOffset, - batchSize, - *confGBuffer, - kNHWCToNCHW); - inConfG->add(*confGBuffer); - } - CHECK_EQ(locOffset, locSizeSum_ / batchSize); - CHECK_EQ(confOffset, confSizeSum_ / batchSize); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MultiBoxLossLayer.h b/paddle/legacy/gserver/layers/MultiBoxLossLayer.h deleted file mode 100644 index a358cded00bb01bfe5d02f9a6d8a24e4b2e51b74..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MultiBoxLossLayer.h +++ /dev/null @@ -1,103 +0,0 @@ -/* copyright (c) 2016 paddlepaddle authors. all rights reserve. - -licensed under the apache license, version 2.0 (the "license"); -you may not use this file except in compliance with the license. -you may obtain a copy of the license at - - http://www.apache.org/licenses/license-2.0 - -unless required by applicable law or agreed to in writing, software -distributed under the license is distributed on an "as is" basis, -without warranties or conditions of any kind, either express or implied. -see the license for the specific language governing permissions and -limitations under the license. */ - -#pragma once - -#include -#include "CostLayer.h" -#include "DataLayer.h" -#include "DetectionUtil.h" -#include "Layer.h" - -using std::vector; -using std::pair; - -namespace paddle { - -/** - * The multibox loss layer for a SSD detection task. - * The loss is composed by the location loss and the confidence loss. - * The location loss is a smooth L1 loss and the confidence loss is - * a softmax loss. - * - Input: This layer needs four input layers: The first input layer - * is the priorbox layer and the second layer is a label layer. - * The rest two input layers are convolution layers for generating - * bbox location offset and the classification confidence. - * - Output: The Single Shot Multibox Detection loss value. - * Reference: - * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, - * Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector - */ - -class MultiBoxLossLayer : public CostLayer { - public: - explicit MultiBoxLossLayer(const LayerConfig& config) : CostLayer(config) {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - - void backward(const UpdateCallback& callback = nullptr); - - void forwardImp(Matrix& output, Argument& label, Matrix& cost) {} - - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {} - - protected: - inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; } - inline LayerPtr getLabelLayer() { return inputLayers_[1]; } - inline LayerPtr getLocInputLayer(size_t index) { - return inputLayers_[2 + index]; - } - inline LayerPtr getConfInputLayer(size_t index) { - return inputLayers_[2 + inputNum_ + index]; - } - - protected: - size_t numClasses_; - real overlapThreshold_; - real negPosRatio_; - real negOverlap_; - size_t inputNum_; - size_t backgroundId_; - - real locLoss_; - real confLoss_; - - size_t numPriors_; - size_t numMatches_; - size_t numNegs_; - size_t numConf_; - size_t locSizeSum_; - size_t confSizeSum_; - - vector> allMatchIndices_; - vector> allNegIndices_; - MatrixPtr locGTData_; - IVectorPtr confGTData_; - - MatrixPtr locBuffer_; - MatrixPtr confBuffer_; - MatrixPtr locDiff_; - MatrixPtr confProb_; - - MatrixPtr labelCpuValue_; - MatrixPtr priorCpuValue_; - MatrixPtr locCpuBuffer_; - MatrixPtr confCpuBuffer_; - MatrixPtr locTmpBuffer_; - MatrixPtr confTmpBuffer_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MultinomialSampler.cpp b/paddle/legacy/gserver/layers/MultinomialSampler.cpp deleted file mode 100644 index e74ed795a1532eab93cfe0e4b1312ef99ec1662f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MultinomialSampler.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MultinomialSampler.h" - -namespace paddle { - -MultinomialSampler::MultinomialSampler(const real* prob, int size) - : rand_(0.0, size) { - intervals_.resize(size + 1); - double sum = 0; - for (int i = 0; i < size; ++i) { - sum += prob[i]; - } - - double intervalLength = sum / size; - double s = 1 / intervalLength; - for (int i = 0; i < size; ++i) { - intervals_[i] = {i, (real)(prob[i] * s)}; - } - - auto nextSmallPos = [&](int pos) { - while (pos < size && - (pos != intervals_[pos].otherId || intervals_[pos].thresh >= 1)) { - ++pos; - } - return pos; - }; - - auto nextBigPos = [&](int pos) { - while (pos < size && intervals_[pos].thresh < 1) { - ++pos; - } - return pos; - }; - - int smallPos = nextSmallPos(0); - int bigPos = nextBigPos(0); - - auto fillIntervals = [&]() { - while (bigPos < size) { - while (intervals_[bigPos].thresh > 1 && smallPos < size) { - intervals_[smallPos].otherId = bigPos; - intervals_[bigPos].thresh -= 1 - intervals_[smallPos].thresh; - smallPos = nextSmallPos(smallPos + 1); - } - if (smallPos >= size) break; - bigPos = nextBigPos(bigPos + 1); - // If intervals_[bigPos].thresh < 1, it becomes a small interval - } - }; - - fillIntervals(); - - smallPos = nextSmallPos(0); - - // At this point there is no small intervals after bigPos. And this condition - // will remain true during the next fillIntervals() - - fillIntervals(); - - // Handle the inaccuracy caused by finite-precision arithmetic which - // may results in some unprocessed small or big intervals at this point. - for (int i = 0; i < size; ++i) { - if (intervals_[i].otherId == i) { - intervals_[i].thresh = 1; - } - } - - // The last one is to safeguard the case that the random number is equal - // to size - intervals_[size] = {size - 1, 1}; -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MultinomialSampler.h b/paddle/legacy/gserver/layers/MultinomialSampler.h deleted file mode 100644 index ed445352418f8504e52a6139492e3577a95eecb1..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MultinomialSampler.h +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "paddle/legacy/utils/Common.h" - -namespace paddle { - -/** - * @brief Given the probability of N objects, the sampler random select - * one of the object. - * @note: prob does not have to be unnormalized. - * - * The space requirement is O(N)=O(N * sizeof(Interval)). - * The computational complexity of generate one sample is O(1). - */ -class MultinomialSampler { - public: - MultinomialSampler(const real* prob, int size); - - //! protobuf always using double. - static MultinomialSampler* create(const double* prob, int size) { -#ifdef PADDLE_TYPE_DOUBLE - return new MultinomialSampler(prob, size); -#else - std::unique_ptr tmp(new real[size]); - std::copy(prob, prob + size, tmp.get()); - return new MultinomialSampler(tmp.get(), size); -#endif - } - - /** - * @brief Generate a random sample. - * @param g is a random number engine. See . - * @return Random integer. - */ - template - int gen(URNG& g) { - return gen1([&g, this]() { return rand_(g); }); - } - - protected: - /** - * @brief Generation - * @param[in] rand rand is a real random number distribution - * for the range [0, size). - * @return random int number or intervals_[random_int_number].otherId. - */ - template - int gen1(Rand rand) { - double r = rand(); // NOLINT - int i = (int)r; - r -= i; - return r < intervals_[i].thresh ? i : intervals_[i].otherId; - } - - struct Interval { - int otherId; - real thresh; - }; - - /// The probability of each interval will be 1./size - std::vector intervals_; - std::uniform_real_distribution rand_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/MultiplexLayer.cpp b/paddle/legacy/gserver/layers/MultiplexLayer.cpp deleted file mode 100644 index 9ca2b2417596e7978ea6b84ec76bcb8a305a4f5d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/MultiplexLayer.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - *@brief This layer multiplex multiple layers according to the index, - * which is provided by the first input layer. - * - Input[0]: the index of the layer to output of size batchSize. - * - Input[1:N]; the candidate output data. - * For each index i from 0 to batchSize -1, the output is the i-th row of the - * (index[i] + 1)-th layer. - * - * For each i-th row of output: - * - * \f[ - * y[i][j] = x_{x_{0}[i] + 1}[i][j], j = 0,1, ... , (x_{1}.width - 1) - * \f] - * where, y is output. \f$x_{k}\f$ is the k-th input layer and - * \f$k = x_{0}[i] + 1\f$. - */ - -class MultiplexLayer : public Layer { - protected: - /** - * @brief A struct is used to save the copy information, includes input - * layer index and copy size. - */ - struct CopyInfo { - CopyInfo(int inStartIdx, int inLength, int inCopyIdx) - : startIdx(inStartIdx), length(inLength), copyIdx(inCopyIdx) {} - - /// The start row of input. - int startIdx; - /// Number of rows. If the layer index in Input[0] is not consecutive, - /// the length is one. Otherwise, the length is > 1 and copy multi rows - /// once. - int length; - /// The copied layer index, which needs to add 1. - int copyIdx; - }; - - /// A list of CopyInfo used to save copy information. - std::vector copySchedule_; - - /// Temporary matrix pointer to point to input data. - MatrixPtr tmpSrc_; - /// Temporary matrix pointer to point to output data. - MatrixPtr tmpDest_; - - public: - explicit MultiplexLayer(const LayerConfig& config) : Layer(config) {} - - ~MultiplexLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - private: - /** - * @brief Calculate copy info for input layers. - */ - void calculateCopySchedule(const IVectorPtr& copyIds, size_t numIns); -}; - -REGISTER_LAYER(multiplex, MultiplexLayer); - -void MultiplexLayer::calculateCopySchedule(const IVectorPtr& copyIds, - size_t numIns) { - copySchedule_.clear(); - CopyInfo prevCopyInfo(0, 0, -1); - for (size_t i = 0; i < copyIds->getSize(); i++) { - int copyId = copyIds->getElement(i); - CHECK_GE(copyId, 0); - CHECK_LT(copyId, int(numIns)); - // copy same input layer with prevous and will copy consecutive. - if (copyId == prevCopyInfo.copyIdx) { - ++prevCopyInfo.length; - } else { - if (prevCopyInfo.copyIdx != -1) { - copySchedule_.emplace_back(prevCopyInfo); - } - prevCopyInfo.startIdx = i; - prevCopyInfo.length = 1; - prevCopyInfo.copyIdx = copyId; - } - } - if (prevCopyInfo.copyIdx != -1) { - copySchedule_.emplace_back(prevCopyInfo); - } -} - -bool MultiplexLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_GE(inputLayers_.size(), 2U); - - tmpSrc_ = - Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); - tmpDest_ = - Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); - return true; -} - -void MultiplexLayer::forward(PassType passType) { - Layer::forward(passType); - - IVectorPtr copyIds = getInput(0).ids; - MatrixPtr inV1 = getInputValue(1); - CHECK_EQ(copyIds->getSize(), inV1->getHeight()); - for (size_t i = 2; i < inputLayers_.size(); i++) { - CHECK_EQ(inV1->getHeight(), getInputValue(i)->getHeight()); - CHECK_EQ(inV1->getWidth(), getInputValue(i)->getWidth()); - } - - calculateCopySchedule(copyIds, inputLayers_.size() - 1); - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - reserveOutput(inV1->getHeight(), inV1->getWidth()); - } - - MatrixPtr outV = getOutputValue(); - { - REGISTER_TIMER_INFO("FwLMultplexingTimer", getName().c_str()); - AsyncGpuBlock block; - for (const CopyInfo& info : copySchedule_) { - outV->subMatrix(info.startIdx, info.length, tmpDest_) - ->copyFrom(*getInputValue(info.copyIdx + 1) - ->subMatrix(info.startIdx, info.length, tmpSrc_)); - } - } - - /* activation */ { - REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void MultiplexLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { - REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); - backwardActivation(); - } - - MatrixPtr outG = getOutputGrad(); - - { - REGISTER_TIMER_INFO("BwLMultiplexTimer", getName().c_str()); - AsyncGpuBlock block; - for (const CopyInfo& info : copySchedule_) { - if (getInputGrad(info.copyIdx + 1)) { - getInputGrad(info.copyIdx + 1) - ->subMatrix(info.startIdx, info.length, tmpDest_) - ->add(*outG->subMatrix(info.startIdx, info.length, tmpSrc_)); - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/NCELayer.cpp b/paddle/legacy/gserver/layers/NCELayer.cpp deleted file mode 100644 index ae4d6408168d1597760fe0094bc04f9cef657da4..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/NCELayer.cpp +++ /dev/null @@ -1,323 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "Layer.h" -#include "MultinomialSampler.h" -#include "paddle/legacy/math/MathFunctions.h" - -namespace paddle { - -/** - * Noise-contrastive estimation. - * Implements the method in the following paper: - * A fast and simple algorithm for training neural probabilistic language - * models. - * - * The config file api is nce_layer. - */ -class NCELayer : public Layer { - int numClasses_; - /// number of input layer besides labelLayer and weightLayer - int numInputs_; - LayerPtr labelLayer_; - /// weight layer, can be None - LayerPtr weightLayer_; - WeightList weights_; - std::unique_ptr biases_; - std::unique_ptr sampler_; - - std::uniform_int_distribution rand_; - - struct Sample { - int sampleId; - int labelId; - bool target; - real weight; - }; - std::vector samples_; - /// whether samples_ is prepared - bool prepared_; - Argument sampleOut_; - - IVectorPtr labelIds_; - - public: - explicit NCELayer(const LayerConfig& config) - : Layer(config), - numClasses_(config.num_classes()), - rand_(0, config.num_classes() - 1), - prepared_(false) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* initialize the weightList */ - size_t i; - for (i = 0; i < inputLayers_.size(); i++) { - if (!parameters_[i]) break; - size_t width = inputLayers_[i]->getSize(); - // create a new weight - CHECK_EQ(parameters_[i]->getSize(), width * numClasses_); - Weight* w = new Weight(numClasses_, width, parameters_[i]); - - // append the new weight to the list - weights_.emplace_back(w); - } - - CHECK_EQ(1U, getSize()); - - numInputs_ = i; - CHECK_GE(numInputs_, 1) - << "Must have at least one input besides label and weight"; - CHECK_LT(i, inputLayers_.size()) << "Missing label layer"; - labelLayer_ = inputLayers_[i]; - if (++i < inputLayers_.size()) { - weightLayer_ = inputLayers_[i]; - ++i; - } - CHECK_EQ(i, inputLayers_.size()); - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - CHECK_EQ(biasParameter_->getSize(), (size_t)numClasses_); - biases_.reset(new Weight(1, numClasses_, biasParameter_)); - } - - if (config_.neg_sampling_dist_size()) { - CHECK_EQ(numClasses_, config_.neg_sampling_dist_size()); - sampler_.reset(MultinomialSampler::create( - config_.neg_sampling_dist().data(), numClasses_)); - } - - return true; - } - - void prepareSamples() { - CHECK(!useGpu_) << "GPU is not supported"; - - int batchSize = getInput(*labelLayer_).getBatchSize(); - IVectorPtr label = getInput(*labelLayer_).ids; - - CpuSparseMatrixPtr multiLabel = std::dynamic_pointer_cast( - getInput(*labelLayer_).value); - - CHECK(label || multiLabel) - << "The label layer must have ids or NonValueSparseMatrix value"; - - auto& randEngine = ThreadLocalRandomEngine::get(); - - samples_.clear(); - samples_.reserve(batchSize * (1 + config_.num_neg_samples())); - - real* weight = - weightLayer_ ? getInputValue(*weightLayer_)->getData() : nullptr; - - for (int i = 0; i < batchSize; ++i) { - real w = weight ? weight[i] : 1; - if (label) { - int* ids = label->getData(); - samples_.push_back({i, ids[i], true, w}); - } else { - const int* cols = multiLabel->getRowCols(i); - int n = multiLabel->getColNum(i); - for (int j = 0; j < n; ++j) { - samples_.push_back({i, cols[j], true, w}); - } - } - for (int j = 0; j < config_.num_neg_samples(); ++j) { - int id = sampler_ ? sampler_->gen(randEngine) : rand_(randEngine); - samples_.push_back({i, id, false, w}); - } - } - prepared_ = true; - } - - void prefetch() override { - prepareSamples(); - IVector::resizeOrCreate(labelIds_, samples_.size(), useGpu_); - int* ids = labelIds_->getData(); - for (size_t i = 0; i < samples_.size(); ++i) { - ids[i] = samples_[i].labelId; - } - - for (int i = 0; i < numInputs_; ++i) { - auto sparseParam = - dynamic_cast(weights_[i]->getW().get()); - if (sparseParam) { - sparseParam->addRows(labelIds_); - } - } - } - - void forward(PassType passType) override { - Layer::forward(passType); - - CHECK(!useGpu_) << "GPU is not supported"; - - if (!prepared_) { - if (passType == PASS_GC) { - ThreadLocalRandomEngine::get().seed(ThreadLocalRand::getDefaultSeed()); - } - prepareSamples(); - } - prepared_ = false; - - /* malloc memory for the output_ if necessary */ - int batchSize = getInputValue(0)->getHeight(); - int size = getSize(); - resetOutput(batchSize, size); - - Matrix::resizeOrCreate(sampleOut_.value, - 1, - samples_.size(), - /* trans= */ false, - useGpu_); - - forwardBias(); - - for (int l = 0; l < numInputs_; ++l) { - forwardOneInput(l); - } - - auto status = activation_->forward(sampleOut_); - status.check(); - - forwardCost(); - } - - void backward(const UpdateCallback& callback) override { - Matrix::resizeOrCreate(sampleOut_.grad, - 1, - samples_.size(), - /* trans= */ false, - useGpu_); - - backwardCost(); - - auto status = activation_->backward(sampleOut_); - status.check(); - - if (biases_->getWGrad()) { - backwardBias(callback); - } - - for (int l = 0; l < numInputs_; ++l) { - backwardOneInput(l, callback); - } - } - - void forwardBias() { - if (!biases_) { - sampleOut_.value->zeroMem(); - } else { - real* bias = biases_->getW()->getData(); - real* sampleOut = sampleOut_.value->getData(); - for (size_t i = 0; i < samples_.size(); ++i) { - sampleOut[i] = bias[samples_[i].labelId]; - } - } - } - - void backwardBias(const UpdateCallback& callback) { - if (!biases_) return; - real* bias = biases_->getWGrad()->getData(); - real* sampleOut = sampleOut_.grad->getData(); - for (size_t i = 0; i < samples_.size(); ++i) { - bias[samples_[i].labelId] += sampleOut[i]; - } - biases_->incUpdate(callback); - } - - void forwardOneInput(int layerId) { - const MatrixPtr& inputMat = getInputValue(layerId); - const MatrixPtr& weightMat = weights_[layerId]->getW(); - - int dim = inputMat->getWidth(); - real* sampleOut = sampleOut_.value->getData(); - - for (size_t i = 0; i < samples_.size(); ++i) { - sampleOut[i] += dotProduct(dim, - inputMat->getRowBuf(samples_[i].sampleId), - weightMat->getRowBuf(samples_[i].labelId)); - } - } - - void backwardOneInput(int layerId, const UpdateCallback& callback) { - const MatrixPtr& inputMat = getInputValue(layerId); - const MatrixPtr& inputGradMat = getInputGrad(layerId); - const MatrixPtr& weightMat = weights_[layerId]->getW(); - const MatrixPtr& weightGradMat = weights_[layerId]->getWGrad(); - - int dim = inputMat->getWidth(); - real* sampleGrad = sampleOut_.grad->getData(); - - if (weightGradMat) { - for (size_t i = 0; i < samples_.size(); ++i) { - axpy(dim, - sampleGrad[i], - inputMat->getRowBuf(samples_[i].sampleId), - weightGradMat->getRowBuf(samples_[i].labelId)); - } - weights_[layerId]->incUpdate(callback); - } - - if (inputGradMat) { - for (size_t i = 0; i < samples_.size(); ++i) { - axpy(dim, - sampleGrad[i], - weightMat->getRowBuf(samples_[i].labelId), - inputGradMat->getRowBuf(samples_[i].sampleId)); - } - } - } - - void forwardCost() { - real* out = output_.value->getData(); - real* sampleOut = sampleOut_.value->getData(); - real b = 1. / numClasses_ * config_.num_neg_samples(); - for (size_t i = 0; i < samples_.size(); ++i) { - real o = sampleOut[i]; - if (sampler_) { - b = config_.num_neg_samples() * - config_.neg_sampling_dist(samples_[i].labelId); - } - real cost = samples_[i].target ? -log(o / (o + b)) : -log(b / (o + b)); - out[samples_[i].sampleId] += samples_[i].weight * cost; - } - } - - void backwardCost() { - real* sampleOut = sampleOut_.value->getData(); - real* sampleGrad = sampleOut_.grad->getData(); - - real b = 1. / numClasses_ * config_.num_neg_samples(); - for (size_t i = 0; i < samples_.size(); ++i) { - real o = sampleOut[i]; - if (sampler_) { - b = config_.num_neg_samples() * - config_.neg_sampling_dist(samples_[i].labelId); - } - real w = samples_[i].weight; - sampleGrad[i] = samples_[i].target ? -w * b / (o * (o + b)) : w / (o + b); - } - } -}; - -REGISTER_LAYER(nce, NCELayer); - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/NormLayer.cpp b/paddle/legacy/gserver/layers/NormLayer.cpp deleted file mode 100644 index 443e26dbc859b1c51c5fb93077178ac45bdeaff3..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/NormLayer.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "NormLayer.h" -#include "NormProjectionLayer.h" -#include "paddle/legacy/utils/Logging.h" -namespace paddle { - -REGISTER_LAYER_CREATE_FUNC(norm, &NormLayer::create); - -Layer* NormLayer::create(const LayerConfig& config) { - CHECK_EQ(config.inputs_size(), 1); - const std::string& norm = config.inputs(0).norm_conf().norm_type(); - if (norm == "rnorm") { - return new ResponseNormLayer(config); - } else if (norm == "cmrnorm-projection") { - return new CMRProjectionNormLayer(config); - } else if (norm == "cross-channel-norm") { - return new CrossChannelNormLayer(config); - } else { - LOG(FATAL) << "Unknown norm type: " << norm; - return nullptr; - } -} - -bool ResponseNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - NormLayer::init(layerMap, parameterMap); - - /* the size of inputs for norm-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1); - - const NormConfig& conf = config_.inputs(0).norm_conf(); - channels_ = conf.channels(); - size_ = conf.size(); - scale_ = conf.scale(); - pow_ = conf.pow(); - outputX_ = conf.output_x(); - imgSize_ = conf.img_size(); - denoms_ = NULL; - - outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); - imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - return true; -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/NormLayer.h b/paddle/legacy/gserver/layers/NormLayer.h deleted file mode 100644 index 5ac00034d086a5952b30576268c72af326e3ebf9..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/NormLayer.h +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "Layer.h" -#include "NormLayer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief Basic parent layer of normalization - * - * @note Normalize the input in local region - */ -class NormLayer : public Layer { - public: - explicit NormLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override { - Layer::init(layerMap, parameterMap); - return true; - } - - /** - * @brief create norm layer by norm_type - */ - static Layer* create(const LayerConfig& config); -}; - -/** - * @brief response normalization within feature maps - * namely normalize in independent channel - * When code refactoring, we delete the original implementation. - * Need to implement in the futrue. - */ -class ResponseNormLayer : public NormLayer { - protected: - size_t channels_, size_, outputX_, imgSize_, outputY_, imgSizeY_; - real scale_, pow_; - MatrixPtr denoms_; - - public: - explicit ResponseNormLayer(const LayerConfig& config) : NormLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override { LOG(FATAL) << "Not implemented"; } - void backward(const UpdateCallback& callback = nullptr) override { - LOG(FATAL) << "Not implemented"; - } -}; - -/** - * This layer applys normalization across the channels of each sample to a - * conv layer's output, and scales the output by a group of trainable factors - * whose dimensions equal to the number of channels. - * - Input: One and only one input layer are accepted. - * - Output: The normalized data of the input data. - * Reference: - * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, - * Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector - */ -class CrossChannelNormLayer : public NormLayer { - public: - explicit CrossChannelNormLayer(const LayerConfig& config) - : NormLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback); - MatrixPtr createSampleMatrix(MatrixPtr data, size_t iter, size_t spatialDim); - MatrixPtr createSpatialMatrix(MatrixPtr data, size_t iter, size_t spatialDim); - - protected: - size_t channels_; - std::unique_ptr scale_; - MatrixPtr scaleDiff_; - MatrixPtr normBuffer_; - MatrixPtr dataBuffer_; - MatrixPtr channelBuffer_; - MatrixPtr spatialBuffer_; - MatrixPtr sampleBuffer_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/NormProjectionLayer.cpp b/paddle/legacy/gserver/layers/NormProjectionLayer.cpp deleted file mode 100644 index 72affaa1ce618a841f8040c84467a46b77531958..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/NormProjectionLayer.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "NormProjectionLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { -size_t CMRProjectionNormLayer::getSize() { - CHECK_EQ(inputLayers_.size(), 1UL); - size_t layerSize = 0; - imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); - imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); - if (imgSizeH_ == 0) { - imgSizeH_ = imgSizeY_; - } - if (imgSizeW_ == 0) { - imgSizeW_ = imgSize_; - } - outputH_ = imgSizeH_; - outputW_ = imgSizeW_; - layerSize = outputH_ * outputW_ * channels_; - - getOutput().setFrameHeight(outputH_); - getOutput().setFrameWidth(outputW_); - return layerSize; -} - -bool CMRProjectionNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - ResponseNormLayer::init(layerMap, parameterMap); - - /* the size of inputs for norm-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1); - - createFunction( - forward_, - "CrossMapNormal", - FuncConfig().set("size", size_).set("scale", scale_).set("pow", pow_)); - createFunction( - backward_, - "CrossMapNormalGrad", - FuncConfig().set("size", size_).set("scale", scale_).set("pow", pow_)); - - return true; -} - -void CMRProjectionNormLayer::forward(PassType passType) { - Layer::forward(passType); - /* malloc memory for the output_ if necessary */ - /* note: one sample correspond to one row */ - MatrixPtr input = inputLayers_[0]->getOutputValue(); - size_t batchSize = input->getHeight(); - int size = getSize(); - resetOutput(batchSize, size); - - Matrix::resizeOrCreate(denoms_, batchSize, size, /* trans */ false, useGpu_); - - shape_ = TensorShape({batchSize, channels_, imgSizeH_, imgSizeW_}); - - // prepare forward arguments - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getInputValue(0), shape_); - outputs.addArg(*getOutputValue(), shape_, ASSIGN_TO); - outputs.addArg(*denoms_, shape_, ASSIGN_TO); - - forward_[0]->calc(inputs, outputs); -} - -void CMRProjectionNormLayer::backward(const UpdateCallback& callback) { - (void)callback; - - if (NULL == getInputGrad(0)) { - return; - } - - // prepare backward arguments - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getInputValue(0), shape_); - inputs.addArg(*getOutputValue(), shape_); - inputs.addArg(*getOutputGrad(), shape_); - inputs.addArg(*denoms_, shape_); - outputs.addArg(*getInputGrad(0), shape_, ADD_TO); - - backward_[0]->calc(inputs, outputs); -} -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/NormProjectionLayer.h b/paddle/legacy/gserver/layers/NormProjectionLayer.h deleted file mode 100644 index 492d1fcb72343a54577a459aaa5de53596f43f42..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/NormProjectionLayer.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "NormLayer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief response normalization across feature maps - * namely normalize in number of size_ channels - */ -class CMRProjectionNormLayer : public ResponseNormLayer { - size_t imgSizeH_, imgSizeW_; - size_t outputH_, outputW_; - - public: - explicit CMRProjectionNormLayer(const LayerConfig& config) - : ResponseNormLayer(config) {} - - ~CMRProjectionNormLayer() {} - - size_t getSize(); - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - protected: - TensorShape shape_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Operator.cpp b/paddle/legacy/gserver/layers/Operator.cpp deleted file mode 100644 index 5b9cf8d15d6f585fda35011cae504a36514e445d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Operator.cpp +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Operator.h" - -namespace paddle { - -ClassRegistrar Operator::registrar_; - -Operator* Operator::create(const OperatorConfig& config, bool useGpu) { - return registrar_.createByType(config.type(), config, useGpu); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Operator.h b/paddle/legacy/gserver/layers/Operator.h deleted file mode 100644 index 20a248985eb6b3aba016b28bca4c0eea44baa868..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Operator.h +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "ModelConfig.pb.h" -#include "paddle/legacy/parameter/Parameter.h" - -#include "Layer.h" -#include "paddle/legacy/parameter/Argument.h" - -namespace paddle { - -// Macro for registering a operator type -// Example: REGISTER_OPERATOR(dot_mul, DotMulOperator); -#define REGISTER_OPERATOR(__type_name, __class_name) \ - static InitFunction __reg_type_##__type_name([]() { \ - Operator::registrar_.registerClass<__class_name>(#__type_name); \ - }) - -/** - * Operator like Projection, but takes more than one Arguments as input. - * @note: Operator can't have parameters. - */ -class Operator { - public: - static Operator* create(const OperatorConfig& config, bool useGpu); - - Operator(const OperatorConfig& config, bool useGpu) - : config_(config), useGpu_(useGpu) {} - - virtual ~Operator() {} - - const OperatorConfig& getConfig() const { return config_; } - - static ClassRegistrar registrar_; - - /** - * Forward propagation. If backward() will be called, in and out must be kept - * valid until then. - * @param ins inputs of operator - * @param out output of operator - * @param passType PASS_TRAIN of PASS_TEST - */ - void forward(std::vector ins, - Argument* out, - PassType passType) { - ins_ = ins; - out_ = out; - passType_ = passType; - forward(); - } - - virtual void prefetch(const Argument* in) {} - virtual void forward() = 0; - virtual void backward() = 0; - - /** - * See comment in Layer.h for the function with the same name. - */ - virtual void resetState() {} - - /** - * Set layer state. - */ - virtual void setState(LayerStatePtr state) {} - - /** - * Set layer state. - */ - virtual LayerStatePtr getState() { return nullptr; } - - protected: - /// Config of operator - OperatorConfig config_; - bool useGpu_; - - /// Store `ins` passed to forward() - std::vector ins_; - /// Store `out` passed to forward() - Argument* out_; - /// Store `passType` passed to forward() - PassType passType_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/OuterProdLayer.cpp b/paddle/legacy/gserver/layers/OuterProdLayer.cpp deleted file mode 100644 index d0928be9d4d52532503987af8e29fdf5c7fb16a5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/OuterProdLayer.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief A layer for computing the outer product of two vectors - * @note used in NEURAL TURING MACHINE - * Input1: vector (batchSize * dim1) - * Input2: vector (batchSize * dim2) - * Output: a matrix: (batchSize * (dim1*dim2)) - */ - -class OuterProdLayer : public Layer { - protected: - MatrixPtr tmpMtx0; - MatrixPtr tmpRow0; - MatrixPtr tmpRow1; - - public: - explicit OuterProdLayer(const LayerConfig& config) : Layer(config) {} - - ~OuterProdLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(out_prod, OuterProdLayer); - -bool OuterProdLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2U); - - size_t dim0 = inputLayers_[0]->getSize(); - size_t dim1 = inputLayers_[1]->getSize(); - - CHECK_EQ(dim0 * dim1, getSize()) << "Dimension mismatch"; - - tmpRow0 = Matrix::create( - nullptr, /* height= */ 1, dim0, /* trans= */ false, useGpu_); - tmpRow1 = Matrix::create( - nullptr, /* height= */ 1, dim1, /* trans= */ false, useGpu_); - tmpMtx0 = Matrix::create(nullptr, - /* height= */ dim0, - dim1, - /* trans= */ false, - useGpu_); - return true; -} - -void OuterProdLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - - size_t batchSize = inV0->getHeight(); - size_t dim0 = inV0->getWidth(); - size_t dim1 = inV1->getWidth(); - - CHECK_EQ(dim0 * dim1, getSize()); - CHECK_EQ(inV1->getHeight(), batchSize); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - reserveOutput(batchSize, dim0 * dim1); - } - - MatrixPtr outV = getOutputValue(); - - { - REGISTER_TIMER_INFO("FwOutProdTimer", getName().c_str()); - for (size_t i = 0; i < batchSize; i++) { - tmpMtx0->setData(outV->getData() + i * dim0 * dim1); - tmpRow0->setData(inV0->getData() + i * dim0); - tmpRow1->setData(inV1->getData() + i * dim1); - - tmpMtx0->mul(*tmpRow0->getTranspose(), *tmpRow1); - } - } -} - -void OuterProdLayer::backward(const UpdateCallback& callback) { - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr outG = getOutputGrad(); - MatrixPtr inG0 = getInputGrad(0); - MatrixPtr inG1 = getInputGrad(1); - - size_t batchSize = inV0->getHeight(); - size_t dim0 = inV0->getWidth(); - size_t dim1 = inV1->getWidth(); - - { - REGISTER_TIMER_INFO("BwOutProdTimer", getName().c_str()); - - if (inG0) { - for (size_t i = 0; i < batchSize; i++) { - tmpMtx0->setData(outG->getData() + i * dim0 * dim1); - tmpRow0->setData(inG0->getData() + i * dim0); - tmpRow1->setData(inV1->getData() + i * dim1); - - tmpRow0->mul(*tmpRow1, *tmpMtx0->getTranspose(), 1, 1); - } - } - - if (inG1) { - for (size_t i = 0; i < batchSize; i++) { - tmpMtx0->setData(outG->getData() + i * dim0 * dim1); - tmpRow0->setData(inV0->getData() + i * dim0); - tmpRow1->setData(inG1->getData() + i * dim1); - - tmpRow1->mul(*tmpRow0, *tmpMtx0, 1, 1); - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PadLayer.cpp b/paddle/legacy/gserver/layers/PadLayer.cpp deleted file mode 100644 index 7b92b3de2d839f240ec8cbe07ed7685295568809..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PadLayer.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PadLayer.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(pad, PadLayer); - -bool PadLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - auto& pad_conf = config_.inputs(0).pad_conf(); - auto& img_conf = pad_conf.image_conf(); - CHECK_EQ(config_.inputs_size(), 1); - inDims_ = TensorShape( - {0, - img_conf.channels(), - img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(), - img_conf.img_size()}); - - CHECK_EQ(2, pad_conf.pad_c_size()); - CHECK_EQ(2, pad_conf.pad_h_size()); - CHECK_EQ(2, pad_conf.pad_w_size()); - padc_ = {pad_conf.pad_c(0), pad_conf.pad_c(1)}; - padh_ = {pad_conf.pad_h(0), pad_conf.pad_h(1)}; - padw_ = {pad_conf.pad_w(0), pad_conf.pad_w(1)}; - - outDims_ = TensorShape(4); - setOutDims(0); - - createFunction(forward_, - "Pad", - FuncConfig() - .set("channel", padc_) - .set("height", padh_) - .set("width", padw_)); - createFunction(backward_, - "PadGrad", - FuncConfig() - .set("channel", padc_) - .set("height", padh_) - .set("width", padw_)); - - return true; -} - -void PadLayer::setOutDims(const size_t batchSize) { - outDims_.reshape({batchSize, - inDims_[1] + padc_[0] + padc_[1], - inDims_[2] + padh_[0] + padh_[1], - inDims_[3] + padw_[0] + padw_[1]}); -} - -void PadLayer::setTensorDim(const size_t batchSize) { - CHECK_EQ(static_cast(inputLayers_.size()), 1); - inDims_.setDim(0, batchSize); - int h = inputLayers_[0]->getOutput().getFrameHeight(); - if (h != 0) inDims_.setDim(2, h); - int w = inputLayers_[0]->getOutput().getFrameWidth(); - if (w != 0) inDims_.setDim(3, w); - setOutDims(batchSize); -} - -void PadLayer::forward(PassType passType) { - Layer::forward(passType); - MatrixPtr input = inputLayers_[0]->getOutputValue(); - size_t batchSize = input->getHeight(); - setTensorDim(batchSize); - int size = outDims_[1] * outDims_[2] * outDims_[3]; - resetOutput(batchSize, size); - MatrixPtr outV = getOutputValue(); - REGISTER_TIMER_INFO("PadForward", getName().c_str()); - - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getInputValue(0), inDims_); - outputs.addArg(*getOutputValue(), outDims_, ASSIGN_TO); - forward_[0]->calc(inputs, outputs); -} - -void PadLayer::backward(const UpdateCallback& callback) { - (void)callback; - REGISTER_TIMER_INFO("PadBackward", getName().c_str()); - - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getOutputGrad(), outDims_); - outputs.addArg(*getInputGrad(0), inDims_, ADD_TO); - backward_[0]->calc(inputs, outputs); -} -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PadLayer.h b/paddle/legacy/gserver/layers/PadLayer.h deleted file mode 100644 index 46b8a595978489c630b3ff2429ecb19d7c12521a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PadLayer.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" - -namespace paddle { - -/** - * \brief This layer pads zeros to inputs according to the specify dimension. - * The input and output is a 4D tensor. Padding zeros from the 2nd to - * the 4th dimenstion according padc_, padh_ and padw_. - */ -class PadLayer : public Layer { - public: - explicit PadLayer(const LayerConfig& config) : Layer(config) {} - - ~PadLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - protected: - void setOutDims(const size_t batchSize); - void setTensorDim(const size_t batchSize); - - std::vector padc_; - std::vector padh_; - std::vector padw_; - TensorShape inDims_; - TensorShape outDims_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ParameterReluLayer.cpp b/paddle/legacy/gserver/layers/ParameterReluLayer.cpp deleted file mode 100644 index 23715d1975d7a3606a9418d54bc69ae6f036a93a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ParameterReluLayer.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ParameterReluLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(prelu, ParameterReluLayer); - -bool ParameterReluLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - CHECK_EQ(inputLayers_.size(), 1UL); - CHECK_EQ(inputLayers_.size(), parameters_.size()); - partialSum_ = config_.partial_sum(); - CHECK_GT(partialSum_, 0UL) << "partial_sum must be larger than zero."; - CHECK(!(inputLayers_[0]->getSize() % partialSum_)) - << "Incorrect value for partialSum: " << partialSum_ - << " must divide input size: " << inputLayers_[0]->getSize(); - CHECK_EQ(getSize() / partialSum_, parameters_[0]->getSize()); - weight_ = std::unique_ptr(new Weight( - 1UL, inputLayers_[0]->getSize() / partialSum_, parameters_[0])); - return true; -} - -void ParameterReluLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInput(0).getBatchSize(); - int size = getSize(); - reserveOutput(batchSize, size); - MatrixPtr outV = getOutputValue(); - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - outV->paramReluForward(*(getInput(0).value), *(weight_->getW())); - } -} - -void ParameterReluLayer::backward(const UpdateCallback& callback) { - if (weight_->getWGrad()) { - weight_->getWGrad()->paramReluBackwardW(*getOutputGrad(), - *(getInputValue(0))); - } - - MatrixPtr preGrad = getInputGrad(0); - preGrad->paramReluBackwardDiff( - *getOutputGrad(), *(getInputValue(0)), *(weight_->getW())); - { - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - weight_->getParameterPtr()->incUpdate(callback); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ParameterReluLayer.h b/paddle/legacy/gserver/layers/ParameterReluLayer.h deleted file mode 100644 index 3aac4b42f60531b5856ddef208b8356898e42859..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ParameterReluLayer.h +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { - -/** - * @brief ParameterReluLayer active inputs with learnable parameter weight_. - * forward: - * \f[ - * y = x > 0 ? x : w .* x - * \f] - * backward: - * \f[ - * dx = x > 0 ? dy : w .* dy \\ - * dw = x > 0 ? 0 : dy.*x - * \f] - * Here, x is the input, w is the weight, y is the output. - * dx, dw, dy is the gradient. - */ - -class ParameterReluLayer : public Layer { - protected: - std::unique_ptr weight_; - - /** - * @brief partialSum_ makes a group of inputs share same weights, - * - partialSum_ = 1: - * element wise activation: each element has a weight_, - * - partialSum_ = number of elements in one channel, - * channels wise parameter activation, elements in a channel - * share same weight_, - * - partialSum_ = number of outputs - * all elements share same weight_, - */ - size_t partialSum_; - - public: - explicit ParameterReluLayer(const LayerConfig& config) : Layer(config) {} - - ~ParameterReluLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Pool3DLayer.cpp b/paddle/legacy/gserver/layers/Pool3DLayer.cpp deleted file mode 100644 index ae3f55c27f2d7bd3ab47d834d5b6f274ff558310..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Pool3DLayer.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Pool3DLayer.h" -#include "PoolProjectionLayer.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -REGISTER_LAYER(pool3d, Pool3DLayer); - -bool Pool3DLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - /* the size of inputs for pool-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1); - - const PoolConfig& conf = config_.inputs(0).pool_conf(); - poolType_ = conf.pool_type(); - channels_ = conf.channels(); - - sizeX_ = conf.size_x(); - sizeY_ = conf.size_y(); - sizeZ_ = conf.size_z(); - - strideW_ = conf.stride(); - strideH_ = conf.stride_y(); - strideD_ = conf.stride_z(); - - imgSizeW_ = conf.img_size(); - imgSizeH_ = conf.img_size_y(); - imgSizeD_ = conf.img_size_z(); - - paddingW_ = conf.padding(); - paddingH_ = conf.padding_y(); - paddingD_ = conf.padding_z(); - - outputW_ = conf.output_x(); - outputH_ = conf.output_y(); - outputD_ = conf.output_z(); - - return true; -} - -size_t Pool3DLayer::getSize() { - CHECK_EQ(inputLayers_.size(), 1UL); - - size_t layerSize = 0; - outputD_ = outputSize(imgSizeD_, sizeZ_, paddingD_, strideD_, false); - outputH_ = outputSize(imgSizeH_, sizeY_, paddingH_, strideH_, false); - outputW_ = outputSize(imgSizeW_, sizeX_, paddingW_, strideW_, false); - - layerSize = outputD_ * outputH_ * outputW_ * channels_; - getOutput().setFrameHeight(outputH_); - getOutput().setFrameWidth(outputW_); - getOutput().setFrameDepth(outputD_); - return layerSize; -} - -void Pool3DLayer::forward(PassType passType) { - Layer::forward(passType); - const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); - size_t batchSize = inMat->getHeight(); - size_t outWidth = getSize(); - resetOutput(batchSize, outWidth); - Matrix::resizeOrCreate(maxPoolIdx_, batchSize, outWidth, false, useGpu_); - const MatrixPtr outMat = getOutputValue(); - - if (poolType_ == "avg") { - outMat->avgPool3DForward(*inMat, - channels_, - imgSizeD_, - imgSizeH_, - imgSizeW_, - outputD_, - outputH_, - outputW_, - sizeZ_, - sizeY_, - sizeX_, - strideD_, - strideH_, - strideW_, - paddingD_, - paddingH_, - paddingW_); - } else if (poolType_ == "max") { - outMat->maxPool3DForward(*inMat, - *maxPoolIdx_, - channels_, - imgSizeD_, - imgSizeH_, - imgSizeW_, - outputD_, - outputH_, - outputW_, - sizeZ_, - sizeY_, - sizeX_, - strideD_, - strideH_, - strideW_, - paddingD_, - paddingH_, - paddingW_); - } else { - LOG(FATAL) << "Unknown pool type: " << poolType_; - } - forwardActivation(); -} - -void Pool3DLayer::backward(const UpdateCallback& callback) { - backwardActivation(); - - (void)callback; - if (NULL == getInputGrad(0)) return; - MatrixPtr inMat = inputLayers_[0]->getOutputValue(); - MatrixPtr inGradMat = inputLayers_[0]->getOutputGrad(); - MatrixPtr outMat = getOutputValue(); - MatrixPtr outGradMat = getOutputGrad(); - - if (poolType_ == "avg") { - inGradMat->avgPool3DBackward(*outGradMat, - imgSizeD_, - imgSizeH_, - imgSizeW_, - outputD_, - outputH_, - outputW_, - sizeZ_, - sizeY_, - sizeZ_, - strideD_, - strideH_, - strideW_, - paddingD_, - paddingH_, - paddingW_, - 1.0, - 1.0); - } else if (poolType_ == "max") { - inGradMat->maxPool3DBackward(*outGradMat, - *maxPoolIdx_, - imgSizeD_, - imgSizeH_, - imgSizeW_, - outputD_, - outputH_, - outputW_, - sizeZ_, - sizeY_, - sizeZ_, - strideD_, - strideH_, - strideW_, - paddingD_, - paddingH_, - paddingW_, - 1.0, - 1.0); - } else { - LOG(FATAL) << "Unknown pool type: " << poolType_; - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Pool3DLayer.h b/paddle/legacy/gserver/layers/Pool3DLayer.h deleted file mode 100644 index 6851c44ab22a39bebe3592b8e5f6384a393947f2..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Pool3DLayer.h +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "Layer.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief Basic parent layer of pooling - * Pools the input within regions - */ -class Pool3DLayer : public Layer { - public: - explicit Pool3DLayer(const LayerConfig& config) : Layer(config) {} - ~Pool3DLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - size_t getSize(); - - protected: - int channels_; - int sizeX_, sizeY_, sizeZ_; - int strideW_, strideH_, strideD_; - int paddingW_, paddingH_, paddingD_; - int imgSizeW_, imgSizeH_, imgSizeD_; - int outputW_, outputH_, outputD_; - std::string poolType_; - MatrixPtr maxPoolIdx_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PoolLayer.cpp b/paddle/legacy/gserver/layers/PoolLayer.cpp deleted file mode 100644 index df172d95757e0842328caa508042f3613bc72232..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PoolLayer.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PoolLayer.h" -#include "MaxPoolWithMaskLayer.h" -#include "PoolProjectionLayer.h" -#include "paddle/legacy/utils/Logging.h" -#ifdef PADDLE_WITH_CUDA -#include "CudnnPoolLayer.h" -#endif -namespace paddle { - -REGISTER_LAYER_CREATE_FUNC(pool, &PoolLayer::create); - -bool PoolLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* the size of inputs for pool-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1); - - const PoolConfig& conf = config_.inputs(0).pool_conf(); - poolType_ = conf.pool_type(); - channels_ = conf.channels(); - sizeX_ = conf.size_x(); - stride_ = conf.stride(); - outputX_ = conf.output_x(); - imgSize_ = conf.img_size(); - confPadding_ = conf.padding(); - - sizeY_ = conf.has_size_y() ? conf.size_y() : conf.size_x(); - imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride(); - confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); - outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); - - excludeMode_ = conf.has_exclude_mode() ? conf.exclude_mode() : true; - return true; -} - -Layer* PoolLayer::create(const LayerConfig& config) { - CHECK_EQ(config.inputs_size(), 1); - const std::string& pool = config.inputs(0).pool_conf().pool_type(); - if (pool == "max-projection" || pool == "avg-projection") { - return new PoolProjectionLayer(config); -#ifdef PADDLE_WITH_CUDA - } else if (CudnnPoolLayer::typeCheck(pool)) { - return new CudnnPoolLayer(config); -#endif - } else if (pool == "max-pool-with-mask") { - return new MaxPoolWithMaskLayer(config); - } else { - LOG(FATAL) << "Unknown pool type: " << pool; - return nullptr; - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PoolLayer.h b/paddle/legacy/gserver/layers/PoolLayer.h deleted file mode 100644 index 0808dfae8497008f974730b65977c85e914a7a27..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PoolLayer.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "Layer.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { - -/** - * @brief Basic parent layer of pooling - * Pools the input within regions - */ -class PoolLayer : public Layer { - protected: - size_t channels_, sizeX_, stride_, outputX_, imgSize_; - int confPadding_; - - size_t sizeY_; - size_t imgSizeY_; - size_t strideY_; - size_t outputY_; - int confPaddingY_; - - std::string poolType_; - - bool excludeMode_; - - public: - explicit PoolLayer(const LayerConfig& config) : Layer(config) {} - - /** - * @brief create pooling layer by pool_type - */ - static Layer* create(const LayerConfig& config); - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PoolProjection.cpp b/paddle/legacy/gserver/layers/PoolProjection.cpp deleted file mode 100644 index 73ce88adf25b16e0ae8c416ddccd969336f5fdeb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PoolProjection.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PoolProjection.h" - -namespace paddle { - -REGISTER_PROJECTION_CREATE_FUNC(pool, &PoolProjection::create); - -PoolProjection::PoolProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - const PoolConfig& conf = config_.pool_conf(); - poolType_ = conf.pool_type(); - channels_ = conf.channels(); - sizeX_ = conf.size_x(); - stride_ = conf.stride(); - outputX_ = conf.output_x(); - imgSize_ = conf.img_size(); - confPadding_ = conf.padding(); - - sizeY_ = conf.has_size_y() ? conf.size_y() : conf.size_x(); - imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride(); - confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); - outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); - - excludeMode_ = conf.has_exclude_mode() ? conf.exclude_mode() : true; -} - -size_t PoolProjection::getSize() { - imgSizeY_ = in_->getFrameHeight(); - imgSize_ = in_->getFrameWidth(); - const PoolConfig& conf = config_.pool_conf(); - if (imgSizeY_ == 0) { - imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - } - if (imgSize_ == 0) { - imgSize_ = conf.img_size(); - } - outputY_ = outputSize(imgSizeY_, - sizeY_, - confPaddingY_, - strideY_, - /* caffeMode */ false); - outputX_ = outputSize(imgSize_, - sizeX_, - confPadding_, - stride_, - /* caffeMode */ false); - - const_cast(out_)->setFrameHeight(outputY_); - const_cast(out_)->setFrameWidth(outputX_); - - return outputY_ * outputX_ * channels_; -} - -PoolProjection* PoolProjection::create(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu) { - const std::string& pool = config.pool_conf().pool_type(); - if (pool == "max-projection") { - return new MaxPoolProjection(config, parameter, useGpu); - } else if (pool == "avg-projection") { - return new AvgPoolProjection(config, parameter, useGpu); - } else { - LOG(FATAL) << "Unknown pool type: " << pool; - return nullptr; - } -} - -void MaxPoolProjection::forward() { - size_t width = getSize(); - CHECK_EQ(width, out_->value->getWidth()); - MatrixPtr inputV = in_->value; - MatrixPtr outV = out_->value; - outV->maxPoolForward(*inputV, - imgSizeY_, - imgSize_, - channels_, - sizeX_, - sizeY_, - strideY_, - stride_, - outputY_, - outputX_, - confPaddingY_, - confPadding_); -} - -void MaxPoolProjection::backward(const UpdateCallback& callback) { - (void)callback; - MatrixPtr outGrad = out_->grad; - MatrixPtr inputV = in_->value; - MatrixPtr outV = out_->value; - MatrixPtr inputGrad = in_->grad; - - if (NULL == inputGrad) { - return; - } - inputGrad->maxPoolBackward(*inputV, - imgSizeY_, - imgSize_, - *outGrad, - *outV, - sizeX_, - sizeY_, - strideY_, - stride_, - outputY_, - outputX_, - 1, - 1, - confPaddingY_, - confPadding_); -} - -void AvgPoolProjection::forward() { - size_t width = getSize(); - CHECK_EQ(width, out_->value->getWidth()); - MatrixPtr inputV = in_->value; - MatrixPtr outV = out_->value; - outV->avgPoolForward(*inputV, - imgSizeY_, - imgSize_, - channels_, - sizeX_, - sizeY_, - strideY_, - stride_, - outputY_, - outputX_, - confPaddingY_, - confPadding_, - excludeMode_); -} - -void AvgPoolProjection::backward(const UpdateCallback& callback) { - (void)callback; - - MatrixPtr outputGrad = out_->grad; - MatrixPtr inputGrad = in_->grad; - - if (NULL == inputGrad) { - return; - } - - inputGrad->avgPoolBackward(*outputGrad, - imgSizeY_, - imgSize_, - sizeX_, - sizeY_, - strideY_, - stride_, - outputY_, - outputX_, - 1, - 1, - confPaddingY_, - confPadding_, - excludeMode_); -} -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PoolProjection.h b/paddle/legacy/gserver/layers/PoolProjection.h deleted file mode 100644 index d01b6a13f0a5fd2283f1f216ef419b9ccc7308f9..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PoolProjection.h +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Projection.h" -#include "paddle/legacy/math/MathUtils.h" - -namespace paddle { - -class PoolProjection : public Projection { - protected: - size_t imgSizeY_, imgSize_; - size_t outputY_, outputX_; - size_t strideY_, stride_; - size_t sizeY_, sizeX_; - int confPaddingY_, confPadding_; - size_t channels_; - std::string poolType_; - bool excludeMode_; - - public: - PoolProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu); - - static PoolProjection* create(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu); - - const std::string& getPoolType() const { return poolType_; } - - size_t getSize(); -}; - -class MaxPoolProjection : public PoolProjection { - public: - MaxPoolProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu) - : PoolProjection(config, parameter, useGpu) {} - - virtual void forward(); - virtual void backward(const UpdateCallback& callback = nullptr); -}; - -class AvgPoolProjection : public PoolProjection { - public: - AvgPoolProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu) - : PoolProjection(config, parameter, useGpu) {} - - virtual void forward(); - virtual void backward(const UpdateCallback& callback = nullptr); -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PoolProjectionLayer.cpp b/paddle/legacy/gserver/layers/PoolProjectionLayer.cpp deleted file mode 100644 index e44b1d7ba1494e43db81f998c2818bbbf7779d6f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PoolProjectionLayer.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PoolProjectionLayer.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -size_t PoolProjectionLayer::getSize() { - CHECK_EQ(inputLayers_.size(), 1UL); - size_t layerSize = 0; - imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); - imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); - if (imgSizeH_ == 0) { - imgSizeH_ = imgSizeY_; - } - if (imgSizeW_ == 0) { - imgSizeW_ = imgSize_; - } - - outputH_ = outputSize(imgSizeH_, - sizeY_, - confPaddingY_, - strideY_, - /* caffeMode */ false); - outputW_ = outputSize(imgSizeW_, - sizeX_, - confPadding_, - stride_, - /* caffeMode */ false); - - layerSize = outputH_ * outputW_ * channels_; - - return layerSize; -} - -void PoolProjectionLayer::forward(PassType passType) { - Layer::forward(passType); - const Argument& in = getInput(0); - int batchSize = in.value->getHeight(); - int size = getSize(); - resetOutput(batchSize, size); - poolProjection_->forward(&in, &output_, passType); -} - -void PoolProjectionLayer::backward(const UpdateCallback& callback) { - (void)callback; - if (NULL == getInputGrad(0)) { - return; - } - poolProjection_->backward(callback); -} -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PoolProjectionLayer.h b/paddle/legacy/gserver/layers/PoolProjectionLayer.h deleted file mode 100644 index fcd35bbba4dff612fba827cdf545de71127c560e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PoolProjectionLayer.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "PoolLayer.h" -#include "PoolProjection.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { -/** - * @brief Basic parent layer of different kinds of pooling - */ -class PoolProjectionLayer : public PoolLayer { - protected: - size_t imgSizeH_, imgSizeW_; - size_t outputH_, outputW_; - std::unique_ptr poolProjection_; - ProjectionConfig projectionConfig_; - - public: - explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) { - PoolConfig* conf = projectionConfig_.mutable_pool_conf(); - *conf = config_.inputs(0).pool_conf(); - poolProjection_.reset( - PoolProjection::create(projectionConfig_, nullptr, useGpu_)); - } - - size_t getSize(); - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PowerLayer.cpp b/paddle/legacy/gserver/layers/PowerLayer.cpp deleted file mode 100644 index 5e94c64db6098dbc1ed13bdcbd573f95024713bc..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PowerLayer.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * This layer applys a power function to a vector element-wise, - * which is used in NEURAL TURING MACHINE. - * \f[ - * y = x^w - * \f] - * where \f$x\f$ is a input vector, \f$w\f$ is scalar weight, - * and output \f$y\f$ is a vector. - * - * The config file api is power_layer. - */ - -class PowerLayer : public Layer { - protected: - MatrixPtr tmpMtx; - - public: - explicit PowerLayer(const LayerConfig& config) : Layer(config) {} - - ~PowerLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(power, PowerLayer); - -bool PowerLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2U); - - return true; -} - -void PowerLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - - size_t batchSize = inV1->getHeight(); - size_t dataDim = inV1->getWidth(); - - CHECK_EQ(getSize(), dataDim); - CHECK_EQ(1U, inV0->getWidth()); - CHECK_EQ(batchSize, inV0->getHeight()); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - reserveOutput(batchSize, dataDim); - } - - MatrixPtr outV = getOutputValue(); - - { - REGISTER_TIMER_INFO("FwPowerTimer", getName().c_str()); - outV->rowPow(0, *inV1, *inV0); - } -} - -void PowerLayer::backward(const UpdateCallback& callback) { - MatrixPtr inV0 = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr inG0 = getInputGrad(0); - MatrixPtr inG1 = getInputGrad(1); - MatrixPtr outV = getOutputValue(); - MatrixPtr outG = getOutputGrad(); - - size_t batchSize = inV1->getHeight(); - size_t dataDim = inV1->getWidth(); - - { - REGISTER_TIMER_INFO("BwPowerTimer", getName().c_str()); - Matrix::resizeOrCreate(tmpMtx, batchSize, dataDim, false, useGpu_); - - if (inG0) { - tmpMtx->log2(*inV1); - tmpMtx->dotMul(*tmpMtx, *outV); - - // inG0 += outG .* (log(inV1) * outV) - inG0->rowDotMul(0, *outG, *tmpMtx); - } - - if (inG1) { - // tmp = (outV / inV1) * inV0 - tmpMtx->dotDiv(*outV, *inV1); - tmpMtx->rowScale(0, *tmpMtx, *inV0); - - inG1->addDotMul(*outG, *tmpMtx, 1, 1); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PrintLayer.cpp b/paddle/legacy/gserver/layers/PrintLayer.cpp deleted file mode 100644 index 6fbcc447f92208439bddd14d421d62cab30d81f4..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PrintLayer.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" - -namespace paddle { - -class PrintLayer : public Layer { - public: - explicit PrintLayer(const LayerConfig& config) : Layer(config) {} - - void forward(PassType passType) override { - Layer::forward(passType); - std::vector vals; - for (size_t i = 0; i != inputLayers_.size(); ++i) { - std::ostringstream s; - getInput(i).printValueString(s, ""); - vals.push_back(s.str()); - } - size_t pos = 0; - size_t i = 0; - std::ostringstream s; - const std::string& format = config_.user_arg(); - while (true) { - size_t pos1 = format.find("%s", pos); - if (pos1 == std::string::npos) break; - if (i >= vals.size()) { - break; - } - s << format.substr(pos, pos1 - pos) << vals[i]; - pos = pos1 + 2; - ++i; - } - if (i != inputLayers_.size()) { - LOG(ERROR) << "Number of value in the format (" << format - << ") is not same as the number of inputs (" - << inputLayers_.size() << ") at " << getName(); - } - s << format.substr(pos); - - const std::string delimiter("\n"); - std::string content = s.str(); - std::string::size_type foundPos = 0; - std::string::size_type prevPos = 0; - while ((foundPos = content.find(delimiter, prevPos)) != std::string::npos) { - LOG(INFO) << content.substr(prevPos, foundPos - prevPos); - prevPos = foundPos + delimiter.size(); - } - LOG(INFO) << content.substr(prevPos); - } - - void backward(const UpdateCallback& callback) override {} -}; - -REGISTER_LAYER(print, PrintLayer); - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/PriorBox.cpp b/paddle/legacy/gserver/layers/PriorBox.cpp deleted file mode 100644 index 83aab6e36662855a5867463757bc5a92e6e83e07..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/PriorBox.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/BaseMatrix.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { -/** - * @brief A layer for generating priorbox locations and variances. - * - Input: Two and only two input layer are accepted. The input layer must be - * be a data output layer and a convolution output layer. - * - Output: The priorbox locations and variances of the input data. - * Reference: - * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, - * Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector - */ - -class PriorBoxLayer : public Layer { - public: // NOLINT - explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override {} - - protected: // NOLINT - int numPriors_; - std::vector minSize_; - std::vector maxSize_; - std::vector aspectRatio_; - std::vector variance_; - MatrixPtr buffer_; -}; - -REGISTER_LAYER(priorbox, PriorBoxLayer); - -bool PriorBoxLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - auto pbConf = config_.inputs(0).priorbox_conf(); - std::vector tmp; - aspectRatio_.push_back(1.); - std::copy(pbConf.min_size().begin(), - pbConf.min_size().end(), - std::back_inserter(minSize_)); - std::copy(pbConf.max_size().begin(), - pbConf.max_size().end(), - std::back_inserter(maxSize_)); - std::copy(pbConf.variance().begin(), - pbConf.variance().end(), - std::back_inserter(variance_)); - std::copy(pbConf.aspect_ratio().begin(), - pbConf.aspect_ratio().end(), - std::back_inserter(tmp)); - - if (maxSize_.size() > 0) CHECK_EQ(minSize_.size(), maxSize_.size()); - - // flip aspect ratios - for (unsigned index = 0; index < tmp.size(); index++) { - real ar = tmp[index]; - if (fabs(ar - 1.) < 1e-6) continue; - aspectRatio_.push_back(ar); - aspectRatio_.push_back(1. / ar); - } - - numPriors_ = aspectRatio_.size() * minSize_.size() + maxSize_.size(); - - return true; -} - -void PriorBoxLayer::forward(PassType passType) { - Layer::forward(passType); - auto input = getInput(0); - int layerWidth = input.getFrameWidth(); - int layerHeight = input.getFrameHeight(); - - auto image = getInput(1); - int imageWidth = image.getFrameWidth(); - int imageHeight = image.getFrameHeight(); - - real stepW = static_cast(imageWidth) / layerWidth; - real stepH = static_cast(imageHeight) / layerHeight; - int dim = layerHeight * layerWidth * numPriors_ * 4; - reserveOutput(1, dim * 2); - // use a cpu buffer to compute - Matrix::resizeOrCreate(buffer_, 1, dim * 2, false, false); - auto* tmpPtr = buffer_->getData(); - - int idx = 0; - for (int h = 0; h < layerHeight; ++h) { - for (int w = 0; w < layerWidth; ++w) { - real centerX = (w + 0.5) * stepW; - real centerY = (h + 0.5) * stepH; - for (size_t s = 0; s < minSize_.size(); s++) { - real minSize = minSize_[s]; - real boxWidth = minSize; - real boxHeight = minSize; - - // first prior: aspect_ratio == 1.0, compatible to old logic - tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; - tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; - tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth; - tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight; - // set the variance. - for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t]; - - if (maxSize_.size() > 0) { - // square prior with size sqrt(minSize * maxSize) - real maxSize = maxSize_[s]; - boxWidth = boxHeight = sqrt(minSize * maxSize); - tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; - tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; - tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth; - tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight; - // set the variance. - for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t]; - } - - // priors with different aspect ratios - for (size_t r = 0; r < aspectRatio_.size(); r++) { - real ar = aspectRatio_[r]; - if (fabs(ar - 1.0) < 1e-6) { - continue; - } - boxWidth = minSize * sqrt(ar); - boxHeight = minSize / sqrt(ar); - tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; - tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; - tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth; - tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight; - // set the variance. - for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t]; - } - } - } - } - - // clip the prior's coordidate such that it is within [0, 1] - for (int d = 0; d < dim * 2; ++d) - if ((d % 8) < 4) - tmpPtr[d] = std::min(std::max(tmpPtr[d], (real)0.), (real)1.); - MatrixPtr outV = getOutputValue(); - outV->copyFrom(buffer_->data_, dim * 2); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Projection.cpp b/paddle/legacy/gserver/layers/Projection.cpp deleted file mode 100644 index 96d61e7f67be294a05a84db75bb7e700e4303b7a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Projection.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Projection.h" - -#include "ContextProjection.h" -#include "FullMatrixProjection.h" -#include "TableProjection.h" - -namespace paddle { - -ClassRegistrar - Projection::registrar_; - -Projection* Projection::create(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu) { - return registrar_.createByType(config.type(), config, parameter, useGpu); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/Projection.h b/paddle/legacy/gserver/layers/Projection.h deleted file mode 100644 index 974f5a2cacd10a965adcb4accf6ca00c26044b64..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/Projection.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "ModelConfig.pb.h" -#include "paddle/legacy/parameter/Parameter.h" - -namespace paddle { - -// Macro for registering a projection type -// Example: REGISTER_LAYER(fc, FullMatrixProjection); -#define REGISTER_PROJECTION(__type_name, __class_name) \ - static InitFunction __reg_type_##__type_name([]() { \ - Projection::registrar_.registerClass<__class_name>(#__type_name); \ - }) - -#define REGISTER_PROJECTION_CREATE_FUNC(__type_name, createFunction) \ - static InitFunction __reg_type_##__type_name([]() { \ - Projection::registrar_.registerClass(#__type_name, createFunction); \ - }) - -/** - * A projection takes one Argument as input, calculate the result and add it - * to output Argument. - */ -class Projection { - public: - static Projection* create(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu); - - Projection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGpu) - : config_(config), parameter_(parameter), useGpu_(useGpu) {} - - virtual ~Projection() {} - - const std::string& getName() const { return config_.name(); } - - /// Register a projection - static ClassRegistrar - registrar_; - - /** - * Forward propagation. If backward() will be called, in and out must be kept - * valid until then. - * @param in input of projection - * @param out output of projection - * @param passType PASS_TRAIN of PASS_TEST - */ - void forward(const Argument* in, const Argument* out, PassType passType) { - in_ = in; - out_ = out; - passType_ = passType; - forward(); - } - - virtual void prefetch(const Argument* in) {} - virtual void forward() = 0; - virtual void backward(const UpdateCallback& callback) = 0; - - /** - * See comment in Layer.h for the function with the same name. - */ - virtual void resetState() {} - - /** - * Set layer state. - */ - virtual void setState(LayerStatePtr state) {} - - /** - * Get layer state. A copy of internal state is returned. - */ - virtual LayerStatePtr getState() { return nullptr; } - - /** - * init forward_ and backward_ functions - */ - virtual bool init() { return true; } - - /** - * Get output size of projection. - */ - size_t getOutputSize() const { return config_.output_size(); } - - protected: - /** - * Create layer function. Function is called in forward or backward. - * \param function, Layer::forward_ or Layer::backward_ - * \param name, function name - * \param config, initialization configuration for the function - */ - void createFunction(std::vector>& function, - const std::string& name, - const FuncConfig& config) { - if (useGpu_) { - function.emplace_back( - FunctionBase::funcRegistrar_.createByType(name + "-GPU")); - } else { - function.emplace_back( - FunctionBase::funcRegistrar_.createByType(name + "-CPU")); - } - auto& func = function.back(); - func->init(config); - } - - protected: - /// Config of projection - ProjectionConfig config_; - /// Parameter of projection - ParameterPtr parameter_; - bool useGpu_; - - /// Store `in` passed to forward() - const Argument* in_; - /// Store `out` passed to forward() - const Argument* out_; - /// Store `passType` passed to forward() - PassType passType_; - /// Layer forward function - std::vector> forward_; - /// Layer backward function - std::vector> backward_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ROIPoolLayer.cpp b/paddle/legacy/gserver/layers/ROIPoolLayer.cpp deleted file mode 100644 index b5cbc0c704a1a87ecfe6d7d980aff30238cd6aad..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ROIPoolLayer.cpp +++ /dev/null @@ -1,233 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ROIPoolLayer.h" -#include - -namespace paddle { - -REGISTER_LAYER(roi_pool, ROIPoolLayer); - -bool ROIPoolLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); - pooledWidth_ = layerConf.pooled_width(); - pooledHeight_ = layerConf.pooled_height(); - spatialScale_ = layerConf.spatial_scale(); - - return true; -} - -void ROIPoolLayer::forward(PassType passType) { - Layer::forward(passType); - - const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); - height_ = getInput(0).getFrameHeight(); - if (!height_) height_ = layerConf.height(); - width_ = getInput(0).getFrameWidth(); - if (!width_) width_ = layerConf.width(); - channels_ = getInputValue(0)->getWidth() / width_ / height_; - - size_t batchSize = getInput(0).getBatchSize(); - size_t numROIs = getInput(1).getBatchSize(); - - MatrixPtr dataValue = getInputValue(0); - MatrixPtr roiValue = getInputValue(1); - resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); - MatrixPtr outputValue = getOutputValue(); - - if (useGpu_) { // TODO(guosheng): implement on GPU later - MatrixPtr dataCpuBuffer; - Matrix::resizeOrCreate(dataCpuBuffer, - dataValue->getHeight(), - dataValue->getWidth(), - false, - false); - MatrixPtr roiCpuBuffer; - Matrix::resizeOrCreate(roiCpuBuffer, - roiValue->getHeight(), - roiValue->getWidth(), - false, - false); - dataCpuBuffer->copyFrom(*dataValue); - roiCpuBuffer->copyFrom(*roiValue); - dataValue = dataCpuBuffer; - roiValue = roiCpuBuffer; - MatrixPtr outputCpuBuffer; - Matrix::resizeOrCreate(outputCpuBuffer, - outputValue->getHeight(), - outputValue->getWidth(), - false, - false); - outputCpuBuffer->copyFrom(*outputValue); - outputValue = outputCpuBuffer; - } - - real* bottomData = dataValue->getData(); - size_t batchOffset = dataValue->getWidth(); - size_t channelOffset = height_ * width_; - real* bottomROIs = roiValue->getData(); - size_t roiOffset = roiValue->getWidth(); - size_t poolChannelOffset = pooledHeight_ * pooledWidth_; - - real* outputData = outputValue->getData(); - real* argmaxData = nullptr; - if (passType != PASS_TEST) { - Matrix::resizeOrCreate(maxIdxs_, - numROIs, - channels_ * pooledHeight_ * pooledWidth_, - false, - false); - argmaxData = maxIdxs_->getData(); - } - - for (size_t n = 0; n < numROIs; ++n) { - // the first five elememts of each RoI should be: - // batch_idx, roi_x_start, roi_y_start, roi_x_end, roi_y_end - size_t roiBatchIdx = bottomROIs[0]; - size_t roiStartW = round(bottomROIs[1] * spatialScale_); - size_t roiStartH = round(bottomROIs[2] * spatialScale_); - size_t roiEndW = round(bottomROIs[3] * spatialScale_); - size_t roiEndH = round(bottomROIs[4] * spatialScale_); - CHECK_GE(roiBatchIdx, 0UL); - CHECK_LT(roiBatchIdx, batchSize); - size_t roiHeight = - std::max(roiEndH - roiStartH + 1, static_cast(1)); - size_t roiWidth = std::max(roiEndW - roiStartW + 1, static_cast(1)); - real binSizeH = - static_cast(roiHeight) / static_cast(pooledHeight_); - real binSizeW = - static_cast(roiWidth) / static_cast(pooledWidth_); - real* batchData = bottomData + batchOffset * roiBatchIdx; - for (size_t c = 0; c < channels_; ++c) { - for (size_t ph = 0; ph < pooledHeight_; ++ph) { - for (size_t pw = 0; pw < pooledWidth_; ++pw) { - size_t hstart = static_cast(std::floor(ph * binSizeH)); - size_t wstart = static_cast(std::floor(pw * binSizeW)); - size_t hend = static_cast(std::ceil((ph + 1) * binSizeH)); - size_t wend = static_cast(std::ceil((pw + 1) * binSizeW)); - hstart = std::min( - std::max(hstart + roiStartH, static_cast(0)), height_); - wstart = std::min( - std::max(wstart + roiStartW, static_cast(0)), width_); - hend = std::min(std::max(hend + roiStartH, static_cast(0)), - height_); - wend = std::min(std::max(wend + roiStartW, static_cast(0)), - width_); - - bool isEmpty = (hend <= hstart) || (wend <= wstart); - size_t poolIndex = ph * pooledWidth_ + pw; - outputData[poolIndex] = isEmpty ? 0 : -FLT_MAX; - if (argmaxData) { - argmaxData[poolIndex] = -1; - } - - for (size_t h = hstart; h < hend; ++h) { - for (size_t w = wstart; w < wend; ++w) { - size_t index = h * width_ + w; - if (batchData[index] > outputData[poolIndex]) { - outputData[poolIndex] = batchData[index]; - if (argmaxData) { - argmaxData[poolIndex] = index; - } - } - } - } - } - } - batchData += channelOffset; - outputData += poolChannelOffset; - if (argmaxData) { - argmaxData += poolChannelOffset; - } - } - bottomROIs += roiOffset; - } - if (useGpu_) { - getOutputValue()->copyFrom(*outputValue); - } -} - -void ROIPoolLayer::backward(const UpdateCallback& callback) { - MatrixPtr inGradValue = getInputGrad(0); - MatrixPtr outGradValue = getOutputGrad(); - MatrixPtr roiValue = getInputValue(1); - - if (useGpu_) { - MatrixPtr inGradCpuBuffer; - Matrix::resizeOrCreate(inGradCpuBuffer, - inGradValue->getHeight(), - inGradValue->getWidth(), - false, - false); - MatrixPtr outGradCpuBuffer; - Matrix::resizeOrCreate(outGradCpuBuffer, - outGradValue->getHeight(), - outGradValue->getWidth(), - false, - false); - MatrixPtr roiCpuBuffer; - Matrix::resizeOrCreate(roiCpuBuffer, - roiValue->getHeight(), - roiValue->getWidth(), - false, - false); - inGradCpuBuffer->copyFrom(*inGradValue); - outGradCpuBuffer->copyFrom(*outGradValue); - roiCpuBuffer->copyFrom(*roiValue); - inGradValue = inGradCpuBuffer; - outGradValue = outGradCpuBuffer; - roiValue = roiCpuBuffer; - } - - real* bottomROIs = roiValue->getData(); - size_t numROIs = getInput(1).getBatchSize(); - size_t roiOffset = getInputValue(1)->getWidth(); - - real* inDiffData = inGradValue->getData(); - size_t batchOffset = getInputValue(0)->getWidth(); - size_t channelOffset = height_ * width_; - - real* outDiffData = outGradValue->getData(); - size_t poolChannelOffset = pooledHeight_ * pooledWidth_; - real* argmaxData = maxIdxs_->getData(); - - for (size_t n = 0; n < numROIs; ++n) { - size_t roiBatchIdx = bottomROIs[0]; - real* batchDiffData = inDiffData + batchOffset * roiBatchIdx; - for (size_t c = 0; c < channels_; ++c) { - for (size_t ph = 0; ph < pooledHeight_; ++ph) { - for (size_t pw = 0; pw < pooledWidth_; ++pw) { - size_t poolIndex = ph * pooledWidth_ + pw; - if (argmaxData[poolIndex] > 0) { - size_t index = static_cast(argmaxData[poolIndex]); - batchDiffData[index] += outDiffData[poolIndex]; - } - } - } - batchDiffData += channelOffset; - outDiffData += poolChannelOffset; - argmaxData += poolChannelOffset; - } - bottomROIs += roiOffset; - } - - if (useGpu_) { - getInputGrad(0)->copyFrom(*inGradValue); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ROIPoolLayer.h b/paddle/legacy/gserver/layers/ROIPoolLayer.h deleted file mode 100644 index 801a9b3aebe6d718ea38b76246a6056891d0b1f6..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ROIPoolLayer.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" - -namespace paddle { - -/** - * A layer used by Fast R-CNN to extract feature maps of ROIs from the last - * feature map. - * - Input: This layer needs two input layers: The first input layer is a - * convolution layer; The second input layer contains the ROI data - * which is the output of ProposalLayer in Faster R-CNN. layers for - * generating bbox location offset and the classification confidence. - * - Output: The ROIs' feature map. - * Reference: - * Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. - * Faster R-CNN: Towards Real-Time Object Detection with Region Proposal - * Networks - */ - -class ROIPoolLayer : public Layer { - protected: - size_t channels_; - size_t width_; - size_t height_; - size_t pooledWidth_; - size_t pooledHeight_; - real spatialScale_; - - // Since there is no int matrix, use real maxtrix instead. - MatrixPtr maxIdxs_; - - public: - explicit ROIPoolLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/RecurrentLayer.cpp b/paddle/legacy/gserver/layers/RecurrentLayer.cpp deleted file mode 100644 index 3fc5bd15edd3180a301723bb6cb115b207684b61..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/RecurrentLayer.cpp +++ /dev/null @@ -1,301 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "RecurrentLayer.h" - -DEFINE_bool(rnn_use_batch, false, "Using the batch method for calculation."); - -namespace paddle { - -REGISTER_LAYER(recurrent, RecurrentLayer); - -bool RecurrentLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!Layer::init(layerMap, parameterMap)) return false; - CHECK_EQ(1U, inputLayers_.size()); - CHECK_EQ(1U, parameters_.size()); - CHECK_EQ(getSize() * getSize(), parameters_[0]->getSize()); - weight_.reset(new Weight(getSize(), getSize(), parameters_[0])); - if (biasParameter_.get() != NULL) { - bias_.reset(new Weight(1, getSize(), biasParameter_)); - } - reversed_ = config_.reversed(); - return true; -} - -void RecurrentLayer::resetState() { - CHECK(!reversed_) << "state is not allowed for reversed recurrent layer"; - Matrix::resizeOrCreate( - prevOutput_, 1, getSize(), /* trans= */ false, useGpu_); - prevOutput_->zeroMem(); -} - -void RecurrentLayer::setState(LayerStatePtr state) { - CHECK(state->value.size() == 1) << "one matrix is expected for RNN state"; - prevOutput_->copyFrom(*(state->value[0])); -} - -LayerStatePtr RecurrentLayer::getState() { - LayerStatePtr res = std::make_shared(); - res->value.push_back(prevOutput_->clone(0, 0, useGpu_)); - res->value[0]->copyFrom(*prevOutput_); - return res; -} - -void RecurrentLayer::forward(PassType passType) { - REGISTER_TIMER_INFO("RecurrentFwTimer", getName().c_str()); - Layer::forward(passType); - const Argument& input = getInput(0); - CHECK(input.sequenceStartPositions); - int batchSize = input.getBatchSize(); - size_t numSequences = input.getNumSequences(); - resetOutput(batchSize, getSize()); - CHECK_EQ(getSize(), input.value->getWidth()); - const int* starts = input.sequenceStartPositions->getData(false); - CHECK_EQ(starts[numSequences], batchSize); - - output_.value->assign(*input.value); - if (bias_) { - output_.value->addBias(*bias_->getW(), 1); - } - if (!FLAGS_rnn_use_batch) { - forwardSequence(batchSize, numSequences, starts); - } else { - forwardBatch(batchSize, numSequences, starts); - } -} - -void RecurrentLayer::forwardSequence(int batchSize, - size_t numSequences, - const int* starts) { - REGISTER_TIMER_INFO("RecurrentFwSequence", getName().c_str()); - frameOutput_.reserve(batchSize); - for (int i = frameOutput_.size(); i < batchSize; ++i) { - Argument arg; - arg.value = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - arg.grad = Matrix::create(nullptr, - /* height= */ 1, - getSize(), - /* trans= */ false, - useGpu_); - frameOutput_.push_back(arg); - } - - for (int i = 0; i < batchSize; ++i) { - frameOutput_[i].value->setData(output_.value->getData() + i * getSize()); - } - - AsyncGpuBlock asyncGpuBlock; - for (size_t i = 0; i < numSequences; ++i) { - forwardOneSequence(starts[i], starts[i + 1] - starts[i]); - } -} - -void RecurrentLayer::forwardOneSequence(int start, int length) { - if (!reversed_) { - if (prevOutput_) { - frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1); - } - activation_->forward(frameOutput_[start]).check(); - - for (int i = 1; i < length; ++i) { - frameOutput_[start + i].value->mul( - *frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1); - activation_->forward(frameOutput_[start + i]).check(); - } - if (prevOutput_) { - prevOutput_->assign(*frameOutput_[start + length - 1].value); - } - } else { - activation_->forward(frameOutput_[start + length - 1]).check(); - for (int i = length - 2; i >= 0; --i) { - frameOutput_[start + i].value->mul( - *frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1); - activation_->forward(frameOutput_[start + i]).check(); - } - } -} - -void RecurrentLayer::backward(const UpdateCallback& callback) { - REGISTER_TIMER_INFO("RecurrentBwTimer", getName().c_str()); - const Argument& input = getInput(0); - CHECK(input.sequenceStartPositions); - int batchSize = input.getBatchSize(); - const int* starts = input.sequenceStartPositions->getData(false); - size_t numSequences = input.getNumSequences(); - - if (!FLAGS_rnn_use_batch) { - backwardSequence(batchSize, numSequences, starts); - } else { - backwardBatch(batchSize, numSequences, starts); - } - - if (input.grad) { - input.grad->add(*output_.grad); - } - - if (bias_ && bias_->getWGrad()) { - bias_->getWGrad()->collectBias(*output_.grad, 1); - bias_->getParameterPtr()->incUpdate(callback); - } - weight_->getParameterPtr()->incUpdate(callback); -} - -void RecurrentLayer::backwardSequence(int batchSize, - size_t numSequences, - const int* starts) { - REGISTER_TIMER_INFO("RecurrentBwSequence", getName().c_str()); - for (int i = 0; i < batchSize; ++i) { - frameOutput_[i].grad->setData(output_.grad->getData() + i * getSize()); - } - - AsyncGpuBlock asyncGpuBlock; - for (size_t i = 0; i < numSequences; ++i) { - backwardOneSequence(starts[i], starts[i + 1] - starts[i]); - } -} - -void RecurrentLayer::backwardOneSequence(int start, int length) { - MatrixPtr weightT = weight_->getW()->getTranspose(); - if (!reversed_) { - for (int i = length - 1; i > 0; --i) { - activation_->backward(frameOutput_[start + i]).check(); - frameOutput_[start + i - 1].grad->mul( - *frameOutput_[start + i].grad, *weightT, 1, 1); - } - activation_->backward(frameOutput_[start]).check(); - if (weight_->getWGrad()) { - weight_->getWGrad()->mul( - *output_.value->subMatrix(start, length - 1)->getTranspose(), - *output_.grad->subMatrix(start + 1, length - 1), - 1, - 1); - } - } else { - for (int i = 0; i < length - 1; ++i) { - activation_->backward(frameOutput_[start + i]).check(); - frameOutput_[start + i + 1].grad->mul( - *frameOutput_[start + i].grad, *weightT, 1, 1); - } - activation_->backward(frameOutput_[start + length - 1]).check(); - if (weight_->getWGrad()) { - weight_->getWGrad()->mul( - *output_.value->subMatrix(start + 1, length - 1)->getTranspose(), - *output_.grad->subMatrix(start, length - 1), - 1, - 1); - } - } -} - -void RecurrentLayer::forwardBatch(int batchSize, - size_t numSequences, - const int* starts) { - if (!batchValue_) { - batchValue_.reset(new SequenceToBatch(useGpu_)); - } - - batchValue_->resizeOrCreateBatch(batchSize, numSequences, starts, reversed_); - - batchValue_->copyFromSeq(*output_.value); - { - REGISTER_TIMER_INFO("RecurrentFwBatch", getName().c_str()); - AsyncGpuBlock asyncGpuBlock; - /* forward one batch */ - for (size_t n = 0; n < batchValue_->getNumBatch(); n++) { - MatrixPtr batch2 = batchValue_->getBatchValue(n); - - if (n != 0) { - MatrixPtr batch1 = - batchValue_->getBatchValue(n - 1, batch2->getHeight()); - batch2->mul(*batch1, *weight_->getW(), 1, 1); - } - Argument arg; - arg.value = batch2; - activation_->forward(arg).check(); - } - } - batchValue_->copyBackSeq(*output_.value); -} - -void RecurrentLayer::backwardBatch(int batchSize, - size_t numSequences, - const int* starts) { - if (!batchGrad_) { - batchGrad_.reset(new SequenceToBatch(useGpu_)); - } - batchGrad_->shareIndexWith(*batchValue_); - - size_t numBatch = batchGrad_->getNumBatch(); - bool backwardByBatch = numBatch < numSequences; - - batchGrad_->copyFromSeq(*output_.grad); - { - REGISTER_TIMER_INFO("RecurrentBwData", getName().c_str()); - MatrixPtr weightT = weight_->getW()->getTranspose(); - AsyncGpuBlock asyncGpuBlock; - /* backward one batch */ - for (int n = (int)numBatch - 1; n >= 0; n--) { - MatrixPtr batch2 = batchGrad_->getBatchValue(n); - MatrixPtr batch1 = batchValue_->getBatchValue(n, batch2->getHeight()); - - Argument arg; - arg.value = batch1; - arg.grad = batch2; - activation_->backward(arg).check(); - - if (n != 0) { - batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight()); - batch1->mul(*batch2, *weightT, 1, 1); - } - - if (backwardByBatch && weight_->getWGrad()) { - if (n != 0) { - /* backward weight */ - batch1 = batchValue_->getBatchValue(n - 1, batch2->getHeight()); - weight_->getWGrad()->mul(*batch1->getTranspose(), *batch2, 1, 1); - } - } - } - } - - batchGrad_->copyBackSeq(*output_.grad); - - if (!backwardByBatch && weight_->getWGrad()) { - REGISTER_TIMER_INFO("RecurrentBwWeight", getName().c_str()); - AsyncGpuBlock asyncGpuBlock; - for (size_t seq = 0; seq < numSequences; ++seq) { - int len = starts[seq + 1] - starts[seq]; - if (!reversed_) { - weight_->getWGrad()->mul( - *output_.value->subMatrix(starts[seq], len - 1)->getTranspose(), - *output_.grad->subMatrix(starts[seq] + 1, len - 1), - 1, - 1); - } else { - weight_->getWGrad()->mul( - *output_.value->subMatrix(starts[seq] + 1, len - 1)->getTranspose(), - *output_.grad->subMatrix(starts[seq], len - 1), - 1, - 1); - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/RecurrentLayer.h b/paddle/legacy/gserver/layers/RecurrentLayer.h deleted file mode 100644 index 287ea27a0984729fde5b35aa0807e9f2b29f993f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/RecurrentLayer.h +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once -#include -#include "Layer.h" -#include "SequenceToBatch.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief RecurrentLayer takes 1 input layer. The output size is the same with - * input layer. - * For each sequence [start, end] it performs the following computation: - * \f[ - * out_{i} = act(in_{i}) \ \ \text{for} \ i = start \\ - * out_{i} = act(in_{i} + out_{i-1} * W) \ \ \text{for} \ start < i <= end - * - * \f] - * If reversed is true, the order is reversed: - * \f[ - * out_{i} = act(in_{i}) \ \ \text{for} \ i = end \\ - * out_{i} = act(in_{i} + out_{i+1} * W) \ \ \text{for} \ start <= i < end - * \f] - * There are two methods to calculate rnn. One way is to compute rnn one - * sequence by one sequence. The other way is to reorganize the input - * into batches, then compute rnn one batch by one batch. Users can select - * them by rnn_use_batch flag. - */ - -class RecurrentLayer : public Layer { - public: - explicit RecurrentLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback) override; - - void resetState() override; - - void setState(LayerStatePtr state) override; - - LayerStatePtr getState() override; - - protected: - /** - * @brief If user do not set --rnn_use_batch=true, it will - * compute rnn forward one sequence by one sequence in default. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void forwardSequence(int batchSize, size_t numSequences, const int* starts); - /** - * @brief Compute rnn forward by one sequence. - * @param start The start position of this sequence (or sample). - * @param length The length of this sequence (or sample), namely the words - * number of this sequence. - */ - void forwardOneSequence(int start, int length); - /** - * @brief Compute rnn backward one sequence by onesequence. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void backwardSequence(int batchSize, size_t numSequences, const int* starts); - /** - * @brief Compute rnn backward by one sequence. - * @param start The start position of this sequence (or sample). - * @param length The length of this sequence (or sample), namely the words - * number of this sequence. - */ - void backwardOneSequence(int start, int length); - - /** - * @brief Reorganize input into batches and compute rnn forward batch - * by batch. It will convert batch shape to sequence after finishing forward. - * The batch info can refer to SequenceToBatch class. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - virtual void forwardBatch(int batchSize, - size_t numSequences, - const int* starts); - - /** - * @brief Reorganize input into batches and compute rnn forward batch - * by batch. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - virtual void backwardBatch(int batchSize, - size_t numSequences, - const int* starts); - - protected: - std::unique_ptr weight_; - std::unique_ptr bias_; - - /// frameOutput_[i] is used to hold the i-th sample of output_ - std::vector frameOutput_; - MatrixPtr prevOutput_; - /// Whether compute rnn by reverse. - bool reversed_; - /// If compute batch by batch, batchValue_ will be used to save the - /// reorganized input value. - std::unique_ptr batchValue_; - /// If compute batch by batch, batchGrad_ will be used to save the - /// gradient with respect to reorganized input value. - std::unique_ptr batchGrad_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/RecurrentLayerGroup.cpp b/paddle/legacy/gserver/layers/RecurrentLayerGroup.cpp deleted file mode 100644 index 39321245995fce2f2bd671593c028fd6038865de..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/RecurrentLayerGroup.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "paddle/legacy/gserver/layers/Layer.h" - -#include "paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * Recurrent layer group is a group of layers, which forward/backward one frame - * after previous frame forward/backward through all layers in layer group. - * It's automatically added by config_parser if some layers are defined - * between RecurrentLayerGroupBegin and RecurrentLayerGroupEnd. - */ -class RecurrentLayerGroup : public Layer { - public: - explicit RecurrentLayerGroup(const LayerConfig& config) : Layer(config) {} - - void initSubNetwork(NeuralNetwork* rootNetwork, - const ModelConfig& config, - const std::vector& parameterTypes, - bool useGpu) override; - - void forward(PassType passType) override { - REGISTER_TIMER_INFO("RecurrentGroupFwTime", getName().c_str()); - const std::vector inArgs; - std::vector outArgs; - network_->forward(inArgs, &outArgs, passType); - } - void backward(const UpdateCallback& callback) override { - REGISTER_TIMER_INFO("RecurrentGroupBwTime", getName().c_str()); - network_->backward(nullptr); - - for (auto& para : parameters_) { - para->incUpdate(callback); - } - } - - /** - * @see Layer.accessSubNetwork - */ - void accessSubNetwork( - const std::function& callback) override { - callback(*network_); - } - - private: - std::unique_ptr network_; -}; - -REGISTER_LAYER(recurrent_layer_group, RecurrentLayerGroup); - -void RecurrentLayerGroup::initSubNetwork( - NeuralNetwork* rootNetwork, - const ModelConfig& config, - const std::vector& parameterTypes, - bool useGpu) { - setNeedGradient(true); - - network_.reset(new RecurrentGradientMachine(config_.name(), rootNetwork)); - ParamInitCallback cb = [rootNetwork](int paramId, Parameter* para) { - para->enableSharedType( - PARAMETER_VALUE, - rootNetwork->getParameters()[paramId]->getBuf(PARAMETER_VALUE), - rootNetwork->getParameters()[paramId]->getMat(PARAMETER_VALUE)); - para->enableSharedType( - PARAMETER_GRADIENT, - rootNetwork->getParameters()[paramId]->getBuf(PARAMETER_GRADIENT), - rootNetwork->getParameters()[paramId]->getMat(PARAMETER_GRADIENT)); - }; - network_->init(config, cb, parameterTypes, useGpu); - - for (auto paramId : network_->getParameterIds()) { - ParameterPtr parameter = rootNetwork->getParameters()[paramId]; - parameter->incShared(); - CHECK_EQ(parameter->getDeviceId(), getDeviceId()); - parameters_.push_back(parameter); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ResizeLayer.cpp b/paddle/legacy/gserver/layers/ResizeLayer.cpp deleted file mode 100644 index 8f8aad820f7d6d2be0af74d607d763912c3c0f2a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ResizeLayer.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/BaseMatrix.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { -/** - * @brief A layer for resizing a minibatch matrix h*w to h'*w' - * @note - * origin matrix height * width) - * resize matrix: (height * width / size) * size - */ -class ResizeLayer : public Layer { - public: - explicit ResizeLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback) override; -}; - -REGISTER_LAYER(resize, ResizeLayer); - -bool ResizeLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!Layer::init(layerMap, parameterMap)) return false; - CHECK_EQ(1U, inputLayers_.size()); - - setNeedSequenceInfo(false); - return true; -} - -void ResizeLayer::forward(PassType passType) { - Layer::forward(passType); - const Argument& input = getInput(0); - size_t height = input.value->getHeight(); - size_t width = input.value->getWidth(); - CHECK_EQ((height * width) % getSize(), 0UL); - - reserveOutput(height * width / getSize(), getSize()); - MatrixPtr tmp = - Matrix::create(output_.value->getData(), height, width, false, useGpu_); - tmp->assign(*input.value); -} - -void ResizeLayer::backward(const UpdateCallback& callback) { - const Argument& input = getInput(0); - size_t height = input.value->getHeight(); - size_t width = input.value->getWidth(); - - if (!input.grad) { - return; - } - - MatrixPtr tmp = Matrix::create(input.grad->getData(), - height * width / getSize(), - getSize(), - false, - useGpu_); - tmp->add(*output_.grad); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/RotateLayer.cpp b/paddle/legacy/gserver/layers/RotateLayer.cpp deleted file mode 100644 index f205d1a91949cc0eb541fefe426be3a2c0886140..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/RotateLayer.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "RotateLayer.h" - -namespace paddle { - -REGISTER_LAYER(rotate, RotateLayer); - -bool RotateLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 1UL); - height_ = config_.height(); - width_ = config_.width(); - CHECK_GT(height_, 0); - CHECK_GT(width_, 0); - return true; -} - -void RotateLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr input = getInputValue(0); - batchSize_ = input->getHeight(); - size_ = input->getWidth(); - CHECK_GE(size_, height_ * width_); - CHECK_EQ(size_ % (height_ * width_), 0) - << "total size_ is not dividable by (height_ * width_), i.e., " - << "channel number should be an integer"; - channels_ = size_ / (height_ * width_); - - resizeOutput(batchSize_, size_); - - MatrixPtr outV = getOutputValue(); - for (int b = 0; b < batchSize_; b++) { // for each input feat map - for (int c = 0; c < channels_; c++) { // for each feat channel - MatrixPtr inputSample = - Matrix::create(input->getData() + b * size_ + c * height_ * width_, - height_, - width_, - false, - useGpu_); - MatrixPtr outputSample = - Matrix::create(outV->getData() + b * size_ + c * height_ * width_, - width_, - height_, - false, - useGpu_); - inputSample->rotate(outputSample, false, true /* clock-wise */); - } - } - - if (getInputGrad(0)) { - zeroGrad(); - } -} - -void RotateLayer::backward(const UpdateCallback& callback) { - (void)callback; - - MatrixPtr outputGrad = getOutputGrad(); - if (outputGrad == NULL) { - return; - } - // the grad should be rotated in the reverse direction - MatrixPtr preGrad = getInputGrad(0); - - for (int b = 0; b < batchSize_; b++) { // for each input feat map - for (int c = 0; c < channels_; c++) { // for each feat channel - MatrixPtr inputSampleGrad = - Matrix::create(preGrad->getData() + b * size_ + c * height_ * width_, - height_, - width_, - false, - useGpu_); - MatrixPtr outputSampleGrad = Matrix::create( - outputGrad->getData() + b * size_ + c * height_ * width_, - width_, - height_, - false, - useGpu_); - MatrixPtr tmpGrad = nullptr; - outputSampleGrad->rotate(tmpGrad, true, false /* anti clock-wise */); - inputSampleGrad->add(*tmpGrad); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/RotateLayer.h b/paddle/legacy/gserver/layers/RotateLayer.h deleted file mode 100644 index 498e24372b8ca17c21ebecbe6a8c8b40217ab259..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/RotateLayer.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { -/** - * A layer for rotating a multi-channel feature map (M x N x C) in the spatial - * domain - * The rotation is 90 degrees in clock-wise for each channel - * \f[ - * y(j,i,:) = x(M-i-1,j,:) - * \f] - * where \f$x\f$ is (M x N x C) input, and \f$y\f$ is (N x M x C) output. - * - * The config file api is rotate_layer - * - */ - -class RotateLayer : public Layer { - public: - explicit RotateLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); - - private: - int batchSize_; - int size_; - int height_; - int width_; - int channels_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/RowConvLayer.cpp b/paddle/legacy/gserver/layers/RowConvLayer.cpp deleted file mode 100644 index 1961557dc2d2601091bb0e56fcd884d76d49bc0e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/RowConvLayer.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "RowConvLayer.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(row_conv, RowConvLayer); - -bool RowConvLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - contexLength_ = config_.inputs(0).row_conv_conf().context_length(); - - CHECK_EQ(inputLayers_.size(), 1UL); - weight_.reset(new Weight(contexLength_, getSize(), parameters_[0])); - createFunction(forward_, "RowConv", FuncConfig()); - createFunction(backward_, "RowConvGrad", FuncConfig()); - - return true; -} - -void RowConvLayer::forward(PassType passType) { - Layer::forward(passType); - MatrixPtr input = getInputValue(0); - size_t height = input->getHeight(); - size_t width = input->getWidth(); - CHECK_EQ(width, getSize()); - resetOutput(height, width); - - const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_); - MatrixPtr w = weight_->getW(); - wDims_ = TensorShape({w->getHeight(), w->getWidth()}); - - MatrixPtr outV = getOutputValue(); - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getInputValue(0), *startPos); - inputs.addArg(*w, wDims_); - outputs.addArg(*getOutputValue(), *startPos, ADD_TO); - - { - REGISTER_TIMER_INFO("RowConvForward", getName().c_str()); - forward_[0]->calc(inputs, outputs); - } - - /* activation */ { - REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); - forwardActivation(); - } -} - -void RowConvLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { - REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); - backwardActivation(); - } - - const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_); - - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getOutputGrad(), *startPos); - inputs.addArg(*getInputValue(0), *startPos); - inputs.addArg(*weight_->getW(), wDims_); - - MatrixPtr inGrad = getInputGrad(0); - MatrixPtr wGrad = weight_->getWGrad(); - size_t h = getInputValue(0)->getHeight(); - size_t w = getInputValue(0)->getWidth(); - outputs.addArg( - inGrad ? (*inGrad) : *(Matrix::create(nullptr, h, w, false, useGpu_)), - *startPos, - ADD_TO); - outputs.addArg( - wGrad ? (*wGrad) - : *(Matrix::create(nullptr, contexLength_, w, false, useGpu_)), - wDims_, - ADD_TO); - - { - REGISTER_TIMER_INFO("RowConvBackward", getName().c_str()); - backward_[0]->calc(inputs, outputs); - } - - { - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - weight_->getParameterPtr()->incUpdate(callback); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/RowConvLayer.h b/paddle/legacy/gserver/layers/RowConvLayer.h deleted file mode 100644 index 3b74df0b1af5caef1a1abd3d3c5b3ae3b67c429b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/RowConvLayer.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" - -namespace paddle { - -/** - * \brief Row Convolution Layer. - */ -class RowConvLayer : public Layer { - public: - explicit RowConvLayer(const LayerConfig& config) : Layer(config) {} - - ~RowConvLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - protected: - // Row convolution weight, context_lenght_ * fan_out. - // fan_out is the size of output feature. - std::unique_ptr weight_; - - // The step number to look ahead plus one equals contexLength_. - size_t contexLength_; - TensorShape wDims_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/RowL2NormLayer.cpp b/paddle/legacy/gserver/layers/RowL2NormLayer.cpp deleted file mode 100644 index d5e6e10a0276adb74ec31c13d9e8acc77414a85b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/RowL2NormLayer.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" - -namespace paddle { - -/** - * A layer for L2 normalization in each row, - * \f[ - * out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}} - * \f] - * where the size of \f$in\f$ is (batchSize x dataDim), - * and the size of \f$out\f$ is (batchSize x dataDim). - */ - -class RowL2NormLayer : public Layer { - protected: - MatrixPtr inSquare_; - MatrixPtr l2NormReciprocal_; - MatrixPtr dotSum_; - - public: - explicit RowL2NormLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(row_l2_norm, RowL2NormLayer); - -bool RowL2NormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 1U); - - return true; -} - -void RowL2NormLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV = getInputValue(0); - - /* malloc memory for the output_ if necessary */ - size_t batchSize = inV->getHeight(); - size_t dataDim = getSize(); - CHECK_EQ(dataDim, inV->getWidth()); - resetOutput(batchSize, dataDim); - MatrixPtr outV = getOutputValue(); - - Matrix::resizeOrCreate(inSquare_, batchSize, dataDim, false, useGpu_); - inV->square2(*inSquare_); - Matrix::resizeOrCreate(l2NormReciprocal_, batchSize, 1, false, useGpu_); - inSquare_->rowSum(*l2NormReciprocal_); - l2NormReciprocal_->sqrt2(*l2NormReciprocal_); - l2NormReciprocal_->scalarDiv(*l2NormReciprocal_, 1.0); - outV->rowScale(0, *inV, *l2NormReciprocal_); -} - -void RowL2NormLayer::backward(const UpdateCallback& callback) { - MatrixPtr inV = getInputValue(0); - MatrixPtr inG = getInputGrad(0); - MatrixPtr outV = getOutputValue(); - MatrixPtr outG = getOutputGrad(); - size_t batchSize = inV->getHeight(); - - // inG[ij] += outG[ij] / l2NormReciprocal - // inG[ij] += -inV[ij] * l2NormReciprocal * l2NormReciprocal * DotMul(outG[i], - // inV[i]) - if (inG) { - Matrix::resizeOrCreate(dotSum_, batchSize, 1, false, useGpu_); - dotSum_->zeroMem(); - dotSum_->rowDotMul(0, *outG, *outV); - dotSum_->dotMul(*dotSum_, *l2NormReciprocal_); - dotSum_->dotMul(*dotSum_, *l2NormReciprocal_); - inSquare_->rowScale(0, *inV, *dotSum_); - inG->sub(*inSquare_); - inG->addRowScale(0, *outG, *l2NormReciprocal_); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SamplingIdLayer.cpp b/paddle/legacy/gserver/layers/SamplingIdLayer.cpp deleted file mode 100644 index dbce63588126c012e3b9713e8be749e0001ddec7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SamplingIdLayer.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "Layer.h" - -namespace paddle { - -/** - * @brief A layer for sampling id from multinomial distribution from the - * input layer. Sampling one id for one sample. The result is stored in - * output_.ids. - * - * The config file api is sampling_id_layer. - */ -class SamplingIdLayer : public Layer { - /// Produces random floating-point values, uniformly distributed on [0, 1). - std::uniform_real_distribution rand1_; - std::vector tmpCpuInput_; - - public: - explicit SamplingIdLayer(const LayerConfig& config) - : Layer(config), rand1_(0, 1) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override { - bool ret = Layer::init(layerMap, parameterMap); - CHECK_EQ(1UL, inputLayers_.size()); - if (useGpu_) { - tmpCpuInput_.reserve(inputLayers_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - tmpCpuInput_.push_back(Argument()); - } - } - return ret; - } - - void forward(PassType passType) override { - Layer::forward(passType); - if (useGpu_) { - for (size_t i = 0; i < inputLayers_.size(); i++) { - tmpCpuInput_[i].resizeAndCopyFrom( - getInput(i), false, HPPL_STREAM_DEFAULT); - } - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - forwardImp(tmpCpuInput_[0]); - } else { - forwardImp(getInput(0)); - } - } - - void forwardImp(const Argument& input) { - size_t batchSize = input.getBatchSize(); - IVector::resizeOrCreate(output_.ids, batchSize, useGpu_); - real* buf = input.value->getData(); - int dim = input.value->getWidth(); - std::vector ids(batchSize); - auto& reng = ThreadLocalRandomEngine::get(); - for (size_t i = 0; i < batchSize; ++i) { - double r = rand1_(reng); - int id = dim - 1; - for (int j = 0; j < dim; ++j) { - if ((r -= buf[i * dim + j]) < 0) { - id = j; - break; - } - } - ids[i] = id; - } - output_.ids->copyFrom(ids.data(), batchSize); - } - - void backward(const UpdateCallback& callback) override {} -}; - -REGISTER_LAYER(sampling_id, SamplingIdLayer); - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ScaleShiftLayer.cpp b/paddle/legacy/gserver/layers/ScaleShiftLayer.cpp deleted file mode 100644 index 8af78a2e27d2b50572f8bdd6e98696f3d1967eb1..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ScaleShiftLayer.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" - -namespace paddle { - -/** - * A layer applies a linear transformation to each element in each row of - * the input matrix. For each element, the layer first re-scale it and then - * adds a bias to it. - * - * \f[ - * y = wx + b - * \f] - * - * Here, w is the scale and b is the bias. Both w and b are trainable scalars. - * - */ - -class ScaleShiftLayer : public Layer { - protected: - std::unique_ptr scale_; - std::unique_ptr offset_; - - public: - explicit ScaleShiftLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(scale_shift, ScaleShiftLayer); - -bool ScaleShiftLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - CHECK_EQ(inputLayers_.size(), 1U); - scale_.reset(new Weight(1, 1, parameters_[0])); - if (biasParameter_.get() != NULL) { - offset_ = std::unique_ptr(new Weight(1, 1, biasParameter_)); - } - return true; -} - -void ScaleShiftLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV = getInputValue(0); - resetOutput(inV->getHeight(), inV->getWidth()); - MatrixPtr outV = getOutputValue(); - real scaleValue = scale_->getW()->getElement(0, 0); - outV->mulScalar(*inV, scaleValue); - if (offset_) { - real offsetValue = offset_->getW()->getElement(0, 0); - outV->add(offsetValue); - } -} - -void ScaleShiftLayer::backward(const UpdateCallback& callback) { - MatrixPtr inV = getInputValue(0); - MatrixPtr inG = getInputGrad(0); - MatrixPtr outV = getOutputValue(); - MatrixPtr outG = getOutputGrad(); - - /* Calculate the parameter gradient for the current layer */ - if (scale_->getWGrad()) { - MatrixPtr rowSumMtx; - Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_); - // this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij} - rowSumMtx->sumOfProducts( - /* b= */ *inV, /* c= */ *outG, /* scaleSum= */ 1, /* scaleDest= */ 0.); - // this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji} - scale_->getWGrad()->sumCols( - /* b= */ *rowSumMtx, /* scaleSum= */ 1., /* scaleDest= */ 1.); - scale_->getParameterPtr()->incUpdate(callback); - } - if (offset_ && offset_->getWGrad()) { - MatrixPtr rowSumMtx; - Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_); - rowSumMtx->sumRows(*outG, 1., 0.); - offset_->getWGrad()->sumCols(*rowSumMtx, 1., 1.); - offset_->getParameterPtr()->incUpdate(callback); - } - - /* Calculate the input layers error */ - if (inG) { - real scaleValue = scale_->getW()->getElement(0, 0); - inG->add(*outG, scaleValue); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ScaleSubRegionLayer.cpp b/paddle/legacy/gserver/layers/ScaleSubRegionLayer.cpp deleted file mode 100644 index 70d44d2a7ef25df64beb2c861692436d842dac02..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ScaleSubRegionLayer.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ScaleSubRegionLayer.h" -#include "paddle/legacy/utils/Stat.h" -namespace paddle { - -REGISTER_LAYER(scale_sub_region, ScaleSubRegionLayer); - -bool ScaleSubRegionLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - CHECK_EQ(static_cast(inputLayers_.size()), 2); - auto& conf = config_.inputs(0).scale_sub_region_conf(); - value_ = conf.value(); - - createFunction(forward_, "ScaleSubRegion", FuncConfig().set("value", value_)); - createFunction( - backward_, "ScaleSubRegionGrad", FuncConfig().set("value", value_)); - - return true; -} - -void ScaleSubRegionLayer::forward(PassType passType) { - Layer::forward(passType); - auto in0 = getInput(0); - imgH_ = in0.getFrameHeight(); - imgW_ = in0.getFrameWidth(); - if (imgH_ == 0 || imgW_ == 0) { - auto& conf = config_.inputs(0).scale_sub_region_conf(); - imgH_ = conf.image_conf().img_size_y(); - imgW_ = conf.image_conf().img_size(); - } - MatrixPtr imgV = in0.value; - size_t batchSize = imgV->getHeight(); - size_t spatialSize = imgH_ * imgW_; - channelsNum_ = imgV->getWidth() / spatialSize; - shape_ = TensorShape({batchSize, channelsNum_, imgH_, imgW_}); - - resetOutput(batchSize, imgV->getWidth()); - auto& out = getOutput(); - out.setFrameHeight(imgH_); - out.setFrameWidth(imgW_); - - MatrixPtr indicesV = getInputValue(1); - indicesShape_ = TensorShape({batchSize, 6}); - - REGISTER_TIMER_INFO("ScaleSubRegionForward", getName().c_str()); - BufferArgs inArgs; - BufferArgs outArgs; - inArgs.addArg(*imgV, shape_); - inArgs.addArg(*indicesV, indicesShape_); - outArgs.addArg(*out.value, shape_, ASSIGN_TO); - forward_[0]->calc(inArgs, outArgs); -} - -void ScaleSubRegionLayer::backward(const UpdateCallback& callback) { - REGISTER_TIMER_INFO("ScaleSubRegionBackward", getName().c_str()); - BufferArgs inArgs; - BufferArgs outArgs; - inArgs.addArg(*getOutputGrad(), shape_); - inArgs.addArg(*getInputValue(1), indicesShape_); - outArgs.addArg(*getInputGrad(0), shape_, ADD_TO); - backward_[0]->calc(inArgs, outArgs); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ScaleSubRegionLayer.h b/paddle/legacy/gserver/layers/ScaleSubRegionLayer.h deleted file mode 100644 index fe431698bc6cd5e52e2c545756b40be8b307e644..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ScaleSubRegionLayer.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" - -namespace paddle { - -/** - * \brief For each instance, this layer can be used to multiply a value to a - * specified sub continuous region. By providing start index and end - * index for C/H/W, you can specify the location and shape of the - * region. - * - * input_0: Input value. - * input_1: Indices value to specify the location an shape of the - * region. - */ -class ScaleSubRegionLayer : public Layer { - public: - explicit ScaleSubRegionLayer(const LayerConfig& config) : Layer(config) {} - - ~ScaleSubRegionLayer() {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - - void backward(const UpdateCallback& callback = nullptr); - - protected: - TensorShape shape_; - TensorShape indicesShape_; - size_t imgH_; - size_t imgW_; - size_t channelsNum_; - real value_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ScalingLayer.cpp b/paddle/legacy/gserver/layers/ScalingLayer.cpp deleted file mode 100644 index a8286b6614c3cdfbd720d0719f939018f6ae9579..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ScalingLayer.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * A layer for each row of a matrix, multiplying with a element of a vector, - * which is used in NEURAL TURING MACHINE. - * \f[ - * y.row[i] = w[i] * x.row[i] - * \f] - * where \f$x\f$ is (batchSize x dataDim) input, \f$w\f$ is - * (batchSize x 1) weight vector, and \f$y\f$ is (batchSize x dataDim) output. - * - * The config file api is scaling_layer. - */ - -class ScalingLayer : public Layer { - public: - explicit ScalingLayer(const LayerConfig& config) : Layer(config) {} - - ~ScalingLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(scaling, ScalingLayer); - -bool ScalingLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2U); - - return true; -} - -void ScalingLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr weightV = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - - size_t batchSize = inV1->getHeight(); - size_t dataDim = inV1->getWidth(); - - CHECK_EQ(dataDim, getSize()); - CHECK_EQ(weightV->getWidth(), 1U); - CHECK_EQ(weightV->getHeight(), batchSize); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - resetOutput(batchSize, dataDim); - } - - MatrixPtr outV = getOutputValue(); - { - REGISTER_TIMER_INFO("FwScalingTimer", getName().c_str()); - // outV += inV1 * weight - outV->addRowScale(0, *inV1, *weightV); - } -} - -void ScalingLayer::backward(const UpdateCallback& callback) { - MatrixPtr weightV = getInputValue(0); - MatrixPtr inV1 = getInputValue(1); - MatrixPtr inG0 = getInputGrad(0); - MatrixPtr inG1 = getInputGrad(1); - MatrixPtr outG = getOutputGrad(); - - { - REGISTER_TIMER_INFO("BwScalingTimer", getName().c_str()); - - if (inG0) { - // inG0 += outG .* inV1 - inG0->rowDotMul(0, *outG, *inV1); - } - - if (inG1) { - // inG1 += outG * weight; - inG1->addRowScale(0, *outG, *weightV); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ScalingProjection.cpp b/paddle/legacy/gserver/layers/ScalingProjection.cpp deleted file mode 100644 index 4d871cafc4d0194a61044d76a766236209c33d47..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ScalingProjection.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Projection.h" - -namespace paddle { - -class ScalingProjection : public Projection { - public: - ScalingProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - CHECK_EQ(parameter->getSize(), 1UL); - weight_.reset(new Weight(1, 1, parameter)); - } - - void forward() { - CHECK(in_->value); - out_->value->add(*in_->value, weight_->getW()->getElement(0, 0)); - } - - void backward(const UpdateCallback& callback) { - if (weight_->getWGrad()) { - auto sum = Matrix::create(in_->value->getHeight(), 1, false, useGpu_); - sum->sumOfProducts(*in_->value, - *out_->grad, - /* scaleSum= */ 1, - /* scaleDest= */ 0); - weight_->getWGrad()->sumCols(*sum, - /* scaleSum= */ 1, - /* scaleDest= */ 1); - parameter_->incUpdate(callback); - } - if (in_->grad) { - in_->grad->add(*out_->grad, weight_->getW()->getElement(0, 0)); - } - } - - protected: - std::unique_ptr weight_; -}; - -REGISTER_PROJECTION(scaling, ScalingProjection); - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.cpp b/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.cpp deleted file mode 100644 index 72fb06814884cc2bcca2c600105077d8cf1459c5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.cpp +++ /dev/null @@ -1,336 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SelectiveFullyConnectedLayer.h" -#include -#include -#include "paddle/legacy/math/SparseMatrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(selective_fc, SelectiveFullyConnectedLayer); - -bool SelectiveFullyConnectedLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - inputNum_ = inputLayers_.size(); - if (config_.has_selected_colums()) { - inputNum_ -= 1; - } - for (size_t i = 0; i < inputNum_; i++) { - size_t height = inputLayers_[i]->getSize(); - size_t width = getSize(); - // NOTE weight is transpoed - weights_.emplace_back(new Weight(width, height, parameters_[i])); - } - - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - - fullOutput_ = false; - - return true; -} - -void SelectiveFullyConnectedLayer::prefetch() {} - -void SelectiveFullyConnectedLayer::reserveOutput(size_t height, - size_t width, - size_t nnz) { - bool flag = (passType_ == PASS_TEST && - config_.selective_fc_pass_generation() && !fullOutput_); - SetDevice device(output_.deviceId); - if (flag) { - // output_.value is sparse matrix - if (dynamic_cast(output_.value.get()) || - dynamic_cast(output_.value.get())) { - output_.value = nullptr; - } - Matrix::resizeOrCreateSparseMatrix(output_.value, - height, - width, - nnz, - FLOAT_VALUE, - SPARSE_CSR, - /*trans=*/false, - /*useGpu=*/useGpu_); - output_.value->copyFrom(*selCols_); - interOutput_ = output_.value; - } else { - if (fullOutput_) { - // output_.value is dense matrix - if (dynamic_cast(output_.value.get()) || - dynamic_cast(output_.value.get())) { - output_.value = nullptr; - } - Matrix::resizeOrCreate(output_.value, - height, - width, - /*trans=*/false, - /*useGpu=*/useGpu_); - interOutput_ = output_.value; - } else { - // output_.value is dense matrix, but width = nnz /height - CHECK_EQ(nnz % height, 0U); - CHECK(nnz / height); - Matrix::resizeOrCreate(output_.value, - height, - nnz / height, - /*trans=*/false, - /*useGpu=*/useGpu_); - interOutput_ = Matrix::createSparseMatrix(output_.value->getData(), - selCols_->getRows(), - selCols_->getCols(), - height, - width, - nnz, - FLOAT_VALUE, - SPARSE_CSR, - /*trans=*/false, - /*useGpu=*/useGpu_); - } - } - interOutput_->zeroMem(); - - if (passType_ != PASS_TEST && needGradient()) { - CHECK_EQ(nnz % height, 0U) << "during training, each sample must have a " - "same number of selected columns."; - CHECK(nnz / height) - << "during training, " - "each sample must have at least one column selected."; - Matrix::resizeOrCreate(output_.grad, - height, - nnz / height, - /*trans=*/false, - /*useGpu=*/useGpu_); - output_.grad->zeroMem(); - } -} - -void SelectiveFullyConnectedLayer::forward(PassType passType) { - REGISTER_TIMER("selective_fc.forward"); - Layer::forward(passType); - - getSelectiveCols(); - size_t height = getInput(0).getBatchSize(); - size_t width = getSize(); - size_t nnz = height * width; - if (!fullOutput_) { - CHECK(selCols_); - CHECK(height == selCols_->getHeight()); - CHECK(width == selCols_->getWidth()); - nnz = selCols_->getElementCnt(); - } - - // Layer::ResetOutput(), here we set outV/outG as SparseMatrix manually - // this outV should be used as input of MaxIdLayer and softmax activation - reserveOutput(height, width, nnz); - - bool flag = true; - for (size_t i = 0; i < inputNum_; i++) { - MatrixPtr input = getInputValue(i); - MatrixPtr weight = weights_[i]->getW(); - size_t hsize = input->getHeight(); - size_t wsize = weight->getHeight(); - real scaleT = i == 0 ? real(0) : real(1); - - flag = nnz < (hsize * wsize) * config_.selective_fc_full_mul_ratio() && - !fullOutput_; - if (flag) { - // if the indecies are highly sparse, - // manully compute the multiplication of - // the input vector and the selected rows. - REGISTER_TIMER("selective.plain"); - interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); - } else { - // if the indecies is not sparse enough, - // use full mul instead - REGISTER_TIMER("selective.mul"); - if (fullOutput_) { - interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); - } else { - Matrix::resizeOrCreate(mmat_, - hsize, - wsize, - /*trans=*/false, - /*useGpu=*/useGpu_); - mmat_->mul(*input, *weight->getTranspose()); - interOutput_->add3(mmat_); - } - } - } - - if (biases_) { - interOutput_->addBias(*(biases_->getW()), 1); - } - - flag = (passType_ == PASS_TEST && config_.selective_fc_pass_generation() && - !fullOutput_); - if (flag) { - // during generation, output of this layer is a sparse csr matrix, - // which is probably the input of maxid layer - // if the model is trained with multi-class-cross-entroy-with-selfnorm, - // activiation of this layer should be exponential, not softmax. - - Argument arg; - arg.value = Matrix::create(interOutput_->getData(), - 1, - nnz, - /*trans=*/false, - /*useGpu=*/useGpu_); - //! TODO(yuyang18): Why we cannot invoke forwardActivation here? - activation_->forward(arg).check(); - } else /* train and test in train, not generating */ { - // during training, this layer output value is *Matrix*, which is input of - // eg. multi-class-cross-entropy - - // while training, every sample has a equal number of selected - // columns to be activated. - // note indices of multi-class-cross-entropy need to be remapped - // to this index. - // e.g. sample = [1,3,5] and 3 is gold, then label is 1 - - forwardActivation(); - } -} - -void SelectiveFullyConnectedLayer::backward(const UpdateCallback& callback) { - backwardActivation(); - MatrixPtr oGrad = getOutputGrad(); - if (!fullOutput_) { - interOutGrad_ = Matrix::createSparseMatrix(oGrad->getData(), - interOutput_->getRows(), - interOutput_->getCols(), - interOutput_->getHeight(), - interOutput_->getWidth(), - interOutput_->getElementCnt(), - FLOAT_VALUE, - SPARSE_CSR, - /*trans=*/false, - /*useGpu=*/useGpu_); - } else { - interOutGrad_ = Matrix::create(oGrad->getData(), - oGrad->getHeight(), - oGrad->getWidth(), - /*trans=*/false, - /*useGpu=*/useGpu_); - } - - if (biases_ && biases_->getWGrad()) { - REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str()); - biases_->getWGrad()->collectBias(*interOutGrad_, 1); - biases_->getParameterPtr()->incUpdate(callback); - } - - // backward is different from FullyConnectedLayer - // because the weight is transposed - for (size_t i = 0; i < inputNum_; i++) { - AsyncGpuBlock block; - MatrixPtr preGrad = getInputGrad(i); - if (preGrad) { - REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); - preGrad->mul(*interOutGrad_, *weights_[i]->getW(), 1, 1); - } - - MatrixPtr wGrad = weights_[i]->getWGrad(); - if (wGrad) { - REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); - MatrixPtr input = getInputValue(i); - wGrad->mul(*interOutGrad_->getTranspose(), *input, 1, 1); - } - - { - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - weights_[i]->getParameterPtr()->incUpdate(callback); - } - } -} - -void paddle::SelectiveFullyConnectedLayer::fillSelectiveData( - const std::shared_ptr>>& candidates) { - if (candidates == nullptr) { - fillFullySelectiveData(); - return; - } - - size_t sampleNum = candidates->size(); - size_t outputWidth = getSize(); - size_t nnz = - std::accumulate(candidates->begin(), - candidates->end(), - 0UL, - [](size_t a, const std::pair& arr) { - return a + arr.second; - }); - - Matrix::resizeOrCreateSparseMatrix(this->cpuSelCols_, - sampleNum, - outputWidth, - nnz, - NO_VALUE, - SPARSE_CSR, - false, - false); - CHECK(this->cpuSelCols_ != nullptr); - CpuSparseMatrixPtr selCols = - std::dynamic_pointer_cast(cpuSelCols_); - int* rowOffsets = selCols->getRows(); - int* colIndices = selCols->getCols(); - - rowOffsets[0] = 0; - int idx = 0; - for (size_t i = 0; i < sampleNum; ++i) { - if ((*candidates)[i].second > 0) { - rowOffsets[i + 1] = rowOffsets[i] + (*candidates)[i].second; - for (size_t j = 0; j < (*candidates)[i].second; ++j) { - colIndices[idx] = (*candidates)[i].first[j]; - idx++; - } - } else { - rowOffsets[i + 1] = rowOffsets[i]; - } - } - - CHECK_EQ(static_cast(rowOffsets[sampleNum]), nnz); - if (!useGpu_) { - this->selCols_ = this->cpuSelCols_; - } else { - Matrix::resizeOrCreateSparseMatrix(this->selCols_, - sampleNum, - outputWidth, - nnz, - NO_VALUE, - SPARSE_CSR, - false, - true); - this->selCols_->copyFrom(*cpuSelCols_, HPPL_STREAM_1); - hl_stream_synchronize(HPPL_STREAM_1); - } - - fullOutput_ = false; -} - -void paddle::SelectiveFullyConnectedLayer::getSelectiveCols() { - if (config_.has_selected_colums()) { - this->selCols_ = inputLayers_[inputNum_]->getOutputValue(); - fullOutput_ = false; - } else if (!config_.selective_fc_pass_generation() || selCols_ == nullptr) { - this->fillFullySelectiveData(); - } // else selCols_ is initialized by fillSelectiveData -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.h b/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.h deleted file mode 100644 index 3ba04d9b2ae208eda021a451e94856d9993dc126..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.h +++ /dev/null @@ -1,103 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { - -/** - * @brief The SelectiveFullyConnectedLayer class - * - * SelectiveFullyConnectedLayer differs from FullyConnectedLayer by that it - * requires an additional input to indicate several selected columns, and only - * compute the multiplications between the input matrices and the selected - * columns of the parameter matrices of this layer. If the selected columns is - * not specified, SelectiveFullyConnected layer acts exactly like - * FullyConnectedLayer. - * - * The config file api is selective_fc_layer. - */ -class SelectiveFullyConnectedLayer : public Layer { - protected: - WeightList weights_; - std::unique_ptr biases_; - - private: - /** - * Get selected columns each forward. - */ - void getSelectiveCols(); - - MatrixPtr mmat_; - /// cpuSelCols_ is a CpuSparseMatrix, used to save selected columns. - MatrixPtr cpuSelCols_; - /// CpuSparseMatrix or GpuSparseMatrix. In CPU mode, selCols_ points - /// to cpuSelCols_. - MatrixPtr selCols_; - size_t inputNum_; - - /// interOutput_ shared same memory with output_.value. - MatrixPtr interOutput_; - - /// if fullOutput_ is false, interOutGrad_ sparse matrix - MatrixPtr interOutGrad_; - - /// if true, means output_.value is the same as Fc Layer - bool fullOutput_; - - public: - explicit SelectiveFullyConnectedLayer(const LayerConfig& config) - : Layer(config), selCols_(nullptr) {} - - ~SelectiveFullyConnectedLayer() {} - void prefetch() override; - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - Weight& getWeight(int idx) { return *weights_[idx]; } - - /** - * @brief Resize the output matrix size. - * And reset value to zero - */ - void reserveOutput(size_t height, size_t width, size_t nnz); - - /** - * @brief Fill candidates to select several activations as output. - * @param candidates specifies several selected columns of the parameter - * matrices of this layer. - * Multiplications only between the input matrices and the selected columns - * are computed. - * If the candidates is a nullptr, selective fc layer acts exactly like the - * fully connected layer. - * @note CURRENTLY, THIS METHOD IS ONLY USED FOR BEAM SEARCH - */ - void fillSelectiveData( - const std::shared_ptr>>& candidates); - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - private: - /** - * @brief Make SelectiveFC act as FullyConnectedLayer - */ - void fillFullySelectiveData() { fullOutput_ = true; } -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SequenceConcatLayer.cpp b/paddle/legacy/gserver/layers/SequenceConcatLayer.cpp deleted file mode 100644 index 7b598e11acde533564f6eda49d78ea8df99a5056..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SequenceConcatLayer.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * A layer for concatenating the first sequence with the second sequence - * Input: two sequences each containing the same number of instances - * seq1 = [a1, a2, ..., an] - * seq2 = [b1, b2, ..., bn] - * Output: a concatenated sequence of the two input sequences - * out = [a1, b1, a2, b2, ..., an, bn] - */ - -class SequenceConcatLayer : public Layer { - protected: - std::unique_ptr biases_; - - public: - explicit SequenceConcatLayer(const LayerConfig& config) : Layer(config) {} - - ~SequenceConcatLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(seqconcat, SequenceConcatLayer); - -bool SequenceConcatLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - // sequene concatenation layer should have exactly 2 inputs - CHECK_EQ(2U, inputLayers_.size()); - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - - setNeedSequenceInfo(false); - return true; -} - -void SequenceConcatLayer::forward(PassType passType) { - Layer::forward(passType); - - size_t dim = getSize(); - - const Argument& input1 = getInput(0); - size_t numSequences1 = input1.getNumSequences(); - auto startPositions1 = input1.sequenceStartPositions->getVector(false); - - const Argument& input2 = getInput(1); - size_t numSequences2 = input2.getNumSequences(); - auto startPositions2 = input2.sequenceStartPositions->getVector(false); - - CHECK_EQ(dim, input1.value->getWidth()); - CHECK_EQ(startPositions1->getData()[numSequences1], input1.getBatchSize()); - CHECK_EQ(numSequences1, startPositions1->getSize() - 1); - - CHECK_EQ(dim, input2.value->getWidth()); - CHECK_EQ(startPositions2->getData()[numSequences2], input2.getBatchSize()); - CHECK_EQ(numSequences2, startPositions2->getSize() - 1); - - CHECK_EQ(numSequences1, numSequences2); - - MatrixPtr inputValue1 = getInputValue(0); - MatrixPtr inputValue2 = getInputValue(1); - - // reset output - reserveOutput(inputValue1->getHeight() + inputValue2->getHeight(), dim); - - MatrixPtr outputValue = getOutputValue(); - - const int* starts1 = startPositions1->getData(); - const int* starts2 = startPositions2->getData(); - - { - AsyncGpuBlock asyncGpuBlock; - REGISTER_TIMER_INFO("SequenceConcatLayerForward", getName().c_str()); - - size_t offset = 0; - size_t leftNumIns = 0; - size_t rightNumIns = 0; - for (size_t seqId = 0; seqId < numSequences1; ++seqId) { - leftNumIns = starts1[seqId + 1] - starts1[seqId]; - outputValue->subMatrix(offset, leftNumIns) - ->assign(*(inputValue1->subMatrix(starts1[seqId], leftNumIns))); - offset += leftNumIns; - - rightNumIns = starts2[seqId + 1] - starts2[seqId]; - outputValue->subMatrix(offset, rightNumIns) - ->assign(*(inputValue2->subMatrix(starts2[seqId], rightNumIns))); - offset += rightNumIns; - } - - // modify the sequenceStartPositions - ICpuGpuVector::resizeOrCreate( - output_.sequenceStartPositions, numSequences1 + 1, false); - - int* tgtBuf = output_.sequenceStartPositions->getMutableData(false); - - for (size_t seqId = 0; seqId < numSequences1 + 1; ++seqId) { - tgtBuf[seqId] = starts1[seqId] + starts2[seqId]; - } - } - - if (biases_.get() != NULL) { - MatrixPtr outV = getOutputValue(); - outV->addBias(*(biases_->getW()), 1); - } - - /* activation */ - forwardActivation(); -} - -void SequenceConcatLayer::backward(const UpdateCallback& callback) { - /* activation */ - backwardActivation(); - - if (biases_ && biases_->getWGrad()) { - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - - // Increasing the number of gradient - biases_->getParameterPtr()->incUpdate(callback); - } - - MatrixPtr inputGrad1 = getInputGrad(0); - MatrixPtr inputGrad2 = getInputGrad(1); - MatrixPtr outputGrad = getOutputGrad(); - auto startPositions1 = getInput(0).sequenceStartPositions->getVector(false); - auto startPositions2 = getInput(1).sequenceStartPositions->getVector(false); - - size_t numSequences1 = startPositions1->getSize() - 1; - size_t numSequences2 = startPositions2->getSize() - 1; - - CHECK_EQ(numSequences1, numSequences2); - - const int* starts1 = startPositions1->getData(); - const int* starts2 = startPositions2->getData(); - - { - AsyncGpuBlock asyncGpuBlock; - REGISTER_TIMER_INFO("SequenceConcatLayerBackward", getName().c_str()); - - size_t offset = 0; - size_t leftNumIns = 0; - size_t rightNumIns = 0; - for (size_t seqId = 0; seqId < numSequences1; ++seqId) { - leftNumIns = starts1[seqId + 1] - starts1[seqId]; - if (inputGrad1) { - inputGrad1->subMatrix(starts1[seqId], leftNumIns) - ->add(*(outputGrad->subMatrix(offset, leftNumIns))); - } - offset += leftNumIns; - - rightNumIns = starts2[seqId + 1] - starts2[seqId]; - if (inputGrad2) { - inputGrad2->subMatrix(starts2[seqId], rightNumIns) - ->add(*(outputGrad->subMatrix(offset, rightNumIns))); - } - offset += rightNumIns; - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/legacy/gserver/layers/SequenceLastInstanceLayer.cpp deleted file mode 100644 index 8735d71ba372de894c9852229ed8c77537792ea0..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SequenceLastInstanceLayer.cpp +++ /dev/null @@ -1,118 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Logging.h" - -#include "SequencePoolLayer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * A layer for extracting the last instance of the input sequence. - * Input: a sequence - * If SequenceLevel = kNonseq: - * Output: a sequence containing only the last instance of the input sequence - * If stride_ > 0: - * Output: a shorten sequence. Stride is the step size by which we slide a - * window upon the input sequence, and getting last instance - * operation is then applied to each interval independently. - * If SequenceLevel = kSeq: - * Check input sequence must has sub-sequence - * Output: a sequence containing only the last instance of each sub-sequence - * of the input sequence - * - * The config file api is last_seq and first_seq. - */ - -class SequenceLastInstanceLayer : public SequencePoolLayer { - protected: - MatrixPtr tmpSrc_; - MatrixPtr tmpDest_; - std::vector instanceIds_; - - public: - explicit SequenceLastInstanceLayer(const LayerConfig& config) - : SequencePoolLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer); - -bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - SequencePoolLayer::init(layerMap, parameterMap); - reversed_ = config_.select_first(); - - tmpSrc_ = - Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); - tmpDest_ = - Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); - - return true; -} - -void SequenceLastInstanceLayer::forward(PassType passType) { - SequencePoolLayer::forward(passType); - - auto starts = startPositions_->getData(false); - MatrixPtr inputValue = getInputValue(0); - MatrixPtr outputValue = getOutputValue(); - - { - AsyncGpuBlock asyncGpuBlock; - REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str()); - - instanceIds_.clear(); - for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { - int insId = reversed_ ? starts[seqId] : starts[seqId + 1] - 1; - instanceIds_.push_back(insId); - - outputValue->subMatrix(seqId, 1, tmpDest_) - ->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_))); - } - } - - if (biases_.get() != NULL) { - outputValue->addBias(*(biases_->getW()), 1); - } - - /* activation, should set to 'linear' in most cases */ - forwardActivation(); -} - -void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) { - SequencePoolLayer::backward(callback); - - MatrixPtr inputGrad = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - - if (inputGrad) { - AsyncGpuBlock asyncGpuBlock; - REGISTER_TIMER_INFO("SequenceLastInstanceLayerBackward", getName().c_str()); - - for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { - inputGrad->subMatrix(instanceIds_[seqId], 1, tmpDest_) - ->add(*(outputGrad->subMatrix(seqId, 1, tmpSrc_))); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SequencePoolLayer.cpp b/paddle/legacy/gserver/layers/SequencePoolLayer.cpp deleted file mode 100644 index 243b795db428ede1fbb39a5054485a198a14e00c..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SequencePoolLayer.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SequencePoolLayer.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -bool SequencePoolLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - // seqlastins/max/average layer should have exactly 1 input - CHECK_EQ(1U, inputLayers_.size()); - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - // transform to which sequence type - if (config_.trans_type() == "non-seq") { - type_ = kNonSeq; - } else if (config_.trans_type() == "seq") { - type_ = kSeq; - } else { - LOG(FATAL) << "Unknown trans_type: " << config_.trans_type(); - } - stride_ = config_.seq_pool_stride(); - setNeedSequenceInfo(false); - return true; -} - -void SequencePoolLayer::forward(PassType passType) { - Layer::forward(passType); - - const Argument& input = getInput(0); - CHECK(input.hasSeq() || input.hasSubseq()) - << "Input should be a sequence or subsequence for layer " << getName(); - - newBatchSize_ = type_ ? input.getNumSubSequences() : input.getNumSequences(); - size_t dim = getSize(); - // check - CHECK_EQ(dim, input.value->getWidth()); - startPositions_ = - type_ ? input.subSequenceStartPositions : input.sequenceStartPositions; - auto starts = startPositions_->getVector(false); - CHECK_EQ(starts->getData()[newBatchSize_], input.getBatchSize()); - CHECK_EQ(newBatchSize_, starts->getSize() - 1); - - /* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq, - * thus, in this case, output_ has no sequenceStartPositions. - * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this - * case, we should compute the new sequenceStartPositions. - */ - if (type_) { - CHECK(input.subSequenceStartPositions) - << "when trans_type = seq, input must hasSubseq"; - output_.degradeSequence(input); - } - if (stride_ > 0) { - CHECK_EQ(input.hasSubseq(), 0UL) - << "sequence stride pooling is invalid for hasSubseq now"; - output_.poolSequenceWithStride(input, stride_, &startPositions_, reversed_); - newBatchSize_ = startPositions_->getSize() - 1; - } - - resetOutput(newBatchSize_, dim); -} - -void SequencePoolLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { backwardActivation(); } - - if (biases_ && biases_->getWGrad()) { - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - - // Increasing the number of gradient - biases_->getParameterPtr()->incUpdate(callback); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SequencePoolLayer.h b/paddle/legacy/gserver/layers/SequencePoolLayer.h deleted file mode 100644 index 1c019b313093f4ac717e0fc57a9aa798e2951580..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SequencePoolLayer.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { -/** - * A base layer for SequenceLastInstanceLayer/AverageLayer/MaxLayer. - * - * Input: one or more sequences. Each sequence contains some instances. - * If SequenceLevel = kNonSeq: - * Output: output size is the number of input sequences (NOT input instances) - * output[i] = seqlastin/average/max_{for each instance in this - * sequence}{input[i]} - * If stride_ > 0: - * Check input sequence must not have sub-sequence - * Output: a shorten sequence. Stride is the step size by which we slide - * a window upon the input sequence, and the pooling operation - * is then applied to each interval independently. - * If SequenceLevel = kSeq: - * Check input sequence must has sub-sequence - * Output: output size is the number of input sub-sequences - * output[i] = seqlastin/average/max_{for each instance in this - * sub-sequence}{input[i]} - * - * The config file api is pooling_layer. - */ - -class SequencePoolLayer : public Layer { - protected: - int type_; - std::unique_ptr biases_; - enum SequenceLevel { kNonSeq = 0, kSeq = 1 }; - size_t newBatchSize_; - ICpuGpuVectorPtr startPositions_; - int stride_; - // Whether the input sequence is reversed or not. - bool reversed_ = false; - - public: - explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SequenceReshapeLayer.cpp b/paddle/legacy/gserver/layers/SequenceReshapeLayer.cpp deleted file mode 100644 index e3d40cab50af1d6eafe28331cdd481ee2b187a56..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SequenceReshapeLayer.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * A layer for reshaping the sequence. Assume the input sequence has - * T instances, the dimension of each instance is M, and the input - * reshape_dim is N, then the output sequence has T*M/N instances, - * the dimension of each instance is N. - * - * Note that T*M/N must be an integer. - */ - -class SequenceReshapeLayer : public Layer { - protected: - std::unique_ptr biases_; - - MatrixPtr reshapedOutputGrad; - - public: - explicit SequenceReshapeLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(seqreshape, SequenceReshapeLayer); - -bool SequenceReshapeLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(1U, inputLayers_.size()); - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - setNeedSequenceInfo(false); - return true; -} - -void SequenceReshapeLayer::forward(PassType passType) { - Layer::forward(passType); - - const Argument& input = getInput(0); - - size_t inDim = input.value->getWidth(); - size_t outDim = getSize(); - - size_t numSequences = input.getNumSequences(); - - // by default, we assume each instance as a sequence - IVectorPtr seqStarts; - IVector::resizeOrCreate(seqStarts, input.getBatchSize() + 1, false); - int* startsData = seqStarts->getData(); - for (int i = 0; i < input.getBatchSize() + 1; i++) { - startsData[i] = i; - } - const int* starts = startsData; - - // if there is sequence, then use start positions - if (input.sequenceStartPositions) { - auto startPositions = input.sequenceStartPositions->getVector(false); - starts = startPositions->getData(); - CHECK_EQ(starts[numSequences], input.getBatchSize()); - CHECK_EQ(numSequences, startPositions->getSize() - 1); - } - - for (size_t seqID = 0; seqID < numSequences; seqID++) { - size_t inNumIns = starts[seqID + 1] - starts[seqID]; - size_t outNumIns = inNumIns * inDim / outDim; - CHECK_EQ(outNumIns * outDim, inNumIns * inDim); - } - - MatrixPtr inputValue = getInputValue(0); - - // reset output - reserveOutput(inputValue->getHeight() * inDim / outDim, outDim); - MatrixPtr outputValue = getOutputValue(); - - { - AsyncGpuBlock asyncGpuBlock; - REGISTER_TIMER_INFO("SequenceReshapeLayerForward", getName().c_str()); - - outputValue->copyFrom(*inputValue); - - // modify the sequenceStartPositions - ICpuGpuVector::resizeOrCreate( - output_.sequenceStartPositions, numSequences + 1, false); - - int* tgtBuf = output_.sequenceStartPositions->getMutableData(false); - - for (size_t seqId = 0; seqId < numSequences + 1; ++seqId) { - tgtBuf[seqId] = starts[seqId] * inDim / outDim; - } - } - - if (biases_.get() != NULL) { - MatrixPtr outV = getOutputValue(); - outV->addBias(*(biases_->getW()), 1); - } - - /* activation */ - forwardActivation(); -} - -void SequenceReshapeLayer::backward(const UpdateCallback& callback) { - /* activation */ - backwardActivation(); - - if (biases_ && biases_->getWGrad()) { - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - - // Increasing the number of gradient - biases_->getParameterPtr()->incUpdate(callback); - } - - MatrixPtr inputGrad = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - - AsyncGpuBlock asyncGpuBlock; - REGISTER_TIMER_INFO("SequenceReshapeLayerBackward", getName().c_str()); - - if (inputGrad) { - Matrix::resizeOrCreate(reshapedOutputGrad, - inputGrad->getHeight(), - inputGrad->getWidth(), - false, - useGpu_); - reshapedOutputGrad->copyFrom(*outputGrad); - inputGrad->add(*reshapedOutputGrad); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SequenceSliceLayer.cpp b/paddle/legacy/gserver/layers/SequenceSliceLayer.cpp deleted file mode 100644 index 3ed51c4ef2f6e91da94f302c14d1c0cc555886aa..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SequenceSliceLayer.cpp +++ /dev/null @@ -1,224 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -class SequenceSliceLayer : public Layer { - public: - explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - private: - /* - * TODO(caoying) - * In PaddePaddle, currently all matrices are real number types, - * but the second and the (optional) third input which are some - * selected indices of the give sequence to trim the sequence, are actually - * filled with int types so that storing int types information in real number - * matrices is very dangerous, since real numbers will be convered to int - * types. If a user fills this matrix himself, invalid data may occor. - */ - - MatrixPtr startIdsOnCpu_; - MatrixPtr endIdsOnCpu_; - - std::vector selectedRows_; - IVectorPtr rowIndice_; - std::vector> inputSeqInfoVec_; - std::vector outSubSeqStartPos_; - std::vector outSeqStartPos_; - - void checkInputs(); - void copySliceIdsToCpu(); - void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends); -}; - -REGISTER_LAYER(seq_slice, SequenceSliceLayer); - -bool SequenceSliceLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - CHECK_GE(inputLayers_.size(), 2U); - CHECK_LE(inputLayers_.size(), 3U); - - setNeedSequenceInfo(false); - return true; -} - -void SequenceSliceLayer::checkInputs() { - const Argument& inputSeq = getInput(0); - CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer " - << "must be a sequence."; - const MatrixPtr indices1 = getInputValue(1); - CHECK_EQ( - indices1->getHeight(), - static_cast(inputSeq.hasSubseq() ? inputSeq.getNumSubSequences() - : inputSeq.getNumSequences())) - << "Height of the second input should be equal to number of sequence " - << "in the first input."; - if (inputLayers_.size() == 3) { - const MatrixPtr indices2 = getInputValue(2); - CHECK_EQ(indices2->getHeight(), indices1->getHeight()) - << "start indices and end indices should have the same height."; - CHECK_EQ(indices2->getWidth(), indices1->getWidth()) - << "start indices and end indices should have the same Width."; - } -} - -void SequenceSliceLayer::copySliceIdsToCpu() { - const MatrixPtr indices1 = getInputValue(1); - if (inputLayers_.size() == 2U) { - if (config_.select_first()) { - Matrix::resizeOrCreate(startIdsOnCpu_, - indices1->getHeight(), - indices1->getWidth(), - false /* trans */, - false /* useGpu */); - startIdsOnCpu_->copyFrom(*indices1); - endIdsOnCpu_ = nullptr; - } else { - Matrix::resizeOrCreate(endIdsOnCpu_, - indices1->getHeight(), - indices1->getWidth(), - false /* trans */, - false /* useGpu */); - endIdsOnCpu_->copyFrom(*indices1); - startIdsOnCpu_ = nullptr; - } - } else if (inputLayers_.size() == 3U) { - Matrix::resizeOrCreate(startIdsOnCpu_, - indices1->getHeight(), - indices1->getWidth(), - false /* trans */, - false /* useGpu */); - startIdsOnCpu_->copyFrom(*indices1); - - const MatrixPtr indices2 = getInputValue(2); - Matrix::resizeOrCreate(endIdsOnCpu_, - indices2->getHeight(), - indices2->getWidth(), - false /* trans */, - false /* useGpu */); - endIdsOnCpu_->copyFrom(*indices2); - } -} - -void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, - const MatrixPtr ends) { - CHECK(starts || ends) << "At least one of the start or end indices " - << "should be given."; - - bool hasSubseq = getInput(0).hasSubseq(); - - outSeqStartPos_.resize(1, 0); - outSubSeqStartPos_.resize(1, 0); - selectedRows_.clear(); - - size_t beamSize = starts ? starts->getWidth() : ends->getWidth(); - size_t rowIdx = 0; - for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) { - for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) { - for (size_t k = 0; k < beamSize; ++k) { - if (starts && starts->getElement(rowIdx, k) == -1.) break; - if (ends && ends->getElement(rowIdx, k) == -1.) break; - - int begPos = inputSeqInfoVec_[i][j]; - if (starts) begPos += starts->getElement(rowIdx, k); - - int endPos = inputSeqInfoVec_[i][j + 1] - 1; - if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); - - int seqLen = endPos - begPos + 1; - CHECK_GT(seqLen, 0); - for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); - hasSubseq - ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) - : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen); - } - rowIdx++; - } - if (hasSubseq) outSeqStartPos_.push_back(outSubSeqStartPos_.back()); - } - - if (useGpu_) { - rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); - rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); - } else { - rowIndice_ = - IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); - } - - // create the sequence information for the output. - ICpuGpuVector::resizeOrCreate( - output_.sequenceStartPositions, outSeqStartPos_.size(), false); - output_.sequenceStartPositions->copyFrom( - outSeqStartPos_.data(), outSeqStartPos_.size(), false); - - if (hasSubseq) { - ICpuGpuVector::resizeOrCreate( - output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false); - output_.subSequenceStartPositions->copyFrom( - outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false); - } -} - -void SequenceSliceLayer::forward(PassType passType) { - Layer::forward(passType); - checkInputs(); - - const Argument& inputSeq = getInput(0); - inputSeqInfoVec_.clear(); - Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, - inputSeq.subSequenceStartPositions, - inputSeqInfoVec_); - if (!useGpu_) { - if (inputLayers_.size() == 2U) { - startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr; - endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1); - } else if (inputLayers_.size() == 3U) { - startIdsOnCpu_ = getInputValue(1); - endIdsOnCpu_ = getInputValue(2); - } - } else { - copySliceIdsToCpu(); - } - - /* - * calculate the selected row indices in a batch, and build the output - * sequence information. - */ - calSelectedRows(startIdsOnCpu_, endIdsOnCpu_); - - resetOutput(selectedRows_.size(), getSize()); - - getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); -} - -void SequenceSliceLayer::backward(const UpdateCallback& callback) { - getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SequenceToBatch.cpp b/paddle/legacy/gserver/layers/SequenceToBatch.cpp deleted file mode 100644 index 5d0d588e67ad814fe82bb4a89e819829ee32b3b5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SequenceToBatch.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SequenceToBatch.h" -#include -#include -#include -#include - -namespace paddle { - -void SequenceToBatch::resizeOrCreateBatch(int batchSize, - size_t numSequences, - const int *seqStarts, - bool reversed, - bool prevBatchState) { - CHECK_EQ(seqStarts[numSequences], batchSize); - IVector::resizeOrCreate(seq2BatchIdx_, batchSize, useGpu_); - if (!useGpu_) { - cpuSeq2BatchIdx_ = seq2BatchIdx_; - } else { - IVector::resizeOrCreate(cpuSeq2BatchIdx_, batchSize, false); - } - - /* - * calculate the length of each sequence & sort sequence index by the length - * Exampel: Sequences = {s0, s1, s2} - * s0: 0 0 0 0, s1: 1 1 1 1 1, s2: 2 2 2 - * seqStartAndLength[3] = {(4, 5, 1), (0, 4, 0), (9, 3, 2)} - */ - struct SeqStartAndLength { - int start_; - int length_; - int seqIdx_; - SeqStartAndLength(int start, int length, int seqIdx) - : start_(start), length_(length), seqIdx_(seqIdx) {} - }; - std::vector seqStartAndLength; - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - int length = seqStarts[seqId + 1] - seqStarts[seqId]; - seqStartAndLength.emplace_back(seqStarts[seqId], length, seqId); - } - std::sort(seqStartAndLength.begin(), - seqStartAndLength.end(), - [](SeqStartAndLength a, SeqStartAndLength b) { - return a.length_ > b.length_; - }); - - /* - * calculate the start position of each batch - * (numBatch equal the maxLength of sequences) - * Exampel: Sequences = {s0, s1, s2} - * s0: 0 0 0 0, s1: 1 1 1 1 1, s2: 2 2 2 - * numBatch = 5, - * batchIndex = {b0, b1, b2, b3, b4} - * b0: 1 0 2, b1: 1 0 2, b2: 1 0 2, b3: 1 0, b4: 1 - * batchStartPositions[6] = {0, 3, 6, 9, 11, 12} - */ - numBatch_ = (size_t)seqStartAndLength[0].length_; - - IVector::resizeOrCreate(batchStartPositions_, numBatch_ + 1, false); - int *batchStartPositions = batchStartPositions_->getData(); - batchStartPositions[0] = 0; - for (size_t n = 0; n < numBatch_; n++) { - int batchId = batchStartPositions[n]; - for (size_t i = 0; i < seqStartAndLength.size(); ++i) { - size_t seqLength = seqStartAndLength[i].length_; - int start = seqStartAndLength[i].start_; - if (n < seqLength) { - if (!reversed) { - cpuSeq2BatchIdx_->getData()[batchId] = start + n; - } else { - cpuSeq2BatchIdx_->getData()[batchId] = start + seqLength - 1 - n; - } - batchId++; - } else { - break; - } - } - batchStartPositions[n + 1] = batchId; - } - if (useGpu_) { - seq2BatchIdx_->copyFrom(*cpuSeq2BatchIdx_); - } - if (prevBatchState) { - IVector::resizeOrCreate(seqIdx_, numSequences, useGpu_); - IVector::resizeOrCreate(seqEndIdxInBatch_, numSequences, useGpu_); - if (!useGpu_) { - cpuSeqIdx_ = seqIdx_; - cpuSeqEndIdxInBatch_ = seqEndIdxInBatch_; - } else { - IVector::resizeOrCreate(cpuSeqIdx_, numSequences, false); - IVector::resizeOrCreate(cpuSeqEndIdxInBatch_, numSequences, false); - } - int *seqIdx = cpuSeqIdx_->getData(); - int *seqEndIdxInBatch = cpuSeqEndIdxInBatch_->getData(); - for (size_t i = 0; i < seqStartAndLength.size(); ++i) { - seqIdx[i] = seqStartAndLength[i].seqIdx_; - } - for (size_t i = 0; i < seqStartAndLength.size(); ++i) { - if (seqStartAndLength[i].length_ > 0) { - seqEndIdxInBatch[seqStartAndLength[i].seqIdx_] = - batchStartPositions[seqStartAndLength[i].length_ - 1] + i; - } else { - seqEndIdxInBatch[seqStartAndLength[i].seqIdx_] = 0; - } - } - if (useGpu_) { - seqIdx_->copyFrom(*cpuSeqIdx_); - seqEndIdxInBatch_->copyFrom(*cpuSeqEndIdxInBatch_); - } - } -} - -void SequenceToBatch::resizeOrCreate(Matrix &seqValue) { - Matrix::resizeOrCreate(batchValue_, - seqValue.getHeight(), - seqValue.getWidth(), - /* trans= */ false, - useGpu_); -} - -MatrixPtr SequenceToBatch::getBatchValue(int batchId, int numRows) { - return getBatchValue(*batchValue_, batchId, numRows); -} - -MatrixPtr SequenceToBatch::getBatchValue(Matrix &batchValue, - int batchId, - int numRows) { - int *batchStartPositions = batchStartPositions_->getData(); - int start = batchStartPositions[batchId]; - int maxRows = batchStartPositions[batchId + 1] - batchStartPositions[batchId]; - if (numRows == 0) { - numRows = maxRows; - } else { - CHECK_LE(numRows, maxRows); - } - return batchValue.subMatrix(start, numRows); -} - -void SequenceToBatch::prevOutput2Batch(Matrix &src, Matrix &dst) { - sequence2BatchCopy(dst, src, *seqIdx_, true); -} - -void SequenceToBatch::getSeqOutputFromBatch(Matrix &sequence, Matrix &batch) { - sequence2BatchCopy(sequence, batch, *seqEndIdxInBatch_, true); -} - -void SequenceToBatch::sequence2BatchCopy(Matrix &batch, - Matrix &sequence, - IVector &seq2BatchIdx, - bool seq2batch) { - int seqWidth = sequence.getWidth(); - int batchCount = batch.getHeight(); - real *batchData = batch.getData(); - real *seqData = sequence.getData(); - int *idxData = seq2BatchIdx.getData(); - - if (useGpu_) { - hl_sequence2batch_copy( - batchData, seqData, idxData, seqWidth, batchCount, seq2batch); - } else { - if (seq2batch) { -#ifdef PADDLE_USE_MKLML - const int blockMemSize = 8 * 1024; - const int blockSize = blockMemSize / sizeof(real); -#pragma omp parallel for collapse(2) - for (int i = 0; i < batchCount; ++i) { - for (int j = 0; j < seqWidth; j += blockSize) { - memcpy(batch.rowBuf(i) + j, - sequence.rowBuf(idxData[i]) + j, - (j + blockSize > seqWidth) ? (seqWidth - j) * sizeof(real) - : blockMemSize); - } - } -#else - for (int i = 0; i < batchCount; ++i) { - memcpy(batch.rowBuf(i), - sequence.rowBuf(idxData[i]), - seqWidth * sizeof(real)); - } -#endif - } else { -#ifdef PADDLE_USE_MKLML -#pragma omp parallel for -#endif - for (int i = 0; i < batchCount; ++i) { - memcpy(sequence.rowBuf(idxData[i]), - batch.rowBuf(i), - seqWidth * sizeof(real)); - } - } - } -} - -void SequenceToBatch::sequence2BatchAdd(Matrix &batch, - Matrix &sequence, - IVector &seq2BatchIdx, - bool seq2batch) { - int seqWidth = sequence.getWidth(); - int batchCount = batch.getHeight(); - real *batchData = batch.getData(); - real *seqData = sequence.getData(); - int *idxData = seq2BatchIdx.getData(); - - if (useGpu_) { - hl_sequence2batch_add( - batchData, seqData, idxData, seqWidth, batchCount, seq2batch); - } else { - for (int i = 0; i < batchCount; ++i) { - if (seq2batch) { - batch.subMatrix(i, 1)->add(*sequence.subMatrix(idxData[i], 1)); - } else { - sequence.subMatrix(idxData[i], 1)->add(*batch.subMatrix(i, 1)); - } - } - } -} - -void SequenceToBatch::copyFromSeq(Matrix &seqValue) { - Matrix::resizeOrCreate(batchValue_, - seqValue.getHeight(), - seqValue.getWidth(), - /* trans= */ false, - useGpu_); - sequence2BatchCopy(*batchValue_, seqValue, *seq2BatchIdx_, true); -} - -void SequenceToBatch::copyBackSeq(Matrix &seqValue) { - sequence2BatchCopy(*batchValue_, seqValue, *seq2BatchIdx_, false); -} - -void SequenceToBatch::copy(Matrix &seqValue, - Matrix &batchValue, - bool seq2batch) { - sequence2BatchCopy(batchValue, seqValue, *seq2BatchIdx_, seq2batch); -} - -void SequenceToBatch::add(Matrix &seqValue, - Matrix &batchValue, - bool seq2batch) { - sequence2BatchAdd(batchValue, seqValue, *seq2BatchIdx_, seq2batch); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SequenceToBatch.h b/paddle/legacy/gserver/layers/SequenceToBatch.h deleted file mode 100644 index 7ed517937d4a015b6b11de16412cac7599f5f8b9..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SequenceToBatch.h +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" - -namespace paddle { - -/* - * This class can used to modify the matrix structure of sequence matrix into - * batch structure. - * sequence matrix: [C1_s ... Cn_s | ...... | C1_t ... Cn_t] - * batch matrix: [C1_s ... C1_t | ...... | Cn_s ... Cn_t] - * Cn_s is the state for sequence s at time n. - * - * Exampel: sequence matrix = {{0, 0, 0, 0}, {1, 1, 1, 1, 1}, {2, 2, 2}} - * s0: 0 0 0 0, s1: 1 1 1 1 1, s2: 2 2 2 - * batch matrix = {{1, 0, 2}, {1, 0, 2}, {1, 0, 2}, {1, 0}, {1}} - * b0: 1 0 2, b1: 1 0 2, b2: 1 0 2, b3: 1 0, b4: 1 - * - * Use: - * Input: seqMatrix, seqStarts(Sequence Start Positions) - * Output: batchMatrix - * 1. SequenceToBatch seq2batch; - * 2. seq2batch.resizeOrCreateBatch(seqStarts); // calculate seq2BatchIdx - * 3. seq2batch.copy(seqMatrix, batchMatrix, true); // copy seq to batch matrix - * - */ -class SequenceToBatch { - public: - explicit SequenceToBatch(bool useGpu) : useGpu_(useGpu) {} - - /* resize and calculate the batchIndex_ */ - void resizeOrCreateBatch(int batchSize, - size_t numSequences, - const int *seqStarts, - bool reversed, - bool prevBatchState = false); - - /* sequence matrix and batch matrix copy: - * seq2batch: copy(seqValue, batchValue, true); - * batch2seq: copy(seqValue, batchValue, false); - */ - void copy(Matrix &seqValue, Matrix &batchValue, bool seq2batch); - /* sequence/batch matrix add to batch/sequence matrix */ - void add(Matrix &seqValue, Matrix &batchValue, bool seq2batch); - MatrixPtr getBatchValue(Matrix &batchValue, int batchId, int numRows = 0); - - size_t getNumBatch() const { return numBatch_; } - - /* resize or create a batch matrix(batchValue_) */ - void resizeOrCreate(Matrix &seqValue); - /* copy seqValue to batchValue_ */ - void copyFromSeq(Matrix &seqValue); - /* copy batchValue_ to seqValue */ - void copyBackSeq(Matrix &seqValue); - MatrixPtr getBatchValue(int batchId, int numRows = 0); - MatrixPtr getBatchValue() { return batchValue_; } - /*tranfer preBatchOutput to batch struct*/ - void prevOutput2Batch(Matrix &src, Matrix &dst); - /*get sequence output from batch struct*/ - void getSeqOutputFromBatch(Matrix &sequence, Matrix &batch); - - /* Copy the index from another seq2batch. */ - void shareIndexWith(const SequenceToBatch &seq2batch) { - CHECK(useGpu_ == seq2batch.useGpu_); - batchStartPositions_ = seq2batch.batchStartPositions_; - seq2BatchIdx_ = seq2batch.seq2BatchIdx_; - cpuSeq2BatchIdx_ = seq2batch.cpuSeq2BatchIdx_; - numBatch_ = seq2batch.numBatch_; - } - - protected: - void sequence2BatchCopy(Matrix &batch, - Matrix &sequence, - IVector &seq2BatchIdx, - bool seq2batch); - void sequence2BatchAdd(Matrix &batch, - Matrix &sequence, - IVector &seq2BatchIdx, - bool seq2batch); - - IVectorPtr batchStartPositions_; - IVectorPtr seq2BatchIdx_; - IVectorPtr cpuSeq2BatchIdx_; - IVectorPtr cpuSeqIdx_; - IVectorPtr cpuSeqEndIdxInBatch_; - IVectorPtr seqIdx_; - IVectorPtr seqEndIdxInBatch_; - size_t numBatch_; - bool useGpu_; - MatrixPtr batchValue_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SliceProjection.cpp b/paddle/legacy/gserver/layers/SliceProjection.cpp deleted file mode 100644 index b474f2db759adfad337f9485a5a38588b6839c54..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SliceProjection.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Projection.h" - -namespace paddle { - -/** - * SliceProjection can slice the input value into multiple parts, - * and then select some of them to merge into a new output. - * - * First, calculate the slices that need to be merged into the output. - * slices = input.slices().for_output() - * - * Second, merge each slice into the output. - * for(auto slice: slices) { - * out.addAtOffset(slice, offset); - * } - * - * Input slices as output: s0, s1, ...: - * ----------------------- - * |///| |//////| | - * |/s0| |//s1//| | - * |///| |//////| | - * ----------------------- - * Output, merge s0, s1, ... into one output: - * ---------------- - * |///|//////| | - * |/s0|//s1//|...| - * |///|//////| | - * ---------------- - * - * The config file api is slice_projection. - */ -class SliceProjection : public Projection { - public: - SliceProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu); - virtual void forward(); - virtual void backward(const UpdateCallback& callback); - - protected: - std::vector> slices_; -}; - -REGISTER_PROJECTION(slice, SliceProjection); - -/** - * Constructed function. - * @note SliceProjection should not have any parameter. - */ -SliceProjection::SliceProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - CHECK(!parameter) << "'slice' projection should not have any parameter"; - - slices_.reserve(config.slices_size()); - for (const auto& slice : config.slices()) { - slices_.push_back(std::make_pair(slice.start(), slice.end())); - } -} - -void SliceProjection::forward() { - size_t offset = 0; - for (auto& slice : slices_) { - auto slice_out = in_->value->subColMatrix(slice.first, slice.second); - out_->value->addAtOffset(*slice_out, offset); - offset += slice_out->getWidth(); - } -} - -void SliceProjection::backward(const UpdateCallback& callback) { - if (in_->grad) { - size_t offset = 0; - for (auto& slice : slices_) { - auto slice_out = in_->grad->subColMatrix(slice.first, slice.second); - slice_out->addAtOffset(*out_->grad, offset); - offset += slice_out->getWidth(); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SlopeInterceptLayer.cpp b/paddle/legacy/gserver/layers/SlopeInterceptLayer.cpp deleted file mode 100644 index 9168fd7dda6dcdcd9e272acbf6337f1c8468e6f0..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SlopeInterceptLayer.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief A layer for applying a slope and an intercept to the input - * element-wise. - * This layer is used in NEURAL TURING MACHINE. - * @note There is no activation and weight in this layer. - * - * \f[ - * y = ax + b - * \f] - * - * Here, a is scale and b is offset, which are provided as attributes of the - * layer. - * - * The config file api is slope_intercept_layer. - */ - -class SlopeInterceptLayer : public Layer { - public: - explicit SlopeInterceptLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(slope_intercept, SlopeInterceptLayer); - -bool SlopeInterceptLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 1U); - - return true; -} - -void SlopeInterceptLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV = getInputValue(0); - - /* malloc memory for the output_ if necessary */ - size_t batchSize = inV->getHeight(); - size_t size = getSize(); - - CHECK_EQ(size, inV->getWidth()); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - reserveOutput(batchSize, size); - } - - MatrixPtr outV = getOutputValue(); - { - REGISTER_TIMER_INFO("FwSlopeInterceptTimer", getName().c_str()); - outV->mulScalar(*inV, config_.slope()); - outV->add(config_.intercept()); - } -} - -void SlopeInterceptLayer::backward(const UpdateCallback& callback) { - MatrixPtr inG = getInputGrad(0); - MatrixPtr outG = getOutputGrad(); - - if (inG) { - REGISTER_TIMER_INFO("BwSlopeInterceptTimer", getName().c_str()); - inG->add(*outG, config_.slope()); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.cpp b/paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.cpp deleted file mode 100644 index b445a399ef691ce6b6a8b6ff927b8c8e1f04dcfd..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SpatialPyramidPoolLayer.h" - -namespace paddle { - -REGISTER_LAYER(spp, SpatialPyramidPoolLayer); - -ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW, - size_t imgSizeH, - size_t channels, - size_t pyramidLevel, - std::string& poolType) { - ProjectionConfig config; - config.set_type("pool"); - PoolConfig* conf = config.mutable_pool_conf(); - conf->set_channels(channels); - conf->set_img_size(imgSizeW); - conf->set_img_size_y(imgSizeH); - conf->set_pool_type(poolType); - - int numBins = std::pow(2, pyramidLevel); - - int sizeH = std::ceil(imgSizeH / static_cast(numBins)); - int paddingH = (sizeH * numBins - imgSizeH + 1) / 2; - int outSizeH = outputSize(imgSizeH, sizeH, paddingH, sizeH, true); - - int sizeW = std::ceil(imgSizeW / static_cast(numBins)); - int paddingW = (sizeW * numBins - imgSizeW + 1) / 2; - int outSizeW = outputSize(imgSizeW, sizeW, paddingW, sizeW, true); - - conf->set_stride(sizeW); - conf->set_stride_y(sizeH); - conf->set_size_x(sizeW); - conf->set_size_y(sizeH); - conf->set_padding(paddingW); - conf->set_padding_y(paddingH); - conf->set_output_x(outSizeW); - conf->set_output_y(outSizeH); - config.set_output_size(outSizeH * outSizeW * channels); - return config; -} - -size_t SpatialPyramidPoolLayer::getSize() { - CHECK_EQ(inputLayers_.size(), 1UL); - size_t layerSize = 0; - const ImageConfig& conf = config_.inputs(0).spp_conf().image_conf(); - imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); - imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); - if (imgSizeH_ == 0) { - imgSizeH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - } - if (imgSizeW_ == 0) { - imgSizeW_ = conf.img_size(); - } - - size_t outputH = 1; - size_t outputW = (std::pow(4, pyramidHeight_) - 1) / (4 - 1); - - layerSize = outputH * outputW * channels_; - return layerSize; -} - -bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - CHECK_EQ(config_.inputs_size(), 1); - - const SppConfig& sppConf = config_.inputs(0).spp_conf(); - pyramidHeight_ = sppConf.pyramid_height(); - poolType_ = sppConf.pool_type(); - - const ImageConfig& imageConf = sppConf.image_conf(); - channels_ = imageConf.channels(); - imgSizeW_ = imageConf.img_size(); - imgSizeH_ = imageConf.has_img_size_y() ? imageConf.img_size_y() : imgSizeW_; - poolProjections_.reserve(pyramidHeight_); - projCol_.reserve(pyramidHeight_); - projOutput_.resize(pyramidHeight_); - - size_t startCol = 0; - size_t endCol = 0; - for (size_t i = 0; i < pyramidHeight_; i++) { - poolProjections_.emplace_back(PoolProjection::create( - getConfig(imgSizeW_, imgSizeH_, channels_, i, poolType_), - nullptr, - useGpu_)); - endCol += poolProjections_[i]->getOutputSize(); - projCol_.push_back(std::make_pair(startCol, endCol)); - startCol = endCol; - } - CHECK_EQ(endCol, getSize()); - return true; -} - -void SpatialPyramidPoolLayer::forward(PassType passType) { - Layer::forward(passType); - - int batchSize = getInput(0).getBatchSize(); - resetOutput(batchSize, getSize()); - for (size_t i = 0; i < pyramidHeight_; i++) { - size_t startCol = projCol_[i].first; - size_t endCol = projCol_[i].second; - projOutput_[i].value = output_.value->subColMatrix(startCol, endCol); - if (output_.grad) { - projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol); - } - } - for (size_t i = 0; i < pyramidHeight_; i++) { - poolProjections_[i]->forward(&getInput(0), &projOutput_[i], passType); - } -} - -void SpatialPyramidPoolLayer::backward(const UpdateCallback& callback) { - for (size_t i = 0; i < pyramidHeight_; i++) { - if (poolProjections_[i]) { - poolProjections_[i]->backward(callback); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.h b/paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.h deleted file mode 100644 index 6d8ed9c87889a93664f09dbaf2a84bd00b1757ad..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.h +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "PoolProjection.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { -/** - * @brief A layer for spatial pyramid pooling on the input image by taking - * the max, average, etc. within regions, so that the result vector of - * different sized images are of the same size. - * - * The config file api is spp_layer. - */ - -class SpatialPyramidPoolLayer : public Layer { - protected: - size_t channels_; - size_t imgSizeW_; - size_t imgSizeH_; - size_t pyramidHeight_; - std::string poolType_; - - std::vector> poolProjections_; - std::vector projOutput_; - std::vector> projCol_; - - public: - explicit SpatialPyramidPoolLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - ProjectionConfig getConfig(size_t sizeX_, - size_t sizeY_, - size_t channels, - size_t pyamidLevel_, - std::string& poolType_); - size_t getSize(); - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/legacy/gserver/layers/SubNestedSequenceLayer.cpp deleted file mode 100644 index f363c2ac8dd22fc8b8e1d7fca27e5beb935d42de..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SubNestedSequenceLayer.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -class SubNestedSequenceLayer : public Layer { - public: - explicit SubNestedSequenceLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - - private: - /* - * This functions generates the indices of rows in a batch according to the - * indices of selected sub-sequence in each sequence. - * - * Examples: - * selectedIndices: - * [ - * [0, 1, -1], - * [0, 1, 2], - * [0, -1, -1], - * [0, 2, 3], - * ] - * inputSeqInfo: - * [ - * [0,3,4], - * [4,5,7,10,15], - * [15,20], - * [20,22,23,25,28] - * ] - * - * ths output is saved to private member rowIndice_; - * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27] - */ - - void calSelectedRows(const MatrixPtr selectedIndices, - const std::vector>& inputSeqInfo); - - /* - * TODO(caoying) - * In PaddePaddle, currently all matrices are real number types, - * but the second is some selected indices of the give sequence to trim - * the nested sequence, are actually filled with int types so that storing - * int types information in real number matrices is very dangerous, since - * real numbers will be convered to int types. If a user fills this matrix - * himself, invalid data may occor. - * - * if the second input of this layer is on GPU memory, copy it to CPU memory. - */ - MatrixPtr selIdsCpu_; - - /* - * reorganize sequenceStartPositions and subSequenceStartPositions - * into a 2d vector to facilitate the sequence selection process. - */ - std::vector> inputSeqInfoVec_; - - /* store the final selected row indices in a batch */ - IVectorPtr rowIndice_; - /* rowIndice_ and selectedRows_ actually share a same memory. */ - std::vector selectedRows_; -}; - -REGISTER_LAYER(sub_nested_seq, SubNestedSequenceLayer); - -bool SubNestedSequenceLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - CHECK_EQ(2U, inputLayers_.size()); - setNeedSequenceInfo(false); - return true; -} - -void SubNestedSequenceLayer::calSelectedRows( - const MatrixPtr selectedIndices, - const std::vector>& inputSeqInfo) { - selectedRows_.clear(); - - std::vector outSeqStartInfo(1, 0); - std::vector outSubSeqStartInfo(1, 0); - - size_t seqNum = selectedIndices->getHeight(); - size_t beamSize = selectedIndices->getWidth(); - for (size_t i = 0; i < seqNum; ++i) { - for (size_t j = 0; j < beamSize; ++j) { - if (selectedIndices->getElement(i, j) == -1.) break; - size_t selSubSeqIdx = selectedIndices->getElement(i, j); - CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); - - size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] - - inputSeqInfoVec_[i][selSubSeqIdx]; - for (size_t k = 0; k < subSeqLen; ++k) - selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k); - outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen); - } - outSeqStartInfo.push_back(outSubSeqStartInfo.back()); - } - - if (useGpu_) { - rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); - rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); - } else { - rowIndice_ = - IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); - } - - // create the sequence information for the output. - ICpuGpuVector::resizeOrCreate( - output_.sequenceStartPositions, outSeqStartInfo.size(), false); - output_.sequenceStartPositions->copyFrom( - outSeqStartInfo.data(), outSeqStartInfo.size(), false); - - ICpuGpuVector::resizeOrCreate( - output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false); - output_.subSequenceStartPositions->copyFrom( - outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false); -} - -void SubNestedSequenceLayer::forward(PassType passType) { - Layer::forward(passType); - - const Argument& inputSeq = getInput(0); - CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " - << "must be a nested sequence."; - const MatrixPtr selectedIndices = getInputValue(1); - CHECK_EQ(size_t(inputSeq.getNumSequences()), selectedIndices->getHeight()); - - if (dynamic_cast(selectedIndices.get())) { - /* - * Currently, the second input for this layer is generated by - * kmax_sequence_score_layer whose output is always stored on CPU, - * or a data_layer which canbe on GPU. - * - * If the second input is on GPU, copy it to CPU memory, because this - * input always uses very few memory, and operations related to it are - * all logic control, not computations. - */ - Matrix::resizeOrCreate(selIdsCpu_, - selectedIndices->getHeight(), - selectedIndices->getWidth(), - false /* trans */, - false /* useGpu */); - selIdsCpu_->copyFrom(*selectedIndices); - } else { - selIdsCpu_ = selectedIndices; - } - - Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, - inputSeq.subSequenceStartPositions, - inputSeqInfoVec_); - calSelectedRows(selIdsCpu_, inputSeqInfoVec_); - - resetOutput(selectedRows_.size(), getSize()); - getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); -} - -void SubNestedSequenceLayer::backward(const UpdateCallback& callback) { - MatrixPtr inputSeqGrad = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - - if (inputSeqGrad) outputGrad->addToRows(*inputSeqGrad, *rowIndice_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SubSequenceLayer.cpp b/paddle/legacy/gserver/layers/SubSequenceLayer.cpp deleted file mode 100644 index 36796f04739054bb19d4a3ce656e248898ba4b17..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SubSequenceLayer.cpp +++ /dev/null @@ -1,226 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * A layer for taking the subsequence according to given offset and size - * Input: original sequence, offset, size - * Output: subsequence - */ - -class SubSequenceLayer : public Layer { - protected: - std::unique_ptr biases_; - MatrixPtr tmpSrc_; - MatrixPtr tmpDest_; - - public: - explicit SubSequenceLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(subseq, SubSequenceLayer); - -bool SubSequenceLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - // sequene concatenation layer should have exactly 2 inputs - CHECK_EQ(3U, inputLayers_.size()); - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - - tmpSrc_ = - Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); - tmpDest_ = - Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); - - setNeedSequenceInfo(false); - return true; -} - -void SubSequenceLayer::forward(PassType passType) { - Layer::forward(passType); - - size_t dim = getSize(); - - const Argument& input = getInput(0); - size_t numSequences1 = input.getNumSequences(); - auto startPositions1 = input.sequenceStartPositions->getVector(false); - - const Argument& offsetSeq = getInput(1); - size_t numSequences2 = offsetSeq.getNumSequences(); - auto startPositions2 = offsetSeq.sequenceStartPositions->getVector(false); - - const Argument& sizeSeq = getInput(2); - size_t numSequences3 = sizeSeq.getNumSequences(); - auto startPositions3 = sizeSeq.sequenceStartPositions->getVector(false); - - CHECK_EQ(dim, input.value->getWidth()); - - CHECK_EQ(startPositions1->getData()[numSequences1], input.getBatchSize()); - CHECK_EQ(numSequences1, startPositions1->getSize() - 1); - - CHECK_EQ(startPositions2->getData()[numSequences2], offsetSeq.getBatchSize()); - CHECK_EQ(numSequences2, startPositions2->getSize() - 1); - - CHECK_EQ(startPositions3->getData()[numSequences3], sizeSeq.getBatchSize()); - CHECK_EQ(numSequences3, startPositions3->getSize() - 1); - - CHECK_EQ(numSequences1, numSequences2); - CHECK_EQ(numSequences2, numSequences3); - - MatrixPtr inputValue = input.value; - IVectorPtr offsetValue; - IVectorPtr sizeValue; - - if (useGpu_) { - // copy to cpu - IVector::resizeOrCreate(offsetValue, offsetSeq.ids->getSize(), false); - IVector::resizeOrCreate(sizeValue, sizeSeq.ids->getSize(), false); - offsetValue->copyFrom(*offsetSeq.ids); - sizeValue->copyFrom(*sizeSeq.ids); - } else { - offsetValue = offsetSeq.ids; - sizeValue = sizeSeq.ids; - } - - CHECK_EQ(offsetValue->getSize(), numSequences1); - CHECK_EQ(sizeValue->getSize(), numSequences1); - - int* offsets = offsetValue->getData(); - int* sizes = sizeValue->getData(); - - // get total height of output - size_t height = 0; - for (size_t seqId = 0; seqId < numSequences1; seqId++) { - height += sizes[seqId]; - } - - // reset output - resetOutput(height, dim); - - MatrixPtr outputValue = getOutputValue(); - - const int* starts1 = startPositions1->getData(); - - { - AsyncGpuBlock asyncGpuBlock; - REGISTER_TIMER_INFO("SubSequenceLayerForward", getName().c_str()); - - size_t offsetIn = 0; - size_t offsetOut = 0; - size_t size = 0; - for (size_t seqId = 0; seqId < numSequences1; ++seqId) { - offsetIn = starts1[seqId] + offsets[seqId]; - size = sizes[seqId]; - - outputValue->subMatrix(offsetOut, size, tmpDest_) - ->assign(*(inputValue->subMatrix(offsetIn, size, tmpSrc_))); - - offsetOut += size; - } - - // modify the sequenceStartPositions - ICpuGpuVector::resizeOrCreate( - output_.sequenceStartPositions, numSequences1 + 1, false); - - int* tgtBuf = output_.sequenceStartPositions->getMutableData(false); - int offset = 0; - for (size_t seqId = 0; seqId < numSequences1; ++seqId) { - tgtBuf[seqId] = offset; - offset += sizes[seqId]; - } - tgtBuf[numSequences1] = offset; - } - - if (biases_.get() != NULL) { - MatrixPtr outV = getOutputValue(); - outV->addBias(*(biases_->getW()), 1); - } - - /* activation */ - forwardActivation(); -} - -void SubSequenceLayer::backward(const UpdateCallback& callback) { - /* activation */ - backwardActivation(); - - if (biases_ && biases_->getWGrad()) { - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - - // Increasing the number of gradient - biases_->getParameterPtr()->incUpdate(callback); - } - - MatrixPtr inputGrad1 = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - auto startPositions1 = getInput(0).sequenceStartPositions->getVector(false); - size_t numSequences1 = startPositions1->getSize() - 1; - const int* starts1 = startPositions1->getData(); - - const Argument& offsetSeq = getInput(1); - const Argument& sizeSeq = getInput(2); - IVectorPtr offsetValue; - IVectorPtr sizeValue; - - if (useGpu_) { - // copy to cpu - IVector::resizeOrCreate(offsetValue, offsetSeq.ids->getSize(), false); - IVector::resizeOrCreate(sizeValue, sizeSeq.ids->getSize(), false); - offsetValue->copyFrom(*offsetSeq.ids); - sizeValue->copyFrom(*sizeSeq.ids); - } else { - offsetValue = offsetSeq.ids; - sizeValue = sizeSeq.ids; - } - - int* offsets = offsetValue->getData(); - int* sizes = sizeValue->getData(); - { - AsyncGpuBlock asyncGpuBlock; - REGISTER_TIMER_INFO("SubSequenceLayerBackward", getName().c_str()); - - int offsetIn = 0; - int offsetOut = 0; - int size = 0; - for (size_t seqId = 0; seqId < numSequences1; ++seqId) { - offsetIn = starts1[seqId] + offsets[seqId]; - size = sizes[seqId]; - - inputGrad1->subMatrix(offsetIn, size, tmpDest_) - ->add(*(outputGrad->subMatrix(offsetOut, size, tmpSrc_))); - offsetOut += size; - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SumToOneNormLayer.cpp b/paddle/legacy/gserver/layers/SumToOneNormLayer.cpp deleted file mode 100644 index 410f4dd7c90e67488bc3dda6dfad551032890d65..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SumToOneNormLayer.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * A layer for sum-to-one normalization, - * which is used in NEURAL TURING MACHINE. - * \f[ - * out[i] = \frac {in[i]} {\sum_{k=1}^N in[k]} - * \f] - * where \f$in\f$ is a (batchSize x dataDim) input vector, - * and \f$out\f$ is a (batchSize x dataDim) output vector. - * - * The config file api is sum_to_one_norm_layer. - */ - -class SumToOneNormLayer : public Layer { - protected: - /// reciprocalRowSum_ = \f$1 / \sum_{k=1}^N in[k]\f$ - MatrixPtr reciprocalRowSum_; - /// dotSum = output_.grad \f$.*\f$ output_.value - MatrixPtr dotSum_; - - public: - explicit SumToOneNormLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; - -REGISTER_LAYER(sum_to_one_norm, SumToOneNormLayer); - -bool SumToOneNormLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 1U); - - return true; -} - -void SumToOneNormLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr inV = getInputValue(0); - - /* malloc memory for the output_ if necessary */ - size_t batchSize = inV->getHeight(); - size_t dataDim = getSize(); - - CHECK_EQ(dataDim, inV->getWidth()); - - { - REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); - resetOutput(batchSize, dataDim); - } - - MatrixPtr outV = getOutputValue(); - { - REGISTER_TIMER_INFO("FwSumToOneNormTimer", getName().c_str()); - - Matrix::resizeOrCreate(reciprocalRowSum_, batchSize, 1, false, useGpu_); - inV->rowSum(*reciprocalRowSum_); - - // todo: matrix checks - CHECK_GT(reciprocalRowSum_->getMin(), 0.0); - - reciprocalRowSum_->scalarDiv(*reciprocalRowSum_, 1.0); - - // outV = inV * reciprocalRowSum - outV->rowScale(0, *inV, *reciprocalRowSum_); - } -} - -void SumToOneNormLayer::backward(const UpdateCallback& callback) { - MatrixPtr inV = getInputValue(0); - MatrixPtr inG = getInputGrad(0); - MatrixPtr outV = getOutputValue(); - MatrixPtr outG = getOutputGrad(); - - size_t batchSize = inV->getHeight(); - - if (inG) { - REGISTER_TIMER_INFO("BwSumToOneTimer", getName().c_str()); - - Matrix::resizeOrCreate(dotSum_, batchSize, 1, false, useGpu_); - - // dotSum = outG .* outV - dotSum_->zeroMem(); - dotSum_->rowDotMul(0, *outG, *outV); - - // inG += -1 * (dotSum / rowSum) - dotSum_->dotMul(*dotSum_, *reciprocalRowSum_); - inG->rowAdd(0, *inG, *dotSum_, -1.0); - // inG += outG * (1/rowSum) - inG->addRowScale(0, *outG, *reciprocalRowSum_); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SwitchOrderLayer.cpp b/paddle/legacy/gserver/layers/SwitchOrderLayer.cpp deleted file mode 100644 index 513f3df7bcaf854835ec0e500d47c23469d5aa46..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SwitchOrderLayer.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SwitchOrderLayer.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(switch_order, SwitchOrderLayer); - -bool SwitchOrderLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - auto& img_conf = config_.inputs(0).image_conf(); - size_t inD = img_conf.img_size_z(); - size_t inH = - img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(); - size_t inW = img_conf.img_size(); - size_t inC = img_conf.channels(); - inH = inH * inD; - inDims_ = TensorShape({0, inC, inH, inW}); - outDims_ = TensorShape(4); - - auto& reshape_conf = config_.reshape_conf(); - for (int i = 0; i < reshape_conf.height_axis_size(); i++) { - heightAxis_.push_back(reshape_conf.height_axis(i)); - } - for (int i = 0; i < reshape_conf.width_axis_size(); i++) { - widthAxis_.push_back(reshape_conf.width_axis(i)); - } - createFunction(nchw2nhwc_, "NCHW2NHWC", FuncConfig()); - createFunction(nhwc2nchw_, "NHWC2NCHW", FuncConfig()); - return true; -} - -void SwitchOrderLayer::setOutDims() { - outDims_.setDim(0, inDims_[0]); - outDims_.setDim(1, inDims_[2]); - outDims_.setDim(2, inDims_[3]); - outDims_.setDim(3, inDims_[1]); - reshapeHeight_ = 1; - for (size_t i = 0; i < heightAxis_.size(); i++) { - reshapeHeight_ *= outDims_[heightAxis_[i]]; - } - output_.setFrameHeight(reshapeHeight_); - reshapeWidth_ = 1; - for (size_t i = 0; i < widthAxis_.size(); i++) { - reshapeWidth_ *= outDims_[widthAxis_[i]]; - } - output_.setFrameWidth(reshapeWidth_); -} - -void SwitchOrderLayer::setInDims() { - MatrixPtr input = inputLayers_[0]->getOutputValue(); - size_t batchSize = input->getHeight(); - inDims_.setDim(0, batchSize); - int d = inputLayers_[0]->getOutput().getFrameDepth(); - d = (d == 0 ? 1 : d); - int h = inputLayers_[0]->getOutput().getFrameHeight(); - if (h != 0) inDims_.setDim(2, h * d); - int w = inputLayers_[0]->getOutput().getFrameWidth(); - if (w != 0) inDims_.setDim(3, w); - int totalCount = input->getElementCnt(); - int channels = totalCount / (inDims_[0] * inDims_[2] * inDims_[3]); - if (channels != 0) inDims_.setDim(1, channels); -} - -void SwitchOrderLayer::forward(PassType passType) { - Layer::forward(passType); - setInDims(); - setOutDims(); - resetOutput(outDims_[0], outDims_[1] * outDims_[2] * outDims_[3]); - if (heightAxis_.size() > 0) { - resetOutput(reshapeHeight_, reshapeWidth_); - } - - // switch NCHW to NHWC - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getInputValue(0), inDims_); - outputs.addArg(*getOutputValue(), outDims_); - nchw2nhwc_[0]->calc(inputs, outputs); - forwardActivation(); -} - -void SwitchOrderLayer::backward(const UpdateCallback& callback) { - (void)callback; - backwardActivation(); - - // switch NHWC to NCHW - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getOutputGrad(), outDims_); - outputs.addArg(*getInputGrad(0), inDims_, ADD_TO); - nhwc2nchw_[0]->calc(inputs, outputs); -} -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/SwitchOrderLayer.h b/paddle/legacy/gserver/layers/SwitchOrderLayer.h deleted file mode 100644 index 8a551a2bba698374841e73dc4dbad403034dd300..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/SwitchOrderLayer.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" - -namespace paddle { - -/** - * \brief This layer calculate softmax in image channel dimension. - */ -class SwitchOrderLayer : public Layer { - public: - explicit SwitchOrderLayer(const LayerConfig& config) : Layer(config) {} - - ~SwitchOrderLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; - void setInDims(); - void setOutDims(); - - protected: - std::vector> nchw2nhwc_; - std::vector> nhwc2nchw_; - TensorShape inDims_; - TensorShape outDims_; - std::vector heightAxis_; - std::vector widthAxis_; - size_t reshapeHeight_; - size_t reshapeWidth_; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/TableProjection.cpp b/paddle/legacy/gserver/layers/TableProjection.cpp deleted file mode 100644 index 326e241d07558cdb3c70afc1b112dc32c949d1f0..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/TableProjection.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "TableProjection.h" - -namespace paddle { - -REGISTER_PROJECTION(table, TableProjection); - -TableProjection::TableProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - table_.reset( - new Weight(config.input_size(), config.output_size(), parameter)); -} - -void TableProjection::prefetch(const Argument* in) { - CHECK(in->ids); - auto* sparseParam = - dynamic_cast(table_->getW().get()); - if (sparseParam) { - sparseParam->addRows(in->ids); - } -} - -void TableProjection::forward() { - CHECK(in_->ids); - out_->value->selectRows(*table_->getW(), *in_->ids); -} - -void TableProjection::backward(const UpdateCallback& callback) { - if (table_->getWGrad()) { - CHECK(in_->ids); - out_->grad->addToRows(*table_->getWGrad(), *in_->ids); - parameter_->incUpdate(callback); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/TableProjection.h b/paddle/legacy/gserver/layers/TableProjection.h deleted file mode 100644 index 60286149f4227fbc758dca7864c6d1f67782c7ae..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/TableProjection.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Projection.h" - -namespace paddle { - -/** - * Table projection takes index data input. It select rows from parameter - * where row_id is in input_ids: - * \f[ - * out.row[i] += table.row[ids[i]] - * \f] - * where \f$out\f$ is out, \f$table\f$ is parameter, \f$ids\f$ is input_ids, - * and \f$i\f$ is row_id. - * - * The config file api is table_projection. - * - * @note If \f$ids[i] = -1\f$, it will be ignored. - */ -class TableProjection : public Projection { - public: - TableProjection(const ProjectionConfig& config, - const ParameterPtr& parameter, - bool useGpu); - /** - * If use sparse row matrix as parameter, prefetch feature ids in input label. - */ - virtual void prefetch(const Argument* in); - virtual void forward(); - virtual void backward(const UpdateCallback& callback); - - protected: - std::unique_ptr table_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/TensorLayer.cpp b/paddle/legacy/gserver/layers/TensorLayer.cpp deleted file mode 100644 index 7f874bce0f2bdf7ab4771e470e2e4535693ecf68..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/TensorLayer.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "TensorLayer.h" - -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(tensor, TensorLayer); - -bool TensorLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* initialize the weightList */ - CHECK_EQ(inputLayers_.size(), 2LU); - CHECK(parameters_[0]); - CHECK(!parameters_[1]); - - // Option the parameters - size_t height = inputLayers_[0]->getSize(); - size_t width = inputLayers_[1]->getSize(); - CHECK_EQ(width * height * getSize(), parameters_[0]->getSize()); - - for (size_t i = 0; i < getSize(); ++i) { - // create a new weight - Weight* w = new Weight(height, width, parameters_[0], i * width * height); - - // append the new weight to the list - weights_.emplace_back(w); - } - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - - return true; -} - -void TensorLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInputValue(0)->getHeight(); - int size = getSize(); - - { resetOutput(batchSize, size); } - - MatrixPtr outV = getOutputValue(); - /* add the bias-vector */ - if (biases_.get() != NULL) { - outV->addBias(*(biases_->getW()), 1); - } - - /* e1 * W * trans(e2) */ { - MatrixPtr input1 = getInputValue(0); - MatrixPtr input2 = getInputValue(1); - MatrixPtr tmpMat = Matrix::create(input2->getHeight(), - input2->getWidth(), - /* trans= */ false, - input2->useGpu()); - REGISTER_TIMER_INFO("TensorFwMulTimer", getName().c_str()); - for (size_t i = 0; i < getSize(); ++i) { - MatrixPtr weights = weights_[i]->getW(); - tmpMat->mul(*input1, *weights, 1, 0); - outV->rowDotMul(i, *tmpMat, *input2); - } - } - - /* activation */ { forwardActivation(); } -} - -void TensorLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { backwardActivation(); } - - if (biases_ && biases_->getWGrad()) { - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - - /* Increasing the number of gradient */ - biases_->getParameterPtr()->incUpdate(callback); - } - - bool syncFlag = hl_get_sync_flag(); - - /* Calculate the W-gradient for the current layer */ - MatrixPtr input1 = getInputValue(0); - MatrixPtr input2 = getInputValue(1); - MatrixPtr oGrad = getOutputGrad(); - MatrixPtr tmpMat = Matrix::create(input1->getHeight(), - input1->getWidth(), - /* trans= */ false, - input1->useGpu()); - - /* trans(grad * e1) * e2 */ { - REGISTER_TIMER_INFO("TensorGradMulTimer", getName().c_str()); - for (size_t i = 0; i < getSize(); ++i) { - if (weights_[i]->getWGrad()) { - tmpMat->rowScale(i, *input1, *oGrad); - MatrixPtr input1_T = tmpMat->getTranspose(); - weights_[i]->getWGrad()->mul(*input1_T, *input2, 1, 1); - } - } - } - - hl_set_sync_flag(false); - - /* Calculate the input layers error */ { - MatrixPtr preGrad1 = getInputGrad(0); - MatrixPtr preGrad2 = getInputGrad(1); - - REGISTER_TIMER_INFO("TensorBpMulTimer", getName().c_str()); - for (size_t i = 0; i < getSize(); ++i) { - MatrixPtr weights = weights_[i]->getW(); - - if (NULL != preGrad1) { /* (grad * e2) * trans(W) */ - tmpMat->rowScale(i, *input2, *oGrad); - MatrixPtr weights_T = weights->getTranspose(); - preGrad1->mul(*tmpMat, *weights_T, 1, 1); - } - if (NULL != preGrad2) { /* (grad * e1) * W */ - tmpMat->rowScale(i, *input1, *oGrad); - preGrad2->mul(*tmpMat, *weights, 1, 1); - } - } - } - hl_set_sync_flag(syncFlag); - parameters_[0]->incUpdate(callback); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/TensorLayer.h b/paddle/legacy/gserver/layers/TensorLayer.h deleted file mode 100644 index fc491a7c9f223cf0dff6d878c6ec27a858c7c7b7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/TensorLayer.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { - -/** - * @brief TensorLayer takes two input vectors. - * \f[ - * y_{i} = x_{1} * W_{i} * x_{2}^{\rm T}, i=0, 1, ...,K-1 - * \f] - * - * - \f$x_{1}\f$: the first input, size is M. - * - \f$x_{2}\f$: the second input, size is N. - * - y: output, size is K. - * - \f$y_{i}\f$: i-th element of y. - * - \f$W_{i}\f$: the i-th learned weight, dimensions: [M, N]. - * - \f$x_{2}^{\rm T}\f$: the transpose of \f$x_{2}\f$. - * - * The config file api is tensor_layer. - */ - -class TensorLayer : public Layer { - protected: - WeightList weights_; - std::unique_ptr biases_; - - public: - explicit TensorLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - Weight& getWeight(int idx) { return *weights_[idx]; } - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/TransLayer.cpp b/paddle/legacy/gserver/layers/TransLayer.cpp deleted file mode 100644 index fd1d435ea5f53785c9c416146c642637adc786a8..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/TransLayer.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "TransLayer.h" -#include "paddle/legacy/utils/Logging.h" -namespace paddle { - -REGISTER_LAYER(trans, TransLayer); - -bool TransLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* the size of inputs for trans-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1); - - return true; -} - -void TransLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - MatrixPtr input = getInputValue(0); - int height = input->getHeight(); - int width = input->getWidth(); - - resizeOutput(width, height); - - MatrixPtr outV = getOutputValue(); - - /* outV's memory has been allocated, so memAlloc = false */ - input->transpose(outV, false); - if (getInputGrad(0)) { - zeroGrad(); - } -} - -void TransLayer::backward(const UpdateCallback& callback) { - (void)callback; - - MatrixPtr outputGrad = getOutputGrad(); - if (outputGrad == NULL) { - return; - } - MatrixPtr preGrad = getInputGrad(0); - if (preGrad) { - MatrixPtr transGrad = Matrix::create(preGrad->getHeight(), - preGrad->getWidth(), - /* trans= */ false, - preGrad->useGpu()); - outputGrad->transpose(transGrad, false); - preGrad->add(*transGrad); - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/TransLayer.h b/paddle/legacy/gserver/layers/TransLayer.h deleted file mode 100644 index 0a6b13933f83f30a07ed63d722dbb612c64edae7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/TransLayer.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" - -namespace paddle { -/** - * A layer for transposing a minibatch matrix. - * \f[ - y = x^\mathrm{T} - * \f] - * where \f$x\f$ is (M x N) input, and \f$y\f$ is (N x M) output. - * - * The config file api is trans_layer. - */ -class TransLayer : public Layer { - public: - explicit TransLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr) override; -}; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/TransposedFullMatrixProjection.cpp b/paddle/legacy/gserver/layers/TransposedFullMatrixProjection.cpp deleted file mode 100644 index c8533dc7d78ec4fd3629e29e6c1c3e73c6acdc17..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/TransposedFullMatrixProjection.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Projection.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * @brief TransposedFullMatrixProjection performs full matrix multiplication: - * out.row[i] += in.row[i] * weight.transpose - * - * The config file api is trans_full_matrix_projection. - */ -class TransposedFullMatrixProjection : public Projection { - public: - TransposedFullMatrixProjection(const ProjectionConfig& config, - ParameterPtr parameter, - bool useGPu); - virtual void forward(); - virtual void backward(const UpdateCallback& callback); - - protected: - std::unique_ptr weight_; -}; - -REGISTER_PROJECTION(trans_fc, TransposedFullMatrixProjection); - -TransposedFullMatrixProjection::TransposedFullMatrixProjection( - const ProjectionConfig& config, ParameterPtr parameter, bool useGpu) - : Projection(config, parameter, useGpu) { - weight_.reset( - new Weight(config.output_size(), config.input_size(), parameter)); -} - -void TransposedFullMatrixProjection::forward() { - REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); - out_->value->mul(*(in_->value), *(weight_->getW()->getTranspose()), 1, 1); -} - -void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) { - bool syncFlag = hl_get_sync_flag(); - - /* Calculate the W-gradient for the current layer */ - if (weight_->getWGrad()) { - REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); - weight_->getWGrad()->mul( - *(out_->grad->getTranspose()), *(in_->value), 1, 1); - } - - // If callback does not change value, backprop error asynchronously so that - // we can do the callback concurrently. - // This is still a little bit dangerous since theoretically for - // SyncMultiGpuMachine it is possible that the value copyback can still - // happen at the same time as the error backprop where the value is being - // used. - hl_set_sync_flag(false); - - /* Calculate the input layers error */ - if (in_->grad) { - REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); - in_->grad->mul(*(out_->grad), *(weight_->getW()), 1, 1); - } - - hl_set_sync_flag(syncFlag); - parameter_->incUpdate(callback); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/UpsampleLayer.cpp b/paddle/legacy/gserver/layers/UpsampleLayer.cpp deleted file mode 100644 index 3ff5332e6401acc3a28c9808fddd4812a7323544..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/UpsampleLayer.cpp +++ /dev/null @@ -1,108 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and - limitations under the License. */ - -#include "UpsampleLayer.h" -#include "iostream" - -namespace paddle { - -REGISTER_LAYER(upsample, UpsampleLayer); - -size_t UpsampleLayer::getOutputSize() { - if (upsampleSize_ == 0) { - upsampleSize_ = imgSize_ * scale_ - static_cast(padOutX_); - upsampleSizeY_ = imgSizeY_ * scaleY_ - static_cast(padOutY_); - } - return upsampleSize_ * upsampleSizeY_ * channels_; -} - -bool UpsampleLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2U); - CHECK_EQ(config_.inputs_size(), 2); - const auto& conf = config_.inputs(0).upsample_conf(); - const auto& img_conf = conf.image_conf(); - - imgSizeY_ = - img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(); - imgSize_ = img_conf.img_size(); - channels_ = img_conf.channels(); - - CHECK((conf.has_upsample_size()) || (conf.has_scale())) - << "scale or upsample_size is required."; - - if (conf.has_upsample_size()) { - upsampleSize_ = conf.upsample_size(); - upsampleSizeY_ = upsampleSize_; - if (conf.has_upsample_size_y()) { - upsampleSizeY_ = conf.upsample_size_y(); - } - } else { - if (!conf.has_scale_y()) { - scale_ = scaleY_ = conf.scale_y(); - CHECK_GT(static_cast(scale_), 1); - } else { - scale_ = conf.scale(); - scaleY_ = conf.scale_y(); - } - padOutX_ = conf.pad_out_x(); - padOutY_ = conf.pad_out_y(); - CHECK(!padOutX_ || scale_ == 2) - << "Output height padding compensation requires scale_ == 2"; - CHECK(!padOutY_ || scaleY_ == 2) - << "Output width padding compensation requires scaleY_ == 2"; - upsampleSize_ = upsampleSizeY_ = 0; - } - return true; -} - -void UpsampleLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr input = getInputValue(0); - MatrixPtr mask = inputLayers_[1]->getOutput("mask").value; - - size_t batchSize = input->getHeight(); - size_t outSize = getOutputSize(); - - CHECK_EQ(input->getWidth(), mask->getWidth()); - CHECK_EQ(mask->getHeight(), batchSize); - resetOutput(batchSize, outSize); - - MatrixPtr output = getOutputValue(); - output->upsampleForward(*input, - *mask, - imgSize_, - imgSizeY_, - channels_, - upsampleSize_, - upsampleSizeY_); -} - -void UpsampleLayer::backward(const UpdateCallback& callback) { - MatrixPtr mask = inputLayers_[1]->getOutput("mask").value; - MatrixPtr inputGrad = getInputGrad(0); - MatrixPtr outputGrad = getOutputGrad(); - inputGrad->upsampleBackward(*outputGrad, - *mask, - imgSize_, - imgSizeY_, - channels_, - upsampleSize_, - upsampleSizeY_); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/UpsampleLayer.h b/paddle/legacy/gserver/layers/UpsampleLayer.h deleted file mode 100644 index 2fe5938244c81ab25c66083cc1ad63ba15618aa1..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/UpsampleLayer.h +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "Layer.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Stat.h" - -namespace paddle { - -/** - * This layer transpose the pooling process. - * It takes two input, the first input is the input data, and - * the second is the mask data from the max-pool-with-mask layer. - * - */ - -class UpsampleLayer : public Layer { - public: - explicit UpsampleLayer(const LayerConfig& config) : Layer(config) {} - ~UpsampleLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - - size_t getOutputSize(); - - protected: - size_t scale_, scaleY_; - size_t upsampleSize_, upsampleSizeY_; - size_t padOutX_, padOutY_; - size_t imgSize_, imgSizeY_; - size_t channels_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ValidationLayer.cpp b/paddle/legacy/gserver/layers/ValidationLayer.cpp deleted file mode 100644 index 9956fd2ed41464eae096911620e160f5ecd89da3..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ValidationLayer.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "ValidationLayer.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -bool ValidationLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - return Layer::init(layerMap, parameterMap); -} - -void ValidationLayer::forward(PassType passType) { - Layer::forward(passType); - - MatrixPtr output = getInputValue(*getOutputLayer()); - CHECK(output); - IVectorPtr label = getInputLabel(*getLabelLayer()); - CHECK(label); - validationImp(output, label); -} - -void ValidationLayer::backward(const UpdateCallback& callback) { - (void)callback; -} - -bool AucValidation::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - bool ret = ValidationLayer::init(layerMap, parameterMap); - EvaluatorConfig config; - config.set_name(getName()); - config.set_type("last-column-auc"); - config.add_input_layers(inputLayers_[0]->getName()); - config.add_input_layers(inputLayers_[1]->getName()); - if (3 == inputLayers_.size()) { - config.add_input_layers(inputLayers_[2]->getName()); - } - evaluator_.reset(Evaluator::create(config)); - passBegin_ = false; - return ret; -} - -void AucValidation::validationImp(MatrixPtr output, IVectorPtr label) { - if (!passBegin_) { - passBegin_ = true; - evaluator_->start(); - } - - bool supportWeight = (3 == inputLayers_.size()) ? true : false; - MatrixPtr weight = supportWeight ? getInputValue(*inputLayers_[2]) : nullptr; - if (dynamic_cast(output.get())) { - size_t height = output->getHeight(); - size_t width = output->getWidth(); - Matrix::resizeOrCreate(cpuOutput_, - height, - width, - /* trans=*/false, - /* useGpu=*/false); - cpuOutput_->copyFrom(*output); - IVector::resizeOrCreate(cpuLabel_, height, false); - cpuLabel_->copyFrom(*label); - - if (supportWeight) { - Matrix::resizeOrCreate(cpuWeight_, height, (size_t)1, false, false); - cpuWeight_->copyFrom(*weight); - } - - output = cpuOutput_; - label = cpuLabel_; - weight = cpuWeight_; - } - - for (size_t i = 0; i < output->getHeight(); i++) { - float y1 = output->getData()[i * output->getWidth() + 1]; - int* labels = label->getData(); - predictArray_.push_back(PredictionResult(y1, labels[i])); - } - std::vector arguments; - if (3 == inputLayers_.size()) { - arguments.resize(3); - arguments[2].value = weight; - } else { - arguments.resize(2); - } - arguments[0].value = output; - arguments[1].ids = label; - evaluator_->evalImp(arguments); -} - -void AucValidation::onPassEnd() { - if (!FLAGS_predict_file.empty()) { - std::ofstream fs(FLAGS_predict_file); - CHECK(fs) << "Fail to open " << FLAGS_predict_file; - for (auto& res : predictArray_) { - fs << res.out << " " << res.label << std::endl; - } - } - - evaluator_->finish(); - LOG(INFO) << *evaluator_; - passBegin_ = false; - predictArray_.clear(); -} - -bool PnpairValidation::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - bool ret = ValidationLayer::init(layerMap, parameterMap); - if (!ret) return ret; - CHECK_GE(inputLayers_.size(), 3UL); - CHECK_LE(inputLayers_.size(), 4UL); - EvaluatorConfig config; - config.set_name(getName()); - config.set_type("pnpair"); - config.add_input_layers(inputLayers_[0]->getName()); - config.add_input_layers(inputLayers_[1]->getName()); - config.add_input_layers(inputLayers_[2]->getName()); - if (4 == inputLayers_.size()) { - config.add_input_layers(inputLayers_[3]->getName()); - } - evaluator_.reset(Evaluator::create(config)); - passBegin_ = false; - return true; -} - -void PnpairValidation::validationImp(MatrixPtr output, IVectorPtr label) { - if (!passBegin_) { - passBegin_ = true; - evaluator_->start(); - } - MatrixPtr weight = - (4 == inputLayers_.size()) ? getInputValue(*inputLayers_[3]) : nullptr; - IVectorPtr info = getInputLabel(*getInfoLayer()); - std::vector arguments; - if (4 == inputLayers_.size()) { - arguments.resize(4); - arguments[3].value = weight; - } else { - arguments.resize(3); - } - arguments[0].value = output; - arguments[1].ids = label; - arguments[2].ids = info; - evaluator_->evalImp(arguments); -} - -void PnpairValidation::onPassEnd() { - if (!FLAGS_predict_file.empty()) { - (dynamic_cast(evaluator_.get()))->printPredictResults(); - } - evaluator_->finish(); - LOG(INFO) << *evaluator_; - passBegin_ = false; -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/ValidationLayer.h b/paddle/legacy/gserver/layers/ValidationLayer.h deleted file mode 100644 index fbc94e8ef570e2eec1d3737aca97bbf91c1392b2..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/ValidationLayer.h +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -#include "Layer.h" -#include "paddle/legacy/gserver/evaluators/Evaluator.h" - -DECLARE_int32(trainer_id); - -namespace paddle { - -class ValidationLayer : public Layer { - public: - explicit ValidationLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - LayerPtr getOutputLayer() { return inputLayers_[0]; } - - LayerPtr getLabelLayer() { return inputLayers_[1]; } - - LayerPtr getInfoLayer() { - assert(inputLayers_.size() > 2); - return inputLayers_[2]; - } - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback = nullptr) override; - - virtual void validationImp(MatrixPtr outputValue, IVectorPtr label) = 0; - - void onPassEnd() override = 0; -}; - -/* - * AucValidation - */ -class AucValidation : public ValidationLayer { - public: - explicit AucValidation(const LayerConfig& config) - : ValidationLayer(config), - cpuOutput_(nullptr), - cpuLabel_(nullptr), - cpuWeight_(nullptr) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void validationImp(MatrixPtr outputValue, IVectorPtr label) override; - - void onPassEnd() override; - - struct PredictionResult { - PredictionResult(real __out, int __label) : out(__out), label(__label) {} - real out; - int label; - }; - std::vector predictArray_; - - private: - bool passBegin_; - std::unique_ptr evaluator_; - MatrixPtr cpuOutput_; - IVectorPtr cpuLabel_; - MatrixPtr cpuWeight_; -}; - -/* - * positive-negative pair rate Validation - */ -class PnpairValidation : public ValidationLayer { - public: - explicit PnpairValidation(const LayerConfig& config) - : ValidationLayer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void validationImp(MatrixPtr outputValue, IVectorPtr label) override; - - void onPassEnd() override; - - private: - bool passBegin_; - std::unique_ptr evaluator_; -}; - -typedef std::shared_ptr ValidationLayerPtr; -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/WarpCTCLayer.cpp b/paddle/legacy/gserver/layers/WarpCTCLayer.cpp deleted file mode 100644 index 6b1656a523d4ac630ec3fd8d934ab44844a0d1f8..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/WarpCTCLayer.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "WarpCTCLayer.h" - -namespace paddle { - -REGISTER_LAYER(warp_ctc, WarpCTCLayer); - -bool WarpCTCLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parament class */ - Layer::init(layerMap, parameterMap); - - CHECK_EQ(inputLayers_.size(), 2UL); - - /* The inputLayers_[0] must be sequence output without softmax */ - numClasses_ = config_.size(); - CHECK_GE(numClasses_, 2UL); - CHECK_EQ(numClasses_, inputLayers_[0]->getSize()); - - blank_ = config_.blank(); - CHECK_LT(blank_, numClasses_); - - normByTimes_ = config_.norm_by_times(); - - // We don't need sequenceStartPositions because each sample of output_ is - // for the cost of one sequence. - setNeedSequenceInfo(false); - - return true; -} - -void WarpCTCLayer::forward(PassType passType) { - Layer::forward(passType); - - const Argument& output = getInput(0); - const Argument& labels = getInput(1); - - CHECK(output.sequenceStartPositions); - CHECK(labels.sequenceStartPositions); - CHECK(labels.ids); - - size_t numSequences = labels.sequenceStartPositions->getSize() - 1; - CHECK_EQ(numSequences, output.sequenceStartPositions->getSize() - 1); - - resizeOutput(numSequences, 1); - - const int* cpuLabelStartPositions = - labels.sequenceStartPositions->getData(false); - const int* cpuOutputStartPositions = - output.sequenceStartPositions->getData(false); - - std::vector cpuLabelLengths(numSequences); - std::vector cpuOutputLengths(numSequences); - for (size_t i = 0; i < numSequences; i++) { - cpuLabelLengths[i] = - cpuLabelStartPositions[i + 1] - cpuLabelStartPositions[i]; - cpuOutputLengths[i] = - cpuOutputStartPositions[i + 1] - cpuOutputStartPositions[i]; - } - - /* Get the maximum sequence length */ - maxSequenceLength_ = 0; - maxSequenceLength_ = *std::max_element( - cpuOutputLengths.data(), cpuOutputLengths.data() + numSequences); - - Matrix::resizeOrCreate(batchValue_, - /* height */ numSequences * maxSequenceLength_, - /* width */ numClasses_, - /* trans */ false, - /* useGpu */ useGpu_); - - Matrix::resizeOrCreate(batchGrad_, - /* height */ numSequences * maxSequenceLength_, - /* width */ numClasses_, - /* trans */ false, - /* useGpu */ useGpu_); - batchGrad_->zeroMem(); - - seq2batchPadding(output.value, batchValue_, output.sequenceStartPositions); - - /* labels always in CPU memory */ - IVector::resizeOrCreate(cpuLabels_, - /* size */ (labels.ids)->getSize(), - /* useGpu */ false); - cpuLabels_->copyFrom(*(labels.ids)); - - /* labels always in CPU memory */ - Matrix::resizeOrCreate(cpuCosts_, - /* height */ numSequences, - /* width */ 1, - /* trans */ false, - /* useGpu */ false); - - /* Init warp-ctc options */ - hl_warpctc_options_t options; - hl_warpctc_init(blank_, useGpu_, &options); - - /* Get the needed workspace size */ - size_t workspaceBytes = 0; - hl_warpctc_get_workspace_size(cpuLabelLengths.data(), - cpuOutputLengths.data(), - numClasses_, - numSequences, - &options, - &workspaceBytes); - CHECK_GT(workspaceBytes, 0UL); - - size_t workspaceLength = workspaceBytes / sizeof(real) + 1; - Vector::resizeOrCreate(workspace_, - /* size */ workspaceLength, - /* useGpu */ useGpu_); - - hl_warpctc_compute_loss(batchValue_->getData(), - batchGrad_->getData(), - cpuLabels_->getData(), - cpuLabelLengths.data(), - cpuOutputLengths.data(), - numClasses_, - numSequences, - cpuCosts_->getData(), - workspace_->getData(), - &options); - - /* Copy the costs */ - output_.value->copyFrom(*cpuCosts_); -} - -void WarpCTCLayer::backward(const UpdateCallback& callback) { - (void)callback; - - const Argument& output = getInput(0); - CHECK(batchGrad_); - - batch2seqPadding( - output.grad, batchGrad_, output.sequenceStartPositions, normByTimes_); -} - -void WarpCTCLayer::seq2batchPadding(const MatrixPtr& seqValue, - MatrixPtr& batchValue, - const ICpuGpuVectorPtr& seqStartPositions) { - size_t numSequences = seqStartPositions->getSize() - 1; - const int* seqStartPositionsData = seqStartPositions->getData(useGpu_); - - real* seqData = seqValue->getData(); - real* batchData = batchValue->getData(); - if (useGpu_) { - hl_sequence2batch_copy_padding(batchData, - seqData, - seqStartPositionsData, - numClasses_, - maxSequenceLength_, - numSequences, - false, - true); - } else { - for (size_t i = 0; i < maxSequenceLength_; i++) { - for (size_t j = 0; j < numSequences; j++) { - size_t sequenceStart = seqStartPositionsData[j]; - size_t sequenceLength = - seqStartPositionsData[j + 1] - seqStartPositionsData[j]; - if (i < sequenceLength) { - memcpy(batchData + (i * numSequences + j) * numClasses_, - seqData + (sequenceStart + i) * numClasses_, - numClasses_ * sizeof(real)); - } else { - memset(batchData + (i * numSequences + j) * numClasses_, - 0, - numClasses_ * sizeof(real)); - } - } - } - } -} - -void WarpCTCLayer::batch2seqPadding(const MatrixPtr& seqValue, - MatrixPtr& batchValue, - const ICpuGpuVectorPtr& seqStartPositions, - bool normByTimes) { - size_t numSequences = seqStartPositions->getSize() - 1; - const int* seqStartPositionsData = seqStartPositions->getData(useGpu_); - - real* seqData = seqValue->getData(); - real* batchData = batchValue->getData(); - if (useGpu_) { - hl_sequence2batch_copy_padding(batchData, - seqData, - seqStartPositionsData, - numClasses_, - maxSequenceLength_, - numSequences, - normByTimes, - false); - } else { - for (size_t i = 0; i < numSequences; i++) { - int sequenceStart = seqStartPositionsData[i]; - int sequenceLength = - seqStartPositionsData[i + 1] - seqStartPositionsData[i]; - real scale = normByTimes ? (1.0f / (real)sequenceLength) : 1.0f; - for (int j = 0; j < sequenceLength; j++) { - for (size_t k = 0; k < numClasses_; k++) { - seqData[(sequenceStart + j) * numClasses_ + k] = - batchData[(j * numSequences + i) * numClasses_ + k] * scale; - } - } - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/layers/WarpCTCLayer.h b/paddle/legacy/gserver/layers/WarpCTCLayer.h deleted file mode 100644 index 3017ca794ecc14f5a3cbd0b302a4953a191a5065..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/layers/WarpCTCLayer.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Layer.h" - -namespace paddle { - -/** - * @brief A layer integrating the open-source warp-ctc library - * to compute connectionist - * temporal classification cost. - * - * The config file api is warp_ctc_layer. - */ -class WarpCTCLayer : public Layer { - public: - explicit WarpCTCLayer(const LayerConfig& config) : Layer(config) {} - ~WarpCTCLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - - protected: - /** - * sequence matrix and batch matrix copy: - * sequence (s0, s0, s0, s0; s1, s1; s2, s2, s2; s3) - * batch (s0, s1, s2, s3; s0, s1, s2, 0; s0, 0, s2, 0; s0, 0, 0, 0) - */ - void seq2batchPadding(const MatrixPtr& seqValue, - MatrixPtr& batchValue, - const ICpuGpuVectorPtr& seqStartPositions); - void batch2seqPadding(const MatrixPtr& seqValue, - MatrixPtr& batchValue, - const ICpuGpuVectorPtr& seqStartPositions, - bool normByTimes); - - protected: - size_t numClasses_; - size_t blank_; - size_t maxSequenceLength_; - bool normByTimes_; - - MatrixPtr batchValue_; - MatrixPtr batchGrad_; - VectorPtr workspace_; - - IVectorPtr cpuLabels_; - MatrixPtr cpuCosts_; -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/tests/.gitignore b/paddle/legacy/gserver/tests/.gitignore deleted file mode 100644 index 7f1845d7ec4c35ec39da427d9961a17b84e4980d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/.gitignore +++ /dev/null @@ -1 +0,0 @@ -pyDataProviderBase.py diff --git a/paddle/legacy/gserver/tests/CMakeLists.txt b/paddle/legacy/gserver/tests/CMakeLists.txt deleted file mode 100644 index 93ddf5aa233017d4f5139a8add6c69ef3a4682b4..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/CMakeLists.txt +++ /dev/null @@ -1,103 +0,0 @@ -# gserver pacakge unittests -add_simple_unittest(test_LinearChainCRF) -add_simple_unittest(test_RecurrentLayer) - -if(NOT MOBILE_INFERENCE) - add_simple_unittest(test_MultinomialSampler) -endif() - -function(gserver_test TARGET) - add_unittest_without_exec(${TARGET} - ${TARGET}.cpp - LayerGradUtil.cpp) - add_test(NAME ${TARGET} - COMMAND ${TARGET}) -endfunction() - -add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/concat_dotmul_a.conf - COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/* ${CMAKE_CURRENT_BINARY_DIR} -) -add_custom_target(copy_gserver_conf ALL DEPENDS concat_dotmul_a.conf) - -gserver_test(test_LayerGrad) -gserver_test(test_CRFLayerGrad) -gserver_test(test_CrossEntropyOverBeamGrad) -gserver_test(test_SeqSliceLayerGrad) -gserver_test(test_ActivationGrad) -gserver_test(test_ConvTrans) -gserver_test(test_PriorBox) -gserver_test(test_DetectionOutput) -gserver_test(test_ConvUnify) -gserver_test(test_BatchNorm) -gserver_test(test_KmaxSeqScore) -gserver_test(test_Expand) -gserver_test(test_MaxPoolingWithMaskOutput) -gserver_test(test_Upsample) - -set(PYTHON_PATH - ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d - ${PADDLE_BINARY_DIR}/python/:${PADDLE_BINARY_DIR}/paddle/legacy/gserver/tests) -function(gserver_test_with_python TARGET) - add_unittest_without_exec(${TARGET} ${TARGET}.cpp) - add_test(NAME ${TARGET} - COMMAND ${PYTHON_PATH} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET} - WORKING_DIRECTORY ${PADDLE_BINARY_DIR}/paddle/) -endfunction() - -gserver_test_with_python(test_PyDataProvider2) -if(WITH_PYTHON) - gserver_test_with_python(test_PyDataProvider) -endif() -if(NOT MOBILE_INFERENCE) - gserver_test_with_python(test_CompareTwoNets) - # TODO(yuyang18): There is some bug in test_RecurrentGradientMachine, I will fix it. - gserver_test_with_python(test_RecurrentGradientMachine) -endif() - -########## test_MKLDNN layers and activations ########## -if(WITH_MKLDNN) - add_unittest_without_exec(test_MKLDNN - test_MKLDNN.cpp - MKLDNNTester.cpp - LayerGradUtil.cpp) - add_test(NAME test_MKLDNN - COMMAND ${PYTHON_PATH} ${CMAKE_CURRENT_BINARY_DIR}/test_MKLDNN - WORKING_DIRECTORY ${PADDLE_BINARY_DIR}/paddle) -endif() - -############### test_WarpCTCLayer ####################### -if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE) - add_unittest_without_exec(test_WarpCTCLayer - test_WarpCTCLayer.cpp) - add_test(NAME test_WarpCTCLayer - COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_WarpCTCLayer --warpctc_dir=${WARPCTC_LIB_DIR} - WORKING_DIRECTORY ${PADDLE_BINARY_DIR}/paddle) -endif() - -if(NOT MOBILE_INFERENCE) - ################## test_Evaluator ############# - add_unittest(test_Evaluator - test_Evaluator.cpp) - - ########### test_NetworkCompare ############### - add_unittest_without_exec(test_NetworkCompare - test_NetworkCompare.cpp) - if(WITH_GPU) - set(use_gpu true) - else() - set(use_gpu false) - endif() - add_test(NAME test_NetworkCompare - COMMAND ${PYTHON_PATH} ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=${use_gpu} - WORKING_DIRECTORY ${PADDLE_BINARY_DIR}/paddle) - - ############ test_CompareSparse ################ - add_unittest_without_exec(test_CompareSparse - test_CompareSparse.cpp) - if(NOT ON_TRAVIS) - add_test(NAME test_CompareSparse - COMMAND ${PYTHON_PATH} ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port -n 6 - ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse - WORKING_DIRECTORY ${PADDLE_BINARY_DIR}/paddle/) - endif() -endif() diff --git a/paddle/legacy/gserver/tests/LayerGradUtil.cpp b/paddle/legacy/gserver/tests/LayerGradUtil.cpp deleted file mode 100644 index f08c1cd1d50d3ead5373a7af64619c8c0ddc78be..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/LayerGradUtil.cpp +++ /dev/null @@ -1,854 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "LayerGradUtil.h" - -DECLARE_bool(thread_local_rand_use_global_seed); - -namespace paddle { -real getCostSum(LayerPtr& testLayer, MatrixPtr weights) { - testLayer->forward(PASS_GC); - std::vector outArgs; - outArgs.push_back(testLayer->getOutput()); - if (weights) { - outArgs[0].value->dotMul(*outArgs[0].value, *weights); - } - return Argument::sum(outArgs); -} - -real getDiffAndPrint(real newCost1, - real newCost2, - real callbackCount, - char fill, - string testLayerName, - string name, - real step, - real delta) { - EXPECT_FALSE(std::isnan(newCost1)); - EXPECT_FALSE(std::isnan(newCost2)); - - real trueDelta = (newCost1 - newCost2) * (callbackCount / 2.); - real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1; - LOG(INFO) << setiosflags(ios::left) << setfill(fill) << setw(20) - << testLayerName << " " << setw(20) << name << "step=" << setw(15) - << step << "cost1=" << setw(10) << newCost1 << "cost2=" << setw(10) - << newCost2 << "true_delta=" << setw(15) << trueDelta - << "analytic_delta=" << setw(15) << delta << "diff=" << diff - << (abs(diff) > 0.01 ? " ***" : ""); - if (fabs(diff - 1) < 0.02) { - LOG(INFO) << "The previous diff might be caused by not accumulating" - << " parameter gradients in backward()"; - } - return diff; -} - -void testState(LayerPtr testLayer, - vector& dataLayers, - vector& datas) { - auto batchSize = datas[0].getBatchSize(); - Argument data; - ICpuGpuVectorPtr sequenceStartPositions = - ICpuGpuVector::create(2, /* useGpu= */ false); - sequenceStartPositions->getMutableData(false)[0] = 0; - sequenceStartPositions->getMutableData(false)[1] = batchSize; - data.sequenceStartPositions = sequenceStartPositions; - testLayer->resetState(); - for (size_t j = 0; j < datas.size(); ++j) { - if (datas[j].value) { - data.value = datas[j].value; - } - if (datas[j].ids) { - data.ids = datas[j].ids; - } - dataLayers[j]->setData(data); - dataLayers[j]->forward(PASS_TEST); - } - testLayer->forward(PASS_TEST); - Argument batchOut; - batchOut.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); - - sequenceStartPositions->getMutableData(false)[1] = 1; - testLayer->resetState(); - - auto testLayerState = [&](int batchId) { - for (size_t j = 0; j < datas.size(); ++j) { - if (datas[j].value) { - data.value = datas[j].value->subMatrix(batchId, 1); - } - if (datas[j].ids) { - data.ids = IVector::create( - datas[j].ids->getData() + batchId, 1, FLAGS_use_gpu); - } - dataLayers[j]->setData(data); - dataLayers[j]->forward(PASS_TEST); - } - - testLayer->forward(PASS_TEST); - Argument out; - out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); - if (batchOut.value) { - size_t dim = batchOut.value->getWidth(); - ASSERT_TRUE((bool)out.value); - EXPECT_EQ(dim, out.value->getWidth()); - EXPECT_EQ(1UL, out.value->getHeight()); - auto ret = std::mismatch(batchOut.value->getData() + batchId * dim, - batchOut.value->getData() + (batchId + 1) * dim, - out.value->getData()); - if (ret.second != out.value->getData() + dim) { - // If reaches here, the test will fail - EXPECT_EQ(*ret.first, *ret.second); - } - } else if (batchOut.ids) { - ASSERT_TRUE((bool)out.ids); - EXPECT_EQ(1UL, out.ids->getSize()); - EXPECT_EQ(batchOut.ids->getElement(batchId), out.ids->getElement(0)); - } - }; - - CHECK_GT(batchSize, 0); - std::vector statePtrs; - statePtrs.reserve(batchSize); - - // Test layer setState() and getState() - for (int i = 0; i < batchSize; ++i) { - statePtrs.push_back(testLayer->getState()); - testLayerState(i); - } - for (int k = 0; k < batchSize - 1; ++k) { - testLayer->setState(statePtrs[k]); - for (int i = k; i < batchSize; ++i) { - testLayerState(i); - } - } -} - -void testBatchState(LayerPtr testLayer, - vector& dataLayers, - vector& datas) { - auto batchSize = datas[0].getBatchSize(); - Argument data; - /*two sequences*/ - size_t numSequences = 2; - ICpuGpuVectorPtr sequenceStartPositions = - ICpuGpuVector::create(numSequences + 1, /* useGpu= */ false); - int* cpuStarts = sequenceStartPositions->getMutableData(false); - int len = ::rand() % (batchSize - 1); - cpuStarts[0] = 0; - cpuStarts[1] = len > 0 ? len : 1; - cpuStarts[2] = batchSize; - - data.sequenceStartPositions = sequenceStartPositions; - for (size_t j = 0; j < datas.size(); ++j) { - if (datas[j].value) { - data.value = datas[j].value; - } - if (datas[j].ids) { - data.ids = datas[j].ids; - } - dataLayers[j]->setData(data); - dataLayers[j]->forward(PASS_TEST); - } - testLayer->resetState(); - testLayer->forward(PASS_TEST); - Argument batchOut; - batchOut.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); - - /*split one miniBatch into two miniBatchs*/ - std::vector seqSplitPos; - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - int len = ::rand() % (cpuStarts[seqId + 1] - cpuStarts[seqId]); - len = len > 0 ? len : 1; - seqSplitPos.push_back(cpuStarts[seqId] + len); - } - - std::vector start; /*seq start pos in source data*/ - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - start.push_back(cpuStarts[seqId]); - } - testLayer->resetState(); - Argument splitData; - for (size_t batchId = 0; batchId < 2; ++batchId) { - size_t splitBatchSize = 0; - std::vector seqLens; - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - int seqLen = (batchId == 0) ? seqSplitPos[seqId] - cpuStarts[seqId] - : cpuStarts[seqId + 1] - seqSplitPos[seqId]; - seqLens.push_back(seqLen); - splitBatchSize += seqLen; - } - ICpuGpuVectorPtr cpuSeqStartPos = - ICpuGpuVector::create(3, /* useGpu= */ false); - int* seqStartPosData = cpuSeqStartPos->getMutableData(false); - seqStartPosData[0] = 0; - seqStartPosData[1] = seqLens[0]; - seqStartPosData[2] = splitBatchSize; - - CHECK_GT(splitBatchSize, size_t(0)); - splitData.sequenceStartPositions = cpuSeqStartPos; - for (size_t j = 0; j < datas.size(); ++j) { - if (datas[j].value) { - Matrix::resizeOrCreate(splitData.value, - splitBatchSize, - datas[j].value->getWidth(), - false, - FLAGS_use_gpu); - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - if (seqLens[seqId]) { - splitData.value->subMatrix(seqStartPosData[seqId], seqLens[seqId]) - ->copyFrom( - *datas[j].value->subMatrix(start[seqId], seqLens[seqId])); - } - } - } - if (datas[j].ids) { - IVector::resizeOrCreate(splitData.ids, splitBatchSize, FLAGS_use_gpu); - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - if (seqLens[seqId]) { - splitData.ids->subVec(seqStartPosData[seqId], seqLens[seqId]) - ->copyFrom(*datas[j].ids->subVec(start[seqId], seqLens[seqId])); - } - } - } - dataLayers[j]->setData(splitData); - dataLayers[j]->forward(PASS_TEST); - } - - testLayer->forward(PASS_TEST); - Argument out; - out.resizeAndCopyFrom(testLayer->getOutput(), /* useGpu= */ false); - if (batchOut.value) { - size_t dim = batchOut.value->getWidth(); - ASSERT_TRUE((bool)out.value); - EXPECT_EQ(dim, out.value->getWidth()); - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - if (seqLens[seqId]) { - out.value->subMatrix(seqStartPosData[seqId], seqLens[seqId]) - ->sub(*batchOut.value->subMatrix(start[seqId], seqLens[seqId])); - } - } - } - - std::vector args; - args.push_back(out); - ASSERT_NEAR(0, Argument::sum(args), 1e-5) << "testBatchState failed"; - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - start[seqId] += seqLens[seqId]; - } - } -} - -double genPerturbation(const real* oldGrad, real* newGrad, size_t dim) { - double gradNorm = 0, dNorm = 0; - for (size_t i = 0; i < dim; ++i) { - newGrad[i] = 2. * rand() / RAND_MAX - 1; // NOLINT - dNorm += newGrad[i] * newGrad[i]; - gradNorm += oldGrad[i] * oldGrad[i]; - } - if (gradNorm > 0) { - real s = 0.5 * sqrt(gradNorm / dNorm); - for (size_t i = 0; i < dim; ++i) { - newGrad[i] = s * newGrad[i] + oldGrad[i]; - } - } - double delta = 0; - for (size_t i = 0; i < dim; ++i) { - delta += oldGrad[i] * newGrad[i]; - } - return delta; -} - -void initWeight(MatrixPtr& weights) { - MatrixPtr tmpMat = weights->clone(); - for (int i = 0; i < int(tmpMat->getElementCnt()); i++) { - tmpMat->getData()[i] = (11 - 2 * (i % 11)); - } - weights->copyFrom(*tmpMat); -} - -void initBatchState(LayerPtr dataLayer, - LayerPtr testLayer, - LayerStatePtr state, - bool useGpu) { - int sequenceNum = dataLayer->getOutput().getNumSequences(); - MatrixPtr prevBatchOutput = - Matrix::create(sequenceNum, testLayer->getSize(), false, useGpu); - MatrixPtr prevBatchState = - Matrix::create(sequenceNum, testLayer->getSize(), false, useGpu); - prevBatchOutput->randomizeUniform(); - prevBatchState->randomizeUniform(); - state->value.clear(); - state->value.push_back(prevBatchOutput); - state->value.push_back(prevBatchState); -} - -void initDataLayer(TestConfig testConf, - std::vector* dataLayers, - vector* datas, - LayerMap* layerMap, - string testLayerName, - size_t batchSize, - bool trans, - bool useGpu) { - ICpuGpuVectorPtr sequenceStartPositions; - ICpuGpuVectorPtr subSequenceStartPositions; - IVectorPtr cpuSequenceDims; - for (size_t i = 0; i < testConf.inputDefs.size(); ++i) { - if (testConf.inputDefs[i].inputType != INPUT_SEQUENCE_LABEL) continue; - - const std::vector& labelSeqStartPositions = - testConf.inputDefs[i].labelSeqStartPositions; - if (labelSeqStartPositions.size() != 0) { - CHECK(!sequenceStartPositions); - CHECK_GE(static_cast(labelSeqStartPositions.size()), 2); - - sequenceStartPositions = - ICpuGpuVector::create(labelSeqStartPositions.size(), useGpu); - sequenceStartPositions->copyFrom( - labelSeqStartPositions.data(), labelSeqStartPositions.size(), useGpu); - } - } - - for (size_t i = 0; i < testConf.inputDefs.size(); ++i) { - LayerConfig config; - config.set_name(testConf.inputDefs[i].name); - config.set_type("data"); - config.set_size(testConf.inputDefs[i].dim); - LayerPtr layer = LayerPtr(new DataLayer(config)); - size_t numSequence = sequenceStartPositions - ? sequenceStartPositions->getSize() - 1 - : batchSize / 10 + 1; - - Argument data; - auto fillData = [&](bool trans, int height, int width) { - int newHeight = trans ? height : width; - int newWidth = trans ? width : height; - data.value = Matrix::create(newHeight, newWidth, false, useGpu); - data.grad = Matrix::create(newHeight, newWidth, false, useGpu); - }; - switch (testConf.inputDefs[i].inputType) { - case INPUT_DATA: - case INPUT_SEQUENCE_DATA: - case INPUT_HASSUB_SEQUENCE_DATA: - case INPUT_DATA_TARGET: - case INPUT_SEQUENCE_MDIM_DATA: - fillData(trans, layer->getSize(), batchSize); - data.value->randomizeUniform(); - // make sure that multi-class-cross-entry won't encounter negatives - // make sure that multi_binary_label satisfies 0~1 - data.value->add(-0.5); - if (testLayerName != "prelu") { - data.value->sigmoid(*data.value); - } - data.grad->zeroMem(); - break; - case INPUT_LABEL: - case INPUT_SEQUENCE_LABEL: - if (testConf.inputDefs[i].labelInitValue.size() != 0) { - const std::vector& labelInitValue = - testConf.inputDefs[i].labelInitValue; - CHECK_EQ(labelInitValue.size(), batchSize); - data.ids = VectorT::create(batchSize, useGpu); - data.ids->copyFrom(labelInitValue.data(), batchSize); - } else { - data.ids = VectorT::create(batchSize, useGpu); - // now rand number can be 0 to inputDefs[i].dim - data.ids->rand(testConf.inputDefs[i].dim); - } - break; - case INPUT_SPARSE_NON_VALUE_DATA: - data.value = makeRandomSparseMatrix( - batchSize, - layer->getSize(), - /* withValue= */ false, - useGpu, - testConf.inputDefs[i].sparse.equalNnzPerSample); - break; - case INPUT_SPARSE_FLOAT_VALUE_DATA: - data.value = makeRandomSparseMatrix(batchSize, - layer->getSize(), - /* withValue= */ true, - useGpu); - break; - case INPUT_DENSE_DIM_DATA: - fillData(trans, layer->getSize(), numSequence); - data.value->randomizeUniform(); - data.value->add(-0.5); - data.value->sigmoid(*data.value); - data.grad->zeroMem(); - break; - case INPUT_SELF_DEFINE_DATA: { - if (testConf.inputDefs[i].ids.size()) { - data.ids = IVector::create(testConf.inputDefs[i].ids.size(), useGpu); - data.ids->copyFrom(testConf.inputDefs[i].ids.data(), - testConf.inputDefs[i].ids.size()); - } else if (testConf.inputDefs[i].selfDefinedData) { - size_t height = testConf.inputDefs[i].selfDefinedData->getHeight(); - size_t width = testConf.inputDefs[i].selfDefinedData->getWidth(); - CHECK_GT(static_cast(height), 0); - CHECK_GT(static_cast(width), 0); - data.value = Matrix::create(height, width, false, useGpu); - data.grad = Matrix::create(height, width, false, useGpu); - data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData); - data.grad->zeroMem(); - } else { - LOG(FATAL) << "No self-defined data are given."; - return; - } - - const std::vector& labelSeqStartPositions = - testConf.inputDefs[i].labelSeqStartPositions; - if (labelSeqStartPositions.size() != 0) { - CHECK_GE(static_cast(labelSeqStartPositions.size()), 2); - - sequenceStartPositions = - ICpuGpuVector::create(labelSeqStartPositions.size(), useGpu); - sequenceStartPositions->copyFrom(labelSeqStartPositions.data(), - labelSeqStartPositions.size(), - useGpu); - data.sequenceStartPositions = sequenceStartPositions; - } - - const std::vector& labelSubSeqStartPositions = - testConf.inputDefs[i].labelSubSeqStartPositions; - if (labelSubSeqStartPositions.size() != 0) { - CHECK_GE(static_cast(labelSubSeqStartPositions.size()), 2); - - subSequenceStartPositions = - ICpuGpuVector::create(labelSubSeqStartPositions.size(), useGpu); - subSequenceStartPositions->copyFrom(labelSubSeqStartPositions.data(), - labelSubSeqStartPositions.size(), - useGpu); - data.subSequenceStartPositions = subSequenceStartPositions; - } - break; - } - default: - LOG(FATAL) << " unknown inputType "; - return; - } - if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA || - testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA || - testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL || - testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA) { - if (!sequenceStartPositions) { - generateSequenceStartPositions(batchSize, sequenceStartPositions); - } - data.sequenceStartPositions = sequenceStartPositions; - } - if (testConf.inputDefs[i].inputType == INPUT_HASSUB_SEQUENCE_DATA) { - if (!subSequenceStartPositions) { - generateSubSequenceStartPositions(sequenceStartPositions, - subSequenceStartPositions); - } - data.subSequenceStartPositions = subSequenceStartPositions; - } - if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_MDIM_DATA) { - if (!cpuSequenceDims) { - generateMDimSequenceData(sequenceStartPositions, cpuSequenceDims); - } - data.cpuSequenceDims = cpuSequenceDims; - } - - DataLayerPtr dataLayer = std::dynamic_pointer_cast(layer); - dataLayer->setData(data); - dataLayer->forward(PASS_GC); - dataLayers->push_back(dataLayer); - (*layerMap)[config.name()] = layer; - datas->push_back(data); - } -} - -void initTestLayer(TestConfig testConf, - LayerMap* layerMap, - std::vector* parameters, - LayerPtr* testLayer) { - ParameterMap parameterMap; - size_t index = 0; - LayerConfig testConfig = testConf.layerConfig; - CHECK_EQ(testConf.inputDefs.size(), - size_t(testConf.layerConfig.inputs_size())); - - auto initParameter = [&](string paraName, - size_t paraSize, - bool isStatic, - bool initialize, - ParameterConfig paraConfig) { - paraConfig.set_name(paraName); - paraConfig.set_size(paraSize); - paraConfig.set_is_static(isStatic); - auto para = - std::make_shared(paraConfig, FLAGS_use_gpu, initialize); - para->enableType(PARAMETER_VALUE); - if (!para->isStatic()) { - para->enableType(PARAMETER_GRADIENT); - para->enableType(PARAMETER_MOMENTUM); - } - para->randomize(); - para->setID(index++); - parameters->push_back(para); - parameterMap[paraConfig.name()] = para; - }; - - for (size_t i = 0; i < testConf.inputDefs.size(); i++) { - InputDef inputDef = testConf.inputDefs[i]; - size_t paraSize = inputDef.paraSize; - bool sparse = inputDef.sparse.sparse; - LayerInputConfig& input = *(testConfig.mutable_inputs(i)); - input.set_input_layer_name(inputDef.name); - - if (paraSize) { - constexpr int kParaNameLen = 20; - char paraName[kParaNameLen]; - snprintf(paraName, kParaNameLen, "para_%d", (int)i); - input.set_input_parameter_name(paraName); - ParameterConfig paraConfig; - paraConfig.set_is_sparse(sparse); - paraConfig.set_format(inputDef.sparse.format); - if (sparse) { - paraConfig.add_dims((*layerMap)[input.input_layer_name()]->getSize()); - paraConfig.add_dims(testConf.layerConfig.size()); - } - CHECK_GE(testConf.paramInitialStd, 0); - paraConfig.set_initial_mean(testConf.paramInitialMean); - paraConfig.set_initial_std(testConf.paramInitialStd); - initParameter(paraName, paraSize, inputDef.isStatic, false, paraConfig); - } - } - if (testConf.biasSize) { - testConfig.set_bias_parameter_name("bias"); - ParameterConfig paraConfig; - initParameter(testConfig.bias_parameter_name(), - testConf.biasSize, - testConf.staticBias, - true, - paraConfig); - } - - *testLayer = Layer::create(testConfig); - (*layerMap)[testConfig.name()] = *testLayer; - (*testLayer)->init((*layerMap), parameterMap); - (*testLayer)->setNeedGradient(true); -} - -void testPerturbParameter(TestConfig testConf, - const MatrixPtr weights, - const LayerStatePtr state, - real cost, - real callbackCount, - real* maxDiff, - LayerPtr testLayer, - std::vector* parameters) { - char fill = ' '; - for (auto& parameter : *parameters) { - if (parameter->isStatic()) { - continue; - } - - size_t dim = parameter->getSize(); - CpuVector oldPara(dim); - CpuVector newPara(dim); - VectorPtr v = parameter->getBuf(PARAMETER_VALUE); - oldPara.copyFrom(*parameter->getBuf(PARAMETER_VALUE)); - real* newp = newPara.getData(); - real* oldp = oldPara.getData(); - CpuVector cpuGrad(*parameter->getBuf(PARAMETER_GRADIENT)); - vector d(dim); - - double delta = genPerturbation(cpuGrad.getData(), &d[0], dim); - // use a step such that delta / cost is FLAGS_checkgrad_eps - real step = - (delta != 0) ? cost / delta * FLAGS_checkgrad_eps : FLAGS_checkgrad_eps; - if (fabs(step) < 1e-6) step = 1e-6; - delta *= step; - - // compute newCost - real newCost[2]; - for (int k = 0; k < 2; k++) { - for (size_t i = 0; i < dim; ++i) { - newp[i] = (k == 0) ? oldp[i] + step * d[i] : oldp[i] - step * d[i]; - } - if (testConf.testBatchState) { - testLayer->setState(state); - } - parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara); - parameter->setValueUpdated(); - newCost[k] = getCostSum(testLayer, weights); - } - real diff = getDiffAndPrint(newCost[0], - newCost[1], - callbackCount, - fill, - testLayer->getName(), - parameter->getName(), - step, - delta); - *maxDiff = std::max(*maxDiff, abs(diff)); - // restore parameter - parameter->getBuf(PARAMETER_VALUE)->copyFrom(oldPara); - parameter->setValueUpdated(); - fill = (fill == ' ') ? '.' : ' '; - } -} - -void testPerturbInput(TestConfig testConf, - const MatrixPtr weights, - const LayerStatePtr state, - real cost, - real callbackCount, - real* maxDiff, - LayerPtr testLayer, - std::vector dataLayers) { - char fill = ' '; - for (size_t index = 0; index < testConf.inputDefs.size(); index++) { - InputType inputType = testConf.inputDefs[index].inputType; - if (inputType != INPUT_DATA && inputType != INPUT_SEQUENCE_DATA && - inputType != INPUT_HASSUB_SEQUENCE_DATA) { - continue; - } - - MatrixPtr outV = dataLayers[index]->getOutputValue(); - int height = outV->getHeight(); - int width = outV->getWidth(); - size_t dim = height * width; - - CpuMatrix oldPara(height, width); - CpuMatrix newPara(height, width); - oldPara.copyFrom(*outV); - real* newp = newPara.getData(); - real* oldp = oldPara.getData(); - CpuMatrix cpuGrad(height, width); - cpuGrad.copyFrom(*(dataLayers[index]->getOutputGrad())); - CpuMatrix d(height, width); - real* data = d.getData(); - - double delta = genPerturbation(cpuGrad.getData(), data, dim); - // use a step such that delta / cost is FLAGS_checkgrad_eps - real step = - (delta != 0) ? cost / delta * FLAGS_checkgrad_eps : FLAGS_checkgrad_eps; - if (fabs(step) < 1e-6) step = 1e-6; - delta *= step; - - real newCost[2]; - for (int k = 0; k < 2; k++) { - for (size_t i = 0; i < dim; ++i) { - newp[i] = - (k == 0) ? oldp[i] + step * data[i] : oldp[i] - step * data[i]; - } - if (testConf.testBatchState) { - testLayer->setState(state); - } - outV->copyFrom(newPara); - newCost[k] = getCostSum(testLayer, weights); - } - - real diff = getDiffAndPrint(newCost[0], - newCost[1], - callbackCount, - fill, - testLayer->getName(), - dataLayers[index]->getName(), - step, - delta); - *maxDiff = std::max(*maxDiff, abs(diff)); - // restore parameter - outV->copyFrom(oldPara); - fill = (fill == ' ') ? '.' : ' '; - } -} - -void testLayerGradKernel(TestConfig testConf, - string testLayerName, - size_t batchSize, - bool trans, - bool useGpu, - bool useWeight, - float epsilon) { -#ifndef PADDLE_WITH_CUDA - if (useGpu) return; -#endif - FLAGS_use_gpu = useGpu; - FLAGS_prev_batch_state = testConf.testBatchState; - MatrixPtr weights = nullptr; - testConf.layerConfig.set_name(testLayerName); - LOG(INFO) << " layer_type=" << testConf.layerConfig.type() - << " useGpu=" << useGpu; - - // data layer initialize - std::vector dataLayers; - LayerMap layerMap; - vector datas; - initDataLayer(testConf, - &dataLayers, - &datas, - &layerMap, - testLayerName, - batchSize, - trans, - useGpu); - // test layer initialize - std::vector parameters; - LayerPtr testLayer; - initTestLayer(testConf, &layerMap, ¶meters, &testLayer); - - LayerStatePtr state = std::make_shared(); - if (testConf.testBatchState) { - initBatchState(dataLayers[0], testLayer, state, useGpu); - testLayer->resetState(); - testLayer->setState(state); - } - - testLayer->forward(PASS_GC); - if (useWeight && weights == nullptr) { - weights = testLayer->getOutput().value->clone(0, 0, useGpu); - initWeight(weights); - } - std::vector outArgs; - outArgs.push_back(testLayer->getOutput()); - if (useWeight) { - outArgs[0].value = outArgs[0].value->clone(0, 0, useGpu); - outArgs[0].value->dotMul(*testLayer->getOutput().value, *weights); - } - - real cost = Argument::sum(outArgs); - LOG(INFO) << " cost " << cost; - EXPECT_FALSE(std::isnan(cost)); - - // Test whether the callback is called for a parameter - if (testLayer->getOutputGrad()) { - useWeight ? testLayer->getOutput().grad->copyFrom(*weights) - : testLayer->getOutputGrad()->resetOne(); - } - vector callbackFlags(parameters.size(), 0); - auto callback = [&](Parameter* para) { ++callbackFlags[para->getID()]; }; - testLayer->backward(callback); - - // do forward and backward for another time to test that gradient is doubled - int callbackCount = 1; - if (testConf.testAccumulate) { - if (testConf.testBatchState) { - testLayer->setState(state); - } - testLayer->forward(PASS_GC); - if (testLayer->getOutputGrad()) { - useWeight ? testLayer->getOutput().grad->copyFrom(*weights) - : testLayer->getOutputGrad()->resetOne(); - } - testLayer->backward(callback); - ++callbackCount; - } - for (size_t i = 0; i < parameters.size(); ++i) { - EXPECT_EQ(parameters[i]->isStatic() ? 0 : callbackCount, callbackFlags[i]); - } - - // Test whether the layer's forward calculation is stable - // by adding perturbation to its parameters or its input layers - real maxDiff = 0; - testPerturbParameter(testConf, - weights, - state, - cost, - callbackCount, - &maxDiff, - testLayer, - ¶meters); - testPerturbInput(testConf, - weights, - state, - cost, - callbackCount, - &maxDiff, - testLayer, - dataLayers); - EXPECT_LE(fabs(maxDiff), epsilon); - - if (testConf.testState) { - testState(testLayer, dataLayers, datas); - } - if (testConf.testBatchState) { - testBatchState(testLayer, dataLayers, datas); - } -} - -void testLayerGrad(TestConfig testConf, - string testLayerName, - size_t batchSize, - bool trans, - bool useGpu, - bool useWeight, - float epsilon) { - testLayerGradKernel( - testConf, testLayerName, batchSize, trans, useGpu, useWeight, epsilon); - bool isStaticTest = false; - LayerConfig testConfig = testConf.layerConfig; - for (size_t i = 0; i < testConf.inputDefs.size(); i++) { - InputDef inputDef = testConf.inputDefs[i]; - // Some layer must set isStatic true, like DataNormLayer - // so use !isStatic in if - if (inputDef.paraSize && (!inputDef.isStatic)) { - testConf.inputDefs[i].isStatic = true; - isStaticTest = true; - } - } - - if (testConf.biasSize) { - testConf.staticBias = true; - isStaticTest = true; - } - if (isStaticTest) { - testLayerGradKernel( - testConf, testLayerName, batchSize, trans, useGpu, useWeight, epsilon); - } -} - -void testProjectionGrad(ProjectionConfig conf, - InputType inputType, - size_t parameterSize, - size_t batchSize, - bool useGpu, - bool testState, - int biasSize, - bool sharedBias) { - TestConfig config; - conf.set_name(conf.type()); - config.layerConfig.set_type("mixed"); - config.layerConfig.set_size(conf.output_size()); - config.biasSize = biasSize == 0 ? config.layerConfig.size() : biasSize; - config.layerConfig.set_bias_size(config.biasSize); - config.layerConfig.set_shared_biases(sharedBias); - config.inputDefs.push_back({inputType, - "layer_0", - static_cast(conf.input_size()), - parameterSize}); - *config.layerConfig.add_inputs()->mutable_proj_conf() = conf; - config.testState = testState; - testLayerGrad(config, "mixed", batchSize, false, useGpu); -} - -void testOperatorGrad(TestConfig& config, - OperatorConfig& operatorConf, - size_t batchSize, - bool useGpu, - bool testState) { - config.layerConfig.set_type("mixed"); - - operatorConf.set_output_size(config.layerConfig.size()); - for (size_t i = 0; i < config.inputDefs.size(); ++i) { - operatorConf.add_input_indices(i); - operatorConf.add_input_sizes(config.inputDefs[i].dim); - } - - config.testState = testState; - testLayerGrad(config, "mixed", batchSize, false, useGpu); -} -} // namespace paddle diff --git a/paddle/legacy/gserver/tests/LayerGradUtil.h b/paddle/legacy/gserver/tests/LayerGradUtil.h deleted file mode 100644 index 941989a1da49d215b9ed4af72e732d6a62fd225d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/LayerGradUtil.h +++ /dev/null @@ -1,329 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" - -#include "paddle/testing/TestUtil.h" -using namespace std; // NOLINT - -namespace paddle { -enum InputType { - INPUT_DATA, // dense vector - INPUT_LABEL, // id - INPUT_DATA_TARGET, // dense vector, but no gradient - INPUT_SEQUENCE_DATA, - INPUT_HASSUB_SEQUENCE_DATA, // sequence has sub-sequence - INPUT_SEQUENCE_MDIM_DATA, - INPUT_SEQUENCE_LABEL, - INPUT_SPARSE_NON_VALUE_DATA, - INPUT_SPARSE_FLOAT_VALUE_DATA, - INPUT_DENSE_DIM_DATA, // using sequence length to init dense data - INPUT_SELF_DEFINE_DATA, // support customizing for input value -}; - -struct ParaSparse { - bool sparse; - string format; - // if equalNnzPerSample is set true, - // every row of the sparse matrix in a format of CSR has a same - // number of nnz values. Currently, this flag is only used for - // selective_fc layer - bool equalNnzPerSample; - ParaSparse(const string& formatIn = "") { // NOLINT - if (formatIn == "") { - sparse = false; - } else { - sparse = true; - } - equalNnzPerSample = false; - } - ParaSparse(const string& formatIn, bool equalNnz) { - format = formatIn; - sparse = true; - equalNnzPerSample = equalNnz; - } -}; - -struct InputDef { - InputType inputType; - string name; - size_t dim; - size_t paraSize; - ParaSparse sparse; - bool isStatic; - std::vector labelInitValue; - std::vector labelSeqStartPositions; - std::vector labelSubSeqStartPositions; - std::vector ids; - MatrixPtr selfDefinedData; - - InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { - inputType = type; - name = nameIn; - dim = dimIn; - paraSize = sizeIn; - sparse = {""}; - isStatic = false; - } - - InputDef(InputType type, - string nameIn, - MatrixPtr selfDefinedData, - std::vector selfDefinedSeqStartPos = {}, - std::vector selfDefinedSubSeqStartPos = {}) - : labelSeqStartPositions(selfDefinedSeqStartPos), - labelSubSeqStartPositions(selfDefinedSubSeqStartPos), - selfDefinedData(selfDefinedData) { - inputType = type; - name = nameIn; - dim = 0; - sparse = {""}; - paraSize = 0; - isStatic = false; - } - - InputDef(InputType type, - string nameIn, - const std::vector& ids, - const std::vector& selfDefinedSeqStartPos = {}, - const std::vector& selfDefinedSubSeqStartPos = {}) - : labelSeqStartPositions(selfDefinedSeqStartPos), - labelSubSeqStartPositions(selfDefinedSubSeqStartPos), - ids(ids) { - selfDefinedData = nullptr; - inputType = type; - name = nameIn; - dim = 0; - sparse = {""}; - paraSize = 0; - isStatic = false; - } - - InputDef(InputType type, - string nameIn, - size_t dimIn, - size_t sizeIn, - const std::vector& labelInitValue, - const std::vector& labelSeqStartPositions) - : labelInitValue(labelInitValue), - labelSeqStartPositions(labelSeqStartPositions) { - inputType = type; - name = nameIn; - dim = dimIn; - paraSize = sizeIn; - sparse = {""}; - isStatic = false; - } - - InputDef(InputType type, - string nameIn, - size_t dimIn, - size_t sizeIn, - ParaSparse sparseIn) { - inputType = type; - name = nameIn; - dim = dimIn; - paraSize = sizeIn; - sparse = sparseIn; - } -}; - -struct TestConfig { - LayerConfig layerConfig; - std::vector inputDefs; - size_t biasSize; - real paramInitialMean; - real paramInitialStd; - bool testAccumulate; - bool testState; - bool staticBias; - bool testBatchState; - TestConfig() - : biasSize(0), - paramInitialMean(0.0), - paramInitialStd(1.0), - testAccumulate(true), - testState(false), - staticBias(false), - testBatchState(false) {} -}; - -real getCostSum(ParameterPtr& parameter, - CpuVector& cpuPara, - LayerPtr& testLayer, - MatrixPtr weights = nullptr); - -real getDiffAndPrint(real newCost1, - real newCost2, - real callbackCount, - char fill, - string testLayerName, - string name, - real step, - real delta); - -/** - * @brief verify that sequentially running forward() one timestamp at one time - * has same result as running forward() with one whole sequence - * - * @param testLayer[in/out] testLayer - * @param dataLayers[in/out] dataLayers - * @param datas[in/out] data of dataLayers - */ -void testState(LayerPtr testLayer, - vector& dataLayers, - vector& datas); - -/** - * @brief verify that sequentially running forward() with short sequences one - * time has same result as running forward() with long sequences. - * - * @param testLayer[in/out] testLayer - * @param dataLayers[in/out] dataLayers - * @param datas[in/out] data of dataLayers - */ -void testBatchState(LayerPtr testLayer, - vector& dataLayers, - vector& datas); - -/** - * @brief Generate a perturbation so that it is roughly aligned with the - * gradient direction. This is to make sure that change along this - * direction will make cost increase (or decrease) in a meaningful - * way so that the finite difference can be used to approximate the - * directional dirivative well. - * - * @param oldGrad[in] input gradient - * newGrad[out] output gradient - * dim dimension of oldGrad/newGrad - * - * @return sum_i(oldGrad[i] * newGrad[i]) - */ -double genPerturbation(const real* oldGrad, real* newGrad, size_t dim); - -void initWeight(MatrixPtr& weights); - -void initBatchState(LayerPtr dataLayer, - LayerPtr testLayer, - LayerStatePtr state, - bool useGpu); - -/** - * @brief initialize the dataLayer by its inputType - * - * @param testConf[in] test config - * dataLayers[out] dataLayers - * datas[out] initialized data of dataLayers - * layerMap[out] layerMap - */ -void initDataLayer(TestConfig testConf, - std::vector* dataLayers, - vector* datas, - LayerMap* layerMap, - string testLayerName, - size_t batchSize, - bool trans, - bool useGpu); - -/** - * @brief initialize the parameter of testLayer - * - * @param testConf[in/out] test config - * layerMap[out] layerMap - * parameters[out] parameters of testLayer - * testLayer[out] testLayer - */ -void initTestLayer(TestConfig testConf, - LayerMap* layerMap, - std::vector* parameters, - LayerPtr* testLayer); - -/** - * @brief Test whether the layer's forward calculation is stable by adding - * perturbation to its parameters - * - * @param testConf[in] test config - * weights[in] weights of testLayer - * state[in] state of testLayer - * cost[in] input cost - * callbackCount[in] number of done callback - * maxDiff[in/out] max of all previous diff - * testLayer[in/out] testLayer - * parameters[in/out] parameters of testLayer - */ -void testPerturbParameter(TestConfig testConf, - const MatrixPtr weights, - const LayerStatePtr state, - real cost, - real callbackCount, - real* maxDiff, - LayerPtr testLayer, - std::vector* parameters); - -/** - * @brief Test whether the layer's forward calculation is stable by adding - * perturbation to its input layers - * - * @param testConf[in] test config - * weights[in] weights of testLayer - * state[in] state of testLayer - * cost[in] input cost - * callbackCount[in] number of done callback - * maxDiff[in/out] max of all previous diff - * testLayer[in/out] testLayer - * dataLayers[in/out] dataLayers - */ -void testPerturbInput(TestConfig testConf, - const MatrixPtr weights, - const LayerStatePtr state, - real cost, - real callbackCount, - real* maxDiff, - LayerPtr testLayer, - std::vector dataLayers); - -void testLayerGradKernel(TestConfig testConf, - string testLayerName, - size_t batchSize, - bool trans, - bool useGpu, - bool useWeight = false, - float epsilon = 0.02); - -void testLayerGrad(TestConfig testConf, - string testLayerName, - size_t batchSize, - bool trans, - bool useGpu, - bool useWeight = false, - float epsilon = 0.02); - -void testProjectionGrad(ProjectionConfig conf, - InputType inputType, - size_t parameterSize, - size_t batchSize, - bool useGpu, - bool testState = false, - int biasSize = 0, - bool sharedBias = false); - -void testOperatorGrad(TestConfig& config, - OperatorConfig& operatorConf, - size_t batchSize, - bool useGpu, - bool testState = false); - -} // namespace paddle diff --git a/paddle/legacy/gserver/tests/MKLDNNTester.cpp b/paddle/legacy/gserver/tests/MKLDNNTester.cpp deleted file mode 100644 index b550ba9c72d85830dbf12485a6a645a6b5360026..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/MKLDNNTester.cpp +++ /dev/null @@ -1,580 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNTester.h" -#include "paddle/legacy/gserver/layers/MKLDNNBase.h" -#include "paddle/legacy/gserver/layers/MKLDNNLayer.h" -#include "paddle/legacy/trainer/Trainer.h" - -namespace paddle { - -// init data layer and test layer of both dnn and reference -void MKLDNNTester::reset(const TestConfig& dnn, - const TestConfig& ref, - size_t batchSize) { - const bool trans = false; - const bool useGpu = false; - - // clear - configs_.clear(); - layerNames_.clear(); - dataLayers_.clear(); - datas_.clear(); - layerMaps_.clear(); - parameters_.clear(); - testLayers_.clear(); - - // resize - configs_.resize(NUM); - layerNames_.resize(NUM); - dataLayers_.resize(NUM); - datas_.resize(NUM); - layerMaps_.resize(NUM); - parameters_.resize(NUM); - testLayers_.resize(NUM); - - // reset configs and layer names - configs_[DNN] = dnn; - configs_[REF] = ref; - layerNames_[DNN] = "mkldnn"; // the first is mkldnn layer - layerNames_[REF] = "reference"; // second is reference layer - - // reset others - for (size_t i = 0; i < NUM; ++i) { - configs_[i].layerConfig.set_name(layerNames_[i]); - initDataLayer(configs_[i], - &(dataLayers_[i]), - &(datas_[i]), - &(layerMaps_[i]), - layerNames_[i], - batchSize, - trans, - useGpu); - initTestLayer( - configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); - } - refLayer_ = testLayers_[REF]; - dnnLayer_ = testLayers_[DNN]; - EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); - EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); - setInputImgSize(); - - // for comparison with Paddle reference results, - // need manually add cpu device output for test - MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast(dnnLayer_); - if (dnnLayer) { - dnnLayer->addOutputArgument(CPU_DEVICE); - } -} - -void MKLDNNTester::setInputImgSize() { - for (size_t n = 0; n < dataLayers_.size(); ++n) { - for (size_t i = 0; i < dataLayers_[n].size(); ++i) { - // TODO(TJ): fix me when concat and elewise ready - dataLayers_[n][i]->getOutput().setFrameHeight(ih_); - dataLayers_[n][i]->getOutput().setFrameWidth(iw_); - } - } -} - -// init randome parameters of ref, and copy to mkldnn -void MKLDNNTester::randomWgtDatas() { - EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); - const bool isBN = refLayer_->getType() == "batch_norm"; - for (size_t i = 0; i < parameters_[REF].size(); ++i) { - const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); - const VectorPtr& refValue = parameters_[REF][i]->getBuf(PARAMETER_VALUE); - parameters_[REF][i]->randomize(); - if (isBN && i == 2) { - // this param is moving average in batch norm, which must larger than 0 - real offset = fabs(refValue->getMin()) + 1.0; - refValue->add(offset); - } - dnnValue->copyFrom(*refValue); - - VLOG(MKLDNN_TESTS) << "Random weight " << parameters_[DNN][i]->getName(); - printVector(dnnValue); - } -} - -// random botdata of ref layer and copy same to mkldnn -void MKLDNNTester::randomBotDatas() { - CHECK_EQ(dataLayers_.size(), NUM); - for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { - dataLayers_[REF][i]->getOutputValue()->randomizeUniform(); - dataLayers_[DNN][i]->getOutputValue()->copyFrom( - *(dataLayers_[REF][i]->getOutputValue())); - VLOG(MKLDNN_TESTS) << "Random Foward, InputValue " << i; - printMatrix(dataLayers_[REF][i]->getOutputValue()); - } -} - -void MKLDNNTester::randomTopDiffs() { - refLayer_->getOutputGrad()->randomizeUniform(); - dnnLayer_->getOutput(CPU_DEVICE) - .grad->copyFrom(*(refLayer_->getOutputGrad())); - VLOG(MKLDNN_TESTS) << "Random Backward, OutputGrad"; - printMatrix(refLayer_->getOutputGrad()); -} - -void MKLDNNTester::checkForward() { - VLOG(MKLDNN_TESTS) << "Check Forward"; - printTopDatas(); - double delta = - compareMatrix(refLayer_->getOutputValue(), dnnLayer_->getOutputValue()); - EXPECT_LE(fabs(delta), eps_); -} - -void MKLDNNTester::checkBackwardData() { - VLOG(MKLDNN_TESTS) << "Check Backward Data"; - const bool isBN = refLayer_->getType() == "batch_norm"; - for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { - const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); - const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); - VLOG(MKLDNN_ALL) << "MKLDNN Backward Result: InputGrad " << i; - printMatrix(dnnDiff); - VLOG(MKLDNN_ALL) << "Reference Backward Result: InputGrad " << i; - printMatrix(refDiff); - - double delta = compareMatrix(refDiff, dnnDiff); - EXPECT_LE(fabs(delta), eps_); - if (isBN) { - // the other two inputs in batch norm are for moving mean and var - // do not have grad to compare - break; - } - } -} - -void MKLDNNTester::checkBackwardWgts() { - VLOG(MKLDNN_TESTS) << "Check Backward Weight"; - CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); - vector dnnWgts; // used to temply save mkldnn weights - saveWgt(parameters_[DNN], dnnWgts); - - MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast(dnnLayer_); - if (dnnLayer) { - dnnLayer->convertWeightsToPaddle(); - } - for (size_t i = 0; i < parameters_[DNN].size(); ++i) { - const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); - const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); - VLOG(MKLDNN_ALL) << "MKLDNN Result: weight value" - << parameters_[DNN][i]->getName(); - printVector(dnn); - VLOG(MKLDNN_ALL) << "Reference Result: weight value " - << parameters_[REF][i]->getName(); - printVector(ref); - - double delta = compareVector(ref, dnn); - EXPECT_LE(fabs(delta), eps_); - } - - VLOG(MKLDNN_ALL) << "Restore dnn weights before comapre"; - restoreWgt(dnnWgts, parameters_[DNN]); -} - -void MKLDNNTester::saveWgt(const vector& from, - vector& to) { - const bool useGpu = false; - to.resize(from.size()); - for (size_t i = 0; i < to.size(); ++i) { - const VectorPtr& wgt = from[i]->getBuf(PARAMETER_VALUE); - to[i] = Vector::create(wgt->getSize(), useGpu); - to[i]->copyFrom(*wgt); - } -} - -void MKLDNNTester::restoreWgt(const vector& from, - vector& to) { - CHECK_EQ(from.size(), to.size()); - for (size_t i = 0; i < from.size(); ++i) { - const VectorPtr& wgt = to[i]->getBuf(PARAMETER_VALUE); - wgt->copyFrom(*from[i]); - } -} - -// clear parameters grad -void MKLDNNTester::clearWgtDiffs(size_t id) { - CHECK_LE(id, parameters_.size()); - for (size_t n = 0; n < parameters_.size(); ++n) { - if (id == n || id == parameters_.size()) { - for (size_t i = 0; i < parameters_[n].size(); ++i) { - const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); - if (grad) { - grad->zeroMem(); - } - } - } - } -} - -void MKLDNNTester::clearBotDiffs(size_t id) { - CHECK_LE(id, dataLayers_.size()); - for (size_t n = 0; n < dataLayers_.size(); ++n) { - if (id == n || id == dataLayers_.size()) { - // clear inputs layers of this specific layer - for (size_t i = 0; i < dataLayers_[n].size(); ++i) { - dataLayers_[n][i]->getOutputGrad()->zeroMem(); - } - } - } -} - -void MKLDNNTester::clearTopDatas(size_t id) { - CHECK_LE(id, testLayers_.size()); - for (size_t i = 0; i < testLayers_.size(); ++i) { - if (id == i || id == testLayers_.size()) { - testLayers_[i]->getOutputValue()->zeroMem(); - } - } -} - -void MKLDNNTester::printTopDatas() { - if (!log_) { - return; - } - - for (int n = 0; n < NUM; ++n) { - VLOG(MKLDNN_ALL) << testLayers_[n]->getType() - << " Forward Result: OutputValue"; - printMatrix(testLayers_[n]->getOutputValue()); - } -} - -void MKLDNNTester::printMatrix(const MatrixPtr& m) { - if (!log_) { - return; - } - - std::ostringstream ostr; - m->print(ostr); - VLOG(MKLDNN_ALL) << std::endl << ostr.str(); -} - -void MKLDNNTester::printVector(const VectorPtr& v) { - if (!log_) { - return; - } - - std::ostringstream ostr; - v->print(ostr, v->getSize()); - VLOG(MKLDNN_ALL) << std::endl << ostr.str(); -} - -double MKLDNNTester::getDelta(const real* refer, - const real* value, - size_t len, - const float failRate, - const float thres) { - double delta = 0, sum = 0; - int failCnt = 0; - const double eps = 1e-5; - double maxRatio = 0; - for (size_t i = 0; i < len; ++i) { - double ref = fabs(refer[i]); - double val = fabs(value[i]); - double diff = fabs(refer[i] - value[i]); - delta += diff; - sum += ref; - if (ref < eps && val < eps) { // both values are very small - continue; - } - double ratio = diff / ref; - if (ratio > thres) { - maxRatio = std::max(maxRatio, ratio); - failCnt++; - } - } - EXPECT_FALSE(std::isinf(sum)); - EXPECT_FALSE(std::isnan(sum)); - EXPECT_FALSE(std::isnan(delta)); - VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len - << ", delta: " << delta / sum << ", failCnt:" << failCnt; - double res = sum > eps ? delta / sum : eps; - return (failCnt / (float)len) > failRate ? maxRatio : res; -} - -double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { - CHECK_EQ(m1->getElementCnt(), m2->getElementCnt()); - return getDelta(m1->getData(), m2->getData(), m1->getElementCnt()); -} - -double MKLDNNTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { - CHECK_EQ(v1->getSize(), v2->getSize()); - return getDelta(v1->getData(), v2->getData(), v1->getSize()); -} - -void MKLDNNTester::runOnce() { - // test forward - randomBotDatas(); - dnnLayer_->forward(passType_); - refLayer_->forward(passType_); - checkForward(); - - if (passType_ == PASS_TEST) { - return; - } - - // test backward - // simple updater - UpdateCallback updateCallback = [](Parameter* para) { - auto& grad = para->getBuf(PARAMETER_GRADIENT); - auto& value = para->getBuf(PARAMETER_VALUE); - real lr = 1e-2; - value->add(*grad, lr); - grad->zeroMem(); - }; - randomTopDiffs(); - dnnLayer_->backward(updateCallback); - refLayer_->backward(updateCallback); - checkBackwardData(); - checkBackwardWgts(); - - // clear buffers - // ref code will addto the diff, dnn code will writeto it - // and clearTopDatas(REF) should be coverd by ref layers - clearBotDiffs(REF); - clearWgtDiffs(REF); - // it is necessary to clear bottom diffs when only activation is dnn type - if (configs_[DNN].layerConfig.active_type().compare(0, 7, "mkldnn_") == 0) { - clearBotDiffs(DNN); - } -} - -void MKLDNNTester::run(const TestConfig& dnn, - const TestConfig& ref, - size_t batchSize, - size_t inputImgH, - size_t inputImgW, - PassType passType, - bool printDetails, - size_t iter, - float epsilon) { - CHECK(dnn.layerConfig.type().compare(0, 7, "mkldnn_") == 0 || - dnn.layerConfig.active_type().compare(0, 7, "mkldnn_") == 0) - << "should be MKLDNN layer or MKLDNN activation"; - if (dnn.layerConfig.type() == ref.layerConfig.type()) { - VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " - << dnn.layerConfig.active_type() << " vs " - << ref.layerConfig.active_type(); - } else { - VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " - << dnn.layerConfig.type() << " vs " - << ref.layerConfig.type(); - } - - ih_ = inputImgH; - iw_ = inputImgW; - passType_ = passType; - log_ = printDetails; - iter_ = iter; - eps_ = epsilon; - - // Firstly test mkldnn init from PARAM_FORMAT_ORIGINAL weight - reset(dnn, ref, batchSize); - randomWgtDatas(); - clearWgtDiffs(); - clearBotDiffs(); - for (size_t i = 0; i < iter_; ++i) { - VLOG(MKLDNN_TESTS) << "Check Iteration " << i; - runOnce(); - } - - if (parameters_[DNN].empty()) { - // has no paramters - return; - } - - // After run some iterations, the mkldnn weight has been stored in dnnLayer - // and we can also get the mkldnn weight parameter header format. - // Weight parameter should always be index 0 (and bias index 1). - // TODO(TJ): should also consider mean and var format when batchnorm ready - int dnnWgtFmt = parameters_[DNN][0]->getHeaderFormat(); - int refWgtFmt = parameters_[REF][0]->getHeaderFormat(); - if (dnnWgtFmt == refWgtFmt) { - // weight format are equal, so no need check more - return; - } - - // then save the weights and restart again - vector dnnWgts, refWgts; - CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); - saveWgt(parameters_[DNN], dnnWgts); - saveWgt(parameters_[REF], refWgts); - - // restart again with dnn weight format - reset(dnn, ref, batchSize); - // TODO(TJ): should also considerate mean and var format when batchnorm ready - parameters_[DNN][0]->setHeaderFormat(dnnWgtFmt); - - // restore wgt - restoreWgt(dnnWgts, parameters_[DNN]); - restoreWgt(refWgts, parameters_[REF]); - clearWgtDiffs(); - clearBotDiffs(); - - for (size_t i = 0; i < iter_; ++i) { - VLOG(MKLDNN_TESTS) << "Check Iteration " << i; - runOnce(); - } -} - -void MKLDNNTester::initArgument(DataIn& data, - const std::string& configPath, - const size_t iter) { - TrainerConfigHelper config(configPath); - size_t batchSize = config.getOptConfig().batch_size(); - data.inArgs.resize(iter); - data.outGrads.resize(iter); - data.paraValues.clear(); - for (const auto& layer_name : config.getModelConfig().input_layer_names()) { - auto layer_config = std::find_if(config.getModelConfig().layers().begin(), - config.getModelConfig().layers().end(), - [=](const LayerConfig& layer_config) { - return layer_config.name() == layer_name; - }); - CHECK(layer_config != config.getModelConfig().layers().end()); - - size_t layerSize = layer_config->size(); - for (size_t i = 0; i < iter; ++i) { - Argument arg; - arg.value = Matrix::create(batchSize, layerSize, false, false); - arg.grad = Matrix::create(batchSize, layerSize, false, false); - arg.value->randomizeUniform(); - arg.value->add(-0.5); - arg.value->sigmoid(*arg.value); - arg.grad->zeroMem(); - arg.ids = VectorT::create(batchSize, false); - arg.ids->rand(layerSize); - generateSequenceStartPositions(batchSize, arg.sequenceStartPositions); - data.inArgs[i].push_back(arg); - } - } - - for (const auto& layer_name : config.getModelConfig().output_layer_names()) { - auto layer_config = std::find_if(config.getModelConfig().layers().begin(), - config.getModelConfig().layers().end(), - [=](const LayerConfig& layer_config) { - return layer_config.name() == layer_name; - }); - CHECK(layer_config != config.getModelConfig().layers().end()); - - size_t layerSize = layer_config->size(); - for (size_t i = 0; i < iter; ++i) { - MatrixPtr grad = Matrix::create(batchSize, layerSize, false, false); - grad->randomizeUniform(); - data.outGrads[i].push_back(grad); - } - } - - for (const auto& para_config : config.getModelConfig().parameters()) { - VectorPtr value = Vector::create(para_config.size(), false); - value->randnorm(0, 2); - data.paraValues.push_back(value); - } -} - -void MKLDNNTester::getOutResult(const std::string& configPath, - DataIn& in, - DataOut& out, - bool use_mkldnn, - size_t iter) { - FLAGS_use_gpu = false; - FLAGS_use_mkldnn = use_mkldnn; - *ThreadLocalRand::getSeed() = 1; - srand(1); - - Trainer trainer; - auto config = std::make_shared(configPath); - trainer.init(config, false); - auto gradientMachine = trainer.getGradientMachine(); - std::vector parameters = gradientMachine->getParameters(); - for (size_t i = 0; i < in.paraValues.size(); i++) { - parameters[i]->getBuf(PARAMETER_VALUE)->copyFrom(*in.paraValues[i]); - } - UpdateCallback simpleUpdate = [](Parameter* para) { - auto& grad = para->getBuf(PARAMETER_GRADIENT); - auto& value = para->getBuf(PARAMETER_VALUE); - real lr = 1e-2; - value->add(*grad, lr); - grad->zeroMem(); - }; - - vector outArgs; - gradientMachine->start(); - out.outValues.clear(); - out.paraValues.clear(); - for (size_t i = 0; i < iter; ++i) { - VLOG(MKLDNN_TESTS) << "runing iteration " << i; - gradientMachine->forward(in.inArgs[i], &outArgs, PASS_TRAIN); - // save forward result - for (size_t k = 0; k < outArgs.size(); k++) { - const MatrixPtr& src = outArgs[k].value; - MatrixPtr dst = - Matrix::create(src->getHeight(), src->getWidth(), false, false); - if (typeid(*src) == typeid(MKLDNNMatrix)) { - MKLDNNMatrixPtr dnnSrc = std::dynamic_pointer_cast(src); - dnnSrc->copyTo(*dst); - } else { - dst->copyFrom(*src); - } - out.outValues.push_back(dst); - } - - // random backward input - for (size_t k = 0; k < outArgs.size(); k++) { - outArgs[k].grad->copyFrom(*in.outGrads[i][k]); - } - gradientMachine->backward(simpleUpdate); - } - gradientMachine->finish(); - - // save param value - for (size_t i = 0; i < in.paraValues.size(); i++) { - VectorPtr val = Vector::create( - parameters[i]->getBuf(PARAMETER_VALUE)->getSize(), false); - val->copyFrom(*parameters[i]->getBuf(PARAMETER_VALUE)); - out.paraValues.push_back(val); - } -} - -void MKLDNNTester::compareResult(DataOut& ref, DataOut& dnn, float eps) { - CHECK_EQ(ref.outValues.size(), dnn.outValues.size()); - CHECK_EQ(ref.paraValues.size(), dnn.paraValues.size()); - for (size_t i = 0; i < ref.outValues.size(); i++) { - VLOG(MKLDNN_TESTS) << "compare value index: " << i; - EXPECT_LE(fabs(compareMatrix(ref.outValues[i], dnn.outValues[i])), eps); - } - for (size_t i = 0; i < ref.paraValues.size(); i++) { - VLOG(MKLDNN_TESTS) << "compare param index: " << i; - EXPECT_LE(fabs(compareVector(ref.paraValues[i], dnn.paraValues[i])), eps); - } -} - -void MKLDNNTester::runNetTest(const std::string& configPath, - size_t iter, - float eps) { - DataIn in; - initArgument(in, configPath, iter); - DataOut outCpu, outDnn; - VLOG(MKLDNN_TESTS) << "runing cpu network"; - getOutResult(configPath, in, outCpu, false, iter); - VLOG(MKLDNN_TESTS) << "runing mkldnn network"; - getOutResult(configPath, in, outDnn, true, iter); - - compareResult(outCpu, outDnn, eps); -} - -} // namespace paddle diff --git a/paddle/legacy/gserver/tests/MKLDNNTester.h b/paddle/legacy/gserver/tests/MKLDNNTester.h deleted file mode 100644 index 086846ce537857eb76ffca492246677eb7982a42..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/MKLDNNTester.h +++ /dev/null @@ -1,143 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "LayerGradUtil.h" -#include "paddle/legacy/gserver/layers/MKLDNNBase.h" -#include "paddle/legacy/gserver/layers/MKLDNNLayer.h" - -namespace paddle { - -/** - * @brief test the functionality of MKLDNNlayers and MKLDNNActivations - * refer to paddle original function - */ -class MKLDNNTester { - enum { - DNN = 0, // MKLDNN layer - REF = 1, // Reference layer - NUM = 2, // Number of total - }; - - struct DataIn { - std::vector> inArgs; - std::vector> outGrads; - std::vector paraValues; - }; - - struct DataOut { - std::vector outValues; - std::vector paraValues; - }; - - protected: - std::vector configs_; - vector layerNames_; - vector> dataLayers_; - vector> datas_; - vector layerMaps_; - vector> parameters_; - vector testLayers_; - LayerPtr refLayer_, dnnLayer_; - - /// run some iterations, all the result should pass - size_t iter_; - /// whether to print out the details - bool log_; - /// epsilon - float eps_; - /// input image size, default 1 - size_t ih_, iw_; - /// passType, PASS_TRAIN, PASS_TEST or PASS_GC (Gradient Check pass) - PassType passType_; - - public: - explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) { - iter_ = iter; - eps_ = epsilon; - log_ = false; - passType_ = PASS_TRAIN; - } - - ~MKLDNNTester() {} - - public: - void run(const TestConfig& dnn, - const TestConfig& ref, - size_t batchSize, - size_t inputImgH = 1, - size_t inputImgW = 1, - PassType passType = PASS_TRAIN, - bool printDetails = false, - size_t iter = 3, - float epsilon = 1e-4); - static void runNetTest(const std::string& configPath, - size_t iter = 2, - float eps = 1e-4); - static void initArgument(DataIn& data, - const std::string& configPath, - size_t iter = 2); - static void getOutResult(const std::string& configPath, - DataIn& in, - DataOut& out, - bool use_mkldnn, - size_t iter = 2); - - private: - void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize); - void setInputImgSize(); - void runOnce(); - - void randomWgtDatas(); - void randomBotDatas(); - void randomTopDiffs(); - - void checkForward(); - void checkBackwardData(); - void checkBackwardWgts(); - - // clear specific layer, clear all when id equals NUM - void clearWgtDiffs(size_t id = NUM); - void clearBotDiffs(size_t id = NUM); - void clearTopDatas(size_t id = NUM); - - void printTopDatas(); - void printMatrix(const MatrixPtr& m); - void printVector(const VectorPtr& v); - - void saveWgt(const vector& from, vector& to); - void restoreWgt(const vector& from, vector& to); - - static double compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2); - static double compareVector(const VectorPtr& v1, const VectorPtr& v2); - static void compareResult(DataOut& ref, DataOut& dnn, float eps = 1e-4); - - /** - * Get delta percent - * if many(>failRate) wrong(abs(val-ref)/abs(ref) > thres) points - * return the max(diff/ref) - * else return sum(abs(diff)) / sum(abs(ref)) - * The return value should be smaller than eps when passing. - */ - static double getDelta(const real* refer, - const real* value, - size_t len, - const float failRate = 1e-3, - const float thres = 0.1); -}; - -} // namespace paddle diff --git a/paddle/legacy/gserver/tests/Sequence/dummy.list b/paddle/legacy/gserver/tests/Sequence/dummy.list deleted file mode 100644 index 0e52665e11298965df5738f69c5bcefcc8bab0f9..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/Sequence/dummy.list +++ /dev/null @@ -1 +0,0 @@ -dummy_file_no_use diff --git a/paddle/legacy/gserver/tests/Sequence/tour_dict_phrase.dict b/paddle/legacy/gserver/tests/Sequence/tour_dict_phrase.dict deleted file mode 100644 index 41f68e7f5c054dc60300843dae8f1bf741dd13ff..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/Sequence/tour_dict_phrase.dict +++ /dev/null @@ -1,158 +0,0 @@ -, -的 -。 -酒店 -房间 -了 -很 -也 -不错 -是 -! -有 -服务 -就是 -都 -住 -一 -在 -好 -月湖 -不 -可以 -. -且 -就 -离 -方便 -早餐 -还是 -近 -位置 -干净 -床上用品 -、 -价格 -挺 -强烈推荐 -感觉 -卫生 -本来 -挺好 -性价比 -房 -前台 -下次 -交通 -不过 -很方便 -给 -没 -这个 -不少 -还有 -十一 -来 -还会 -停电 -推荐 -流 -服务员 -新 -舒适 -选择 -热情 -简直 -吃饭 -安静 -吃 -很干净 -地理位置 -便利 -得 -这 -子 -杯子 -很多 -周围 -適 -第 -天一广场 -整体 -好吃 -* -尚可 -品质 -2 -时候 -家 -出差 -又 -较 -便宜 -整洁 -啊 -汉庭 -交通便利 -旁边 -对 -去过 -次 -利落 -合 -换 -窗户 -温馨 -最 -两 -应该 -只有 -适中 -出去玩 -很安静 -商务 -对面 -道歉 -乾 -地铁站 -居然 -不远 -总体来说 -泳池 -地段 -全家 -相对 -晚 -天一阁 -电脑 -來 -呀 -一人 -口头 -上网 -刷牙 -相当 -天 -合理 -准备 -通知 -第一天 -水温 -出来 -五星级 -快 -无 -楼层 -各方面 -华润万家 -宁波 -选 -放心 -浄 -主要原因 -安排 -客户 -一次性杯子 -起 -床垫 -一早 diff --git a/paddle/legacy/gserver/tests/Sequence/tour_train_wdseg b/paddle/legacy/gserver/tests/Sequence/tour_train_wdseg deleted file mode 100644 index 2cdf7f7e14e53fbc9070432d86a6cb21ad566cc7..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/Sequence/tour_train_wdseg +++ /dev/null @@ -1,10 +0,0 @@ -2 酒店 有 很 舒适 的 床垫 子 , 床上用品 也 应该 是 一人 一 换 , 感觉 很 利落 对 卫生 很 放心 呀 。 -2 很 温馨 , 也 挺 干净 的 * 地段 不错 , 出来 就 有 全家 , 离 地铁站 也 近 , 交通 很方便 * 就是 都 不 给 刷牙 的 杯子 啊 , 就 第一天 给 了 一次性杯子 * -2 位置 方便 , 强烈推荐 , 十一 出去玩 的 时候 选 的 , 对面 就是 华润万家 , 周围 吃饭 的 也 不少 。 -2 交通便利 , 吃 很 便利 , 乾 浄 、 安静 , 商务 房 有 电脑 、 上网 快 , 价格 可以 , 就 早餐 不 好吃 。 整体 是 不错 的 。 適 合 出差 來 住 。 -2 本来 准备 住 两 晚 , 第 2 天 一早 居然 停电 , 且 无 通知 , 只有 口头 道歉 。 总体来说 性价比 尚可 , 房间 较 新 , 还是 推荐 . -2 这个 酒店 去过 很多 次 了 , 选择 的 主要原因 是 离 客户 最 便宜 相对 又 近 的 酒店 -2 挺好 的 汉庭 , 前台 服务 很 热情 , 卫生 很 整洁 , 房间 安静 , 水温 适中 , 挺好 ! -2 HowardJohnson 的 品质 , 服务 相当 好 的 一 家 五星级 。 房间 不错 、 泳池 不错 、 楼层 安排 很 合理 。 还有 就是 地理位置 , 简直 一 流 。 就 在 天一阁 、 月湖 旁边 , 离 天一广场 也 不远 。 下次 来 宁波 还会 住 。 -2 酒店 很干净 , 很安静 , 很 温馨 , 服务员 服务 好 , 各方面 都 不错 * -2 挺好 的 , 就是 没 窗户 , 不过 对 得 起 这 价格 diff --git a/paddle/legacy/gserver/tests/Sequence/tour_train_wdseg.nest b/paddle/legacy/gserver/tests/Sequence/tour_train_wdseg.nest deleted file mode 100644 index 3aa890d8aa1e15d2e448ee98e655bc8b499e72a5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/Sequence/tour_train_wdseg.nest +++ /dev/null @@ -1,14 +0,0 @@ -2 酒店 有 很 舒适 的 床垫 子 , 床上用品 也 应该 是 一人 一 换 , 感觉 很 利落 对 卫生 很 放心 呀 。 -2 很 温馨 , 也 挺 干净 的 * 地段 不错 , 出来 就 有 全家 , 离 地铁站 也 近 , 交通 很方便 * 就是 都 不 给 刷牙 的 杯子 啊 , 就 第一天 给 了 一次性杯子 * - -2 位置 方便 , 强烈推荐 , 十一 出去玩 的 时候 选 的 , 对面 就是 华润万家 , 周围 吃饭 的 也 不少 。 -2 交通便利 , 吃 很 便利 , 乾 浄 、 安静 , 商务 房 有 电脑 、 上网 快 , 价格 可以 , 就 早餐 不 好吃 。 整体 是 不错 的 。 適 合 出差 來 住 。 -2 本来 准备 住 两 晚 , 第 2 天 一早 居然 停电 , 且 无 通知 , 只有 口头 道歉 。 总体来说 性价比 尚可 , 房间 较 新 , 还是 推荐 . - -2 这个 酒店 去过 很多 次 了 , 选择 的 主要原因 是 离 客户 最 便宜 相对 又 近 的 酒店 -2 挺好 的 汉庭 , 前台 服务 很 热情 , 卫生 很 整洁 , 房间 安静 , 水温 适中 , 挺好 ! - -2 HowardJohnson 的 品质 , 服务 相当 好 的 一 家 五星级 。 房间 不错 、 泳池 不错 、 楼层 安排 很 合理 。 还有 就是 地理位置 , 简直 一 流 。 就 在 天一阁 、 月湖 旁边 , 离 天一广场 也 不远 。 下次 来 宁波 还会 住 。 -2 酒店 很干净 , 很安静 , 很 温馨 , 服务员 服务 好 , 各方面 都 不错 * -2 挺好 的 , 就是 没 窗户 , 不过 对 得 起 这 价格 - diff --git a/paddle/legacy/gserver/tests/Sequence/train.list b/paddle/legacy/gserver/tests/Sequence/train.list deleted file mode 100644 index 1109a2449252cb9bfcb10ece4cf9a96e655e5a25..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/Sequence/train.list +++ /dev/null @@ -1 +0,0 @@ -legacy/gserver/tests/Sequence/tour_train_wdseg diff --git a/paddle/legacy/gserver/tests/Sequence/train.list.nest b/paddle/legacy/gserver/tests/Sequence/train.list.nest deleted file mode 100644 index a67df35024f456d517899f37272b0f74d822f03d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/Sequence/train.list.nest +++ /dev/null @@ -1 +0,0 @@ -legacy/gserver/tests/Sequence/tour_train_wdseg.nest diff --git a/paddle/legacy/gserver/tests/__init__.py b/paddle/legacy/gserver/tests/__init__.py deleted file mode 100644 index f662d6826321eb840739382558f76327d27b5847..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/paddle/legacy/gserver/tests/concat_dotmul_a.conf b/paddle/legacy/gserver/tests/concat_dotmul_a.conf deleted file mode 100644 index db02ca7e80de63618a7abf7b3673840627cd8c93..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/concat_dotmul_a.conf +++ /dev/null @@ -1,31 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000) - -data = data_layer(name ="input", size=1000) - -with mixed_layer(size=1000) as layer1: - layer1 += dotmul_projection(input=data) - -with mixed_layer(size=1000) as layer2: - layer2 += dotmul_projection(input=data) - -concat = concat_layer(input=[layer1, layer2]) - -outputs(concat) diff --git a/paddle/legacy/gserver/tests/concat_dotmul_b.conf b/paddle/legacy/gserver/tests/concat_dotmul_b.conf deleted file mode 100644 index 5e64970e4440a4f1d8c9282faa486963b3515a9d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/concat_dotmul_b.conf +++ /dev/null @@ -1,29 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=1000) - -data = data_layer(name ="input", size=1000) - -proj1 = dotmul_projection(input=data) - -proj2 = dotmul_projection(input=data) - -concat = concat_layer(input=[proj1, proj2]) - -outputs(concat) diff --git a/paddle/legacy/gserver/tests/concat_fullmatrix_a.conf b/paddle/legacy/gserver/tests/concat_fullmatrix_a.conf deleted file mode 100644 index 940d1efc58fe9c21028c1b1e31c46648ab518cbe..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/concat_fullmatrix_a.conf +++ /dev/null @@ -1,35 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) - -data = data_layer(name ="input", size=100) - -# fc1 is equal to fc2 -# note that in mixed_layer, default bias_attr=False, -# and default act=LinearActivation(). -fc1 = fc_layer(input=data, size=1000, - bias_attr=False, - act=LinearActivation()) - -with mixed_layer(size=1000) as fc2: - fc2 += full_matrix_projection(input=data) - -concat = concat_layer(input=[fc1, fc2]) - -outputs(concat) diff --git a/paddle/legacy/gserver/tests/concat_fullmatrix_b.conf b/paddle/legacy/gserver/tests/concat_fullmatrix_b.conf deleted file mode 100644 index 931e5b38efa019e1f0afbd59a00d4115a4aab67a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/concat_fullmatrix_b.conf +++ /dev/null @@ -1,29 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) - -data = data_layer(name ="input", size=100) - -proj1 = full_matrix_projection(input=data, size=1000) - -proj2 = full_matrix_projection(input=data, size=1000) - -concat = concat_layer(input=[proj1, proj2]) - -outputs(concat) diff --git a/paddle/legacy/gserver/tests/concat_slice_a.conf b/paddle/legacy/gserver/tests/concat_slice_a.conf deleted file mode 100644 index dccf911089e16f4f97b1470ee39d192d4557d4bd..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/concat_slice_a.conf +++ /dev/null @@ -1,41 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) - -data = data_layer(name ="input", size=8*16*16) - -conv1 = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation()) -conv2 = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation()) - -proj1 = slice_projection(input=conv1, slices=[(0, 4), (4, 12)]) - -proj2 = slice_projection(input=conv2, slices=[(1, 5), (5, 15)]) - -concat = concat_layer(input=[proj1, proj2]) - -outputs(concat) - diff --git a/paddle/legacy/gserver/tests/concat_slice_b.conf b/paddle/legacy/gserver/tests/concat_slice_b.conf deleted file mode 100644 index 29686ef2810370af3f84b60b2450d5c7d2e7663d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/concat_slice_b.conf +++ /dev/null @@ -1,41 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) - -data = data_layer(name ="input", size=8*16*16) - -conv1 = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation()) -conv2 = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation()) - -proj1 = slice_projection(input=conv1, slices=[(0, 12)]) - -proj2 = slice_projection(input=conv2, slices=[(1, 15)]) - -concat = concat_layer(input=[proj1, proj2]) - -outputs(concat) - diff --git a/paddle/legacy/gserver/tests/concat_table_a.conf b/paddle/legacy/gserver/tests/concat_table_a.conf deleted file mode 100644 index 047cb44d156daa93ba50cc259144217990685055..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/concat_table_a.conf +++ /dev/null @@ -1,32 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=300) - -data = data_layer(name ="input", size=10000) - -# emb1 is equal to emb2, note that bias_attr=false -# and act=LinearActivation() in default. -emb1 = embedding_layer(input=data, size=128) - -with mixed_layer(size=128) as emb2: - emb2 += table_projection(input=data) - -concat = concat_layer(input=[emb1, emb2]) - -outputs(concat) diff --git a/paddle/legacy/gserver/tests/concat_table_b.conf b/paddle/legacy/gserver/tests/concat_table_b.conf deleted file mode 100644 index c666ab994276721b66884e59fe89e816d086df8b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/concat_table_b.conf +++ /dev/null @@ -1,29 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=300) - -data = data_layer(name ="input", size=10000) - -proj1 = table_projection(input=data, size=128) - -proj2 = table_projection(input=data, size=128) - -concat = concat_layer(input=[proj1, proj2]) - -outputs(concat) diff --git a/paddle/legacy/gserver/tests/img_conv_a.conf b/paddle/legacy/gserver/tests/img_conv_a.conf deleted file mode 100644 index 3ad15c64fe5b793768f5a108f4ce60d15fd5da4a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/img_conv_a.conf +++ /dev/null @@ -1,40 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) -data = data_layer(name ="input", size=8*16*16) -conv1 = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation()) -conv2 = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation()) - -concat = concat_layer(input=[conv1, conv2]) - -conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=True, - act=LinearActivation(), - groups=2) - -outputs(concat, conv) diff --git a/paddle/legacy/gserver/tests/img_conv_b.conf b/paddle/legacy/gserver/tests/img_conv_b.conf deleted file mode 100644 index e68008155e97256e4bc865016a507c96995bd2eb..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/img_conv_b.conf +++ /dev/null @@ -1,32 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) -data = data_layer(name ="input", size=8*16*16) -proj1 = conv_projection(input=data, filter_size=1, filter_size_y=1, - num_channels=8, num_filters=16, stride=1) -proj2 = conv_projection(input=data, filter_size=1, filter_size_y=1, - num_channels=8, num_filters=16, stride=1) -concat = concat_layer(input=[proj1, proj2], bias_attr=False, act=ReluActivation()) - -proj = conv_projection(input=data, filter_size=1, filter_size_y=1, - num_channels=8, num_filters=16, stride=1, groups=2) - -with mixed_layer(bias_attr=True, act=LinearActivation()) as conv: - conv += proj - -outputs(concat, conv) diff --git a/paddle/legacy/gserver/tests/img_conv_c.conf b/paddle/legacy/gserver/tests/img_conv_c.conf deleted file mode 100644 index 4598ffbdb2f1452cacaf9715409263922828bcb0..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/img_conv_c.conf +++ /dev/null @@ -1,43 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) -data = data_layer(name ="input", size=8*16*16) -conv1 = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation(), - layer_type="exconv") -conv2 = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=False, - act=ReluActivation(), - layer_type="exconv") - -concat = concat_layer(input=[conv1, conv2]) - -conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=16, stride=1, - bias_attr=True, - act=LinearActivation(), - groups=2, - layer_type="exconv") - -outputs(concat, conv) diff --git a/paddle/legacy/gserver/tests/img_conv_cudnn.py b/paddle/legacy/gserver/tests/img_conv_cudnn.py deleted file mode 100644 index fd889ee1ce882e8bf3b1cc9605548d4aadfc4662..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/img_conv_cudnn.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) -data = data_layer(name="input", size=8 * 16 * 16) -conv = img_conv_layer( - input=data, - filter_size=1, - filter_size_y=1, - num_channels=8, - num_filters=16, - stride=1, - bias_attr=True, - act=LinearActivation(), - groups=2, - layer_type="cudnn_conv") - -outputs(conv) diff --git a/paddle/legacy/gserver/tests/img_conv_exconv.py b/paddle/legacy/gserver/tests/img_conv_exconv.py deleted file mode 100644 index 5aca6da5acf3320c72bd617241166b2925dcd027..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/img_conv_exconv.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) -data = data_layer(name="input", size=8 * 16 * 16) -conv = img_conv_layer( - input=data, - filter_size=1, - filter_size_y=1, - num_channels=8, - num_filters=16, - stride=1, - bias_attr=True, - act=LinearActivation(), - groups=2, - layer_type="exconv") - -outputs(conv) diff --git a/paddle/legacy/gserver/tests/img_pool_a.conf b/paddle/legacy/gserver/tests/img_pool_a.conf deleted file mode 100644 index afd271055d974734fc589a51401542b4bed99534..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/img_pool_a.conf +++ /dev/null @@ -1,44 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) -data = data_layer(name ="input", size=8*16*16) -conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, - num_filters=8,stride=1) -maxpool = img_pool_layer(input=conv, - pool_size=3, - pool_size_y=5, - num_channels=8, - stride=1, - stride_y=2, - padding=1, - padding_y=2, - pool_type=MaxPooling(), -) -avgpool = img_pool_layer(input=conv, - pool_size=3, - pool_size_y=5, - num_channels=8, - stride=1, - stride_y=2, - padding=1, - padding_y=2, - pool_type=AvgPooling(), -) - -outputs([maxpool, avgpool]) diff --git a/paddle/legacy/gserver/tests/img_pool_b.conf b/paddle/legacy/gserver/tests/img_pool_b.conf deleted file mode 100644 index e8deb9edbe755c1bcf8ea0180125ff7c470b0e0a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/img_pool_b.conf +++ /dev/null @@ -1,44 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=10) -data = data_layer(name ="input", size=8*16*16) -conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, - num_channels=8, num_filters=8, stride=1) -maxpool = img_pool_layer(input=conv, - pool_size=3, - pool_size_y=5, - num_channels=8, - stride=1, - stride_y=2, - padding=1, - padding_y=2, - pool_type=CudnnMaxPooling(), -) - -avgpool = img_pool_layer(input=conv, - pool_size=3, - pool_size_y=5, - num_channels=8, - stride=1, - stride_y=2, - padding=1, - padding_y=2, - pool_type=CudnnAvgPooling(), -) - -outputs([maxpool, avgpool]) diff --git a/paddle/legacy/gserver/tests/mkldnn_branch_net.conf b/paddle/legacy/gserver/tests/mkldnn_branch_net.conf deleted file mode 100644 index 8d5146abb0ebd7f5d6c512457f3cb5c84eac20f5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/mkldnn_branch_net.conf +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=16) -channels = get_config_arg("channels", int, 2) - -def two_conv(input, group_name): - out1 = img_conv_layer(input=input, - name=group_name+'_conv1_', - filter_size=1, - num_filters=channels, - padding=0, - shared_biases=True, - act=ReluActivation()) - - out2 = img_conv_layer(input=input, - name=group_name+'_conv2_', - filter_size=3, - num_filters=channels, - padding=1, - shared_biases=True, - act=ReluActivation()) - return out1, out2 - -def two_conv_bn(input, group_name): - out1, out2 = two_conv(input, group_name) - out1 = batch_norm_layer(input=out1, - name=group_name+'_bn1_', - use_global_stats=False, - act=ReluActivation()) - - out2 = batch_norm_layer(input=out2, - name=group_name+'_bn2_', - use_global_stats=False, - act=ReluActivation()) - return out1, out2 - -def two_conv_pool(input, group_name): - out1, out2 = two_conv(input, group_name) - out1 = img_pool_layer(input=out1, - name=group_name+'_pool1_', - pool_size=3, - stride=2, - padding=0, - pool_type=MaxPooling()) - - out2 = img_pool_layer(input=out2, - name=group_name+'_pool2_', - pool_size=5, - stride=2, - padding=1, - pool_type=MaxPooling()) - return out1, out2 - -def two_fc(input, group_name): - out1 = fc_layer(input=input, - name=group_name+'_fc1_', - size=channels, - bias_attr=False, - act=LinearActivation()) - - out2 = fc_layer(input=input, - name=group_name+'_fc2_', - size=channels, - bias_attr=False, - act=LinearActivation()) - return out1, out2 - -data = data_layer(name ="input", size=channels*16*16) - -tmp = img_conv_layer(input=data, - num_channels=channels, - filter_size=3, - num_filters=channels, - padding=1, - shared_biases=True, - act=ReluActivation()) - -a1, a2 = two_conv(tmp, 'conv_branch') -tmp = addto_layer(input=[a1, a2], - act=ReluActivation(), - bias_attr=False) - -tmp = img_pool_layer(input=tmp, - pool_size=3, - stride=2, - padding=1, - pool_type=AvgPooling()) - -b1, b2 = two_conv_pool(tmp, 'pool_branch') -tmp = concat_layer(input=[b1, b2]) - -tmp = img_pool_layer(input=tmp, - num_channels=channels*2, - pool_size=3, - stride=2, - padding=1, - pool_type=MaxPooling()) - -tmp = img_conv_layer(input=tmp, - filter_size=3, - num_filters=channels, - padding=1, - stride=2, - shared_biases=True, - act=LinearActivation(), - bias_attr=False) - -tmp = batch_norm_layer(input=tmp, - use_global_stats=False, - act=ReluActivation()) - -c1, c2 = two_conv_bn(tmp, 'bn_branch') -tmp = addto_layer(input=[c1, c2], - act=ReluActivation(), - bias_attr=False) - -tmp = fc_layer(input=tmp, size=channels, - bias_attr=True, - act=ReluActivation()) - -d1, d2 = two_fc(tmp, 'fc_branch') -tmp = addto_layer(input=[d1, d2]) - -out = fc_layer(input=tmp, size=10, - bias_attr=True, - act=SoftmaxActivation()) - -outputs(out) diff --git a/paddle/legacy/gserver/tests/mkldnn_simple_net.conf b/paddle/legacy/gserver/tests/mkldnn_simple_net.conf deleted file mode 100644 index 0e9d6b31fa8776136b4eee29311383ae6bb21644..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/mkldnn_simple_net.conf +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=16) -channels = get_config_arg("channels", int, 2) - -data = data_layer(name ="input", size=channels*16*16) - -tmp = img_conv_layer(input=data, - num_channels=channels, - filter_size=3, - num_filters=channels, - padding=1, - shared_biases=True, - act=ReluActivation()) - -tmp = img_pool_layer(input=tmp, - pool_size=3, - stride=1, - padding=0, - pool_type=AvgPooling()) - -tmp = img_conv_layer(input=tmp, - filter_size=3, - num_filters=channels, - padding=1, - shared_biases=True, - act=LinearActivation(), - bias_attr=False) - -tmp = batch_norm_layer(input=tmp, - use_global_stats=False, - act=ReluActivation()) - -tmp = img_pool_layer(input=tmp, - pool_size=3, - stride=2, - padding=1, - pool_type=MaxPooling()) - -tmp = img_cmrnorm_layer(input=tmp, size=5, scale=0.0001, power=0.75) - -tmp = fc_layer(input=tmp, - size=channels, - bias_attr=False, - act=ReluActivation()) - -out = fc_layer(input=tmp, - size=10, - bias_attr=True, - act=SoftmaxActivation()) - -outputs(out) diff --git a/paddle/legacy/gserver/tests/pyDataProvider.py b/paddle/legacy/gserver/tests/pyDataProvider.py deleted file mode 100644 index 85ea90d6eec25eb709b19d06a18c7a955078be04..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/pyDataProvider.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import numpy -import struct -import traceback - - -def header_creator(): - ret = "" - ret += struct.pack('i', 3) # slot num - ret += struct.pack('i', 1) # sequence flag - ret += struct.pack('i', 0) # slot0 dense type - ret += struct.pack('i', 3) # slot0 dim - ret += struct.pack('i', 1) # slot1 sparse non value type - ret += struct.pack('i', 7) # slot1 dim - ret += struct.pack('i', 3) # slot2 index type - ret += struct.pack('i', 2) # slot2 dim - return ret - - -def dense_value_creator(sample_num): - ret = "" - ret += struct.pack('i', sample_num) # slot0 sample num - for i in range(sample_num): # slot0 value - ret += struct.pack('f', 1.0) - ret += struct.pack('f', 2.0) - ret += struct.pack('f', 3.0) - return ret - - -def sparse_value_creator(sample_num): - ret = "" - ret += struct.pack('i', sample_num) # slot1 sample num - for i in range(sample_num): # slot1 index - ret += struct.pack('i', i * 2) - ret += struct.pack('i', sample_num * 2) #slot1 length - for i in range(sample_num): # slot1 value - ret += struct.pack('i', 1) - ret += struct.pack('i', 2) - return ret - - -def index_value_creator(sample_num): - ret = "" - ret += struct.pack('i', sample_num) # slot2 sample num - for i in range(sample_num): # slot2 value - ret += struct.pack('i', 0) - return ret - - -def sequenceStartPositions_creator(): - ret = "" - ret += struct.pack('i', 2) # slot0 sequence num - ret += struct.pack('i', 0) # slot0 sequence value1 - ret += struct.pack('i', 1) # slot0 sequence value2 - ret += struct.pack('i', 1) # slot1 sequence num - ret += struct.pack('i', 0) # slot1 sequence value1 - ret += struct.pack('i', 2) # slot2 sequence num - ret += struct.pack('i', 0) # slot2 sequence value1 - ret += struct.pack('i', 1) # slot2 sequence value2 - return ret - - -def subSequenceStartPositions_creator(): - ret = "" - ret += struct.pack('i', 3) # slot0 subsequence num - ret += struct.pack('i', 0) # slot0 subsequence value1 - ret += struct.pack('i', 1) # slot0 subsequence value2 - ret += struct.pack('i', 2) # slot0 subsequence value3 - ret += struct.pack('i', 2) # slot1 subsequence num - ret += struct.pack('i', 0) # slot1 subsequence value1 - ret += struct.pack('i', 1) # slot1 subsequence value2 - ret += struct.pack('i', 3) # slot2 subsequence num - ret += struct.pack('i', 0) # slot2 subsequence value1 - ret += struct.pack('i', 1) # slot2 subsequence value2 - ret += struct.pack('i', 2) # slot2 subsequence value3 - return ret - - -class SimpleDataProvider: - def __init__(self, *file_list): - self.file_list = file_list - - def shuffle(self): - pass - - def reset(self): - pass - - def getHeader(self): - return header_creator() - - def getNextBatch(self, batch_size): - ret = "" - ret += struct.pack('i', 2) # batch size - ret += dense_value_creator(2) # slot0 - ret += sparse_value_creator(2) # slot1 - ret += index_value_creator(2) # slot2 - ret += sequenceStartPositions_creator() - return ret - - -class SimpleNestDataProvider: - def __init__(self, *file_list): - self.file_list = file_list - - def shuffle(self): - pass - - def reset(self): - pass - - def getHeader(self): - return header_creator() - - def getNextBatch(self, batch_size): - ret = "" - ret += struct.pack('i', 2) # batch size - ret += dense_value_creator(4) # slot0 - ret += sparse_value_creator(4) # slot1 - ret += index_value_creator(4) # slot2 - ret += sequenceStartPositions_creator() - ret += subSequenceStartPositions_creator() - return ret - - -if __name__ == "__main__": - # test code - data_provider = SimpleDataProvider('./test_batch') - print len(data_provider.getHeader()) - print len(data_provider.getNextBatch(2)) - - data_provider = SimpleNestDataProvider('./test_batch') - print len(data_provider.getHeader()) - print len(data_provider.getNextBatch(2)) diff --git a/paddle/legacy/gserver/tests/pyDataProvider/pyDataProviderList b/paddle/legacy/gserver/tests/pyDataProvider/pyDataProviderList deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/paddle/legacy/gserver/tests/pyDataProvider/trainer.conf b/paddle/legacy/gserver/tests/pyDataProvider/trainer.conf deleted file mode 100644 index 7d910df20d4077a6645c42e418816cfaeb28d7e5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/pyDataProvider/trainer.conf +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -################################### Data Configuration ################### -TrainData(PyData(type="py", - files = "./gserver/tests/pyDataProvider/pyDataProviderList", - load_data_module="pyDataProvider", - load_data_object="SimpleDataProvider")) - -################################### Algorithm Configuration ############# -Settings( - learning_rate_decay_a = 1e-05, - learning_rate_decay_b = 1e-06, - learning_rate = 0.001, - batch_size = 1, - algorithm = 'sgd', - num_batches_per_send_parameter = 1, - num_batches_per_get_parameter = 1, -) - -################################### Network Configuration ############### -Layer(type = "data", name = "input1", size = 3) -Layer(type = "data", name = "input2", size = 7) - -Layer(inputs = [Input("input1", - decay_rate = 0.12, - initial_std = 0.02, - parameter_name = "_layer1_1.w"), - Input("input2", - decay_rate = 0.12, - initial_std = 0.02, - parameter_name = "_layer1_2.w"), - ], - name = "layer1", - bias = Bias(parameter_name = "_layer1.bias"), - active_type = "sigmoid", - type = "fc", - size = 100) -Layer(inputs = [Input("layer1", - decay_rate = 0.06, - initial_std = 0.02, - parameter_name = "_layer2.w")], - name = "layer2", - bias = Bias(parameter_name = "_layer2.bias"), - active_type = "sigmoid", - type = "fc", - size = 100) -Layer(inputs = [Input("layer2", - decay_rate = 0.02, - initial_std = 0.02, - parameter_name = "_layer_output.w")], - name = "output", - bias = Bias(parameter_name = "_layer_output.bias"), - active_type = "softmax", - type = "fc", - size = 10) - -Layer(type = "data", name = "label", size = 1) -Layer(inputs = [Input("output"), Input("label")], - type = "multi-class-cross-entropy", - name = "cost") -Inputs("input1", "input2", "label") -Outputs("cost") diff --git a/paddle/legacy/gserver/tests/rnn_data_provider.py b/paddle/legacy/gserver/tests/rnn_data_provider.py deleted file mode 100644 index 18b2191f44e3c85a8db767e3ec242d1fbcfb087a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/rnn_data_provider.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -from paddle.trainer.PyDataProvider2 import * - -# Note that each config should has an independent provider -# in current design of PyDataProvider2. -####################################################### -data = [ - [[[1, 3, 2], [4, 5, 2]], 0], - [[[0, 2], [2, 5], [0, 1, 2]], 1], -] - - -# Used for sequence_nest_rnn.conf -@provider( - input_types=[integer_value_sub_sequence(10), integer_value(3)], - should_shuffle=False) -def process_subseq(settings, file_name): - for d in data: - yield d - - -# Used for sequence_rnn.conf -@provider( - input_types=[integer_value_sequence(10), integer_value(3)], - should_shuffle=False) -def process_seq(settings, file_name): - for d in data: - seq = [] - for subseq in d[0]: - seq += subseq - yield seq, d[1] - - -# Used for sequence_nest_rnn_multi_input.conf -@provider( - input_types=[integer_value_sub_sequence(10), integer_value(3)], - should_shuffle=False) -def process_subseq2(settings, file_name): - for d in data: - yield d - - -# Used for sequence_rnn_multi_input.conf -@provider( - input_types=[integer_value_sequence(10), integer_value(3)], - should_shuffle=False) -def process_seq2(settings, file_name): - for d in data: - seq = [] - for subseq in d[0]: - seq += subseq - yield seq, d[1] - - -########################################################### -data2 = [ - [[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]], 0], - [[[0, 2], [2, 5], [0, 1, 2]], [[1, 5], [4], [2, 3, 6, 1]], 1], -] - - -# Used for sequence_nest_rnn_multi_unequalength_inputs.conf -@provider( - input_types=[ - integer_value_sub_sequence(10), integer_value_sub_sequence(10), - integer_value(2) - ], - should_shuffle=False) -def process_unequalength_subseq(settings, file_name): - for d in data2: - yield d - - -# Used for sequence_rnn_multi_unequalength_inputs.conf -@provider( - input_types=[ - integer_value_sequence(10), integer_value_sequence(10), integer_value(2) - ], - should_shuffle=False) -def process_unequalength_seq(settings, file_name): - for d in data2: - words1 = reduce(lambda x, y: x + y, d[0]) - words2 = reduce(lambda x, y: x + y, d[1]) - yield words1, words2, d[2] - - -########################################################### -data3 = [ - [[[1, 2], [4, 5, 2]], [1, 2], 0], - [[[0, 2], [2, 5], [0, 1, 2]], [2, 3, 0], 1], -] - - -# Used for sequence_nest_mixed_inputs.conf -@provider( - input_types=[ - integer_value_sub_sequence(10), integer_value_sequence(10), - integer_value(2) - ], - should_shuffle=False) -def process_mixed(settings, file_name): - for d in data3: - yield d diff --git a/paddle/legacy/gserver/tests/sequenceGen.py b/paddle/legacy/gserver/tests/sequenceGen.py deleted file mode 100644 index d5ec8ac23f1f4af7178ac432832bc0c5b4c9eb65..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequenceGen.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import os -import sys - -from paddle.trainer.PyDataProvider2 import * - - -def hook(settings, dict_file, **kwargs): - settings.word_dict = dict_file - settings.input_types = [ - integer_value_sequence(len(settings.word_dict)), integer_value(3) - ] - settings.logger.info('dict len : %d' % (len(settings.word_dict))) - - -@provider(init_hook=hook, should_shuffle=False) -def process(settings, file_name): - with open(file_name, 'r') as fdata: - for line in fdata: - label, comment = line.strip().split('\t') - label = int(''.join(label.split())) - words = comment.split() - words = [ - settings.word_dict[w] for w in words if w in settings.word_dict - ] - yield words, label - - -## for hierarchical sequence network -def hook2(settings, dict_file, **kwargs): - settings.word_dict = dict_file - settings.input_types = [ - integer_value_sub_sequence(len(settings.word_dict)), - integer_value_sequence(3) - ] - settings.logger.info('dict len : %d' % (len(settings.word_dict))) - - -@provider(init_hook=hook2, should_shuffle=False) -def process2(settings, file_name): - with open(file_name) as fdata: - labels = [] - sentences = [] - for line in fdata: - if (len(line)) > 1: - label, comment = line.strip().split('\t') - label = int(''.join(label.split())) - words = comment.split() - words = [ - settings.word_dict[w] for w in words - if w in settings.word_dict - ] - labels.append(label) - sentences.append(words) - else: - yield sentences, labels - labels = [] - sentences = [] diff --git a/paddle/legacy/gserver/tests/sequence_layer_group.conf b/paddle/legacy/gserver/tests/sequence_layer_group.conf deleted file mode 100644 index ad1b61d5821fd20135e61bb95abdea16d27a6a9a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_layer_group.conf +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' -dict_file = dict() -for line_count, line in enumerate(open(dict_path, "r")): - dict_file[line.strip()] = line_count - -define_py_data_sources2( - train_list='legacy/gserver/tests/Sequence/train.list', - test_list=None, - module='sequenceGen', - obj='process', - args={"dict_file": dict_file}) - -settings(batch_size=5) -######################## network configure ################################ -dict_dim = len(open(dict_path, 'r').readlines()) -word_dim = 128 -hidden_dim = 256 -label_dim = 3 - -data = data_layer(name="word", size=dict_dim) - -emb = embedding_layer(input=data, size=word_dim) - -# (lstm_input + lstm) is equal to lstmemory -with mixed_layer(size=hidden_dim * 4) as lstm_input: - lstm_input += full_matrix_projection(input=emb) - -lstm = lstmemory_group( - input=lstm_input, - size=hidden_dim, - act=TanhActivation(), - gate_act=SigmoidActivation(), - state_act=TanhActivation()) - -lstm_last = last_seq(input=lstm) - -with mixed_layer( - size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: - output += full_matrix_projection(input=lstm_last) - -outputs( - classification_cost( - input=output, label=data_layer( - name="label", size=1))) diff --git a/paddle/legacy/gserver/tests/sequence_lstm.conf b/paddle/legacy/gserver/tests/sequence_lstm.conf deleted file mode 100644 index 6ab70e70713f31de31b5cd544cf132e7d0af0f2f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_lstm.conf +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' -dict_file = dict() -for line_count, line in enumerate(open(dict_path, "r")): - dict_file[line.strip()] = line_count - -define_py_data_sources2( - train_list='legacy/gserver/tests/Sequence/train.list', - test_list=None, - module='sequenceGen', - obj='process', - args={"dict_file": dict_file}) - -settings(batch_size=5) -######################## network configure ################################ -dict_dim = len(open(dict_path, 'r').readlines()) -word_dim = 128 -hidden_dim = 256 -label_dim = 3 -sparse_update = get_config_arg("sparse_update", bool, False) - -data = data_layer(name="word", size=dict_dim) - -emb = embedding_layer( - input=data, - size=word_dim, - param_attr=ParamAttr(sparse_update=sparse_update)) - -with mixed_layer(size=hidden_dim * 4) as lstm_input: - lstm_input += full_matrix_projection(input=emb) - -lstm = lstmemory( - input=lstm_input, - act=TanhActivation(), - gate_act=SigmoidActivation(), - state_act=TanhActivation()) - -lstm_last = last_seq(input=lstm) - -with mixed_layer( - size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: - output += full_matrix_projection(input=lstm_last) - -outputs( - classification_cost( - input=output, label=data_layer( - name="label", size=1))) diff --git a/paddle/legacy/gserver/tests/sequence_nest_layer_group.conf b/paddle/legacy/gserver/tests/sequence_nest_layer_group.conf deleted file mode 100644 index 75c36b118979760e034f81e3127a748651f53347..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_nest_layer_group.conf +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' -dict_file = dict() -for line_count, line in enumerate(open(dict_path, "r")): - dict_file[line.strip()] = line_count - -define_py_data_sources2( - train_list='legacy/gserver/tests/Sequence/train.list.nest', - test_list=None, - module='sequenceGen', - obj='process2', - args={"dict_file": dict_file}) - -settings(batch_size=2) -######################## network configure ################################ -dict_dim = len(open(dict_path, 'r').readlines()) -word_dim = 128 -hidden_dim = 256 -label_dim = 3 - -data = data_layer(name="word", size=dict_dim) - -emb_group = embedding_layer(input=data, size=word_dim) - - -# (lstm_input + lstm) is equal to lstmemory -def lstm_group(lstm_group_input): - with mixed_layer(size=hidden_dim * 4) as group_input: - group_input += full_matrix_projection(input=lstm_group_input) - - lstm_output = lstmemory_group( - input=group_input, - name="lstm_group", - size=hidden_dim, - act=TanhActivation(), - gate_act=SigmoidActivation(), - state_act=TanhActivation()) - return lstm_output - - -lstm_nest_group = recurrent_group( - input=SubsequenceInput(emb_group), step=lstm_group, name="lstm_nest_group") -# hasSubseq ->(seqlastins) seq -lstm_last = last_seq( - input=lstm_nest_group, agg_level=AggregateLevel.TO_SEQUENCE) - -# seq ->(expand) hasSubseq -lstm_expand = expand_layer( - input=lstm_last, - expand_as=emb_group, - expand_level=ExpandLevel.FROM_SEQUENCE) - -# hasSubseq ->(average) seq -lstm_average = pooling_layer( - input=lstm_expand, - pooling_type=AvgPooling(), - agg_level=AggregateLevel.TO_SEQUENCE) - -with mixed_layer( - size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: - output += full_matrix_projection(input=lstm_average) - -outputs( - classification_cost( - input=output, label=data_layer( - name="label", size=1))) diff --git a/paddle/legacy/gserver/tests/sequence_nest_rnn.conf b/paddle/legacy/gserver/tests/sequence_nest_rnn.conf deleted file mode 100644 index bc3b22c2a946a62c7a9d3163d3863a090d63539c..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_nest_rnn.conf +++ /dev/null @@ -1,74 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -define_py_data_sources2(train_list='legacy/gserver/tests/Sequence/dummy.list', - test_list=None, - module='rnn_data_provider', - obj='process_subseq') - - -settings(batch_size=2, learning_rate=0.01) -######################## network configure ################################ -dict_dim = 10 -word_dim = 8 -hidden_dim = 8 -label_dim = 3 - -data = data_layer(name="word", size=dict_dim) - -emb = embedding_layer(input=data, size=word_dim) - -# This hierachical RNN is designed to be equivalent to the simple RNN in -# sequence_rnn.conf - -def outer_step(x): - outer_mem = memory(name="outer_rnn_state", size=hidden_dim) - def inner_step(y): - inner_mem = memory(name="inner_rnn_state", - size=hidden_dim, - boot_layer=outer_mem) - out = fc_layer(input=[y, inner_mem], - size=hidden_dim, - act=TanhActivation(), - bias_attr=True, - name="inner_rnn_state") - return out - - inner_rnn_output = recurrent_group( - step=inner_step, - name="inner", - input=x) - last = last_seq(input=inner_rnn_output, name="outer_rnn_state") - - # "return last" won't work, because recurrent_group only support the input - # sequence type is same as return sequence type. - return inner_rnn_output - -out = recurrent_group( - name="outer", - step=outer_step, - input=SubsequenceInput(emb)) - -rep = last_seq(input=out) -prob = fc_layer(size=label_dim, - input=rep, - act=SoftmaxActivation(), - bias_attr=True) - -outputs(classification_cost(input=prob, - label=data_layer(name="label", size=label_dim))) diff --git a/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_input.conf b/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_input.conf deleted file mode 100644 index 165ab229897d32ce2cae1d483b3ffd81392a355a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_input.conf +++ /dev/null @@ -1,76 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -define_py_data_sources2(train_list='legacy/gserver/tests/Sequence/dummy.list', - test_list=None, - module='rnn_data_provider', - obj='process_subseq') - - -settings(batch_size=2, learning_rate=0.01) -######################## network configure ################################ -dict_dim = 10 -word_dim = 8 -hidden_dim = 8 -label_dim = 3 - -data = data_layer(name="word", size=dict_dim) - -emb = embedding_layer(input=data, size=word_dim) - -# This hierachical RNN is designed to be equivalent to the simple RNN in -# sequence_rnn.conf - -def outer_step(wid, x): - outer_mem = memory(name="outer_rnn_state", size=hidden_dim) - def inner_step(y, wid): - z = embedding_layer(input=wid, size=word_dim) - inner_mem = memory(name="inner_rnn_state", - size=hidden_dim, - boot_layer=outer_mem) - out = fc_layer(input=[y, z, inner_mem], - size=hidden_dim, - act=TanhActivation(), - bias_attr=True, - name="inner_rnn_state") - return out - - inner_rnn_output = recurrent_group( - step=inner_step, - name="inner", - input=[x, wid]) - last = last_seq(input=inner_rnn_output, name="outer_rnn_state") - - # "return last" should also work. But currently RecurrentGradientMachine - # does not handle it, and will report error: In hierachical RNN, all out - # links should be from sequences now. - return inner_rnn_output - -out = recurrent_group( - name="outer", - step=outer_step, - input=[SubsequenceInput(data), SubsequenceInput(emb)]) - -rep = last_seq(input=out) -prob = fc_layer(size=label_dim, - input=rep, - act=SoftmaxActivation(), - bias_attr=True) - -outputs(classification_cost(input=prob, - label=data_layer(name="label", size=label_dim))) diff --git a/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py b/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py deleted file mode 100644 index 9a48b7f25c454b492d20e807f09f6d788af44681..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -from paddle.trainer_config_helpers import * - -######################## data source ################################ -define_py_data_sources2( - train_list='legacy/gserver/tests/Sequence/dummy.list', - test_list=None, - module='rnn_data_provider', - obj='process_unequalength_subseq') - -settings(batch_size=2, learning_rate=0.01) -######################## network configure ################################ -dict_dim = 10 -word_dim = 8 -hidden_dim = 8 -label_dim = 2 - -speaker1 = data_layer(name="word1", size=dict_dim) -speaker2 = data_layer(name="word2", size=dict_dim) - -emb1 = embedding_layer(input=speaker1, size=word_dim) -emb2 = embedding_layer(input=speaker2, size=word_dim) - - -# This hierarchical RNN is designed to be equivalent to the simple RNN in -# sequence_rnn_multi_unequalength_inputs.conf -def outer_step(x1, x2): - index = [0] - - def inner_step(ipt): - index[0] += 1 - i = index[0] - outer_mem = memory(name="outer_rnn_state_%d" % i, size=hidden_dim) - - def inner_step_impl(y): - inner_mem = memory( - name="inner_rnn_state_" + y.name, - size=hidden_dim, - boot_layer=outer_mem) - out = fc_layer( - input=[y, inner_mem], - size=hidden_dim, - act=TanhActivation(), - bias_attr=True, - name='inner_rnn_state_' + y.name) - return out - - encoder = recurrent_group( - step=inner_step_impl, name='inner_%d' % i, input=ipt) - last = last_seq(name="outer_rnn_state_%d" % i, input=encoder) - return encoder, last - - encoder1, sentence_last_state1 = inner_step(ipt=x1) - encoder2, sentence_last_state2 = inner_step(ipt=x2) - - encoder1_expand = expand_layer( - input=sentence_last_state1, expand_as=encoder2) - - return [encoder1_expand, encoder2] - - -encoder1_rep, encoder2_rep = recurrent_group( - name="outer", - step=outer_step, - input=[SubsequenceInput(emb1), SubsequenceInput(emb2)], - targetInlink=emb2) - -encoder1_last = last_seq(input=encoder1_rep) -encoder1_expandlast = expand_layer(input=encoder1_last, expand_as=encoder2_rep) -context = mixed_layer( - input=[ - identity_projection(encoder1_expandlast), - identity_projection(encoder2_rep) - ], - size=hidden_dim) - -rep = last_seq(input=context) -prob = fc_layer( - size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True) - -outputs( - classification_cost( - input=prob, label=data_layer( - name="label", size=label_dim))) diff --git a/paddle/legacy/gserver/tests/sequence_recurrent.py b/paddle/legacy/gserver/tests/sequence_recurrent.py deleted file mode 100644 index e2c6a7935c28838fb12fc6e44d99dd59636bf7dd..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_recurrent.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' -dict_file = dict() -for line_count, line in enumerate(open(dict_path, "r")): - dict_file[line.strip()] = line_count - -define_py_data_sources2( - train_list='legacy/gserver/tests/Sequence/train.list', - test_list=None, - module='sequenceGen', - obj='process', - args={"dict_file": dict_file}) - -settings(batch_size=5) -######################## network configure ################################ -dict_dim = len(open(dict_path, 'r').readlines()) -word_dim = 128 -hidden_dim = 128 -label_dim = 3 - -# This config is designed to be equivalent with sequence_recurrent_group.py - -data = data_layer(name="word", size=dict_dim) - -emb = embedding_layer( - input=data, size=word_dim, param_attr=ParamAttr(name="emb")) - -recurrent = recurrent_layer(input=emb, bias_attr=False, act=SoftmaxActivation()) - -recurrent_last = last_seq(input=recurrent) - -with mixed_layer( - size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: - output += full_matrix_projection(input=recurrent_last) - -outputs( - classification_cost( - input=output, label=data_layer( - name="label", size=1))) diff --git a/paddle/legacy/gserver/tests/sequence_recurrent_group.py b/paddle/legacy/gserver/tests/sequence_recurrent_group.py deleted file mode 100644 index b4638bd9075ff5cdd4a5ed1bc0e0d133f9a9ab86..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_recurrent_group.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddle.trainer_config_helpers import * - -######################## data source ################################ -dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' -dict_file = dict() -for line_count, line in enumerate(open(dict_path, "r")): - dict_file[line.strip()] = line_count - -define_py_data_sources2( - train_list='legacy/gserver/tests/Sequence/train.list', - test_list=None, - module='sequenceGen', - obj='process', - args={"dict_file": dict_file}) - -settings(batch_size=5) -######################## network configure ################################ -dict_dim = len(open(dict_path, 'r').readlines()) -word_dim = 128 -hidden_dim = 128 -label_dim = 3 - -# This config is designed to be equivalent with sequence_recurrent.py - -data = data_layer(name="word", size=dict_dim) - -emb = embedding_layer( - input=data, size=word_dim, param_attr=ParamAttr(name="emb")) - - -def step(y): - mem = memory(name="rnn_state", size=hidden_dim) - with mixed_layer( - name="rnn_state", - size=hidden_dim, - bias_attr=False, - act=SoftmaxActivation()) as out: - out += identity_projection(input=y) - out += full_matrix_projection( - input=mem, param_attr=ParamAttr(name="___recurrent_layer_0__")) - return out - - -recurrent = recurrent_group(name="rnn", step=step, input=emb) - -recurrent_last = last_seq(input=recurrent) - -with mixed_layer( - size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: - output += full_matrix_projection(input=recurrent_last) - -outputs( - classification_cost( - input=output, label=data_layer( - name="label", size=1))) diff --git a/paddle/legacy/gserver/tests/sequence_rnn.conf b/paddle/legacy/gserver/tests/sequence_rnn.conf deleted file mode 100644 index 3133595c9ce4c25683c06d326a5ebe9d2bf13077..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_rnn.conf +++ /dev/null @@ -1,57 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -define_py_data_sources2(train_list='legacy/gserver/tests/Sequence/dummy.list', - test_list=None, - module='rnn_data_provider', - obj='process_seq') - - -settings(batch_size=2, learning_rate=0.01) -######################## network configure ################################ -dict_dim = 10 -word_dim = 8 -hidden_dim = 8 -label_dim = 3 - -data = data_layer(name="word", size=dict_dim) - -emb = embedding_layer(input=data, size=word_dim) - -def step(y): - mem = memory(name="rnn_state", size=hidden_dim) - out = fc_layer(input=[y, mem], - size=hidden_dim, - act=TanhActivation(), - bias_attr=True, - name="rnn_state") - return out - -out = recurrent_group( - name="rnn", - step=step, - input=emb) - -rep = last_seq(input=out) -prob = fc_layer(size=label_dim, - input=rep, - act=SoftmaxActivation(), - bias_attr=True) - -outputs(classification_cost(input=prob, - label=data_layer(name="label", size=label_dim))) diff --git a/paddle/legacy/gserver/tests/sequence_rnn_matched_inputs.py b/paddle/legacy/gserver/tests/sequence_rnn_matched_inputs.py deleted file mode 100644 index 921cef04dda0da396a79592b09d7a7e7177462d5..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_rnn_matched_inputs.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -define_py_data_sources2( - train_list='legacy/gserver/tests/Sequence/dummy.list', - test_list=None, - module='rnn_data_provider', - obj='process_mixed') - -settings(batch_size=2, learning_rate=0.01) -######################## network configure ################################ -dict_dim = 10 -word_dim = 2 -hidden_dim = 2 -label_dim = 2 - -data1 = data_layer(name="word1", size=dict_dim) -data2 = data_layer(name="word2", size=dict_dim) -label = data_layer(name="label", size=label_dim) - -encoding = embedding_layer(input=data2, size=word_dim) - -subseq = embedding_layer(input=data1, size=word_dim) -seq = embedding_layer(input=data2, size=word_dim) -nonseq = embedding_layer(input=label, size=word_dim) - - -# This hierarchical RNN is designed to be equivalent to the simple RNN in -# sequence_rnn_mixed_inputs.conf -def outer_step(subseq, seq, nonseq, encoding): - outer_mem = memory(name="outer_rnn_state", size=hidden_dim) - - def inner_step(subseq, seq, nonseq): - inner_mem = memory( - name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem) - - out = fc_layer( - input=[subseq, seq, nonseq, inner_mem], - size=hidden_dim, - act=TanhActivation(), - bias_attr=True, - name='inner_rnn_state') - return out - - decoder = recurrent_group( - step=inner_step, name='inner', input=[subseq, seq, nonseq]) - last = last_seq(name="outer_rnn_state", input=decoder) - context = simple_attention( - encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last) - return context - - -out = recurrent_group( - name="outer", - step=outer_step, - input=[ - subseq, expand_layer( - seq, expand_as=subseq, - expand_level=ExpandLevel.FROM_SEQUENCE), expand_layer( - nonseq, - expand_as=subseq, - expand_level=ExpandLevel.FROM_NO_SEQUENCE), - StaticInput(encoding) - ]) - -rep = last_seq(input=out) -prob = fc_layer( - size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True) - -outputs(classification_cost(input=prob, label=label)) diff --git a/paddle/legacy/gserver/tests/sequence_rnn_mixed_inputs.py b/paddle/legacy/gserver/tests/sequence_rnn_mixed_inputs.py deleted file mode 100644 index c7bcaf6c4b21272e1c95d6de7e69e4558d52b9c6..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_rnn_mixed_inputs.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -define_py_data_sources2( - train_list='legacy/gserver/tests/Sequence/dummy.list', - test_list=None, - module='rnn_data_provider', - obj='process_mixed') - -settings(batch_size=2, learning_rate=0.01) -######################## network configure ################################ -dict_dim = 10 -word_dim = 2 -hidden_dim = 2 -label_dim = 2 - -data1 = data_layer(name="word1", size=dict_dim) -data2 = data_layer(name="word2", size=dict_dim) -label = data_layer(name="label", size=label_dim) - -encoding = embedding_layer(input=data2, size=word_dim) - - -# This hierarchical RNN is designed to be equivalent to the simple RNN in -# sequence_rnn_matched_inputs.conf -def outer_step(subseq, seq, nonseq, encoding): - outer_mem = memory(name="outer_rnn_state", size=hidden_dim) - - def inner_step(data1, data2, label): - inner_mem = memory( - name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem) - - subseq = embedding_layer(input=data1, size=word_dim) - seq = embedding_layer(input=data2, size=word_dim) - nonseq = embedding_layer(input=label, size=word_dim) - - print_layer(input=[data1, seq, label, inner_mem]) - out = fc_layer( - input=[subseq, seq, nonseq, inner_mem], - size=hidden_dim, - act=TanhActivation(), - bias_attr=True, - name='inner_rnn_state') - return out - - decoder = recurrent_group( - step=inner_step, name='inner', - input=[subseq, StaticInput(seq), nonseq]) - last = last_seq(name="outer_rnn_state", input=decoder) - context = simple_attention( - encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last) - return context - - -out = recurrent_group( - name="outer", - step=outer_step, - input=[data1, data2, StaticInput(label), StaticInput(encoding)]) - -rep = last_seq(input=out) -prob = fc_layer( - size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True) - -outputs(classification_cost(input=prob, label=label)) diff --git a/paddle/legacy/gserver/tests/sequence_rnn_multi_input.conf b/paddle/legacy/gserver/tests/sequence_rnn_multi_input.conf deleted file mode 100644 index bf4be779a23e081cef33ce2b2734ad91cfa33c0d..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_rnn_multi_input.conf +++ /dev/null @@ -1,58 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -define_py_data_sources2(train_list='legacy/gserver/tests/Sequence/dummy.list', - test_list=None, - module='rnn_data_provider', - obj='process_seq') - - -settings(batch_size=2, learning_rate=0.01) -######################## network configure ################################ -dict_dim = 10 -word_dim = 8 -hidden_dim = 8 -label_dim = 3 - -data = data_layer(name="word", size=dict_dim) - -emb = embedding_layer(input=data, size=word_dim) - -def step(y, wid): - z = embedding_layer(input=wid, size=word_dim) - mem = memory(name="rnn_state", size=hidden_dim) - out = fc_layer(input=[y, z, mem], - size=hidden_dim, - act=TanhActivation(), - bias_attr=True, - name="rnn_state") - return out - -out = recurrent_group( - name="rnn", - step=step, - input=[emb, data]) - -rep = last_seq(input=out) -prob = fc_layer(size=label_dim, - input=rep, - act=SoftmaxActivation(), - bias_attr=True) - -outputs(classification_cost(input=prob, - label=data_layer(name="label", size=label_dim))) diff --git a/paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py b/paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py deleted file mode 100644 index 3612b49c2279874a378d4aaed81623f7d0d2ea2f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from paddle.trainer_config_helpers import * - -######################## data source ################################ -define_py_data_sources2( - train_list='legacy/gserver/tests/Sequence/dummy.list', - test_list=None, - module='rnn_data_provider', - obj='process_unequalength_seq') - -settings(batch_size=2, learning_rate=0.01) -######################## network configure ################################ -dict_dim = 10 -word_dim = 8 -hidden_dim = 8 -label_dim = 2 - -speaker1 = data_layer(name="word1", size=dict_dim) -speaker2 = data_layer(name="word2", size=dict_dim) - -emb1 = embedding_layer(input=speaker1, size=word_dim) -emb2 = embedding_layer(input=speaker2, size=word_dim) - -# This hierachical RNN is designed to be equivalent to the RNN in -# sequence_nest_rnn_multi_unequalength_inputs.conf - - -def step(x1, x2): - def calrnn(y): - mem = memory(name='rnn_state_' + y.name, size=hidden_dim) - out = fc_layer( - input=[y, mem], - size=hidden_dim, - act=TanhActivation(), - bias_attr=True, - name='rnn_state_' + y.name) - return out - - encoder1 = calrnn(x1) - encoder2 = calrnn(x2) - return [encoder1, encoder2] - - -encoder1_rep, encoder2_rep = recurrent_group( - name="stepout", step=step, input=[emb1, emb2]) - -encoder1_last = last_seq(input=encoder1_rep) -encoder1_expandlast = expand_layer(input=encoder1_last, expand_as=encoder2_rep) -context = mixed_layer( - input=[ - identity_projection(encoder1_expandlast), - identity_projection(encoder2_rep) - ], - size=hidden_dim) - -rep = last_seq(input=context) -prob = fc_layer( - size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True) - -outputs( - classification_cost( - input=prob, label=data_layer( - name="label", size=label_dim))) diff --git a/paddle/legacy/gserver/tests/test_ActivationGrad.cpp b/paddle/legacy/gserver/tests/test_ActivationGrad.cpp deleted file mode 100644 index f468d229a889e02bf79baa29576c638acbd8eb08..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_ActivationGrad.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_bool(use_gpu); -DECLARE_bool(thread_local_rand_use_global_seed); - -void testActivation(const string& act) { - LOG(INFO) << "test activation: " << act; - size_t size = 10; - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("addto"); - config.layerConfig.set_size(size); - config.layerConfig.set_active_type(act); - config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); - config.layerConfig.add_inputs(); - for (auto useGpu : {false, true}) { - testLayerGrad(config, - act + "_activation", - 100, - /* trans= */ false, - useGpu, - /* useWeight */ true); - } -} - -TEST(Activation, activation) { - auto types = ActivationFunction::getAllRegisteredTypes(); - std::set excluded{"sequence_softmax"}; - for (auto type : types) { - if (excluded.count(type)) continue; - testActivation(type); - } -} - -void testSequenceSoftmaxAct(bool hasSubseq) { - LOG(INFO) << "test activation: sequence softmax"; - - const size_t size = 1; - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("addto"); - config.layerConfig.set_size(size); - config.layerConfig.set_active_type("sequence_softmax"); - config.inputDefs.push_back( - {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, - "layer_0", - 1, - 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "sequence_softmax", - 100, - /* trans= */ false, - useGpu, - /* useWeight */ true); - } -} - -TEST(SequenceSoftmaxActivation, activation) { - for (auto hasSubseq : {false, true}) { - LOG(INFO) << "hasSubseq = " << hasSubseq; - testSequenceSoftmaxAct(hasSubseq); - } -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - FLAGS_thread_local_rand_use_global_seed = true; - srand(1); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_BatchNorm.cpp b/paddle/legacy/gserver/tests/test_BatchNorm.cpp deleted file mode 100644 index e21fa16074406645be88eeb454d743531f825041..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_BatchNorm.cpp +++ /dev/null @@ -1,195 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/utils/GlobalConstants.h" - -#include "LayerGradUtil.h" -#include "paddle/legacy/cuda/include/hl_batch_norm.h" -#include "paddle/legacy/math/tests/TensorCheck.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_bool(use_gpu); -DECLARE_int32(gpu_id); -DECLARE_double(checkgrad_eps); -DECLARE_bool(thread_local_rand_use_global_seed); -DECLARE_bool(prev_batch_state); - -// Test that the batchNormLayer can be followed by a ConvLayer -TEST(Layer, batchNorm) { - FLAGS_use_gpu = false; - TestConfig configBN; - const int CHANNELS = 6272; - const int IMG_SIZE = 1; - configBN.layerConfig.set_type("batch_norm"); - configBN.layerConfig.set_name("bn"); - configBN.layerConfig.set_size(CHANNELS * IMG_SIZE * IMG_SIZE); - configBN.layerConfig.set_active_type("relu"); - configBN.biasSize = CHANNELS; - configBN.inputDefs.push_back({INPUT_DATA, - "layer_0", - /* dim= */ IMG_SIZE * IMG_SIZE * CHANNELS, - /* paraSize= */ CHANNELS}); - - configBN.inputDefs.push_back( - {INPUT_DATA, "layer_1_running_mean", 1, CHANNELS}); - configBN.inputDefs.back().isStatic = true; - configBN.inputDefs.push_back( - {INPUT_DATA, "layer_2_running_var", 1, CHANNELS}); - configBN.inputDefs.back().isStatic = true; - - LayerInputConfig* input = configBN.layerConfig.add_inputs(); - configBN.layerConfig.add_inputs(); - configBN.layerConfig.add_inputs(); - - ImageConfig* img_conf = input->mutable_image_conf(); - img_conf->set_channels(CHANNELS); - img_conf->set_img_size(IMG_SIZE); - - // Setting up conv-layer config - TestConfig config; - config.biasSize = 64; - config.layerConfig.set_type("exconv"); - config.layerConfig.set_num_filters(64); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - config.inputDefs.push_back({INPUT_DATA, "bn", 6272, 204800}); - input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(5); - conv->set_filter_size_y(5); - conv->set_channels(128); - conv->set_padding(1); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(1); - conv->set_filter_channels(conv->channels() / conv->groups()); - conv->set_img_size(7); - conv->set_output_x(3); - config.layerConfig.set_size(conv->output_x() * conv->output_x() * - config.layerConfig.num_filters()); - config.layerConfig.set_name("conv"); - - // data layer initialize - std::vector dataLayers; - LayerMap layerMap; - vector datas; - initDataLayer(configBN, - &dataLayers, - &datas, - &layerMap, - "batch_norm", - 100, - false, - false); - // test layer initialize - std::vector parameters; - LayerPtr bnLayer; - initTestLayer(configBN, &layerMap, ¶meters, &bnLayer); - - std::vector parameters2; - LayerPtr convLayer; - initTestLayer(config, &layerMap, ¶meters2, &convLayer); - - bnLayer->forward(PASS_GC); - convLayer->forward(PASS_GC); - - CHECK_EQ(static_cast(convLayer->getOutputValue()->getHeight()), 100); - CHECK_EQ(static_cast(convLayer->getOutputValue()->getWidth()), 576); -} - -#ifdef PADDLE_WITH_CUDA -void batchNormInference(int n, int c, int h, int w) { - MatrixPtr input = std::make_shared(n, c * h * w); - MatrixPtr cudnnOut = std::make_shared(n, c * h * w); - MatrixPtr cudaOut = std::make_shared(n, c * h * w); - MatrixPtr cudnnCheck = std::make_shared(n, c * h * w); - MatrixPtr cudaCheck = std::make_shared(n, c * h * w); - input->randomizeUniform(); - cudnnOut->zeroMem(); - cudaOut->zeroMem(); - - MatrixPtr scale = std::make_shared(1, c); - scale->randomizeUniform(); - MatrixPtr bias = std::make_shared(1, c); - bias->randomizeUniform(); - - MatrixPtr movingMean = std::make_shared(1, c); - movingMean->randomizeUniform(); - - MatrixPtr movingVar = std::make_shared(1, c); - movingVar->randomizeUniform(); - movingVar->clip(0.01, 50); - - hl_tensor_descriptor ioDesc; - hl_tensor_descriptor bnDesc; - hl_create_tensor_descriptor(&ioDesc); - hl_create_tensor_descriptor(&bnDesc); - hl_tensor_reshape(ioDesc, n, c, h, w); - hl_tensor_reshape(bnDesc, 1, c, 1, 1); - - double EPS = 1E-5; - hl_batch_norm_forward_inference(ioDesc, - input->getData(), - ioDesc, - cudnnOut->getData(), - bnDesc, - scale->getData(), - bias->getData(), - movingMean->getData(), - movingVar->getData(), - EPS); - - hl_batch_norm_cuda_inference(input->getData(), - cudaOut->getData(), - scale->getData(), - bias->getData(), - movingMean->getData(), - movingVar->getData(), - EPS, - n, - c, - h, - w); - - cudnnCheck->copyFrom(*cudnnOut); - cudaCheck->copyFrom(*cudaOut); - autotest::TensorCheckErr(*cudnnCheck, *cudaCheck); - - hl_destroy_tensor_descriptor(ioDesc); - hl_destroy_tensor_descriptor(bnDesc); -} - -TEST(BatchNorm, Inference) { - batchNormInference(33, 267, 1, 1); - batchNormInference(19, 105, 4, 4); -} -#endif - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - FLAGS_thread_local_rand_use_global_seed = true; - srand(1); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_CRFLayerGrad.cpp b/paddle/legacy/gserver/tests/test_CRFLayerGrad.cpp deleted file mode 100644 index 1dafd1de4d82f1d306626090c30cf9203fa24dd0..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_CRFLayerGrad.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/gserver/layers/LinearChainCRF.h" - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT - -DECLARE_int32(gpu_id); -DECLARE_bool(thread_local_rand_use_global_seed); - -static inline bool getNextSequence(std::vector& seq, int numClasses) { - for (auto& v : seq) { - if (++v < numClasses) { - return true; - } - v = 0; - } - return false; -} - -// log(exp(x) + exp(y)) -static inline real logSum(real x, real y) { - real maxValue = std::max(x, y); - if (std::isinf(maxValue)) { - return -std::numeric_limits::infinity(); - } else { - return maxValue + log(exp(x - maxValue) + exp(y - maxValue)); - } -} - -static inline std::vector genRandLabels(int numClasses, int length) { - std::vector labels(length); - for (int i = 0; i < length; ++i) { - labels[i] = rand() % numClasses; // NOLINT - } - return labels; -} - -TEST(CRFLayer, cost) { - const int numClasses = 4; - CpuVector para(numClasses * (numClasses + 2)); - real* a = para.getData(); - real* b = para.getData() + numClasses; - real* w = para.getData() + 2 * numClasses; - LinearChainCRF crf(4, para.getData()); - for (int length : {1, 2, 3, 10}) { - for (int tries = 0; tries < 10; ++tries) { - CpuMatrix x(length, numClasses); - x.randomizeUniform(); - para.randnorm(0, 2); - - std::vector goldenLabels = genRandLabels(numClasses, length); - - real cost = crf.forward(x.getData(), goldenLabels.data(), length); - - real logZ = -std::numeric_limits::infinity(); - real logNominator = -std::numeric_limits::infinity(); - std::vector testResult(length, 0); - do { - real score = a[testResult.front()]; - score += x.getElement(0, testResult.front()); - for (int k = 1; k < length; ++k) { - score += x.getElement(k, testResult[k]) + - w[numClasses * testResult[k - 1] + testResult[k]]; - } - score += b[testResult.back()]; - logZ = logSum(logZ, score); - - if (goldenLabels == testResult) { - logNominator = score; - } - } while (getNextSequence(testResult, numClasses)); - - real trueCost = -logNominator + logZ; - - real diff = fabs(trueCost - cost); - diff /= fabs(cost) < fabs(trueCost) ? fabs(cost) : fabs(trueCost); - VLOG(1) << "cost=" << cost << " trueCost=" << trueCost << " diff=" << diff - << std::endl; - if (typeid(real) == typeid(double)) { // NOLINT - EXPECT_LE(diff, 1e-10); - } else { - EXPECT_LE(diff, 5e-3); - } - } - } -} - -inline real epsilon() { return typeid(real) == typeid(double) ? 1e-10 : 0.06; } - -TestConfig initTestConfig(size_t numClasses, bool withWeight) { - TestConfig config; - config.layerConfig.set_type("crf"); - config.layerConfig.set_size(numClasses); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, - "layer_0", - numClasses, - numClasses * (numClasses + 2)}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back( - {INPUT_SEQUENCE_LABEL, "layer_label", numClasses, 0}); - config.layerConfig.add_inputs(); - - if (withWeight) { - config.inputDefs.push_back({INPUT_DENSE_DIM_DATA, "layer_weight", 1, 0}); - config.layerConfig.add_inputs(); - } - - return config; -} - -TEST(Layer, CRFLayer) { - size_t numClasses = 10; - for (int tries = 0; tries < 5; ++tries) { - TestConfig config = initTestConfig(numClasses, /* withWeight= */ false); - for (int length : {1, 3, 100}) { - // Not support GPU now - testLayerGrad(config, - "crf", - length, - /* trans= */ false, - /* useGpu= */ false, - /* useWeight= */ false, - epsilon()); - } - } -} - -TEST(Layer, CRFLayerUseWeight) { - size_t numClasses = 10; - for (int tries = 0; tries < 5; ++tries) { - TestConfig config = initTestConfig(numClasses, /* withWeight= */ true); - for (int length : {1, 3, 100}) { - // Not support GPU now - testLayerGrad(config, - "crf", - length, - /* trans= */ false, - /* useGpu= */ false, - /* useWeight= */ false, - epsilon()); - } - } -} - -int main(int argc, char** argv) { - initMain(argc, argv); - hl_start(); - hl_init(FLAGS_gpu_id); - FLAGS_thread_local_rand_use_global_seed = true; - srand(1); - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_CompareSparse.cpp b/paddle/legacy/gserver/tests/test_CompareSparse.cpp deleted file mode 100644 index 11b633a5885180ae227f6e93330117b567d4a4ab..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_CompareSparse.cpp +++ /dev/null @@ -1,228 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/legacy/trainer/Trainer.h" - -#include -#include - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -static const string& configFile1 = "legacy/gserver/tests/sequence_lstm.conf"; - -DECLARE_bool(use_gpu); -DECLARE_string(config); -DECLARE_int32(gpu_id); -DECLARE_int32(seed); -DECLARE_int32(num_passes); -DECLARE_int32(saving_period); - -DECLARE_int32(num_gradient_servers); -DECLARE_int32(port); -DECLARE_bool(local); -DECLARE_bool(use_old_updater); -DECLARE_bool(parallel_nn); -DECLARE_string(config_args); -DEFINE_double(max_diff_ratio, - 0.0f, - "max diff ratio allowed for parameters value"); - -int gNumDevices = 0; - -std::vector trainerOnePassTest(const string& configFile, - bool sparseUpdate, - int trainerCount = 1, - bool useGpu = false) { - FLAGS_use_gpu = useGpu; - FLAGS_config = configFile; - FLAGS_trainer_count = trainerCount; - FLAGS_config_args = sparseUpdate ? "sparse_update=1" : "sparse_update=0"; - - LOG(INFO) << " useGpu=" << useGpu << " trainerCount=" << trainerCount - << " configFile=" << configFile << " sparseUpdate=" << sparseUpdate; - srand(FLAGS_seed); - *ThreadLocalRand::getSeed() = FLAGS_seed; - ThreadLocalRandomEngine::get().seed(FLAGS_seed); - if (useGpu) { - CHECK_LE(trainerCount, gNumDevices); - } - - std::vector> pservers; - if (!FLAGS_local) { - int numPorts = FLAGS_ports_num + FLAGS_ports_num_for_sparse; - pservers.resize(numPorts); - - for (int i = 0; i < numPorts; ++i) { - pservers[i].reset(new ParameterServer2(std::string(), FLAGS_port + i)); - pservers[i]->init(); - pservers[i]->start(); - } - } - - Trainer trainer; - trainer.init(TrainerConfigHelper::createFromFlagConfig()); - trainer.train(); - return trainer.getGradientMachine()->getParameters(); -} - -std::vector& getDenseParameters() { - static std::vector denseParameters; - if (denseParameters.empty()) { - // use dense training as base - FLAGS_local = true; - denseParameters = trainerOnePassTest(configFile1, false); - } - - return denseParameters; -} - -void checkBuffer(real* A, - const char* desA, - real* B, - const char* desB, - size_t len, - double maxDiffRatio) { - double maxDiff = 0; - double maxValue = 0; - for (size_t i = 0; i < len; ++i) { - double diff = fabs(A[i] - B[i]); - maxValue = std::max(maxValue, std::max(fabs(A[i]), fabs(B[i]))); - maxDiff = std::max(maxDiff, diff); - } - EXPECT_LE(maxDiff / maxValue, maxDiffRatio); - LOG(INFO) << " maxDiff=" << maxDiff << " maxValue=" << maxValue - << " maxDiff/maxValue=" << maxDiff / maxValue << "\n\n"; -} - -void compareValue(const vector& parametersA, - const vector& parametersB, - double maxDiffRatio = 0.0) { - LOG(INFO) << "\n\n--------------------------------" - << " Check Gradient Machine Parameters:" - << " -------------------------------------\n"; - for (size_t i = 0; i < parametersA.size(); ++i) { - ParameterPtr parameterA, parameterB; - parameterA = parametersA[i]; - parameterB = parametersB[i]; - - CpuVector paraA(parameterA->getSize()); - CpuVector paraB(parameterB->getSize()); - paraA.copyFrom(*parameterA->getBuf(PARAMETER_VALUE)); - paraB.copyFrom(*parameterB->getBuf(PARAMETER_VALUE)); - - LOG(INFO) << "\n\n----------- PARAMETER_VALUE: " << parameterA->getName() - << " ; size : " << paraA.getSize() << " ------------"; - checkBuffer(paraA.getData(), - "para_A", - paraB.getData(), - "para_B", - paraA.getSize(), - maxDiffRatio); - } -} - -TEST(compareSparse, cpu) { - FLAGS_local = 1; // disable remote sparse update in parameter config - std::vector parameters = trainerOnePassTest(configFile1, true); - compareValue(getDenseParameters(), parameters); -} - -TEST(compareSparse, remote_cpu) { - FLAGS_local = 0; // will enable remote sparse update - FLAGS_ports_num_for_sparse = 5; - std::vector parameters = trainerOnePassTest(configFile1, true); - compareValue(getDenseParameters(), parameters); -} - -TEST(compareSparse, cpu10_local_vs_remote) { - FLAGS_local = 1; // disable remote sparse update in parameter config - std::vector localParameters = - trainerOnePassTest(configFile1, true, 2); - - FLAGS_local = 0; // will enable remote sparse update - FLAGS_ports_num_for_sparse = 5; - std::vector remoteParameters = - trainerOnePassTest(configFile1, true, 2); - - compareValue(localParameters, remoteParameters); -} - -TEST(compareSparse, multiGradientMachine) { - int numGpu; -#ifdef PADDLE_TYPE_DOUBLE - double eps = 1e-8; -#else - double eps = 1e-4; -#endif - numGpu = hl_get_device_count(); - for (bool local : {false, true}) { - FLAGS_local = local; - FLAGS_ports_num_for_sparse = 5; - for (bool useGpu : {false, true}) { -#ifndef PADDLE_WITH_CUDA - if (useGpu) continue; -#endif - FLAGS_parallel_nn = useGpu; - LOG(INFO) << " local=" << local << " useGpu=" << useGpu; - int trainerCount = useGpu ? numGpu : 2; - std::vector parameters = - trainerOnePassTest(configFile1, true, trainerCount, useGpu); - compareValue(getDenseParameters(), parameters, eps); - } - } - FLAGS_parallel_nn = false; -} - -TEST(compareSparse, NeuralNetwork) { -#ifdef PADDLE_TYPE_DOUBLE - double eps = 1e-8; -#else - double eps = 1e-4; -#endif - for (bool local : {false, true}) { - FLAGS_local = local; - FLAGS_ports_num_for_sparse = 5; - for (bool useGpu : {false, true}) { -#ifndef PADDLE_WITH_CUDA - if (useGpu) continue; -#endif - FLAGS_parallel_nn = useGpu; - LOG(INFO) << " local=" << local << " useGpu=" << useGpu; - int trainerCount = 1; - std::vector parameters = - trainerOnePassTest(configFile1, true, trainerCount, useGpu); - compareValue(getDenseParameters(), parameters, useGpu ? eps : 0); - } - } - FLAGS_parallel_nn = false; -} - -int main(int argc, char** argv) { - // FIXME(tonyyang-svail): - // Turn off this test due CI failure: - // https://paddleci.ngrok.io/viewLog.html?buildId=27608&buildTypeId=Paddle_PrCi&tab=buildLog&_focus=10430 - return 0; - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - initPython(argc, argv); - - gNumDevices = hl_get_device_count(); - FLAGS_num_passes = 1; // train one pass - FLAGS_saving_period = 100000; // do not save parameter - - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_CompareTwoNets.cpp b/paddle/legacy/gserver/tests/test_CompareTwoNets.cpp deleted file mode 100644 index e19c34abbd8a84660a9e79bcbf602437bfc92832..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_CompareTwoNets.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include - -#include "paddle/legacy/trainer/Trainer.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_int32(gpu_id); - -DECLARE_bool(local); -DECLARE_bool(use_gpu); - -DECLARE_string(config); -DECLARE_string(nics); - -DEFINE_bool(need_high_accuracy, - false, - "whether need to run in double accuracy"); -DEFINE_double( - max_diff_ratio, - 0.0f, - "max diff ratio allowed for outputs and parameters (value/gradient)"); -DECLARE_bool(thread_local_rand_use_global_seed); -DECLARE_int32(seed); - -static const string& config_file_a = - "legacy/gserver/tests/sequence_recurrent.py"; -static const string& config_file_b = - "legacy/gserver/tests/sequence_recurrent_group.py"; - -struct ComData { - vector outArgs; - vector parameters; -}; - -void calcGradient(ComData& data, const string configFile) { - FLAGS_config = configFile; - - FLAGS_local = true; - FLAGS_use_gpu = false; - - FLAGS_nics = ""; - - *ThreadLocalRand::getSeed() = FLAGS_seed; - srand(FLAGS_seed); - - Trainer trainer; - trainer.init(TrainerConfigHelper::createFromFlagConfig(), false); - - data.parameters = trainer.getGradientMachine()->getParameters(); - - DataBatch dataBatch; - int32_t batchSize = trainer.getConfig().opt_config().batch_size(); - - trainer.getDataProvider()->reset(); - trainer.getDataProvider()->setSkipShuffle(); - trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch); - - CHECK(dataBatch.getSize()) << "No data from data provider"; - vector& inArgs = dataBatch.getStreams(); - - trainer.getGradientMachine()->start(); - trainer.getGradientMachine()->forwardBackward( - inArgs, &data.outArgs, PASS_TRAIN); - - trainer.getGradientMachine()->finish(); -} - -void checkBuffer(real* A, - const char* desA, - real* B, - const char* desB, - size_t len, - size_t width = 1) { - int nNum = 0; - real maxVal = 0; - for (size_t i = 0; i < len; ++i) { - maxVal = std::max(maxVal, std::max(A[i], B[i])); - } - real maxDiff = 0; - for (size_t i = 0; i < len; ++i) { - real diff = fabs(A[i] - B[i]); - maxDiff = std::max(maxDiff, diff); - if (diff > maxVal * FLAGS_max_diff_ratio) { - nNum++; - VLOG(1) << "Row: " << i / width << ", " << desA << " : " << A[i] << " " - << desB << " : " << B[i] << " diff=" << diff; - } - } - EXPECT_EQ(0, nNum); - LOG(INFO) << "maxValue=" << maxVal << " maxDiff=" << maxDiff << "\n\n"; -} - -void compareGradient(ComData& comDataA, ComData& comDataB) { - vector outArgsA = comDataA.outArgs; - vector outArgsB = comDataB.outArgs; - - for (size_t i = 0; i < outArgsA.size(); ++i) { - CpuMatrix matA(outArgsA[i].value->getHeight(), - outArgsA[i].value->getWidth()); - CpuMatrix matB(outArgsB[i].value->getHeight(), - outArgsB[i].value->getWidth()); - - matA.copyFrom(*outArgsA[i].value); - matB.copyFrom(*outArgsB[i].value); - - LOG(INFO) << "\n--------------------------------" - << " Check Network Output_" << i << ":" - << " -------------------------------------\n"; - checkBuffer(matA.getData(), - "network A output", - matB.getData(), - "network B output", - matA.getElementCnt(), - matA.getWidth()); - } - - vector& parametersA = comDataA.parameters; - vector& parametersB = comDataB.parameters; - - LOG(INFO) << "\n\n--------------------------------" - << " Check Gradient Machine Parameters:" - << " -------------------------------------\n"; - for (size_t i = 0; i < parametersA.size(); ++i) { - ParameterPtr parameterA, parameterB; - parameterA = parametersA[i]; - parameterB = parametersB[i]; - - CpuVector paraA(parameterA->getSize()); - CpuVector paraB(parameterB->getSize()); - paraA.copyFrom(*parameterA->getBuf(PARAMETER_VALUE)); - paraB.copyFrom(*parameterB->getBuf(PARAMETER_VALUE)); - - LOG(INFO) << "\n\n----------- PARAMETER_VALUE: " << parameterA->getName() - << " ; size : " << paraA.getSize() << " ------------"; - checkBuffer(paraA.getData(), - "Network A", - paraB.getData(), - "Network B", - paraA.getSize()); - - CpuVector gradA(*parameterA->getBuf(PARAMETER_GRADIENT)); - CpuVector gradB(*parameterB->getBuf(PARAMETER_GRADIENT)); - - LOG(INFO) << "\n\n----------- PARAMETER_GRADIENT: " << parameterA->getName() - << " ; size : " << gradA.getSize() << " -----------"; - checkBuffer(gradA.getData(), - "Network A", - gradB.getData(), - "Network B", - gradA.getSize()); - } -} - -TEST(Trainer, create) { - ComData dataA; - calcGradient(dataA, config_file_a); - LOG(INFO) << "\n\nforwardBackward of Network A is finished\n\n"; - - ComData dataB; - calcGradient(dataB, config_file_b); - LOG(INFO) << "\n\nforwardBackward of the Network B is finished\n\n"; - - compareGradient(dataA, dataB); -} - -int main(int argc, char** argv) { - FLAGS_thread_local_rand_use_global_seed = true; - paddle::initMain(argc, argv); - testing::InitGoogleTest(&argc, argv); - initPython(argc, argv); - -#ifndef PADDLE_TYPE_DOUBLE - if (FLAGS_need_high_accuracy) { - LOG(INFO) << "skip test due to it's need high accuracy"; - return 0; - } - if (FLAGS_max_diff_ratio == 0.0f) { - FLAGS_max_diff_ratio = 1e-5; - LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio - << " in low accuracy mode"; - } -#else - if (FLAGS_max_diff_ratio == 0.0f) { - FLAGS_max_diff_ratio = 1e-10; - LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio - << " in high accuracy mode"; - } -#endif - - int ret = RUN_ALL_TESTS(); - return ret; -} diff --git a/paddle/legacy/gserver/tests/test_ConvTrans.cpp b/paddle/legacy/gserver/tests/test_ConvTrans.cpp deleted file mode 100644 index 4ea0a3d379b010fcb6ccb91a28e653a53cfe66d8..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_ConvTrans.cpp +++ /dev/null @@ -1,244 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/utils/GlobalConstants.h" - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_bool(use_gpu); -DECLARE_int32(gpu_id); -DECLARE_double(checkgrad_eps); -DECLARE_bool(thread_local_rand_use_global_seed); -DECLARE_bool(prev_batch_state); - -// Test that the convTrans forward is the same as conv backward -TEST(Layer, convTransLayerFwd) { - // Setting up conv-trans layer - TestConfig configt; - configt.biasSize = 3; - configt.layerConfig.set_type("exconvt"); - configt.layerConfig.set_num_filters(3); - configt.layerConfig.set_partial_sum(1); - configt.layerConfig.set_shared_biases(true); - - configt.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); - LayerInputConfig* input = configt.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(2); - conv->set_filter_size_y(4); - conv->set_channels(16); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(1); - conv->set_filter_channels(3 / conv->groups()); - conv->set_img_size(16); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - configt.layerConfig.set_size(conv->img_size() * conv->img_size() * - configt.layerConfig.num_filters()); - configt.layerConfig.set_name("convTrans"); - - // data layer initialize - std::vector dataLayers; - LayerMap layerMap; - vector datas; - initDataLayer( - configt, &dataLayers, &datas, &layerMap, "convTrans", 100, false, false); - // test layer initialize - std::vector parameters; - LayerPtr convtLayer; - initTestLayer(configt, &layerMap, ¶meters, &convtLayer); - convtLayer->getBiasParameter()->zeroMem(); - convtLayer->forward(PASS_GC); - - // Setting up conv-layer config - TestConfig config; - config.biasSize = 16; - config.layerConfig.set_type("exconv"); - config.layerConfig.set_num_filters(16); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - config.inputDefs.push_back({INPUT_DATA, "layer_1", 768, 384}); - input = config.layerConfig.add_inputs(); - conv = input->mutable_conv_conf(); - conv->set_filter_size(2); - conv->set_filter_size_y(4); - conv->set_channels(3); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(1); - conv->set_filter_channels(conv->channels() / conv->groups()); - conv->set_img_size(16); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - config.layerConfig.set_size(conv->output_x() * conv->output_x() * - config.layerConfig.num_filters()); - config.layerConfig.set_name("conv"); - - // data layer initialize - std::vector dataLayers2; - LayerMap layerMap2; - vector datas2; - initDataLayer( - config, &dataLayers2, &datas2, &layerMap2, "conv", 100, false, false); - // test layer initialize - std::vector parameters2; - LayerPtr convLayer; - initTestLayer(config, &layerMap2, ¶meters2, &convLayer); - - // Sync convLayer and convtLayer parameter - convLayer->getBiasParameter()->zeroMem(); - convLayer->getParameters()[0] - ->getBuf(PARAMETER_VALUE) - ->copyFrom(*(convtLayer->getParameters()[0]->getBuf(PARAMETER_VALUE))); - - // Set convLayer outputGrad as convTransLayer input value - convLayer->forward(PASS_GC); - convLayer->getOutput().grad->copyFrom(*(dataLayers[0]->getOutputValue())); - - vector callbackFlags(parameters2.size(), 0); - auto callback = [&](Parameter* para) { ++callbackFlags[para->getID()]; }; - convLayer->backward(callback); - - // Check that the convLayer backward is the same as convTransLayer forward - checkMatrixEqual(convtLayer->getOutputValue(), - dataLayers2[0]->getOutputGrad()); -} - -// Do one forward pass of convTrans layer and check to see if its output -// matches the given result -void doOneConvtTest(size_t imgSize, - size_t output_x, - size_t stride, - size_t padding, - size_t filter_size, - MatrixPtr& result) { - TestConfig configt; - configt.biasSize = 1; - configt.layerConfig.set_type("exconvt"); - configt.layerConfig.set_num_filters(1); - configt.layerConfig.set_partial_sum(1); - configt.layerConfig.set_shared_biases(true); - - configt.inputDefs.push_back( - {INPUT_DATA, "layer_0", output_x * output_x, filter_size * filter_size}); - LayerInputConfig* input = configt.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(filter_size); - conv->set_filter_size_y(filter_size); - conv->set_channels(1); - conv->set_padding(padding); - conv->set_padding_y(padding); - conv->set_stride(stride); - conv->set_stride_y(stride); - conv->set_groups(1); - conv->set_filter_channels(1); - conv->set_img_size(imgSize); - conv->set_output_x(output_x); - - configt.layerConfig.set_size(conv->img_size() * conv->img_size() * - configt.layerConfig.num_filters()); - configt.layerConfig.set_name("convTrans"); - - std::vector dataLayers; - LayerMap layerMap; - vector datas; - initDataLayer( - configt, &dataLayers, &datas, &layerMap, "convTrans", 1, false, false); - dataLayers[0]->getOutputValue()->zeroMem(); - dataLayers[0]->getOutputValue()->add(1.0); - - // test layer initialize - std::vector parameters; - LayerPtr convtLayer; - initTestLayer(configt, &layerMap, ¶meters, &convtLayer); - convtLayer->getBiasParameter()->zeroMem(); - convtLayer->getParameters()[0]->zeroMem(); - convtLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->add(1.0); - convtLayer->forward(PASS_GC); - - checkMatrixEqual(convtLayer->getOutputValue(), result); -} - -TEST(Layer, convTransLayerFwd2) { - MatrixPtr result; - result = Matrix::create(1, 5 * 5, false, false); - result->zeroMem(); - result->add(1.0); - doOneConvtTest(/* imgSize */ 5, - /* output_x */ 1, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 5, - result); - - real resultData[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, - 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; - result->setData(resultData); - doOneConvtTest(/* imgSize */ 5, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 4, - result); - - real resultData2[] = {1, 2, 2, 2, 1, 2, 4, 4, 4, 2, 2, 4, 4, - 4, 2, 2, 4, 4, 4, 2, 1, 2, 2, 2, 1}; - result->setData(resultData2); - doOneConvtTest(/* imgSize */ 5, - /* output_x */ 2, - /* stride */ 2, - /* padding */ 1, - /* filter_size */ 5, - result); - - real resultData3[] = {1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 4, - 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1}; - result->setData(resultData3); - doOneConvtTest(/* imgSize */ 5, - /* output_x */ 2, - /* stride */ 2, - /* padding */ 0, - /* filter_size */ 3, - result); -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - FLAGS_thread_local_rand_use_global_seed = true; - srand(1); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_ConvUnify.cpp b/paddle/legacy/gserver/tests/test_ConvUnify.cpp deleted file mode 100644 index d4ca158352d9e4bf859b31b7c7410518bdc20ac6..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_ConvUnify.cpp +++ /dev/null @@ -1,315 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/utils/GlobalConstants.h" - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_bool(use_gpu); -DECLARE_int32(gpu_id); -DECLARE_double(checkgrad_eps); -DECLARE_bool(thread_local_rand_use_global_seed); -DECLARE_bool(prev_batch_state); - -// Do one forward pass of ConvLayer using either exconv or cudnn_conv -MatrixPtr doOneConvTest(size_t imgSize, - size_t output_x, - size_t stride, - size_t padding, - size_t filter_size, - size_t channel, - size_t numfilters, - size_t groups, - MatrixPtr& inputData, - real* param, - bool useGpu, - bool isDeconv = false) { - TestConfig config; - config.biasSize = numfilters; - string layerType; - if (useGpu) { - layerType = (isDeconv) ? "cudnn_convt" : "cudnn_conv"; - } else { - layerType = (isDeconv) ? "exconvt" : "exconv"; - } - config.layerConfig.set_type(layerType); - config.layerConfig.set_num_filters(numfilters); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - size_t weightSize = channel * filter_size * filter_size * - config.layerConfig.num_filters() / groups; - if (isDeconv) { - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", output_x * output_x * channel, weightSize}); - config.layerConfig.set_size(imgSize * imgSize * - config.layerConfig.num_filters()); - } else { - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", imgSize * imgSize * channel, weightSize}); - config.layerConfig.set_size(output_x * output_x * - config.layerConfig.num_filters()); - } - - LayerInputConfig* input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(filter_size); - conv->set_filter_size_y(filter_size); - conv->set_channels(channel); - conv->set_padding(padding); - conv->set_padding_y(padding); - conv->set_stride(stride); - conv->set_stride_y(stride); - conv->set_groups(groups); - conv->set_img_size(imgSize); - conv->set_output_x(output_x); - - if (isDeconv) { - conv->set_filter_channels(numfilters / groups); - } else { - conv->set_filter_channels(channel / groups); - } - - config.layerConfig.set_name("conv"); - - std::vector dataLayers; - LayerMap layerMap; - vector datas; - initDataLayer( - config, &dataLayers, &datas, &layerMap, "conv", 1, false, useGpu); - dataLayers[0]->getOutputValue()->zeroMem(); - dataLayers[0]->getOutputValue()->copyFrom(*inputData); - - // test layer initialize - std::vector parameters; - LayerPtr convLayer; - initTestLayer(config, &layerMap, ¶meters, &convLayer); - convLayer->getBiasParameter()->zeroMem(); - convLayer->getParameters()[0]->zeroMem(); - convLayer->getParameters()[0] - ->getBuf(PARAMETER_VALUE) - ->copyFrom(param, weightSize); - convLayer->forward(PASS_GC); - - return convLayer->getOutputValue(); -} - -TEST(Layer, convParaUnified) { -#ifdef PADDLE_WITH_CUDA - MatrixPtr input, resultCpu, resultGpu; - - /// TEST1 for conv /// - input = Matrix::create(1, 4 * 4, false, false); - real inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - real param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1}; - - input->setData(inputData); - - resultCpu = doOneConvTest(/* imgSize */ 4, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 3, - /*channel*/ 1, - /*numfilters*/ 2, - /*groups*/ 1, - input, - param, - /*useGpu*/ false); - - resultGpu = doOneConvTest(/* imgSize */ 4, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 3, - /*channel*/ 1, - /*numfilters*/ 2, - /*groups*/ 1, - input, - param, - /*useGpu*/ true); - checkMatrixEqual(resultCpu, resultGpu); - - /// TEST1 for deconv /// - input = Matrix::create(1, 2 * 2, false, false); - real inputDataT[] = {1, 2, 3, 4}; - input->setData(inputDataT); - - resultCpu = doOneConvTest(/* imgSize */ 4, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 3, - /*channel*/ 1, - /*numfilters*/ 2, - /*groups*/ 1, - input, - param, - /*useGpu*/ false, - /*isDeconv*/ true); - - resultGpu = doOneConvTest(/* imgSize */ 4, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 3, - /*channel*/ 1, - /*numfilters*/ 2, - /*groups*/ 1, - input, - param, - /*useGpu*/ true, - /*isDeconv*/ true); - checkMatrixEqual(resultCpu, resultGpu); - - /// TEST2 for conv /// - input = Matrix::create(1, 3 * 3 * 2, false, false); - real inputData2[] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; - real param2[] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1}; - - input->setData(inputData2); - - resultCpu = doOneConvTest(/* imgSize */ 3, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 2, - /*channel*/ 2, - /*numfilters*/ 2, - /*groups*/ 1, - input, - param2, - /*useGpu*/ false); - - resultGpu = doOneConvTest(/* imgSize */ 3, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 2, - /*channel*/ 2, - /*numfilters*/ 2, - /*groups*/ 1, - input, - param2, - /*useGpu*/ true); - checkMatrixEqual(resultCpu, resultGpu); - - /// TEST3 for conv /// - real param3[] = {1, 2, 3, 4, 4, 3, 2, 1}; - - resultCpu = doOneConvTest(/* imgSize */ 3, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 2, - /*channel*/ 2, - /*numfilters*/ 2, - /*groups*/ 2, - input, - param3, - /*useGpu*/ false); - - resultGpu = doOneConvTest(/* imgSize */ 3, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 2, - /*channel*/ 2, - /*numfilters*/ 2, - /*groups*/ 2, - input, - param3, - /*useGpu*/ true); - checkMatrixEqual(resultCpu, resultGpu); - - /// TEST2 for deconv /// - input = Matrix::create(1, 2 * 2 * 2, false, false); - real inputData2T[] = {1, 2, 3, 4, 5, 6, 7, 8}; - input->setData(inputData2T); - - resultCpu = doOneConvTest(/* imgSize */ 3, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 2, - /*channel*/ 2, - /*numfilters*/ 2, - /*groups*/ 1, - input, - param2, - /*useGpu*/ false, - /*isDeconv*/ true); - - resultGpu = doOneConvTest(/* imgSize */ 3, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 2, - /*channel*/ 2, - /*numfilters*/ 2, - /*groups*/ 1, - input, - param2, - /*useGpu*/ true, - /*isDeconv*/ true); - checkMatrixEqual(resultCpu, resultGpu); - - /// TEST3 for deconv /// - resultCpu = doOneConvTest(/* imgSize */ 3, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 2, - /*channel*/ 2, - /*numfilters*/ 2, - /*groups*/ 2, - input, - param3, - /*useGpu*/ false, - /*isDeconv*/ true); - - resultGpu = doOneConvTest(/* imgSize */ 3, - /* output_x */ 2, - /* stride */ 1, - /* padding */ 0, - /* filter_size */ 2, - /*channel*/ 2, - /*numfilters*/ 2, - /*groups*/ 2, - input, - param3, - /*useGpu*/ true, - /*isDeconv*/ true); - checkMatrixEqual(resultCpu, resultGpu); -#endif -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - FLAGS_thread_local_rand_use_global_seed = true; - srand(1); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/legacy/gserver/tests/test_CrossEntropyOverBeamGrad.cpp deleted file mode 100644 index 34eb0dedffeba46c662a0e69ce9ba82f474a8358..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_CrossEntropyOverBeamGrad.cpp +++ /dev/null @@ -1,352 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT - -DECLARE_int32(gpu_id); -DECLARE_bool(thread_local_rand_use_global_seed); - -const size_t MAX_SEQ_NUM = 23; -const size_t MAX_SEQ_LEN = 50; -const size_t MAX_BEAM_SIZE = 27; - -const size_t SEED = (size_t)(time(NULL)); - -struct SingleBeamExpansion { - vector seqStartPos; - vector subSeqStartPos; - vector candidateScores; - - // TODO(caoying): store this into Argument.ids - vector selectedIndices; - - vector groundTruth; - vector inBeam; - vector rowIdxInBeam; - vector colIdxInBeam; - - void resetGroundTruth(size_t n) { - groundTruth.clear(); - groundTruth.resize(n, -1); - - inBeam.clear(); - inBeam.resize(n, 0); - - rowIdxInBeam.clear(); - rowIdxInBeam.resize(n, -1); - - colIdxInBeam.clear(); - colIdxInBeam.resize(n, -1); - } -}; - -inline float randFloat() { - return static_cast(rand()) / static_cast(RAND_MAX); -} - -void genRand(real* numbers, size_t n) { - default_random_engine generator; - uniform_real_distribution distribution(0.0, 1.0); - for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator); -} - -vector randSampling(real range, int n) { - CHECK_GE(range, n); - vector num(range); - iota(begin(num), end(num), 0.); - if (range == n) return num; - - random_shuffle(begin(num), end(num)); - num.resize(n); - sort(begin(num), end(num)); - return num; -} - -void genCandidateScores(bool hasSubseq, - size_t beamSize, - SingleBeamExpansion& prevBeam, - SingleBeamExpansion& curBeam) { - vector& seqStartPos = curBeam.seqStartPos; - seqStartPos.resize(1, 0); - vector& subSeqStartPos = curBeam.subSeqStartPos; - subSeqStartPos.resize(1, 0); - - srand(SEED); - if (prevBeam.selectedIndices.size()) { - if (prevBeam.subSeqStartPos.size() > 1) { - int seqIdx = 1; - // samples in previous beam are nested sequences. - for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) { - for (size_t j = 0; j < beamSize; ++j) { - if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break; - subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) + - subSeqStartPos.back()); - } - if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) { - seqStartPos.push_back(subSeqStartPos.back()); - seqIdx++; - } - } - } else { - for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) { - if (i && i % beamSize == 0) { - seqStartPos.push_back(subSeqStartPos.back()); - if (i == prevBeam.selectedIndices.size()) break; - } - if (prevBeam.selectedIndices[i] == -1.) continue; - subSeqStartPos.push_back(subSeqStartPos.back() + - (1 + (rand() % MAX_SEQ_LEN))); - } - } - } else { - // the first beam expansion - int seqNum = 1 + (rand() % MAX_SEQ_NUM); - for (int i = 0; i < seqNum; ++i) { - if (hasSubseq) { - for (size_t j = 0; j < 1 + (rand() % MAX_SEQ_NUM); ++j) - subSeqStartPos.push_back(subSeqStartPos.back() + - (1 + (rand() % MAX_SEQ_LEN))); - seqStartPos.push_back(subSeqStartPos.back()); - } else { - seqStartPos.push_back(seqStartPos.back() + - (1 + (rand() % MAX_SEQ_LEN))); - } - } - } - - size_t totalSeqNum = hasSubseq ? subSeqStartPos.back() : seqStartPos.back(); - curBeam.candidateScores.resize(totalSeqNum, 0.); - genRand(curBeam.candidateScores.data(), totalSeqNum); -} - -void genSelectedIndices(size_t beamSize, - vector& seqStartPos, - vector& selectedIndices) { - size_t selectedIdsCount = beamSize * (seqStartPos.size() - 1); - selectedIndices.resize(selectedIdsCount, -1.); - - for (size_t i = 0; i < seqStartPos.size() - 1; ++i) { - int seqLen = seqStartPos[i + 1] - seqStartPos[i]; - int n = min(seqLen, static_cast(beamSize)); - vector ids = randSampling(seqLen, n); - memcpy(selectedIndices.data() + i * beamSize, - ids.data(), - sizeof(real) * ids.size()); - } -} - -void genGroundTruth(vector& beamExpansions, - size_t beamSize) { - SingleBeamExpansion& beam = beamExpansions[1]; - size_t seqNum = beam.seqStartPos.size() - 1; - for (size_t i = 2; i < beamExpansions.size(); ++i) - CHECK_EQ(seqNum, beamExpansions[i].seqStartPos.size() - 1); - - srand(SEED); - - // initialize the first beam. - beam.resetGroundTruth(seqNum); - for (size_t i = 0; i < seqNum; ++i) { - if (randFloat() > 0.5) { - /* - * force the randomly generated label falls in the beam by chance 0.5. - * otherwise, when sequence length is relatively long and beam size is - * relatively small, the gold sequences falls off the beam at in the - * first search. - */ - real* begPos = beam.selectedIndices.data() + i * beamSize; - beam.colIdxInBeam[i] = - rand() % count_if(begPos, begPos + beamSize, [](const real& val) { - return val != -1.; - }); - beam.groundTruth[i] = - beam.selectedIndices[i * beamSize + beam.colIdxInBeam[i]]; - beam.inBeam[i] = 1; - } else { - int label = rand() % (beam.seqStartPos[i + 1] - beam.seqStartPos[i]); - beam.groundTruth[i] = label; - - real* begPos = beam.selectedIndices.data() + i * beamSize; - real* endPos = begPos + beamSize; - real* lblPos = find(begPos, endPos, real(label)); - if (lblPos != endPos) { - beam.inBeam[i] = 1; - beam.colIdxInBeam[i] = lblPos - begPos; - } - } - beam.rowIdxInBeam[i] = i; - } - - // iterate over each beam expansions - for (size_t i = 2; i < beamExpansions.size(); ++i) { - SingleBeamExpansion& curBeam = beamExpansions[i]; - SingleBeamExpansion& prevBeam = beamExpansions[i - 1]; - curBeam.resetGroundTruth(seqNum); - - // iterate over each sequence - for (size_t j = 0; j < seqNum; ++j) { - if (!prevBeam.inBeam[j]) continue; - - // gold sequence falls in the beam in previous search. - real* begPos = prevBeam.selectedIndices.data(); - int offset = - prevBeam.rowIdxInBeam[j] * beamSize + prevBeam.colIdxInBeam[j]; - curBeam.rowIdxInBeam[j] = count_if( - begPos, begPos + offset, [](const real& val) { return val != -1.; }); - - if (randFloat() > 0.5) { - // force the randomly generated label falls in the beam by chance 0.5. - - real* start = - curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize; - int n = rand() % count_if(start, start + beamSize, [](const real& val) { - return val != -1.; - }); - curBeam.colIdxInBeam[j] = n; - curBeam.groundTruth[j] = *(start + n); - curBeam.inBeam[j] = 1; - } else { - CHECK_LE((size_t)curBeam.rowIdxInBeam[j] + 1, - curBeam.subSeqStartPos.size() - 1); - int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]]; - int end = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j] + 1]; - CHECK_GT(size_t(end), size_t(start)); - int label = rand() % (end - start); - - curBeam.groundTruth[j] = label; - real* findBeg = - curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize; - real* lblPos = - find(findBeg, findBeg + beamSize, static_cast(label)); - if (lblPos != (findBeg + beamSize)) { - curBeam.inBeam[j] = 1; - curBeam.colIdxInBeam[j] = lblPos - findBeg; - } - } - } - } -} - -void genOneBeam(size_t beamSize, - bool hasSubseq, - SingleBeamExpansion& prevBeam, - SingleBeamExpansion& curBeam) { - genCandidateScores(hasSubseq, beamSize, prevBeam, curBeam); - genSelectedIndices(beamSize, - hasSubseq ? curBeam.subSeqStartPos : curBeam.seqStartPos, - curBeam.selectedIndices); -} - -void genRandomBeamExpansion(size_t expansionCount, - size_t beamSize, - vector& beamExpansions) { - beamExpansions.clear(); - beamExpansions.resize(expansionCount + 1); - - // beamExpansions[0] is reserved. - for (size_t i = 1; i <= expansionCount; ++i) - genOneBeam(beamSize, bool(i - 1), beamExpansions[i - 1], beamExpansions[i]); - genGroundTruth(beamExpansions, beamSize); -} - -void testCrossEntropyOverBeam(bool useGpu, - size_t beamSize, - vector& beams) { - TestConfig config; - config.layerConfig.set_type("cross_entropy_over_beam"); - - size_t seqNum = 0; - for (size_t i = 1; i < beams.size(); ++i) { - const SingleBeamExpansion& beam = beams[i]; - // create scores for all the candidates - MatrixPtr candidateScorePtr = - Matrix::create(beam.candidateScores.size(), 1, false, false); - candidateScorePtr->copyFrom(beam.candidateScores.data(), - beam.candidateScores.size()); - - ostringstream paramName; - paramName << "candidate_scores_" << i; - - if (beam.subSeqStartPos.size() > 1) { - seqNum = beam.subSeqStartPos.size() - 1; - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, - paramName.str(), - candidateScorePtr, - beam.seqStartPos, - beam.subSeqStartPos}); - } else { - seqNum = beam.seqStartPos.size() - 1; - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, - paramName.str(), - candidateScorePtr, - beam.seqStartPos}); - } - config.layerConfig.add_inputs(); - - // create indices for the selected candidates - MatrixPtr selectedCandidates = - Matrix::create(seqNum, beamSize, false, false); - selectedCandidates->copyFrom(beam.selectedIndices.data(), - beam.selectedIndices.size()); - paramName.clear(); - paramName << "selected_candidates_" << i; - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, paramName.str(), selectedCandidates}); - config.layerConfig.add_inputs(); - - // create the ground truth - paramName.clear(); - paramName << "label_" << i; - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, paramName.str(), beam.groundTruth}); - config.layerConfig.add_inputs(); - } - - testLayerGrad( - config, "cross_entropy_over_beam", seqNum, false, useGpu, false); -} - -TEST(Layer, CrossEntropyOverBeam) { - LOG(INFO) << "SEED = " << SEED; - const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE; - LOG(INFO) << "beamSize = " << beamSize; - - // TODO(caoying): test with random beam expansions. - const size_t expansionCount = 3; - vector beams; - genRandomBeamExpansion(expansionCount, beamSize, beams); - - for (bool useGpu : {false, true}) - testCrossEntropyOverBeam(useGpu, beamSize, beams); -} - -int main(int argc, char** argv) { - initMain(argc, argv); - hl_start(); - hl_init(FLAGS_gpu_id); - FLAGS_thread_local_rand_use_global_seed = true; - srand(SEED); - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_DetectionOutput.cpp b/paddle/legacy/gserver/tests/test_DetectionOutput.cpp deleted file mode 100644 index 486521426553c76729fa4c287b8fbc5f9c064b61..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_DetectionOutput.cpp +++ /dev/null @@ -1,194 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -// Do one forward pass of priorBox layer and check to see if its output -// matches the given result -void doOneDetectionOutputTest(MatrixPtr& inputLoc, - MatrixPtr& inputConf, - MatrixPtr& inputPriorBox, - size_t feature_map_width, - size_t feature_map_height, - real nms_threshold, - bool use_gpu, - MatrixPtr& result) { - // Setting up the detection output layer - TestConfig configt; - configt.layerConfig.set_type("detection_output"); - LayerInputConfig* input = configt.layerConfig.add_inputs(); - configt.layerConfig.add_inputs(); - configt.layerConfig.add_inputs(); - - DetectionOutputConfig* detOutput = input->mutable_detection_output_conf(); - detOutput->set_width(feature_map_width); - detOutput->set_height(feature_map_height); - detOutput->set_nms_threshold(nms_threshold); - detOutput->set_num_classes(2); - detOutput->set_nms_top_k(20); - detOutput->set_keep_top_k(10); - detOutput->set_background_id(0); - detOutput->set_confidence_threshold(0.01); - detOutput->set_input_num(1); - configt.inputDefs.push_back({INPUT_DATA_TARGET, "priorbox", 32, 0}); - configt.inputDefs.push_back({INPUT_DATA, "input_loc", 16, 0}); - configt.inputDefs.push_back({INPUT_DATA, "input_conf", 8, 0}); - - // data layer initialize - std::vector dataLayers; - LayerMap layerMap; - vector datas; - initDataLayer( - configt, &dataLayers, &datas, &layerMap, "priorbox", 1, false, use_gpu); - - dataLayers[0]->getOutputValue()->copyFrom(*inputPriorBox); - dataLayers[1]->getOutputValue()->copyFrom(*inputLoc); - dataLayers[2]->getOutputValue()->copyFrom(*inputConf); - - // test layer initialize - bool store_FLAGS_use_gpu = FLAGS_use_gpu; - FLAGS_use_gpu = use_gpu; - std::vector parameters; - LayerPtr detectionOutputLayer; - initTestLayer(configt, &layerMap, ¶meters, &detectionOutputLayer); - FLAGS_use_gpu = store_FLAGS_use_gpu; - detectionOutputLayer->forward(PASS_GC); - checkMatrixEqual(detectionOutputLayer->getOutputValue(), result); -} - -TEST(Layer, detectionOutputLayerFwd) { - bool useGpu = false; - // CPU case 1. - MatrixPtr inputLoc; - MatrixPtr inputConf; - MatrixPtr inputPriorBox; - MatrixPtr result, result2, result3, result4; - real nmsTreshold = 0.01; - real inputLocData[] = {0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1}; - real inputConfData[] = {0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6}; - real inputPriorBoxData[] = {0.1, 0.1, 0.5, 0.5, 0.1, 0.1, 0.2, 0.2, - 0.2, 0.2, 0.6, 0.6, 0.1, 0.1, 0.2, 0.2, - 0.3, 0.3, 0.7, 0.7, 0.1, 0.1, 0.2, 0.2, - 0.4, 0.4, 0.8, 0.8, 0.1, 0.1, 0.2, 0.2}; - real resultData[] = { - 0, 1, 0.68997443, 0.099959746, 0.099959746, 0.50804031, 0.50804031}; - inputLoc = Matrix::create(1, 16, false, useGpu); - inputConf = Matrix::create(1, 8, false, useGpu); - inputPriorBox = Matrix::create(1, 32, false, useGpu); - result = Matrix::create(1, 7, false, useGpu); - inputLoc->setData(inputLocData); - inputConf->setData(inputConfData); - inputPriorBox->setData(inputPriorBoxData); - result->setData(resultData); - doOneDetectionOutputTest(inputLoc, - inputConf, - inputPriorBox, - /* feature_map_width */ 1, - /* feature_map_height */ 1, - nmsTreshold, - useGpu, - result); - - // CPU case 2. - nmsTreshold = 0.2; - result2 = Matrix::create(2, 7, false, useGpu); - real resultData2[] = {0, - 1, - 0.68997443, - 0.099959746, - 0.099959746, - 0.50804031, - 0.50804031, - 0, - 1, - 0.59868765, - 0.29995975, - 0.29995975, - 0.70804024, - 0.70804024}; - result2->setData(resultData2); - doOneDetectionOutputTest(inputLoc, - inputConf, - inputPriorBox, - /* feature_map_width */ 1, - /* feature_map_height */ 1, - nmsTreshold, - useGpu, - result2); - -#ifdef PADDLE_WITH_CUDA - // GPU case 1. - useGpu = true; - inputLoc = Matrix::create(1, 16, false, useGpu); - inputConf = Matrix::create(1, 8, false, useGpu); - inputPriorBox = Matrix::create(1, 32, false, useGpu); - inputLoc->copyFrom(inputLocData, 16); - inputConf->copyFrom(inputConfData, 8); - inputPriorBox->copyFrom(inputPriorBoxData, 32); - - nmsTreshold = 0.01; - result3 = Matrix::create(1, 7, false, useGpu); - result3->copyFrom(resultData, 7); - doOneDetectionOutputTest(inputLoc, - inputConf, - inputPriorBox, - /* feature_map_width */ 1, - /* feature_map_height */ 1, - nmsTreshold, - useGpu, - result3); - - // GPU case 2. - nmsTreshold = 0.2; - result4 = Matrix::create(2, 7, false, useGpu); - result4->copyFrom(resultData2, 14); - doOneDetectionOutputTest(inputLoc, - inputConf, - inputPriorBox, - /* feature_map_width */ 1, - /* feature_map_height */ 1, - nmsTreshold, - useGpu, - result4); -#endif -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_Evaluator.cpp b/paddle/legacy/gserver/tests/test_Evaluator.cpp deleted file mode 100644 index 8aab50d23e56e449d86f22a315c45432253cdd07..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_Evaluator.cpp +++ /dev/null @@ -1,267 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/trainer/Trainer.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_bool(use_gpu); -DECLARE_int32(gpu_id); -DECLARE_bool(thread_local_rand_use_global_seed); - -enum InputType { - INPUT_DATA, // dense vector - INPUT_LABEL, // id - INPUT_DATA_TARGET, // dense vector, but no gradient - INPUT_SEQUENCE_DATA, - INPUT_SEQUENCE_LABEL, - INPUT_SPARSE_NON_VALUE_DATA -}; - -struct InputDef { - InputType inputType; - string name; - size_t dim; -}; - -struct TestConfig { - EvaluatorConfig evaluatorConfig; - std::vector inputDefs; - bool testAccumulate; - TestConfig() : testAccumulate(true) {} -}; - -void testEvaluator(TestConfig testConf, - string testEvaluatorName, - size_t batchSize, - bool useGpu) { -#ifndef PADDLE_WITH_CUDA - if (useGpu) return; -#endif - FLAGS_use_gpu = useGpu; - testConf.evaluatorConfig.set_name(testEvaluatorName); - LOG(INFO) << " evaluator_type=" << testConf.evaluatorConfig.type() - << " useGpu=" << useGpu; - - std::vector arguments; - for (size_t i = 0; i < testConf.inputDefs.size(); ++i) { - Argument data; - size_t dim = testConf.inputDefs[i].dim; - switch (testConf.inputDefs[i].inputType) { - case INPUT_DATA: - case INPUT_SEQUENCE_DATA: - case INPUT_DATA_TARGET: - data.value = Matrix::create(batchSize, dim, false, useGpu); - data.value->randomizeUniform(); - - // make sure output > 0 && output < 1 - data.value->add(-0.5); - data.value->sigmoid(*data.value); - break; - case INPUT_LABEL: - case INPUT_SEQUENCE_LABEL: - data.ids = VectorT::create(batchSize, useGpu); - data.ids->rand(dim); // now rand number can be 0 to inputDefs[i].dim. - break; - case INPUT_SPARSE_NON_VALUE_DATA: - data.value = makeRandomSparseMatrix(batchSize, - dim, - /* withValue= */ false, - useGpu); - break; - default: - LOG(FATAL) << " unknown inputType "; - return; - } - - ICpuGpuVectorPtr sequenceStartPositions; - if (testConf.inputDefs[i].inputType == INPUT_SEQUENCE_DATA || - testConf.inputDefs[i].inputType == INPUT_SEQUENCE_LABEL) { - if (!sequenceStartPositions) { - generateSequenceStartPositions(batchSize, sequenceStartPositions); - } - data.sequenceStartPositions = sequenceStartPositions; - } - - arguments.push_back(data); - } - - Evaluator* testEvaluator = Evaluator::create(testConf.evaluatorConfig); - double totalScore = 0.0; - testEvaluator->start(); - totalScore += testEvaluator->evalImp(arguments); - testEvaluator->updateSamplesNum(arguments); - testEvaluator->finish(); - LOG(INFO) << *testEvaluator; - - std::vector names; - testEvaluator->getNames(&names); - paddle::Error err; - for (auto& name : names) { - auto value = testEvaluator->getValue(name, &err); - ASSERT_TRUE(err.isOK()); - LOG(INFO) << name << " " << value; - auto tp = testEvaluator->getType(name, &err); - ASSERT_TRUE(err.isOK()); - ASSERT_EQ(testConf.evaluatorConfig.type(), tp); - } - - double totalScore2 = 0.0; - if (testConf.testAccumulate) { - testEvaluator->start(); - totalScore2 += testEvaluator->evalImp(arguments); - testEvaluator->finish(); - EXPECT_LE(fabs(totalScore - totalScore2), 1.0e-5); - } -} - -void testEvaluatorAll(TestConfig testConf, - string testEvaluatorName, - size_t batchSize) { - testEvaluator(testConf, testEvaluatorName, batchSize, true); - testEvaluator(testConf, testEvaluatorName, batchSize, false); -} - -TEST(Evaluator, detection_map) { - TestConfig config; - config.evaluatorConfig.set_type("detection_map"); - config.evaluatorConfig.set_overlap_threshold(0.5); - config.evaluatorConfig.set_background_id(0); - config.evaluatorConfig.set_ap_type("Integral"); - config.evaluatorConfig.set_evaluate_difficult(0); - - config.inputDefs.push_back({INPUT_DATA, "output", 7}); - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "label", 6}); - config.evaluatorConfig.set_evaluate_difficult(false); - testEvaluatorAll(config, "detection_map", 100); - - config.evaluatorConfig.set_evaluate_difficult(true); - testEvaluatorAll(config, "detection_map", 100); -} - -TEST(Evaluator, classification_error) { - TestConfig config; - config.evaluatorConfig.set_type("classification_error"); - config.evaluatorConfig.set_top_k(5); - - config.inputDefs.push_back({INPUT_DATA, "output", 50}); - config.inputDefs.push_back({INPUT_LABEL, "label", 50}); - testEvaluatorAll(config, "classification_error", 100); - config.inputDefs.push_back({INPUT_DATA, "weight", 1}); - testEvaluatorAll(config, "classification_error_weight", 100); - - // multi binary labels - config.inputDefs.clear(); - config.inputDefs.push_back({INPUT_DATA, "output", 100}); - config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "label", 100}); - // Not support GPU - testEvaluator(config, "classification_error_multi_binary_label", 50, false); - - config.evaluatorConfig.set_classification_threshold(0.4); - config.inputDefs.push_back({INPUT_DATA, "weight", 1}); - // Not support GPU - testEvaluator( - config, "classification_error_weight_multi_binary_label", 50, false); -} - -TEST(Evaluator, sum) { - TestConfig config; - config.evaluatorConfig.set_type("sum"); - - // sum of output - config.inputDefs.push_back({INPUT_DATA, "output", 10}); - testEvaluatorAll(config, "sum_output", 200); - config.inputDefs.push_back({INPUT_DATA, "weight", 1}); - testEvaluatorAll(config, "sum_output_weight", 200); - - // sum of label - config.inputDefs.clear(); - config.inputDefs.push_back({INPUT_LABEL, "label", 10}); - testEvaluatorAll(config, "sum_label", 200); - config.inputDefs.push_back({INPUT_DATA, "weight", 1}); - testEvaluatorAll(config, "sum_label_weight", 200); -} - -TEST(Evaluator, last_column_sum) { - TestConfig config; - config.evaluatorConfig.set_type("last-column-sum"); - - config.inputDefs.push_back({INPUT_DATA, "output", 50}); - testEvaluatorAll(config, "last-column-sum", 200); - config.inputDefs.push_back({INPUT_DATA, "weight", 1}); - testEvaluatorAll(config, "last-column-sum_weight", 200); -} - -TEST(Evaluator, last_column_auc) { - TestConfig config; - config.evaluatorConfig.set_type("last-column-auc"); - - config.inputDefs.push_back({INPUT_DATA, "output", 2}); - config.inputDefs.push_back({INPUT_LABEL, "label", 2}); - testEvaluatorAll(config, "last-column-auc", 500); - config.inputDefs.push_back({INPUT_DATA, "weight", 1}); - testEvaluatorAll(config, "last-column-auc_weight", 200); -} - -TEST(Evaluator, precision_recall) { - TestConfig config; - config.evaluatorConfig.set_type("precision_recall"); - - config.inputDefs.push_back({INPUT_DATA, "output", 10}); - config.inputDefs.push_back({INPUT_LABEL, "label", 10}); - testEvaluatorAll(config, "precision_recall", 200); - config.inputDefs.push_back({INPUT_DATA, "weight", 1}); - testEvaluatorAll(config, "precision_recall_weight", 200); - - LOG(INFO) << "positive_label = 5"; - config.evaluatorConfig.set_positive_label(5); - testEvaluatorAll(config, "precision_recall_weight", 200); - - // multi binary labels - config.inputDefs.clear(); - config.evaluatorConfig.set_positive_label(-1); - config.inputDefs.push_back({INPUT_DATA, "output", 10}); - config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "label", 10}); - // Not support GPU - testEvaluator(config, "precision_recall_multi_binary_label", 100, false); - - LOG(INFO) << "classification_threshold = 0.4"; - config.evaluatorConfig.set_classification_threshold(0.4); - config.inputDefs.push_back({INPUT_DATA, "weight", 1}); - // Not support GPU - testEvaluator( - config, "precision_recall_weight_multi_binary_label", 100, false); -} - -TEST(Evaluator, ctc_error_evaluator) { - TestConfig config; - config.evaluatorConfig.set_type("ctc_edit_distance"); - - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "output", 32}); - config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "label", 1}); - testEvaluatorAll(config, "ctc_error_evaluator", 100); -} - -int main(int argc, char** argv) { - initMain(argc, argv); - FLAGS_thread_local_rand_use_global_seed = true; - srand(1); - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_Expand.cpp b/paddle/legacy/gserver/tests/test_Expand.cpp deleted file mode 100644 index fa1c86d13f4b3d5d9f6e0e5c4442818154134cef..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_Expand.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -// Do one forward pass of expand layer and check to see if its output -// matches the given result.(Test onlyCPU currently.) -void doOneExpandTest(string trans_type, - bool hasSubseq, - bool useGpu, - Argument& input1, - Argument& input2, - Argument& result) { - FLAGS_use_gpu = false; - // Setting up the expand layer - TestConfig config; - config.layerConfig.set_type("expand"); - - auto inputType1 = - trans_type == "non-seq" ? INPUT_DENSE_DIM_DATA : INPUT_SEQUENCE_DATA; - config.inputDefs.push_back({inputType1, "layer0", 1, 0}); - auto inputType2 = - hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA; - - config.inputDefs.push_back({inputType2, "layer1", 1, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.set_trans_type(trans_type); - - // data layer initialize - std::vector dataLayers; - LayerMap layerMap; - vector datas; - initDataLayer( - config, &dataLayers, &datas, &layerMap, "expand", 1, false, useGpu); - dataLayers[0]->getOutput() = input1; - dataLayers[1]->getOutput() = input2; - - // test layer initialize - std::vector parameters; - LayerPtr expandLayer; - initTestLayer(config, &layerMap, ¶meters, &expandLayer); - expandLayer->forward(PASS_GC); - checkMatrixEqual(expandLayer->getOutputValue(), result.value); -} - -TEST(Layer, ExpandLayerFwd) { - bool useGpu = false; - - // Assume batch_size =3 in all cases. - - // CPU case 1. non-seq expand to seq - // input1 = 1,2,3 - // input2 = [4,5],[6],[7,8,9] - // result = [1,1],[2],[3,3,3] - Argument input1, input2, result; - input1.value = Matrix::create(3, 1, false, useGpu); - real input1Data[] = {1, 2, 3}; - input1.value->setData(input1Data); - - input2.value = Matrix::create(6, 1, false, useGpu); - real input2Data[] = {4, 5, 6, 7, 8, 9}; - input2.value->setData(input2Data); - input2.sequenceStartPositions = ICpuGpuVector::create(4, useGpu); - int input2Seq[] = {0, 2, 3, 6}; - input2.sequenceStartPositions->copyFrom(input2Seq, 4, useGpu); - - result.value = Matrix::create(6, 1, false, useGpu); - real resultData[] = {1, 1, 2, 3, 3, 3}; - result.value->setData(resultData); - - doOneExpandTest("non-seq", false, useGpu, input1, input2, result); - - // CPU case 2. non-seq expand to sub-seq - // NOTE: input1.batch_size == input2.sequencelength in this case. - // i.e, input1 expands by input2.sequence - // input1 = 1,2,3 - // input2 = [[4,5]],[[6]],[[7],[8,9]] - // result = [[1,1]],[[2]],[[3],[3,3]] - input2.subSequenceStartPositions = ICpuGpuVector::create(5, useGpu); - int input2SubSeq[] = {0, 2, 3, 4, 6}; - input2.subSequenceStartPositions->copyFrom(input2SubSeq, 5, useGpu); - - doOneExpandTest("non-seq", true, useGpu, input1, input2, result); - - // CPU case 3. seq expand to sub-seq - // input1 = [1,2],[3],[4] - // input2 = [[4,5]],[[6]],[[7],[8,9]] - // result = [[1,1]],[[2]],[[3],[4,4]] - Matrix::resizeOrCreate(input1.value, 4, 1, false, useGpu); - real input1Data_case3[] = {1, 2, 3, 4}; - input1.value->setData(input1Data_case3); - - input1.sequenceStartPositions = ICpuGpuVector::create(4, useGpu); - int input1Seq[] = {0, 2, 3, 4}; - input1.sequenceStartPositions->copyFrom(input1Seq, 4, useGpu); - - real resultData_case3[] = {1, 1, 2, 3, 4, 4}; - result.value->setData(resultData_case3); - - doOneExpandTest("seq", true, useGpu, input1, input2, result); -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_KmaxSeqScore.cpp b/paddle/legacy/gserver/tests/test_KmaxSeqScore.cpp deleted file mode 100644 index e15b4e5038cddda00acdd06b7748984b03094e6e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_KmaxSeqScore.cpp +++ /dev/null @@ -1,164 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/utils/GlobalConstants.h" - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_bool(use_gpu); -DECLARE_int32(gpu_id); -DECLARE_bool(thread_local_rand_use_global_seed); - -vector randSampling(int range, int n) { - CHECK_GE(range, n); - vector num(range); - iota(begin(num), end(num), 0); - if (range == n) return num; - - random_shuffle(begin(num), end(num)); - num.resize(n); - return num; -} - -void genRandomSeqInfo(vector& seqStartPosition, - vector& subSeqStartPosition) { - const int maxSeqNum = 100; - // generate random start position information - int seqNum = 1 + (rand() % maxSeqNum); - seqStartPosition.resize(seqNum + 1, 0); - subSeqStartPosition.resize(1, 0); - - for (int i = 0; i < seqNum; ++i) { - int subSeqLen = 1 + (rand() % maxSeqNum); - for (int j = 0; j < subSeqLen; ++j) - subSeqStartPosition.push_back(subSeqStartPosition.back() + subSeqLen); - seqStartPosition[i + 1] = subSeqStartPosition.back(); - } -} - -void genRandomGroundTruth(real* values, - vector>& groundTruth, - vector& startPos, - size_t beamSize) { - groundTruth.resize(startPos.size() - 1, vector(beamSize, -1)); - for (size_t i = 0; i < startPos.size() - 1; ++i) { - int seqLen = startPos[i + 1] - startPos[i]; - vector pos = - randSampling(seqLen, min(static_cast(beamSize), seqLen)); - for (size_t j = 0; j < pos.size(); ++j) { - groundTruth[i][j] = pos[j]; - values[startPos[i] + pos[j]] = 1.; - } - } -} - -void checkLayerOut(vector> groundTruth, - real* layerOut, - size_t beamSize) { - for (size_t i = 0; i < groundTruth.size(); ++i) { - int begPos = i * beamSize; - vector tmp(layerOut + begPos, layerOut + begPos + beamSize); - sort(begin(tmp), end(tmp)); - sort(begin(groundTruth[i]), end(groundTruth[i])); - for (size_t j = 0; j < beamSize; ++j) CHECK_EQ(tmp[j], groundTruth[i][j]); - } -} - -TEST(Layer, kmaxSeqScoreLayer) { - const size_t maxBeamSize = 100; - size_t beamSize = 1 + (rand() % maxBeamSize); - - vector seqStartPosition; - vector subSeqStartPosition; - genRandomSeqInfo(seqStartPosition, subSeqStartPosition); - MatrixPtr inValue = - Matrix::create(subSeqStartPosition.back(), 1, false, false); - - std::vector mode = {false}; -#ifdef PADDLE_WITH_CUDA - mode.push_back(true); -#endif - - for (auto hasSubseq : {false, true}) { - vector> groundTruth; - inValue->randomizeUniform(); - genRandomGroundTruth(inValue->getData(), - groundTruth, - hasSubseq ? subSeqStartPosition : seqStartPosition, - beamSize); - - for (auto useGpu : mode) { - TestConfig config; - config.layerConfig.set_type("kmax_seq_score"); - config.layerConfig.set_beam_size(beamSize); - - if (hasSubseq) { - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, - "scores", - inValue, - seqStartPosition, - subSeqStartPosition}); - } else { - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, "scores", inValue, seqStartPosition}); - } - config.layerConfig.add_inputs(); - - // data layer initialize - std::vector dataLayers; - LayerMap layerMap; - vector datas; - initDataLayer( - config, - &dataLayers, - &datas, - &layerMap, - "kmax_seq_score", - 100 /* actually this parameter is unused in self-defined input*/, - false, - useGpu); - // test layer initialize - std::vector parameters; - LayerPtr kmaxSeqScoreLayer; - FLAGS_use_gpu = useGpu; - initTestLayer(config, &layerMap, ¶meters, &kmaxSeqScoreLayer); - kmaxSeqScoreLayer->forward(PASS_TRAIN); - - const MatrixPtr outValue = kmaxSeqScoreLayer->getOutputValue(); - CHECK_EQ(outValue->getHeight(), - hasSubseq ? subSeqStartPosition.size() - 1 - : seqStartPosition.size() - 1); - CHECK_EQ(outValue->getWidth(), beamSize); - checkLayerOut(groundTruth, outValue->getData(), beamSize); - } - } -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - FLAGS_thread_local_rand_use_global_seed = true; - srand((size_t)(time(NULL))); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_LayerGrad.cpp b/paddle/legacy/gserver/tests/test_LayerGrad.cpp deleted file mode 100644 index 979cf8ee673291d66f8704f2deda6c7160f4b228..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_LayerGrad.cpp +++ /dev/null @@ -1,2532 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_CUDA -#include -#endif -#include -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/math/MathUtils.h" - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_bool(use_gpu); -DECLARE_int32(gpu_id); -DECLARE_double(checkgrad_eps); -DECLARE_bool(thread_local_rand_use_global_seed); -DECLARE_bool(prev_batch_state); - -TEST(Operator, dot_mul) { - TestConfig config; - config.layerConfig.set_size(10); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); - operatorConf.set_type("dot_mul"); - operatorConf.set_dotmul_scale(-1); - - testOperatorGrad(config, operatorConf, 100, false, false); -} - -TEST(Projection, context) { - for (auto contextStart : {-5, -3, -1, 0, 3}) { - for (auto contextLength : {1, 2, 5, 7}) { - for (auto batchSize : {1, 2, 5, 20}) { - for (auto trainablePadding : {false, true}) { - LOG(INFO) << " contextStart=" << contextStart - << " contextLength=" << contextLength - << " batchSize=" << batchSize - << " trainablePadding=" << trainablePadding; - ProjectionConfig conf; - conf.set_type("context"); - conf.set_input_size(10); - conf.set_context_start(contextStart); - conf.set_context_length(contextLength); - conf.set_trainable_padding(trainablePadding); - conf.set_output_size(conf.context_length() * conf.input_size()); - int pad = - std::max(0, -conf.context_start()) + - std::max(0, conf.context_start() + conf.context_length() - 1); - for (auto useGpu : {false, true}) { - testProjectionGrad( - conf, - INPUT_SEQUENCE_DATA, - trainablePadding ? conf.input_size() * pad : 0, - batchSize, - useGpu, - contextStart + contextLength <= 1); // = testState - } - } - } - } - } -} - -TEST(Projection, trans_fc) { - ProjectionConfig conf; - conf.set_type("trans_fc"); - conf.set_input_size(50); - conf.set_output_size(20); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 1000, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, fc) { - ProjectionConfig conf; - conf.set_type("fc"); - conf.set_input_size(10); - conf.set_output_size(20); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 200, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, dot_mul) { - ProjectionConfig conf; - conf.set_type("dot_mul"); - conf.set_input_size(20); - conf.set_output_size(20); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 20, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, table) { - ProjectionConfig conf; - conf.set_type("table"); - conf.set_input_size(10); - conf.set_output_size(20); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_LABEL, - /* parameterSize */ 200, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, identity) { - ProjectionConfig conf; - conf.set_type("identity"); - conf.set_input_size(10); - conf.set_output_size(10); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 0, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, slice) { - ProjectionConfig conf; - conf.set_type("slice"); - conf.set_input_size(100); - SliceConfig& slice1 = *conf.add_slices(); - slice1.set_start(10); - slice1.set_end(20); - SliceConfig& slice2 = *conf.add_slices(); - slice2.set_start(50); - slice2.set_end(70); - conf.set_output_size(30); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 0, - /* batchSize */ 10, - useGpu); - } -} - -TEST(Projection, scaling) { - ProjectionConfig conf; - conf.set_type("scaling"); - conf.set_input_size(10); - conf.set_output_size(10); - for (auto useGpu : {false}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 1, - /* batchSize */ 100, - useGpu); - } -} - -void testProjectionConv(size_t groups, bool isDeconv) { - const int NUM_FILTERS = 18; - const int FILTER_SIZE = 2; - const int FILTER_SIZE_Y = 2; - const int CHANNELS = 3; - const int IMAGE_SIZE = 16; - -#if CUDNN_VERSION >= 6000 - const int DILATION = 2; -#else - const int DILATION = 1; -#endif - - ProjectionConfig conf; - if (isDeconv) { - conf.set_type("convt"); - } else { - conf.set_type("conv"); - } - conf.set_num_filters(NUM_FILTERS); - - ConvConfig* conv = conf.mutable_conv_conf(); - conv->set_filter_size(FILTER_SIZE); - conv->set_filter_size_y(FILTER_SIZE_Y); - conv->set_channels(CHANNELS); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_dilation(DILATION); - conv->set_dilation_y(DILATION); - conv->set_groups(groups); - if (isDeconv) { - conv->set_filter_channels(NUM_FILTERS / conv->groups()); - } else { - conv->set_filter_channels(conv->channels() / conv->groups()); - } - conv->set_img_size(IMAGE_SIZE); - int output_x = outputSize(conv->img_size(), - (conv->filter_size() - 1) * DILATION + 1, - conv->padding(), - conv->stride(), - /* caffeMode */ true); - int output_y = outputSize(conv->img_size(), - (conv->filter_size_y() - 1) * DILATION + 1, - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true); - conv->set_output_x(output_x); - conv->set_output_y(output_y); - LOG(INFO) << "DILATION:" << DILATION << "; output_x: " << output_x - << "; output_y: " << output_y; - if (isDeconv) { - int deconv_image_x = imageSize(output_x, - (conv->filter_size() - 1) * DILATION + 1, - conv->padding(), - conv->stride(), - /* caffeMode */ true); - int deconv_image_y = imageSize(output_y, - (conv->filter_size_y() - 1) * DILATION + 1, - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true); - - LOG(INFO) << " deconv_image_x: " << deconv_image_x - << "; deconv_image_y: " << deconv_image_y; - conf.set_input_size(output_x * output_y * CHANNELS); - conf.set_output_size(deconv_image_x * deconv_image_y * NUM_FILTERS); - } else { - conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); - conf.set_output_size(output_x * output_y * NUM_FILTERS); - } - - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE * - FILTER_SIZE_Y / groups, - /* batchSize */ 100, - true, - false, - NUM_FILTERS, - true); -} - -#ifdef PADDLE_WITH_CUDA -TEST(Projection, conv) { - /// test ConvProjection - testProjectionConv(1, false); - testProjectionConv(3, false); - /// test ConvTransProjection - testProjectionConv(1, true); - testProjectionConv(3, true); -} -#endif - -TEST(Layer, BilinearInterpLayer) { - TestConfig config; - config.layerConfig.set_type("bilinear_interp"); - config.biasSize = 0; - config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); - - LayerInputConfig* input = config.layerConfig.add_inputs(); - BilinearInterpConfig* bilinear = input->mutable_bilinear_interp_conf(); - ImageConfig* image = bilinear->mutable_image_conf(); - image->set_img_size(32); - image->set_img_size_y(32); - image->set_channels(4); - - for (auto useGpu : {false, true}) { - for (auto outSize : {32, 64}) { - bilinear->set_out_size_x(outSize); - bilinear->set_out_size_y(outSize); - testLayerGrad(config, "bilinear_interp", 10, false, useGpu); - } - } -} - -TEST(Layer, concat) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("concat"); - config.layerConfig.set_size(15); - config.layerConfig.set_active_type("sigmoid"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "concat", 100, false, useGpu); - } -} - -TEST(Layer, AddtoLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("addto"); - config.layerConfig.set_size(10); - config.layerConfig.set_active_type("sigmoid"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "addto", 100, false, useGpu); - } -} - -TEST(Layer, CTCLayer) { - TestConfig config; - config.layerConfig.set_type("ctc"); - config.layerConfig.set_norm_by_times(false); - config.layerConfig.set_size(10); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "ctc", - 100, - /* trans */ false, /* useGpu */ - useGpu); - } -} - -TEST(Layer, cosSimLayer) { - TestConfig config; - config.layerConfig.set_type("cos"); - config.layerConfig.set_size(1); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 50, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "cos", 100, false, useGpu); - } -} - -TEST(Layer, CosSimVecMatLayer) { - TestConfig config; - config.layerConfig.set_type("cos_vm"); - config.layerConfig.set_size(5); // output size - config.layerConfig.set_cos_scale(2.0); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "cos_vm", 100, false, useGpu); - } -} - -void testDepthwiseConvLayer(const string& type, bool useGpu) { - TestConfig config; - config.biasSize = 32; - config.layerConfig.set_type(type); - config.layerConfig.set_num_filters(32); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 2048, 192}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(2); - conv->set_filter_size_y(3); - conv->set_channels(16); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(16); - conv->set_filter_channels(conv->channels() / conv->groups()); - conv->set_img_size(16); - conv->set_img_size_y(8); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - conv->set_output_y(outputSize(conv->img_size_y(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true)); - config.layerConfig.set_size(conv->output_x() * conv->output_y() * - config.layerConfig.num_filters()); - - testLayerGrad(config, "depthwise_conv", 100, false, useGpu); - // Use small batch_size and useWeight=true to test biasGrad - testLayerGrad(config, "depthwise_conv", 2, false, useGpu, true, 0.02); -} - -TEST(Layer, depthwiseConvLayer) { - // 'depthwise_conv' is a sepecial case of 'exconv' whose - // groups size equals to the input channels size. - testDepthwiseConvLayer("exconv", /* useGpu= */ false); -#ifdef PADDLE_WITH_CUDA - testDepthwiseConvLayer("exconv", /* useGpu= */ true); -#endif -} - -void testConvLayer(const string& type, bool trans, bool useGpu) { - TestConfig config; - config.biasSize = 16; - config.layerConfig.set_type(type); - config.layerConfig.set_num_filters(16); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - int dilation = 2; - if (type == "cudnn_conv") { -#if CUDNN_VERSION >= 6000 - dilation = 2; -#else - dilation = 1; -#endif - } - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 768, 192}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(2); - conv->set_filter_size_y(2); - conv->set_channels(3); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_dilation(dilation); - conv->set_dilation_y(dilation); - conv->set_groups(1); - conv->set_filter_channels(conv->channels() / conv->groups()); - conv->set_img_size(16); - conv->set_img_size_y(16); - conv->set_output_x(outputSize(conv->img_size(), - (conv->filter_size() - 1) * dilation + 1, - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - conv->set_output_y(outputSize(conv->img_size_y(), - (conv->filter_size_y() - 1) * dilation + 1, - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true)); - config.layerConfig.set_size(conv->output_x() * conv->output_y() * - config.layerConfig.num_filters()); - - testLayerGrad(config, "conv", 100, trans, useGpu); - // Use small batch_size and useWeight=true to test biasGrad - testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02); -} - -TEST(Layer, convLayer) { - testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); -#ifdef PADDLE_WITH_CUDA - testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); - testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); -#endif -} - -void testConvTransLayer(const string& type, bool trans, bool useGpu) { - TestConfig config; - config.biasSize = 3; - config.layerConfig.set_type(type); - config.layerConfig.set_num_filters(3); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(2); - conv->set_filter_size_y(4); - conv->set_channels(16); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(1); - conv->set_filter_channels(3 / conv->groups()); - conv->set_img_size(16); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - - config.layerConfig.set_size(conv->img_size() * conv->img_size() * - config.layerConfig.num_filters()); - - testLayerGrad(config, "convTrans", 100, trans, useGpu); - // Use small batch_size and useWeight=true to test biasGrad - testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02); -} - -TEST(Layer, convTransLayer) { - for (auto useGpu : {false, true}) { - testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); - } -#ifdef PADDLE_WITH_CUDA - testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); -#endif -} - -TEST(Layer, blockExpandLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("blockexpand"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 6144, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - BlockExpandConfig* blockExpand = input->mutable_block_expand_conf(); - blockExpand->set_img_size_x(64); - blockExpand->set_img_size_y(32); - blockExpand->set_channels(3); - blockExpand->set_padding_x(0); - blockExpand->set_padding_y(0); - blockExpand->set_block_x(4); - blockExpand->set_block_y(32); - blockExpand->set_stride_x(2); - blockExpand->set_stride_y(2); - blockExpand->set_output_x(outputSize(blockExpand->img_size_x(), - blockExpand->block_x(), - blockExpand->padding_x(), - blockExpand->stride_x(), - /* caffeMode */ false)); - blockExpand->set_output_y(outputSize(blockExpand->img_size_y(), - blockExpand->block_y(), - blockExpand->padding_y(), - blockExpand->stride_y(), - /* caffeMode */ false)); - config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() * - blockExpand->channels()); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "blockexpand", 100, false, useGpu); - } -} - -TEST(Layer, maxoutLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("maxout"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - MaxOutConfig* maxout = input->mutable_maxout_conf(); - ImageConfig* image = maxout->mutable_image_conf(); - - image->set_img_size(32); - image->set_img_size_y(32); - image->set_channels(4); - maxout->set_groups(2); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "maxout", 10, false, useGpu); - } -} - -void testFcLayer(string format, size_t nnz) { - TestConfig config; - config.biasSize = 1024; - config.layerConfig.set_type("fc"); - config.layerConfig.set_size(1024); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_drop_rate(0.1); - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", 2048, nnz, ParaSparse(format)}); - config.layerConfig.add_inputs(); - - LOG(INFO) << config.inputDefs[0].sparse.sparse << " " - << config.inputDefs[0].sparse.format; - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "fc", - 100, - /* trans */ false, - useGpu, - /* weight */ true); - } -} - -TEST(Layer, fcLayer) { - testFcLayer("", 1024 * 1024 * 2); - testFcLayer("csc", 1024 * 10); - testFcLayer("csr", 1024 * 10); -} - -TEST(Layer, SelectiveFullyConnectedLayer) { - TestConfig config; - size_t nin = 16; - size_t nout = 256; - config.layerConfig.set_type("selective_fc"); - config.layerConfig.set_size(nout); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_has_selected_colums(true); - config.layerConfig.set_selective_fc_pass_generation(false); - config.biasSize = nout; - - config.inputDefs.push_back({INPUT_DATA, "input0", nin, nin * nout}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back( - {INPUT_SPARSE_NON_VALUE_DATA, "index", nout, 0, ParaSparse("csr", true)}); - config.layerConfig.add_inputs(); - - testLayerGrad(config, - "selective_fc", - 100, - /* trans= */ false, - /* useGup= */ false, - false); -#ifdef PADDLE_WITH_CUDA - testLayerGrad(config, - "selective_fc", - 100, - /* trans= */ false, - /* useGup= */ true, - false); -#endif -} - -TEST(Layer, DataNormLayer) { - TestConfig config; - config.layerConfig.set_type("data_norm"); - config.layerConfig.set_size(20); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 100}); - config.inputDefs.back().isStatic = true; - config.layerConfig.add_inputs(); - - for (auto strategy : {"z-score", "min-max", "decimal-scaling"}) { - config.layerConfig.set_data_norm_strategy(strategy); - // The parameters are static, so not support GPU now - testLayerGrad(config, - "data_norm", - 200, - /* trans */ false, - /* useGpu */ false); - } -} - -TEST(Layer, hsigmoidLayer) { - TestConfig config; - config.layerConfig.set_type("hsigmoid"); - config.layerConfig.set_num_classes(5); - config.layerConfig.set_size(1); - config.biasSize = config.layerConfig.num_classes() - 1; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 200}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 5, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "hsigmoid", - 100, - /* trans */ false, - /* useGpu */ useGpu); - } -} - -TEST(Layer, multi_cross) { - TestConfig config; - config.layerConfig.set_type("multi-class-cross-entropy"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad( - config, "multi-class-cross-entropy", 100, /* trans */ false, useGpu); - } -} - -TEST(Layer, multi_binary_label_sparse_mat) { - TestConfig config; - config.layerConfig.set_type("multi_binary_label_cross_entropy"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "multi_binary_label_cross_entropy", - 100, - /* trans */ false, - useGpu); - } -} - -TEST(layer, multi_binary_label_id) { - TestConfig config; - config.layerConfig.set_type("multi_binary_label_cross_entropy"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "multi_binary_label_cross_entropy", - 100, - /* trans */ false, - useGpu); - } -} - -TEST(Layer, multi_cross_with_selfnorm) { - TestConfig config; - config.layerConfig.set_type("multi_class_cross_entropy_with_selfnorm"); - config.layerConfig.set_softmax_selfnorm_alpha(0.1); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // Not support GPU now - testLayerGrad(config, - "multi_class_cross_entropy_with_selfnorm", - 100, - /* trans */ false, - /* useGpu */ false); -} - -TEST(Layer, multi_cross_soft) { - TestConfig config; - config.layerConfig.set_type("soft_binary_class_cross_entropy"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "soft_binary_class_cross_entropy", - 100, - /* trans */ false, - useGpu); - } -} - -TEST(Layer, square_error) { - TestConfig config; - config.layerConfig.set_type("square_error"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); - } -} - -TEST(Layer, sparse_square_error) { - TestConfig config; - config.layerConfig.set_type("square_error"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // "GpuSparseMatrix" as label is not supported - testLayerGrad(config, - "square_error", - 100, - /* trans */ false, - /* useGpu */ false); -} - -TEST(Layer, sparse_float_square_error) { - TestConfig config; - config.layerConfig.set_type("square_error"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_SPARSE_FLOAT_VALUE_DATA, "layer_1", 50, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // "GpuSparseMatrix" as label is not supported - testLayerGrad(config, - "square_error", - 100, - /* trans */ false, - /* useGpu */ false); -} - -TEST(Layer, square_error_weighted) { - TestConfig config; - config.layerConfig.set_type("square_error"); - config.biasSize = 0; - config.testAccumulate = false; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); - } -} - -TEST(Layer, huber_regression_loss) { - TestConfig config; - config.layerConfig.set_type("huber_regression"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - for (auto delta : {1, 3, 5}) { - config.layerConfig.set_delta(delta); - testLayerGrad(config, "huber_regression", 100, /* trans */ false, useGpu); - } - } -} - -TEST(Layer, huber_two_class) { - TestConfig config; - config.layerConfig.set_type("huber_classification"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 2, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "huber_two_class", 100, /* trans */ false, useGpu); - } -} - -void testExpandLayer(string trans_type, bool hasSubseq) { - TestConfig config; - config.layerConfig.set_type("expand"); - - config.inputDefs.push_back( - {trans_type == "non-seq" ? INPUT_DENSE_DIM_DATA : INPUT_SEQUENCE_DATA, - "layer_0", - 10, - 0}); - config.inputDefs.push_back( - {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, - "layer_1", - 10, - 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.set_trans_type(trans_type); - LOG(INFO) << " trans_type=" << trans_type << " hasSubseq=" << hasSubseq; - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "expand", 30, false, useGpu); - } -} - -TEST(Layer, ExpandLayer) { - testExpandLayer("non-seq", false); // non-seq expand to seq - testExpandLayer("non-seq", true); // non-seq expand to hasSubseq - testExpandLayer("seq", true); // seq expand to hasSubseq -} - -void testDegradeLayer(bool hasSubseq, - string layer_type, - string trans_type, - int stride) { - TestConfig config; - config.layerConfig.set_type(layer_type); - config.layerConfig.set_size(10); - config.layerConfig.set_seq_pool_stride(stride); - config.biasSize = 0; - - config.inputDefs.push_back( - {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, - "layer_0", - 10, - 0}); - config.layerConfig.add_inputs(); - config.layerConfig.set_trans_type(trans_type); - - auto testDegradeLayerGrad = [](TestConfig& config, string layer_type) { - for (auto useGpu : {false, true}) { - testLayerGrad(config, layer_type, 100, false, useGpu); - } - }; - - if (layer_type == "average") { - for (auto strategy : {"average", "sum", "squarerootn"}) { - LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type - << " average_strategy=" << strategy - << " seq_pool_stride=" << stride; - config.layerConfig.set_average_strategy(strategy); - testDegradeLayerGrad(config, layer_type); - } - } else { - LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type - << " seq_pool_stride=" << stride; - testDegradeLayerGrad(config, layer_type); - } -} - -TEST(Layer, MaxLayer) { - testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq - testDegradeLayer(false, - "max", - "non-seq", - 5); // seq max to a shorten seq, stride window = 5 - testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq - testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq -} - -TEST(Layer, SequenceLastInstanceLayer) { - testDegradeLayer(false, - "seqlastins", - "non-seq", - -1); // seq seqlastins to non-seq - testDegradeLayer(false, - "seqlastins", - "non-seq", - 5); // seq seqlastins to a shorten seq, stride window = 5 - testDegradeLayer(true, - "seqlastins", - "non-seq", - -1); // hasSubseq seqlastins to non-seq - testDegradeLayer(true, - "seqlastins", - "seq", - -1); // hasSubseq seqlastins to seq -} - -TEST(Layer, AverageLayer) { - testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq - testDegradeLayer(false, - "average", - "non-seq", - 5); // seq average to a shorten seq, stride window = 5 - testDegradeLayer(true, - "average", - "non-seq", - -1); // hasSubseq average to non-seq - testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq -} - -TEST(Layer, SequenceConcatLayer) { - TestConfig config; - config.layerConfig.set_type("seqconcat"); - config.layerConfig.set_size(10); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "seqconcat", 100, false, useGpu); - } -} - -TEST(Layer, SequenceReshapeLayer) { - TestConfig config; - config.layerConfig.set_type("seqreshape"); - config.layerConfig.set_size(10); - - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 100, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "seqreshape", 100, false, useGpu); - } -} - -TEST(Layer, ConvShiftLayer) { - TestConfig config; - config.layerConfig.set_type("conv_shift"); - config.layerConfig.set_size(10); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // Not support GPU now - testLayerGrad(config, "conv_shift", 100, false, false); -} - -TEST(Layer, PowerLayer) { - TestConfig config; - config.layerConfig.set_type("power"); - config.layerConfig.set_size(10); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "power", 100, false, useGpu); - } -} - -TEST(Layer, ConvexCombinationLayer) { - TestConfig config; - config.layerConfig.set_type("convex_comb"); - config.layerConfig.set_size(20); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "convex_comb", 100, false, useGpu); - } -} - -TEST(Layer, InterpolationLayer) { - TestConfig config; - config.layerConfig.set_type("interpolation"); - config.layerConfig.set_size(10); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_2", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "interpolation", 100, false, useGpu); - } -} - -TEST(Layer, DotProdLayer) { - TestConfig config; - config.layerConfig.set_type("dot_prod"); - config.layerConfig.set_size(1); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "dot_prod", 10, false, useGpu); - } -} - -TEST(Layer, OuterProdLayer) { - TestConfig config; - config.layerConfig.set_type("out_prod"); - config.layerConfig.set_size(100); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "out_prod", 100, false, useGpu); - } -} - -TEST(Layer, SlopeInterceptLayer) { - TestConfig config; - config.layerConfig.set_type("slope_intercept"); - config.layerConfig.set_size(10); - config.layerConfig.set_slope(1.0); - config.layerConfig.set_intercept(0.1); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "slope_intercept", 100, false, useGpu); - } -} - -TEST(Layer, ScalingLayer) { - TestConfig config; - config.layerConfig.set_type("scaling"); - config.layerConfig.set_size(10); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "scaling", 100, false, useGpu); - } -} - -void testNormLayer(const string& normType, bool trans, bool useGpu) { - TestConfig config; - config.layerConfig.set_type("norm"); - config.layerConfig.set_active_type("relu"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1568, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - NormConfig* norm = input->mutable_norm_conf(); - norm->set_norm_type(normType); - norm->set_channels(16); - norm->set_size(5); - norm->set_scale(0.001); - norm->set_pow(0.75); - norm->set_blocked(0); - norm->set_img_size(14); - norm->set_img_size_y(7); - norm->set_output_x(norm->img_size()); - norm->set_output_y(norm->img_size_y()); - if (norm->norm_type() == "cmrnorm" || - norm->norm_type() == "cmrnorm-projection") { - norm->set_scale(norm->scale() / norm->size()); - } else { - norm->set_scale(norm->scale() / (norm->size() * norm->size())); - } - - config.layerConfig.set_size(norm->output_x() * norm->output_y() * - norm->channels()); - config.biasSize = 0; - - testLayerGrad(config, "norm", 100, trans, useGpu); -} - -TEST(Layer, NormLayer) { - testNormLayer("cmrnorm-projection", - /* trans= */ false, /* useGpu= */ - true); - testNormLayer("cmrnorm-projection", - /* trans= */ false, /* useGpu= */ - false); -} - -void setPoolConfig(TestConfig* config, - PoolConfig* pool, - const string& poolType) { - (*config).biasSize = 0; - (*config).layerConfig.set_type("pool"); - (*config).layerConfig.set_num_filters(16); - - int kw = 3, kh = 3; - int pw = 0, ph = 0; - int sw = 2, sh = 2; - pool->set_pool_type(poolType); - pool->set_channels(16); - pool->set_size_x(kw); - pool->set_size_y(kh); - pool->set_start(0); - pool->set_padding(pw); - pool->set_padding_y(ph); - pool->set_stride(sw); - pool->set_stride_y(sh); - - int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); - int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); - pool->set_output_x(ow); - pool->set_output_y(oh); -} - -void testPoolLayer(const string& poolType, - bool trans, - bool useGpu, - bool excludeMode = true) { - TestConfig config; - config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - PoolConfig* pool = input->mutable_pool_conf(); - - pool->set_img_size(14); - pool->set_img_size_y(14); - pool->set_exclude_mode(excludeMode); - setPoolConfig(&config, pool, poolType); - config.layerConfig.set_size(pool->output_x() * pool->output_y() * - pool->channels()); - - testLayerGrad(config, "pool", 100, trans, useGpu); -} - -#ifdef PADDLE_WITH_CUDA -void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { - TestConfig config; - config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - PoolConfig* pool = input->mutable_pool_conf(); - - pool->set_size_y(4); - pool->set_stride_y(3); - pool->set_img_size(10); - pool->set_img_size_y(20); - setPoolConfig(&config, pool, poolType); - pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) / - ((float)pool->stride_y()) + - 1.5); - config.layerConfig.set_size(pool->output_x() * pool->output_y() * - pool->channels()); - - testLayerGrad(config, "pool", 100, trans, useGpu); -} -#endif - -TEST(Layer, PoolLayer) { - testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); - testPoolLayer("avg-projection", - /* trans= */ false, - /* useGpu= */ false, - /* excludeMode= */ false); - testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); - testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ false); - -#ifdef PADDLE_WITH_CUDA - testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); - testPoolLayer("avg-projection", - /* trans= */ false, - /* useGpu= */ true, - /* excludeMode= */ false); - testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); - testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); - testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); - testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); - testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); - testPoolLayer2("cudnn-avg-incl-pad-pool", - /* trans= */ false, - /* useGpu= */ true); - testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ true); -#endif -} - -void setPool3DConfig(TestConfig* config, - PoolConfig* pool, - const string& poolType) { - // filter size - const int NUM_FILTERS = 16; - const int FILTER_SIZE = 3; - const int FILTER_SIZE_Y = 3; - const int FILTER_SIZE_Z = 3; - const int CHANNELS = 16; - - (*config).biasSize = 0; - (*config).layerConfig.set_type("pool3d"); - (*config).layerConfig.set_num_filters(NUM_FILTERS); - - int kw = FILTER_SIZE, kh = FILTER_SIZE_Y, kd = FILTER_SIZE_Z; - int pw = 0, ph = 0, pd = 0; - int sw = 2, sh = 2, sd = 2; - - pool->set_pool_type(poolType); - pool->set_pool_type("avg"); - pool->set_channels(CHANNELS); - pool->set_size_x(kw); - pool->set_size_y(kh); - pool->set_size_z(kd); - pool->set_padding(0); - pool->set_padding_y(0); - pool->set_padding_z(0); - pool->set_stride(sw); - pool->set_stride_y(sh); - pool->set_stride_z(sd); - pool->set_start(0); - int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); - int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); - int od = outputSize(pool->img_size_z(), kd, pd, sd, /* caffeMode */ false); - pool->set_output_x(ow); - pool->set_output_y(oh); - pool->set_output_z(od); -} - -void testPool3DLayer(const string& poolType, bool trans, bool useGpu) { - TestConfig config; - config.inputDefs.push_back({INPUT_DATA, "layer_0", 11664, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - PoolConfig* pool = input->mutable_pool_conf(); - - const int IMAGE_SIZE = 9; - const int IMAGE_SIZE_Y = 9; - const int IMAGE_SIZE_Z = 9; - - pool->set_img_size(IMAGE_SIZE); - pool->set_img_size_y(IMAGE_SIZE_Y); - pool->set_img_size_z(IMAGE_SIZE_Z); - - setPool3DConfig(&config, pool, poolType); - config.layerConfig.set_size(pool->output_x() * pool->output_y() * - pool->channels()); - - testLayerGrad(config, "pool3d", 100, trans, useGpu); -} - -TEST(Layer, Pool3DLayer) { - testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false); - testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false); -#ifdef PADDLE_WITH_CUDA - testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true); - testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true); -#endif -} - -void testSppLayer(const string& poolType, - const int pyramidHeight, - bool trans, - bool useGpu) { - TestConfig config; - config.layerConfig.set_type("spp"); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - SppConfig* sppConfig = input->mutable_spp_conf(); - sppConfig->set_pool_type(poolType); - sppConfig->set_pyramid_height(pyramidHeight); - ImageConfig* imageConfig = sppConfig->mutable_image_conf(); - imageConfig->set_channels(16); - imageConfig->set_img_size(10); - imageConfig->set_img_size_y(20); - int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1); - config.layerConfig.set_size(outputSize * imageConfig->channels()); - testLayerGrad(config, "spp", 100, trans, useGpu); -} - -TEST(Layer, SpatialPyramidPoolLayer) { - for (auto useGpu : {false, true}) { - for (auto pyramidHeight : {1, 2, 3}) { - testSppLayer("avg-projection", pyramidHeight, false, useGpu); - testSppLayer("max-projection", pyramidHeight, false, useGpu); - } - } -} - -TEST(Layer, rankCostLayer) { - TestConfig config; - config.layerConfig.set_type("rank-cost"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "rank-cost", 100, false, useGpu); - } -} - -TEST(Layer, sumCostLayer) { - TestConfig config; - config.layerConfig.set_type("sum_cost"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "sum_cost", 100, false, useGpu); - } -} - -TEST(Layer, weightedRankCostLayer) { - TestConfig config; - config.layerConfig.set_type("rank-cost"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_3", 1, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "weighted-rank-cost", 100, false, useGpu); - } -} - -TEST(Layer, TensorLayer) { - TestConfig config; - config.layerConfig.set_type("tensor"); - config.layerConfig.set_size(10); - config.layerConfig.set_active_type("sigmoid"); - config.biasSize = config.layerConfig.size(); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 250}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 5, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "tensor", 100, false, useGpu); - } -} - -TEST(Layer, RecurrentLayer) { - TestConfig config; - config.layerConfig.set_type("recurrent"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("tanh"); - config.biasSize = 4; - - config.inputDefs.push_back( - {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 4, /* paraSize= */ 16}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - for (auto reversed : {false, true}) { - config.layerConfig.set_reversed(reversed); - config.testState = !reversed; - testLayerGrad( - config, "recurrent", 50, /* trans= */ false, useGpu, false, 1.0); - } - } -} - -TEST(Layer, LstmLayer) { - TestConfig config; - config.layerConfig.set_type("lstmemory"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("tanh"); - config.layerConfig.set_active_state_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 28; - - config.inputDefs.push_back( - {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 64}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - for (auto reversed : {false, true}) { - config.layerConfig.set_reversed(reversed); - config.testState = !reversed; - testLayerGrad( - config, "lstmemory", 100, /* trans= */ false, useGpu, false, 0.02); - } - } - for (auto useGpu : {true}) { - config.testBatchState = true; - config.layerConfig.set_reversed(false); - testLayerGrad(config, "lstmemory", 10, /* trans= */ false, useGpu); - } -} - -TEST(Layer, MDLstmLayer) { - TestConfig config; - config.layerConfig.set_type("mdlstmemory"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_active_state_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 4 * 9; - - config.inputDefs.push_back( - {INPUT_SEQUENCE_MDIM_DATA, "layer_0", 4 * 5, 4 * 4 * 5}); - config.layerConfig.add_inputs(); - config.layerConfig.add_directions(true); - config.layerConfig.add_directions(true); - - for (auto useGpu : {false, true}) { - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 2; j++) { - config.layerConfig.set_directions(0, bool(i)); - config.layerConfig.set_directions(1, bool(j)); - testLayerGrad(config, "mdlstmemory", 100, false, useGpu); - } - } - } -} - -TEST(Layer, ParameterReluLayer) { - auto testParameterReluLayer = [&](size_t inputSize, size_t channels) { - TestConfig config; - config.layerConfig.set_type("prelu"); - config.inputDefs.push_back({INPUT_DATA, "layer_0", inputSize, channels}); - config.layerConfig.add_inputs(); - config.layerConfig.set_size(inputSize); - config.layerConfig.set_partial_sum(inputSize / - channels); // size of feature map - for (auto useGpu : {false, true}) { - testLayerGrad(config, "prelu", 100, false, useGpu); - } - }; - - testParameterReluLayer(192, 1); - testParameterReluLayer(192, 3); - testParameterReluLayer(192, 192); -} - -TEST(Layer, ResizeLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("resize"); - config.layerConfig.set_size(64); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 16, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "resize", 100, false, useGpu); - } -} - -TEST(Layer, RotateLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("rotate"); - const int CHANNEL = 2; - const int HEIGHT = 8; - const int WIDTH = 4; - const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL; - config.layerConfig.set_size(INPUT_SIZE); - config.layerConfig.set_height(HEIGHT); - config.layerConfig.set_width(WIDTH); - config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "rotate", 100, false, useGpu); - } -} - -TEST(Layer, NCELayer) { - TestConfig config; - size_t numClasses = 4; - config.layerConfig.set_type("nce"); - config.layerConfig.set_size(1); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_num_classes(numClasses); - config.biasSize = numClasses; - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 16 * numClasses}); - config.inputDefs.push_back( - {INPUT_LABEL, "label", /* dim= */ numClasses, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto withWeight : {false, true}) { - if (withWeight) { - config.inputDefs.push_back( - {INPUT_DATA_TARGET, "weight", /* dim= */ 1, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - } - - for (auto isIdLabel : {false, true}) { - config.inputDefs[1] = { - isIdLabel ? INPUT_LABEL : INPUT_SPARSE_NON_VALUE_DATA, - "label", - /* dim= */ numClasses, - /* paraSize= */ 0}; - - for (auto withDist : {false, true}) { - config.layerConfig.clear_neg_sampling_dist(); - if (withDist) { - double sum = 0; - for (size_t i = 0; i < numClasses; ++i) { - real p = rand(); // NOLINT use rand_r - config.layerConfig.add_neg_sampling_dist(p); - sum += p; - } - for (size_t i = 0; i < numClasses; ++i) { - real p = config.layerConfig.neg_sampling_dist(i) / sum; - config.layerConfig.set_neg_sampling_dist(i, p); - } - } - LOG(INFO) << "NCELayer " - << " isIdLabel=" << isIdLabel << " withWeight=" << withWeight - << " withDist=" << withDist; - // Not support GPU now - testLayerGrad(config, - "nce", - 100, - /* trans= */ false, - /* useGpu */ false); - } - } - } -} - -TEST(Layer, GatedRecurrentLayer) { - TestConfig config; - config.layerConfig.set_type("gated_recurrent"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 12; - - config.inputDefs.push_back( - {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - for (auto reversed : {false, true}) { - config.layerConfig.set_reversed(reversed); - config.testState = !reversed; - testLayerGrad(config, "gated_recurrent", 100, /* trans= */ false, useGpu); - } - } -} - -TEST(Layer, GruStepLayer) { - TestConfig config; - config.layerConfig.set_type("gru_step"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 12; - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); - config.inputDefs.push_back( - {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "gruStep", 100, /* trans= */ false, useGpu); - } -} - -TEST(Layer, LstmStepLayer) { - TestConfig config; - config.layerConfig.set_type("lstm_step"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_active_state_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 12; - config.testAccumulate = false; - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 0}); - config.inputDefs.push_back( - {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "lstmStep", 100, /* trans= */ false, useGpu); - } -} - -void testBatchNormLayer(const string& type, bool trans, bool useGpu) { - TestConfig config; - const int CHANNELS = 10; - const int IMG_SIZE = 16; - const int IMG_SIZE_Y = 8; - size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y; - config.layerConfig.set_type(type); - config.layerConfig.set_size(size); - config.layerConfig.set_active_type("sigmoid"); - config.biasSize = CHANNELS; - config.inputDefs.push_back({INPUT_DATA, - "layer_0", - /* dim= */ size, - /* paraSize= */ CHANNELS}); - - config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, CHANNELS}); - config.inputDefs.back().isStatic = true; - config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, CHANNELS}); - config.inputDefs.back().isStatic = true; - - LayerInputConfig* input = config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - ImageConfig* img_conf = input->mutable_image_conf(); - img_conf->set_channels(CHANNELS); - img_conf->set_img_size(IMG_SIZE); - img_conf->set_img_size_y(IMG_SIZE_Y); - - testLayerGrad(config, - "batch_norm", - 64, - /* trans= */ trans, - useGpu, - /* useWeight */ true); -} - -TEST(Layer, BatchNormalizationLayer) { - testBatchNormLayer("batch_norm", false, false); -#ifdef PADDLE_WITH_CUDA - testBatchNormLayer("batch_norm", false, true); - if (hl_get_cudnn_lib_version() >= int(4000)) { - testBatchNormLayer("cudnn_batch_norm", false, true); - } -#endif -} - -void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) { - TestConfig config; - const int CHANNELS = 10; - const int IMG_SIZE = 16; - const int IMG_SIZE_Y = 8; - const int IMG_SIZE_Z = 8; - size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y * IMG_SIZE_Z; - config.layerConfig.set_type(type); - config.layerConfig.set_size(size); - config.layerConfig.set_active_type("sigmoid"); - config.biasSize = CHANNELS; - config.inputDefs.push_back({INPUT_DATA, - "layer_0", - /* dim= */ size, - /* paraSize= */ CHANNELS}); - - config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, CHANNELS}); - config.inputDefs.back().isStatic = true; - config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, CHANNELS}); - config.inputDefs.back().isStatic = true; - - LayerInputConfig* input = config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - ImageConfig* img_conf = input->mutable_image_conf(); - img_conf->set_channels(CHANNELS); - img_conf->set_img_size(IMG_SIZE); - img_conf->set_img_size_y(IMG_SIZE_Y); - img_conf->set_img_size_z(IMG_SIZE_Z); - - testLayerGrad(config, - "batch_norm", - 64, - /* trans= */ trans, - useGpu, - /* useWeight */ true); -} - -TEST(Layer, testBatchNorm3DLayer) { - testBatchNorm3DLayer("batch_norm", false, false); -#ifdef PADDLE_WITH_CUDA - testBatchNorm3DLayer("batch_norm", false, true); - if (hl_get_cudnn_lib_version() >= int(4000)) { - testBatchNorm3DLayer("cudnn_batch_norm", false, true); - } -#endif -} - -void testConvOperator(bool isDeconv) { - TestConfig config; - const int NUM_FILTERS = 16; - const int FILTER_SIZE = 2; - const int FILTER_SIZE_Y = 3; - const int CHANNELS = 3; - const int IMAGE_SIZE = 16; - const int IMAGE_SIZE_Y = 9; - OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); - if (isDeconv) { - operatorConf.set_type("convt"); - } else { - operatorConf.set_type("conv"); - } - ConvConfig* conv = operatorConf.mutable_conv_conf(); - operatorConf.set_num_filters(NUM_FILTERS); - conv->set_filter_size(FILTER_SIZE); - conv->set_filter_size_y(FILTER_SIZE_Y); - conv->set_channels(CHANNELS); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(1); - conv->set_img_size(IMAGE_SIZE); - conv->set_img_size_y(IMAGE_SIZE_Y); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - conv->set_output_y(outputSize(conv->img_size_y(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true)); - - if (isDeconv) { - conv->set_filter_channels(NUM_FILTERS / conv->groups()); - config.inputDefs.push_back({INPUT_DATA, - "layer_0", - conv->output_x() * conv->output_y() * CHANNELS, - 0}); - config.layerConfig.set_size(IMAGE_SIZE * IMAGE_SIZE_Y * NUM_FILTERS); - } else { - conv->set_filter_channels(conv->channels() / conv->groups()); - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); - config.layerConfig.set_size(conv->output_x() * conv->output_y() * - NUM_FILTERS); - } - - config.inputDefs.push_back( - {INPUT_DATA, - "layer_1", - FILTER_SIZE * FILTER_SIZE_Y * CHANNELS * NUM_FILTERS, - 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false); -} - -TEST(Operator, conv) { - testConvOperator(/*isDeconv*/ true); - testConvOperator(/*isDeconv*/ false); -} - -TEST(Layer, FeatureMapExpandLayer) { - TestConfig config; - config.layerConfig.set_type("featmap_expand"); - const int CHANNELS = 10; - const int INPUT_SIZE = 100; - config.layerConfig.set_size(INPUT_SIZE * CHANNELS); - config.layerConfig.set_num_filters(CHANNELS); - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, - "layer_0", - /* dim= */ INPUT_SIZE, - /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - for (auto useGpu : {false, true}) { - for (auto asRowVec : {false, true}) { - config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : "as_col_vec"); - testLayerGrad(config, - "featmap_expand", - /*batch_size*/ 100, - /* trans= */ false, - useGpu, - /* useWeight */ true); - } - } -} - -TEST(Layer, MultiplexLayer) { - TestConfig config; - const int LAYER_SIZE = 100; - config.layerConfig.set_type("multiplex"); - config.layerConfig.set_size(LAYER_SIZE); - - config.inputDefs.push_back({INPUT_LABEL, "layer_0", 2, 0}); - config.inputDefs.push_back( - {INPUT_DATA, "layer_1", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); - config.inputDefs.push_back( - {INPUT_DATA, "layer_2", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "multiplex", 512, /* trans= */ false, useGpu); - } -} - -TEST(Layer, PadLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("pad"); - - int c = 4; - int h = 31; - int w = 36; - size_t size = c * h * w; - config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - PadConfig* pad = input->mutable_pad_conf(); - ImageConfig* image = pad->mutable_image_conf(); - - image->set_channels(c); - image->set_img_size(h); - image->set_img_size_y(w); - pad->add_pad_c(1); - pad->add_pad_c(2); - pad->add_pad_h(2); - pad->add_pad_h(3); - pad->add_pad_w(3); - pad->add_pad_w(5); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "pad", 10, false, useGpu); - } -} - -TEST(Layer, CrossChannelNormLayer) { - TestConfig config; - config.paramInitialMean = 1.; - config.paramInitialStd = 0.; - config.layerConfig.set_type("norm"); - config.layerConfig.set_size(100); - LayerInputConfig* input = config.layerConfig.add_inputs(); - NormConfig* norm = input->mutable_norm_conf(); - norm->set_norm_type("cross-channel-norm"); - norm->set_channels(10); - norm->set_size(100); - norm->set_scale(0); - norm->set_pow(0); - norm->set_blocked(0); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); - } -} - -TEST(Layer, smooth_l1) { - TestConfig config; - config.layerConfig.set_type("smooth_l1"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 200, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 200, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); - } -} - -TEST(Layer, multibox_loss) { - TestConfig config; - config.layerConfig.set_type("multibox_loss"); - config.biasSize = 0; - LayerInputConfig* input = config.layerConfig.add_inputs(); - MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf(); - multiboxLoss->set_num_classes(21); - multiboxLoss->set_input_num(1); - multiboxLoss->set_overlap_threshold(0.5); - multiboxLoss->set_neg_pos_ratio(3); - multiboxLoss->set_neg_overlap(0.5); - multiboxLoss->set_background_id(0); - multiboxLoss->set_height(3); - multiboxLoss->set_width(3); - - size_t gtNum = 1; - MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false); - labelValue->randomizeUniform(); - labelValue->add(-0.5); - labelValue->sigmoid(*labelValue); - real* labelData = labelValue->getData(); - size_t labelWidth = labelValue->getWidth(); - for (size_t i = 0; i < gtNum; ++i) { - *(labelData + i * labelWidth) = std::rand() % 20 + 1; - *(labelData + i * labelWidth + 1) = 0.400259; - *(labelData + i * labelWidth + 2) = 0.377857; - *(labelData + i * labelWidth + 3) = 0.525712; - *(labelData + i * labelWidth + 4) = 0.519368; - } - vector seqStartPositions(gtNum + 1, 0); - for (size_t i = 1; i <= gtNum; ++i) { - seqStartPositions[i] = i; - } - - // Ensure at lease one matched bbox - MatrixPtr priorValue = Matrix::create(1, 72, false, false); - priorValue->randomizeUniform(); - priorValue->add(-0.5); - priorValue->sigmoid(*priorValue); - real* priorData = priorValue->getData(); - *(priorData) = 0.424811; - *(priorData + 1) = 0.397059; - *(priorData + 2) = 0.538905; - *(priorData + 3) = 0.447091; - *(priorData + 4) = 0.425720; - *(priorData + 5) = 0.515228; - *(priorData + 6) = 0.519452; - *(priorData + 7) = 0.591065; - - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}}); - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions}); - config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0}); - config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "multibox_loss", 1, false, useGpu, false); - } -} - -TEST(Layer, TransLayer) { - TestConfig config; - const int height = 128; - const int width = 256; - config.layerConfig.set_type("trans"); - config.layerConfig.set_size(width); - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", /* dim= */ height * width, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "trans", height, /* trans= */ false, useGpu); - } -} - -TEST(Layer, RowConvLayer) { - const int context = 3; - const int size = 512; - - TestConfig config; - config.layerConfig.set_type("row_conv"); - config.layerConfig.set_size(size); - config.layerConfig.set_active_type("sigmoid"); - - config.inputDefs.push_back( - {INPUT_SEQUENCE_DATA, "layer_0", size, context * size}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - RowConvConfig* conv = input->mutable_row_conv_conf(); - conv->set_context_length(context); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "row_conv", 100, false, useGpu, false); - } -} - -TEST(Layer, CropLayer) { - TestConfig config; - // config input_0 - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ImageConfig* img = input->mutable_image_conf(); - img->set_channels(4); - img->set_img_size(16); - config.layerConfig.set_axis(2); - config.layerConfig.add_offset(0); - config.layerConfig.add_offset(0); - - // config input_1 - config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); - input = config.layerConfig.add_inputs(); - img = input->mutable_image_conf(); - img->set_channels(2); - img->set_img_size(8); - - // config crop layer - config.layerConfig.set_type("crop"); - config.layerConfig.set_name("cropLayer"); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "crop", 100, false, useGpu, false); - } -} - -TEST(Layer, roi_pool) { - TestConfig config; - config.layerConfig.set_type("roi_pool"); - config.biasSize = 0; - LayerInputConfig* input = config.layerConfig.add_inputs(); - ROIPoolConfig* roiPoolConf = input->mutable_roi_pool_conf(); - roiPoolConf->set_pooled_width(7); - roiPoolConf->set_pooled_height(7); - roiPoolConf->set_spatial_scale(1. / 16); - roiPoolConf->set_width(14); - roiPoolConf->set_height(14); - - const size_t roiNum = 10; - const size_t roiDim = 10; - const size_t batchSize = 5; - MatrixPtr roiValue = Matrix::create(roiNum, roiDim, false, false); - roiValue->zeroMem(); - real* roiData = roiValue->getData(); - for (size_t i = 0; i < roiNum; ++i) { - roiData[i * roiDim + 0] = std::rand() % batchSize; - roiData[i * roiDim + 1] = std::rand() % 224; // xMin - roiData[i * roiDim + 2] = std::rand() % 224; // yMin - size_t xMin = static_cast(roiData[i * roiDim + 1]); - size_t yMin = static_cast(roiData[i * roiDim + 2]); - roiData[i * roiDim + 3] = xMin + std::rand() % (224 - xMin); // xMax - roiData[i * roiDim + 4] = yMin + std::rand() % (224 - yMin); // yMax - } - - config.inputDefs.push_back({INPUT_DATA, "input", 3 * 14 * 14, {}}); - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "rois", roiValue, {}}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "roi_pool", batchSize, false, useGpu, false); - } -} - -TEST(Layer, SwitchOrderLayer) { - TestConfig config; - // config input_0 - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ImageConfig* img = input->mutable_image_conf(); - img->set_channels(4); - img->set_img_size(16); - img->set_img_size_y(16); - - ReshapeConfig* reshape = config.layerConfig.mutable_reshape_conf(); - reshape->add_height_axis(0); - reshape->add_height_axis(1); - reshape->add_height_axis(2); - reshape->add_width_axis(3); - - // config softmax layer - config.layerConfig.set_type("switch_order"); - config.layerConfig.set_name("switchOrderLayer"); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "switch_order", 100, false, useGpu, true); - } -} - -vector randSampling(real range, int n) { - CHECK_GE(range, n); - vector num(range); - iota(begin(num), end(num), 0.); - if (range == n) return num; - - random_shuffle(begin(num), end(num)); - num.resize(n); - sort(begin(num), end(num)); - return num; -} - -TEST(Layer, SubNestedSequenceLayer) { - // layer size is not crutial for this layer, - // so use a small layer size in unittest - const int layerSize = 4; - - const int maxSeqNum = 50; - const int maxSeqLen = 50; - const int maxBeamSize = 32; - - srand((size_t)(time(NULL))); - int beamSize = 1 + (rand() % maxBeamSize); - - TestConfig config; - config.layerConfig.set_type("sub_nested_seq"); - config.layerConfig.set_name("sub_nested_seq_layer"); - config.layerConfig.set_size(layerSize); - - int seqNum = 1 + (rand() % maxSeqNum); - - // sequence information for the first input, it is a nested sequence - vector seqStartPos(seqNum + 1, 0); - vector subSeqStartPos(1, 0); - - // selected indices - MatrixPtr selectedIndices = Matrix::create(seqNum, beamSize, false, false); - selectedIndices->one(); - selectedIndices->mulScalar(-1.); - real* indicesData = selectedIndices->getData(); - - for (int i = 0; i < seqNum; ++i) { - int subSeqNum = 1 + (rand() % maxSeqNum); - for (int j = 0; j < subSeqNum; ++j) { - subSeqStartPos.push_back(subSeqStartPos.back() + - (1 + (rand() % maxSeqLen))); - } - vector selSeqs = - randSampling(static_cast(subSeqNum), min(beamSize, subSeqNum)); - memcpy(indicesData + (i * beamSize), - selSeqs.data(), - selSeqs.size() * sizeof(real)); - seqStartPos[i + 1] = subSeqStartPos.back(); - } - - MatrixPtr seqInputPtr = - Matrix::create(seqStartPos.back(), layerSize, false, false); - seqInputPtr->randomizeUniform(); - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, - "nested_seq_input", - seqInputPtr, - seqStartPos, - subSeqStartPos}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, "selected_indices", selectedIndices}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "sub_nested_seq", - /* batchSize */ seqNum, - /* trans */ false, - /* useGpu*/ useGpu, - /* useWeight */ false); - } -} - -TEST(Layer, ClipLayer) { - const size_t batchSize = 128; - const size_t size = 512; - TestConfig config; - config.layerConfig.set_type("clip"); - config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ClipConfig* layerConf = input->mutable_clip_conf(); - double p1 = std::rand() / (double)RAND_MAX; - double p2 = std::rand() / (double)RAND_MAX; - layerConf->set_min(std::min(p1, p2)); - layerConf->set_max(std::max(p1, p2)); - for (auto useGpu : {false, true}) { - testLayerGrad(config, "clip", batchSize, false, useGpu, false); - } -} - -TEST(Layer, RowL2NormLayer) { - const size_t batchSize = 128; - const size_t size = 512; - TestConfig config; - config.layerConfig.set_type("row_l2_norm"); - config.layerConfig.set_size(size); - config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); - config.layerConfig.add_inputs(); - for (auto useGpu : {false, true}) { - testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false); - } -} - -void test3DConvLayer(const string& type, bool trans, bool useGpu) { - // filter size - const int NUM_FILTERS = 6; - // const int CHANNELS = 3; - const int FILTER_SIZE = 3; - const int FILTER_SIZE_Y = 3; - const int FILTER_SIZE_Z = 3; - - // input image - const int CHANNELS = 3; - const int IMAGE_SIZE = 9; - const int IMAGE_SIZE_Y = 9; - const int IMAGE_SIZE_Z = 9; - - TestConfig config; - config.biasSize = NUM_FILTERS; - config.layerConfig.set_type(type); - config.layerConfig.set_num_filters(NUM_FILTERS); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - // Setting up conv3D-trans layer - LayerInputConfig* input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - - conv->set_channels(CHANNELS); - conv->set_filter_size(FILTER_SIZE); - conv->set_filter_size_y(FILTER_SIZE_Y); - conv->set_filter_size_z(FILTER_SIZE_Z); - conv->set_padding(0); - conv->set_padding_y(0); - conv->set_padding_z(0); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_stride_z(2); - conv->set_img_size(IMAGE_SIZE); - conv->set_img_size_y(IMAGE_SIZE_Y); - conv->set_img_size_z(IMAGE_SIZE_Z); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - conv->set_output_y(outputSize(conv->img_size_y(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true)); - conv->set_output_z(outputSize(conv->img_size_z(), - conv->filter_size_z(), - conv->padding_z(), - conv->stride_z(), - /* caffeMode */ true)); - - config.layerConfig.set_size(conv->output_x() * conv->output_y() * - conv->output_z() * NUM_FILTERS); - conv->set_groups(1); - conv->set_filter_channels(conv->channels() / conv->groups()); - config.inputDefs.push_back( - {INPUT_DATA, - "layer_0", - CHANNELS * IMAGE_SIZE * IMAGE_SIZE_Y * IMAGE_SIZE_Z, - conv->filter_channels() * FILTER_SIZE * FILTER_SIZE_Y * FILTER_SIZE_Z * - NUM_FILTERS}); - - testLayerGrad(config, "conv3D", 10, trans, useGpu); - // Use small batch_size and useWeight=true to test biasGrad - testLayerGrad(config, "conv3D", 2, trans, useGpu, true, 0.02); -} - -TEST(Layer, test3DConvLayer) { - test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false); -#ifdef PADDLE_WITH_CUDA - test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true); -#endif -} - -void test3DDeConvLayer(const string& type, bool trans, bool useGpu) { - // filter size - const int NUM_FILTERS = 6; - // const int CHANNELS = 3; - const int FILTER_SIZE = 3; - const int FILTER_SIZE_Y = 3; - const int FILTER_SIZE_Z = 3; - - // input image - const int CHANNELS = 3; - const int IMAGE_SIZE = 4; - const int IMAGE_SIZE_Y = 6; - const int IMAGE_SIZE_Z = 6; - - // Setting up conv-trans layer - TestConfig config; - config.biasSize = NUM_FILTERS; - config.layerConfig.set_type("deconv3d"); - config.layerConfig.set_num_filters(NUM_FILTERS); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - LayerInputConfig* input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - - conv->set_channels(CHANNELS); - conv->set_filter_size(FILTER_SIZE); - conv->set_filter_size_y(FILTER_SIZE_Y); - conv->set_filter_size_z(FILTER_SIZE_Z); - conv->set_padding(0); - conv->set_padding_y(0); - conv->set_padding_z(0); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_stride_z(2); - conv->set_output_x(IMAGE_SIZE); - conv->set_output_y(IMAGE_SIZE_Y); - conv->set_output_z(IMAGE_SIZE_Z); - - conv->set_img_size(imageSize(conv->output_x(), - conv->filter_size(), - conv->padding(), - conv->stride(), - true)); - conv->set_img_size_y(imageSize(conv->output_y(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y(), - true)); - conv->set_img_size_z(imageSize(conv->output_z(), - conv->filter_size_z(), - conv->padding_z(), - conv->stride_z(), - true)); - config.layerConfig.set_size(conv->img_size() * conv->img_size_y() * - conv->img_size_z() * NUM_FILTERS); - conv->set_groups(1); - conv->set_filter_channels(conv->channels() / conv->groups()); - config.inputDefs.push_back( - {INPUT_DATA, - "layer_0", - CHANNELS * IMAGE_SIZE * IMAGE_SIZE_Y * IMAGE_SIZE_Z, - conv->filter_channels() * FILTER_SIZE * FILTER_SIZE_Y * FILTER_SIZE_Z * - NUM_FILTERS}); - - testLayerGrad(config, "deconv3D", 10, trans, useGpu); - // Use small batch_size and useWeight=true to test biasGrad - testLayerGrad(config, "deconv3D", 2, trans, useGpu, true, 0.02); -} - -TEST(Layer, test3DDeConvLayer) { - test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false); -#ifdef PADDLE_WITH_CUDA - test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true); -#endif -} - -TEST(Layer, ScaleShiftLayer) { - // FIXME: Disable ScaleShiftLayer because it is not stable. - // https://github.com/PaddlePaddle/Paddle/issues/7781 - return; - // const size_t batchSize = 16; - // const size_t size = 32; - // TestConfig config; - // config.layerConfig.set_type("scale_shift"); - // config.layerConfig.set_size(size); - // config.biasSize = 1; - // config.inputDefs.push_back( - // {INPUT_DATA, "input", /* dim= */ size, /* paraSize= */ 1}); - // config.layerConfig.add_inputs(); - // for (auto useGpu : {false, true}) { - // testLayerGrad(config, "scale_shift", batchSize, false, useGpu, false); - // } -} - -TEST(Layer, ScaleSubRegionLayer) { - const size_t batchSize = 64; - const size_t size = 4096; - TestConfig config; - config.layerConfig.set_type("scale_sub_region"); - config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); - MatrixPtr indicesV = Matrix::create(batchSize, 6, false, false); - auto* data = indicesV->getData(); - for (size_t i = 0; i < batchSize; ++i) { - data[i * 2] = 2; - data[i * 2 + 1] = 4; - data[i * 2 + 2] = 16; - data[i * 2 + 3] = 32; - data[i * 2 + 4] = 16; - data[i * 2 + 5] = 32; - } - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "indices", indicesV, {}}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ScaleSubRegionConfig* scaleSubRegionConf = - input->mutable_scale_sub_region_conf(); - ImageConfig* imgConf = scaleSubRegionConf->mutable_image_conf(); - imgConf->set_img_size(32); - imgConf->set_img_size_y(32); - imgConf->set_channels(4); - scaleSubRegionConf->set_value(2.0); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "scale_sub_region", batchSize, false, useGpu, false); - } -} - -TEST(Layer, L2DistanceLayer) { - TestConfig config; - config.layerConfig.set_type("l2_distance"); - config.layerConfig.set_size(1); - config.biasSize = 0; - - const size_t input_dim = 27; - const size_t batch_size = 11; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", input_dim, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", input_dim, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "l2_distance", batch_size, false, useGpu); - } -} - -void testFactorizationMachineLayer(InputType type, bool useGpu) { - const int FACTOR_SIZE = 10; - TestConfig config; - config.layerConfig.set_type("factorization_machine"); - config.layerConfig.set_factor_size(FACTOR_SIZE); - config.layerConfig.set_size(1); - config.biasSize = 0; - config.inputDefs.push_back({type, "layer_0", 128, 1280}); - config.layerConfig.add_inputs(); - testLayerGrad(config, "factorization_machine", 16, false, useGpu, false); -} - -TEST(Layer, FactorizationMachineLayer) { - for (auto useGpu : {false, true}) { - testFactorizationMachineLayer(INPUT_DATA, useGpu); - } - testFactorizationMachineLayer(INPUT_SPARSE_FLOAT_VALUE_DATA, false); -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - FLAGS_thread_local_rand_use_global_seed = true; - srand(1); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_LinearChainCRF.cpp b/paddle/legacy/gserver/tests/test_LinearChainCRF.cpp deleted file mode 100644 index 7082c1363a4cdadfd0e4a4497c20ae5c513bc7f1..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_LinearChainCRF.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "paddle/legacy/gserver/layers/LinearChainCRF.h" -#include "paddle/legacy/utils/Util.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -static inline bool getNextSequence(vector& seq, int numClasses) { - for (auto& v : seq) { - if (++v < numClasses) { - return true; - } - v = 0; - } - return false; -} - -TEST(LinearChainCRF, decoding) { - const int numClasses = 4; - CpuVector para(numClasses * (numClasses + 2)); - real* a = para.getData(); - real* b = para.getData() + numClasses; - real* w = para.getData() + 2 * numClasses; - LinearChainCRF crf(4, para.getData()); - for (int length : {1, 2, 3, 10}) { - for (int tries = 0; tries < 10; ++tries) { - CpuMatrix x(length, numClasses); - x.randomizeUniform(); - para.randnorm(0, 2); - vector decodingResult(length); - vector bestResult(length); - vector testResult(length, 0); - crf.decode(x.getData(), &decodingResult[0], length); - real bestScore = -std::numeric_limits::max(); - do { - real score = a[testResult.front()] + b[testResult.back()]; - score += x.getElement(0, testResult.front()); - for (int k = 1; k < length; ++k) { - score += x.getElement(k, testResult[k]) + - w[numClasses * testResult[k - 1] + testResult[k]]; - } - if (score > bestScore) { - bestScore = score; - bestResult = testResult; - } - } while (getNextSequence(testResult, numClasses)); - for (int k = 0; k < length; ++k) { - EXPECT_EQ(decodingResult[k], bestResult[k]); - } - } - } -} diff --git a/paddle/legacy/gserver/tests/test_MKLDNN.cpp b/paddle/legacy/gserver/tests/test_MKLDNN.cpp deleted file mode 100644 index c79ccd1956c5c68e5c97c2a185230b8ea9c3dea0..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_MKLDNN.cpp +++ /dev/null @@ -1,448 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include "MKLDNNTester.h" -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/activations/MKLDNNActivation.h" -#include "paddle/legacy/math/MathUtils.h" - -using namespace paddle; // NOLINT - -DECLARE_bool(thread_local_rand_use_global_seed); -DECLARE_bool(use_gpu); -DECLARE_bool(use_mkldnn); - -#define RUN_MKLDNN_TEST(DNN_CONFIG, REF_CONFIG, DESC) \ - MKLDNNTester tester; \ - for (auto bs : {DESC.bs, 1}) { \ - tester.run(DNN_CONFIG, REF_CONFIG, bs, DESC.ih, DESC.iw); \ - } - -#define RUN_MKLDNN_TEST_LAYER(DNN_CONFIG, REF_TYPE, DESC) \ - TestConfig ref = DNN_CONFIG; \ - ref.layerConfig.set_type(REF_TYPE); \ - RUN_MKLDNN_TEST(DNN_CONFIG, ref, DESC) - -struct testFcDesc { - int bs; - int ic; - int ih, iw; // oh == ow == 1 - int oc; -}; - -static void getMKLDNNFcConfig(TestConfig& cfg, const testFcDesc& pm) { - cfg.layerConfig.set_type("mkldnn_fc"); - cfg.layerConfig.set_active_type("relu"); - cfg.layerConfig.set_size(pm.oc); - cfg.inputDefs.push_back( - {INPUT_DATA, - "layer_0", - /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), - /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)}); - cfg.layerConfig.add_inputs(); -} - -void testFcLayer(const testFcDesc& pm) { - TestConfig dnnConfig; - getMKLDNNFcConfig(dnnConfig, pm); - for (auto biasSize : {pm.oc, 0}) { - dnnConfig.biasSize = biasSize; - RUN_MKLDNN_TEST_LAYER(dnnConfig, "fc", pm) - } -} - -TEST(MKLDNNLayer, FcLayer) { - /* bs, ic, ih, iw, oc */ - testFcLayer({2, 2, 1, 1, 3}); - testFcLayer({3, 7, 1, 1, 19}); - testFcLayer({8, 16, 13, 13, 32}); - testFcLayer({4, 12, 13, 13, 18}); - testFcLayer({2, 64, 16, 16, 32}); - testFcLayer({15, 3, 16, 16, 6}); -} - -struct testConvDesc { - int bs, gp; - int ic, ih, iw; - int oc, oh, ow; - int fh, fw; - int ph, pw; - int sh, sw; - int dh, dw; -}; - -static void getMKLDNNConvConfig(TestConfig& cfg, const testConvDesc& pm) { - cfg.layerConfig.set_type("mkldnn_conv"); - cfg.layerConfig.set_active_type("relu"); - cfg.layerConfig.set_num_filters(pm.oc); - cfg.layerConfig.set_size(pm.oc * pm.oh * pm.ow); - cfg.layerConfig.set_shared_biases(true); - cfg.inputDefs.push_back( - {INPUT_DATA, - "layer_0", - /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), - /* size of weight= */ size_t(pm.oc * pm.ic * pm.fh * pm.fw / pm.gp)}); - LayerInputConfig* input = cfg.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_groups(pm.gp); - conv->set_img_size(pm.iw); - conv->set_img_size_y(pm.ih); - conv->set_output_x(pm.ow); - conv->set_output_y(pm.oh); - conv->set_filter_size(pm.fw); - conv->set_filter_size_y(pm.fh); - conv->set_channels(pm.ic); - conv->set_padding(pm.pw); - conv->set_padding_y(pm.ph); - conv->set_stride(pm.sw); - conv->set_stride_y(pm.sh); - conv->set_dilation(pm.dw); - conv->set_dilation_y(pm.dh); - conv->set_caffe_mode(true); - conv->set_filter_channels(conv->channels() / conv->groups()); - CHECK_EQ(conv->filter_channels() * pm.gp, conv->channels()) - << "it is indivisible"; - - int fh = (pm.fh - 1) * pm.dh + 1; - int fw = (pm.fw - 1) * pm.dw + 1; - int ow = outputSize(pm.iw, fw, pm.pw, pm.sw, true); - int oh = outputSize(pm.ih, fh, pm.ph, pm.sh, true); - CHECK_EQ(ow, pm.ow) << "output size check failed"; - CHECK_EQ(oh, pm.oh) << "output size check failed"; -} - -void testConvLayer(const testConvDesc& pm) { - TestConfig dnnConfig; - getMKLDNNConvConfig(dnnConfig, pm); - for (auto biasSize : {pm.oc, 0}) { - dnnConfig.biasSize = biasSize; - RUN_MKLDNN_TEST_LAYER(dnnConfig, "exconv", pm) - } -} - -TEST(MKLDNNLayer, ConvLayer) { - /* bs, gp, ic, ih, iw, oc, oh, ow, fh, fw, ph, pw, sh, sw, dh, dw */ - testConvLayer({2, 1, 3, 32, 32, 16, 32, 32, 3, 3, 1, 1, 1, 1, 1, 1}); - testConvLayer({2, 1, 8, 16, 16, 8, 16, 16, 3, 3, 1, 1, 1, 1, 1, 1}); - testConvLayer({3, 1, 16, 32, 32, 3, 32, 32, 3, 3, 1, 1, 1, 1, 1, 1}); - testConvLayer({8, 1, 16, 18, 18, 32, 18, 18, 3, 3, 1, 1, 1, 1, 1, 1}); - testConvLayer({16, 1, 1, 42, 31, 32, 23, 11, 4, 5, 3, 2, 2, 3, 1, 1}); - testConvLayer({2, 1, 8, 16, 16, 8, 8, 8, 3, 3, 1, 1, 2, 2, 1, 1}); - testConvLayer({3, 1, 8, 13, 13, 8, 7, 7, 3, 3, 1, 1, 2, 2, 1, 1}); - // with groups - testConvLayer({2, 2, 4, 5, 5, 8, 5, 5, 3, 3, 1, 1, 1, 1, 1, 1}); - testConvLayer({2, 3, 3, 5, 5, 3, 5, 5, 3, 3, 1, 1, 1, 1, 1, 1}); - testConvLayer({4, 4, 16, 3, 3, 16, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1}); -} - -struct testPoolDesc { - int bs, ic; // input channel and output channel are the same - int ih, iw; - int oh, ow; - int fh, fw; - int ph, pw; - int sh, sw; -}; - -static void getMKLDNNPoolConfig(TestConfig& cfg, const testPoolDesc& pm) { - cfg.layerConfig.set_type("mkldnn_pool"); - cfg.layerConfig.set_active_type("relu"); - cfg.layerConfig.set_size(pm.ic * pm.oh * pm.ow); - cfg.inputDefs.push_back( - {INPUT_DATA, - "layer_0", - /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), - 0}); - LayerInputConfig* input = cfg.layerConfig.add_inputs(); - PoolConfig* pool = input->mutable_pool_conf(); - pool->set_pool_type("avg-projection"); - pool->set_channels(pm.ic); - pool->set_img_size(pm.iw); - pool->set_img_size_y(pm.ih); - pool->set_output_x(pm.ow); - pool->set_output_y(pm.oh); - pool->set_size_x(pm.fw); - pool->set_size_y(pm.fh); - pool->set_padding(pm.pw); - pool->set_padding_y(pm.ph); - pool->set_stride(pm.sw); - pool->set_stride_y(pm.sh); - - int oh = outputSize(pm.ih, pm.fh, pm.ph, pm.sh, false); - int ow = outputSize(pm.iw, pm.fw, pm.pw, pm.sw, false); - CHECK_EQ(ow, pm.ow) << "output size check failed"; - CHECK_EQ(oh, pm.oh) << "output size check failed"; -} - -void testPoolLayer(const testPoolDesc& pm) { - TestConfig dnnConfig; - getMKLDNNPoolConfig(dnnConfig, pm); - LayerInputConfig* input = dnnConfig.layerConfig.mutable_inputs(0); - PoolConfig* pool = input->mutable_pool_conf(); - for (auto type : {"max-projection", "avg-projection"}) { - pool->set_pool_type(type); - RUN_MKLDNN_TEST_LAYER(dnnConfig, "pool", pm) - } -} - -TEST(MKLDNNLayer, PoolLayer) { - /* bs, ch, ih, iw, oh, ow, fh, fw, ph, pw, sh, sw */ - testPoolLayer({2, 1, 4, 4, 2, 2, 3, 3, 0, 0, 2, 2}); - testPoolLayer({10, 8, 16, 16, 8, 8, 2, 2, 0, 0, 2, 2}); - testPoolLayer({4, 2, 5, 5, 3, 3, 3, 3, 1, 1, 2, 2}); - testPoolLayer({8, 16, 56, 56, 28, 28, 3, 3, 0, 0, 2, 2}); - testPoolLayer({8, 16, 14, 14, 7, 7, 3, 3, 0, 0, 2, 2}); - testPoolLayer({4, 16, 7, 7, 1, 1, 7, 7, 0, 0, 1, 1}); - testPoolLayer({4, 2, 5, 5, 3, 3, 5, 5, 1, 1, 1, 1}); - testPoolLayer({2, 8, 56, 56, 29, 29, 3, 3, 1, 1, 2, 2}); -} - -struct testBatchNormDesc { - int bs; - int ic; - int ih, iw; -}; - -static void getMKLDNNBatchNormConfig(TestConfig& cfg, - const testBatchNormDesc& pm) { - cfg.layerConfig.set_size(pm.ic * pm.ih * pm.iw); - cfg.layerConfig.set_type("mkldnn_batch_norm"); - cfg.biasSize = pm.ic; - cfg.inputDefs.push_back( - {INPUT_DATA, - "layer_0", - /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), - /* size of weight= */ size_t(pm.ic)}); - cfg.inputDefs.push_back( - {INPUT_DATA, "layer_1_moving_mean", 1, size_t(pm.ic)}); - cfg.inputDefs.back().isStatic = true; - cfg.inputDefs.push_back({INPUT_DATA, "layer_2_moving_var", 1, size_t(pm.ic)}); - cfg.inputDefs.back().isStatic = true; - LayerInputConfig* input = cfg.layerConfig.add_inputs(); - cfg.layerConfig.set_active_type("relu"); - cfg.layerConfig.add_inputs(); - cfg.layerConfig.add_inputs(); - ImageConfig* img_conf = input->mutable_image_conf(); - img_conf->set_channels(pm.ic); - img_conf->set_img_size_y(pm.ih); - img_conf->set_img_size(pm.iw); -} - -void testBatchNormLayer(const testBatchNormDesc& pm) { - TestConfig dnnConfig; - getMKLDNNBatchNormConfig(dnnConfig, pm); - TestConfig refConfig = dnnConfig; - refConfig.layerConfig.set_type("batch_norm"); - // for PASS_TRAIN, use_global_stats always should be false, and batchsize != 1 - VLOG(MKLDNN_TESTS) << "check train phase"; - dnnConfig.layerConfig.set_use_global_stats(false); - refConfig.layerConfig.set_use_global_stats(false); - MKLDNNTester tester; - tester.run(dnnConfig, refConfig, pm.bs, pm.ih, pm.iw, PASS_TRAIN); - // for PASS_TEST, check use_global_stats true and false, and batchsize 1 - VLOG(MKLDNN_TESTS) << "check test phase"; - for (auto useGS : {false, true}) { - dnnConfig.layerConfig.set_use_global_stats(useGS); - refConfig.layerConfig.set_use_global_stats(useGS); - MKLDNNTester tester; - for (auto bs : {pm.bs, 1}) { - tester.run(dnnConfig, refConfig, bs, pm.ih, pm.iw, PASS_TEST); - } - } -} - -TEST(MKLDNNLayer, BatchNormLayer) { - testBatchNormLayer({4, 10, 6, 6}); - testBatchNormLayer({16, 32, 16, 16}); - testBatchNormLayer({4, 16, 8, 10}); -} - -struct testLRNDesc { - int bs, ic, ih, iw; - float scale, pow; - int localSize; -}; - -void getMKLDNNLRNConfig(TestConfig& cfg, const testLRNDesc& pm) { - cfg.layerConfig.set_type("mkldnn_lrn"); - cfg.layerConfig.set_active_type("relu"); - size_t layerSize = pm.ic * pm.ih * pm.iw; - cfg.inputDefs.push_back({INPUT_DATA, "layer_0", layerSize, 0}); - LayerInputConfig* input = cfg.layerConfig.add_inputs(); - NormConfig* norm = input->mutable_norm_conf(); - norm->set_channels(pm.ic); - norm->set_size(pm.localSize); - norm->set_scale(pm.scale); - norm->set_pow(pm.pow); - norm->set_blocked(0); - norm->set_img_size(pm.iw); - norm->set_img_size_y(pm.ih); - norm->set_output_x(norm->img_size()); - norm->set_output_y(norm->img_size_y()); - cfg.layerConfig.set_size(layerSize); - cfg.biasSize = 0; -} - -void testLRNLayer(const testLRNDesc& pm) { - TestConfig dnnConfig; - getMKLDNNLRNConfig(dnnConfig, pm); - // mkldnn_lrn <==> norm with cmrnorm-projection type - TestConfig refConfig = dnnConfig; - refConfig.layerConfig.set_type("norm"); - LayerInputConfig* input = refConfig.layerConfig.mutable_inputs(0); - NormConfig* norm = input->mutable_norm_conf(); - norm->set_norm_type("cmrnorm-projection"); - norm->set_scale(norm->scale() / norm->size()); - RUN_MKLDNN_TEST(dnnConfig, refConfig, pm) -} - -TEST(MKLDNNLayer, LRNLayer) { - testLRNLayer({4, 10, 12, 12, 0.001f, 0.75f, 5}); - testLRNLayer({2, 32, 6, 6, 0.001f, 0.75f, 5}); - testLRNLayer({4, 16, 8, 10, 0.01f, 0.5f, 5}); -} - -struct testImageDesc { - int bs, ic, ih, iw; -}; - -static void getAddtoConfig(TestConfig& cfg, - const testImageDesc& pm, - const size_t nInputs = 1) { - cfg.biasSize = 0; - cfg.layerConfig.set_type("addto"); - size_t layerSize = pm.ic * pm.ih * pm.iw; - cfg.layerConfig.set_size(layerSize); - cfg.layerConfig.set_active_type("relu"); - for (size_t i = 0; i < nInputs; ++i) { - std::stringstream ss; - ss << "layer_" << i; - cfg.inputDefs.push_back({INPUT_DATA, ss.str(), layerSize, 0}); - LayerInputConfig* input = cfg.layerConfig.add_inputs(); - ImageConfig* img_conf = input->mutable_image_conf(); - img_conf->set_channels(pm.ic); - img_conf->set_img_size_y(pm.ih); - img_conf->set_img_size(pm.iw); - } -} - -void testAddtoLayer(const testImageDesc& pm, const size_t nInputs) { - CHECK_GE(nInputs, 1UL); - TestConfig dnnConfig; - getAddtoConfig(dnnConfig, pm, nInputs); - dnnConfig.layerConfig.set_type("mkldnn_addto"); - for (auto withBias : {false, true}) { - dnnConfig.biasSize = withBias ? pm.ic * pm.ih * pm.iw : 0; - RUN_MKLDNN_TEST_LAYER(dnnConfig, "addto", pm) - } -} - -TEST(MKLDNNLayer, AddtoLayer) { - testAddtoLayer({16, 5, 14, 14}, 1); - testAddtoLayer({8, 10, 8, 8}, 2); - testAddtoLayer({4, 12, 1, 1}, 3); -} - -static void getMKLDNNConcatConfig(TestConfig& cfg, - const std::vector& inputs) { - CHECK_GE(inputs.size(), 2UL) << "at least two inputs"; - int oc = inputs[0].ic; - for (size_t i = 1; i < inputs.size(); ++i) { - CHECK_EQ(inputs[i].bs, inputs[0].bs); - CHECK_EQ(inputs[i].ih, inputs[0].ih); - CHECK_EQ(inputs[i].iw, inputs[0].iw); - oc += inputs[i].ic; - } - cfg.biasSize = 0; - cfg.layerConfig.set_type("mkldnn_concat"); - cfg.layerConfig.set_size(oc * inputs[0].ih * inputs[0].iw); - cfg.layerConfig.set_active_type("relu"); - for (size_t i = 0; i < inputs.size(); ++i) { - std::stringstream ss; - ss << "layer_" << i; - cfg.inputDefs.push_back( - {INPUT_DATA, - ss.str(), - (size_t)(inputs[i].ic) * inputs[i].ih * inputs[i].iw, - 0}); - LayerInputConfig* input = cfg.layerConfig.add_inputs(); - ImageConfig* img_conf = input->mutable_image_conf(); - img_conf->set_channels(inputs[i].ic); - img_conf->set_img_size_y(inputs[i].ih); - img_conf->set_img_size(inputs[i].iw); - } -} - -void testConcatLayer(const std::vector& inputs) { - TestConfig dnnConfig; - getMKLDNNConcatConfig(dnnConfig, inputs); - RUN_MKLDNN_TEST_LAYER(dnnConfig, "concat", inputs[0]) -} - -TEST(MKLDNNLayer, ConcatLayer) { - testConcatLayer({{64, 128, 1, 1}, {64, 32, 1, 1}, {64, 64, 1, 1}}); - testConcatLayer({{32, 100, 8, 8}, {32, 10, 8, 8}}); -} - -void testActivation(std::string actType, const testImageDesc& pm) { - // TODO(TJ): remove me when paddle support elu activation - if (actType == "mkldnn_elu") { - return; - } - const std::string compareTypes[] = {actType, actType.erase(0, 7)}; - TestConfig cfg; - getAddtoConfig(cfg, pm); - TestConfig ref = cfg; - cfg.layerConfig.set_active_type(compareTypes[0]); - ref.layerConfig.set_active_type(compareTypes[1]); - RUN_MKLDNN_TEST(cfg, ref, pm) -} - -TEST(MKLDNNActivation, Activations) { - auto types = MKLDNNActivation::getAllRegisteredTypes(); - for (auto type : types) { - /* bs, c, h, w*/ - testActivation(type, {16, 64, 32, 32}); - testActivation(type, {2, 8, 1, 1}); - } -} - -DECLARE_string(config_args); -TEST(MKLDNNNet, net) { - std::vector cases = {"simple", "branch"}; - for (auto name : cases) { - std::string config = "./legacy/gserver/tests/mkldnn_" + name + "_net.conf"; - for (auto channels : {2, 32}) { - std::ostringstream oss; - oss << "channels=" << channels; - FLAGS_config_args = oss.str(); - MKLDNNTester::runNetTest(config); - } - } -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - FLAGS_use_gpu = false; - FLAGS_use_mkldnn = true; - initMain(argc, argv); - initPython(argc, argv); - FLAGS_thread_local_rand_use_global_seed = true; - srand(1); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_MaxPoolingWithMaskOutput.cpp b/paddle/legacy/gserver/tests/test_MaxPoolingWithMaskOutput.cpp deleted file mode 100644 index 2bc261b4a87ce7f1f4ce1c936ee4151d75e17f3f..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_MaxPoolingWithMaskOutput.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "LayerGradUtil.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; - -void setPoolConfig(TestConfig* config, - PoolConfig* pool, - const string& poolType) { - (*config).biasSize = 0; - (*config).layerConfig.set_type("pool"); - (*config).layerConfig.set_num_filters(1); - - int kw = 3, kh = 3; - int pw = 0, ph = 0; - int sw = 2, sh = 2; - pool->set_pool_type(poolType); - pool->set_channels(1); - pool->set_size_x(kw); - pool->set_size_y(kh); - pool->set_start(0); - pool->set_padding(pw); - pool->set_padding_y(ph); - pool->set_stride(sw); - pool->set_stride_y(sh); - - int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); - int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); - pool->set_output_x(ow); - pool->set_output_y(oh); -} - -void doOneMaxPoolingWithMaskOutputTest(MatrixPtr& inputMat, - const string& poolType, - bool use_gpu, - MatrixPtr& maskMat) { - TestConfig config; - config.inputDefs.push_back({INPUT_DATA, "layer_0", 25, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - PoolConfig* pool = input->mutable_pool_conf(); - - pool->set_img_size(5); - pool->set_img_size_y(5); - setPoolConfig(&config, pool, poolType); - config.layerConfig.set_size(pool->output_x() * pool->output_y() * - pool->channels()); - - config.layerConfig.set_name("MaxPoolWithMask"); - - std::vector dataLayers; - LayerMap layerMap; - vector datas; - - initDataLayer(config, - &dataLayers, - &datas, - &layerMap, - "MaxPoolWithMask", - 1, - false, - use_gpu); - - dataLayers[0]->getOutputValue()->copyFrom(*inputMat); - - FLAGS_use_gpu = use_gpu; - std::vector parameters; - LayerPtr maxPoolingWithMaskOutputLayer; - initTestLayer(config, &layerMap, ¶meters, &maxPoolingWithMaskOutputLayer); - maxPoolingWithMaskOutputLayer->forward(PASS_GC); - - checkMatrixEqual(maxPoolingWithMaskOutputLayer->getOutput("mask").value, - maskMat); -} - -TEST(Layer, maxPoolingWithMaskOutputLayerFwd) { - bool useGpu = false; - MatrixPtr inputMat; - MatrixPtr maskMat; - real inputData[] = {0.1, 0.1, 0.5, 0.5, 1.1, 0.2, 0.2, 0.6, 0.1, - 0.1, 0.3, 0.3, 0.7, 0.1, 0.1, 0.4, 0.4, 0.8, - 0.8, 0.1, 1.0, 2.0, 3.0, 0.0, 9.0}; - real maskData[] = {12, 4, 22, 24}; - - inputMat = Matrix::create(1, 25, false, useGpu); - maskMat = Matrix::create(1, 4, false, useGpu); - inputMat->setData(inputData); - maskMat->setData(maskData); - doOneMaxPoolingWithMaskOutputTest( - inputMat, "max-pool-with-mask", useGpu, maskMat); -#ifdef PADDLE_WITH_CUDA - useGpu = true; - inputMat = Matrix::create(1, 25, false, useGpu); - maskMat = Matrix::create(1, 4, false, useGpu); - inputMat->copyFrom(inputData, 25); - maskMat->copyFrom(maskData, 4); - doOneMaxPoolingWithMaskOutputTest( - inputMat, "max-pool-with-mask", useGpu, maskMat); -#endif -} diff --git a/paddle/legacy/gserver/tests/test_MultinomialSampler.cpp b/paddle/legacy/gserver/tests/test_MultinomialSampler.cpp deleted file mode 100644 index 25b1a1191d0100c8ee625d3f5f36d1513164b23b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_MultinomialSampler.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include -#include - -#undef PADDLE_DISABLE_TIMER -#include "paddle/legacy/utils/Stat.h" - -#include "paddle/legacy/gserver/layers/MultinomialSampler.h" -#include "paddle/legacy/utils/Util.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -class MultinomialSamplerTester : public MultinomialSampler { - public: - MultinomialSamplerTester(real* prob, int size) - : MultinomialSampler(prob, size) {} - - template - int testGen(Rand1 rand1) { - return gen1(rand1); - } -}; - -TEST(MultinomialSampler, gen) { - int numGrids = 1024 * 1024; - int size = 1024 * 4; - default_random_engine reng; - - for (size_t iter = 0; iter < 256; ++iter) { - uniform_int_distribution rand(1, numGrids / size * 1.8); - vector prob; - int sum = 0; - for (int i = 0; i < size; ++i) { - prob.push_back(rand(reng)); - sum += prob.back(); - } - - CHECK_LE(sum, numGrids); - prob.back() += numGrids - sum; - - vector counts(size); - MultinomialSamplerTester sampler(&prob[0], size); - counts.assign(size, 0); - { - double s = (double)size / (double)numGrids; - REGISTER_TIMER("MultinomialSampler"); - for (double i = 0; i < numGrids; ++i) { - int ret = sampler.testGen([i, s]() { return s * i; }); - if (ret < 0 || ret >= size) { - EXPECT_GE(ret, 0); - EXPECT_LT(ret, size); - break; - } - ++counts[ret]; - } - } - for (int i = 0; i < size; ++i) { - if (prob[i] != counts[i]) { - EXPECT_EQ(prob[i], counts[i]); - LOG(INFO) << iter; - break; - } - } - } -} - -void benchmarkRandom() { - int n = 1024 * 1024; - - int sum; - double sum1; - - sum = 0; - unsigned int seed = 1; - { - REGISTER_TIMER("crand"); - for (int i = 0; i < n; ++i) { - sum += rand_r(&seed) % 1000; - } - } - LOG(INFO) << "sum=" << sum; - - default_random_engine reng; - uniform_int_distribution rand(1, 1000); - sum = 0; - { - REGISTER_TIMER("stdrand"); - for (int i = 0; i < n; ++i) { - sum += rand(reng); - } - } - LOG(INFO) << "sum=" << sum; - - sum = 0; - { - REGISTER_TIMER("default_random_engine"); - for (int i = 0; i < n; ++i) { - sum += reng(); - } - } - LOG(INFO) << "sum=" << sum; - - uniform_real_distribution rand1(0, 1); - sum1 = 0; - { - REGISTER_TIMER("stdrand1"); - for (int i = 0; i < n; ++i) { - sum1 += rand1(reng); - } - } - LOG(INFO) << "sum1=" << sum1; - - sum1 = 0; - { - real a = 1.0f / (real)RAND_MAX; - REGISTER_TIMER("crand1"); - for (int i = 0; i < n; ++i) { - sum1 += a * rand_r(&seed); - } - } - LOG(INFO) << "sum1=" << sum1; -} - -int main(int argc, char** argv) { - initMain(argc, argv); - testing::InitGoogleTest(&argc, argv); - benchmarkRandom(); - int ret = RUN_ALL_TESTS(); - globalStat.printSegTimerStatus(); - return ret; -} diff --git a/paddle/legacy/gserver/tests/test_NetworkCompare.cpp b/paddle/legacy/gserver/tests/test_NetworkCompare.cpp deleted file mode 100644 index c9f9f3e61be11fa33ab37e27065fdf275f86453a..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_NetworkCompare.cpp +++ /dev/null @@ -1,294 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#undef PADDLE_DISABLE_TIMER -#include -#include -#include -#include - -#include "paddle/legacy/trainer/Trainer.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_int32(gpu_id); -DECLARE_double(checkgrad_eps); -DEFINE_bool(use_label, true, "input label or sequence label"); -DEFINE_bool(static_para, false, "static parameter"); - -struct DataIn { - std::vector inArgs; - std::vector outGrads; - std::vector paraValues; -}; - -struct DataOut { - std::vector outValues; - std::vector paraGrads; -}; - -void initArgument(DataIn& data, - const std::string& configPath, - bool useGpu = FLAGS_use_gpu) { - TrainerConfigHelper config(configPath); - size_t batchSize = config.getOptConfig().batch_size(); - - for (const auto& layer_name : config.getModelConfig().input_layer_names()) { - auto layer_config = std::find_if(config.getModelConfig().layers().begin(), - config.getModelConfig().layers().end(), - [=](const LayerConfig& layer_config) { - return layer_config.name() == layer_name; - }); - CHECK(layer_config != config.getModelConfig().layers().end()); - - size_t layerSize = layer_config->size(); - Argument arg; - arg.value = Matrix::create(batchSize, layerSize, false, useGpu); - arg.grad = Matrix::create(batchSize, layerSize, false, useGpu); - arg.value->randomizeUniform(); - arg.value->add(-0.5); - arg.value->sigmoid(*arg.value); - arg.grad->zeroMem(); - if (FLAGS_use_label) { - arg.ids = VectorT::create(batchSize, useGpu); - arg.ids->rand(layerSize); - } - generateSequenceStartPositions(batchSize, arg.sequenceStartPositions); - data.inArgs.push_back(arg); - } - - for (const auto& layer_name : config.getModelConfig().output_layer_names()) { - auto layer_config = std::find_if(config.getModelConfig().layers().begin(), - config.getModelConfig().layers().end(), - [=](const LayerConfig& layer_config) { - return layer_config.name() == layer_name; - }); - CHECK(layer_config != config.getModelConfig().layers().end()); - - size_t layerSize = layer_config->size(); - MatrixPtr grad = Matrix::create(batchSize, layerSize, false, useGpu); - grad->randomizeUniform(); - data.outGrads.push_back(grad); - } - - for (const auto& para_config : config.getModelConfig().parameters()) { - VectorPtr value = Vector::create(para_config.size(), useGpu); - value->randnorm(0, 2); - data.paraValues.push_back(value); - } -} - -void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) { - *ThreadLocalRand::getSeed() = 0; - srand(0); - - Trainer trainer; - auto config = std::make_shared(configPath); - trainer.init(config, false); - - std::vector parameters; - vector outArgs; - - auto gradientMachine = trainer.getGradientMachine(); - parameters = gradientMachine->getParameters(); - if (FLAGS_static_para) { - for (size_t i = 0; i < parameters.size(); i++) { - parameters[i]->getBuf(PARAMETER_VALUE)->one(); - } - } else { - for (size_t i = 0; i < in.paraValues.size(); i++) { - parameters[i]->getBuf(PARAMETER_VALUE)->copyFrom(*in.paraValues[i]); - } - } - gradientMachine->start(); - gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN); - for (size_t i = 0; i < in.outGrads.size(); i++) { - // If the all the layers in the config have no parameters, also - // not set NeedGradient(), the outArgs[i] will be nullptr. - outArgs[i].grad->copyFrom(*in.outGrads[i]); - } - gradientMachine->backward(); - for (size_t i = 0; i < in.outGrads.size(); i++) { - MatrixPtr value = Matrix::create(outArgs[i].value->getHeight(), - outArgs[i].value->getWidth(), - false, - false); - value->copyFrom(*outArgs[i].value); - out.outValues.push_back(value); - } - for (size_t i = 0; i < in.paraValues.size(); i++) { - VectorPtr grad = Vector::create( - parameters[i]->getBuf(PARAMETER_GRADIENT)->getSize(), false); - grad->copyFrom(*parameters[i]->getBuf(PARAMETER_GRADIENT)); - out.paraGrads.push_back(grad); - } - - for (int i = 0; i < 20; i++) { - REGISTER_TIMER("forward"); - gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN); - } - for (int i = 0; i < 20; i++) { - REGISTER_TIMER("backward"); - gradientMachine->backward(); - } - - gradientMachine->finish(); -} - -void checkBuffer(real* A, - const char* desA, - real* B, - const char* desB, - size_t len, - size_t width = 1) { - int nNum = 0; - for (size_t i = 0; i < len; ++i) { - real diff = fabs(A[i] - B[i]); - if (diff > 0.0f && - diff / std::max(fabs(A[i]), fabs(B[i])) > FLAGS_checkgrad_eps) { - nNum++; - LOG(INFO) << "Row: " << i / width << ", " << desA << " : " << A[i] - << " " << desB << " : " << B[i]; - } - } - EXPECT_EQ(0, nNum); -} - -void compareGradient(DataOut& outA, DataOut& outB) { - LOG(INFO) << "------------------------------" - << " Check Network Output " - << "------------------------------"; - for (size_t i = 0; i < outA.outValues.size(); ++i) { - LOG(INFO) << "OUTPUT VALUE: " << i; - checkBuffer(outA.outValues[i]->getData(), - "network A output", - outB.outValues[i]->getData(), - "network B output", - outA.outValues[i]->getElementCnt(), - outA.outValues[i]->getWidth()); - } - - if (!FLAGS_static_para) { - LOG(INFO) << "------------------------------" - << " Check Parameters " - << "------------------------------"; - for (size_t i = 0; i < outA.paraGrads.size(); ++i) { - LOG(INFO) << "PARAMETER GRADIENT: " << i; - checkBuffer(outA.paraGrads[i]->getData(), - "Network A", - outB.paraGrads[i]->getData(), - "Network B", - outA.paraGrads[i]->getSize()); - } - } -} - -void compareNetwork(const std::string& config_file_a, - const std::string& config_file_b) { - DataIn in; - initArgument(in, config_file_a); - - DataOut dataA; - calcGradient(in, dataA, config_file_a); - LOG(INFO) << "forwardBackward of Network A is finished"; - globalStat.printSegTimerStatus(); - globalStat.reset(); - LOG(INFO) << "\n\n"; - - DataOut dataB; - calcGradient(in, dataB, config_file_b); - LOG(INFO) << "forwardBackward of the Network B is finished"; - globalStat.printSegTimerStatus(); - globalStat.reset(); - LOG(INFO) << "\n\n"; - - compareGradient(dataA, dataB); -} - -TEST(Compare, concat_dotmul) { - std::string config_file_a = "./legacy/gserver/tests/concat_dotmul_a.conf"; - std::string config_file_b = "./legacy/gserver/tests/concat_dotmul_b.conf"; - compareNetwork(config_file_a, config_file_b); -} - -TEST(Compare, concat_fullmatrix) { - std::string config_file_a = "./legacy/gserver/tests/concat_fullmatrix_a.conf"; - std::string config_file_b = "./legacy/gserver/tests/concat_fullmatrix_b.conf"; - compareNetwork(config_file_a, config_file_b); -} - -TEST(Compare, concat_table) { - std::string config_file_a = "./legacy/gserver/tests/concat_table_a.conf"; - std::string config_file_b = "./legacy/gserver/tests/concat_table_b.conf"; - compareNetwork(config_file_a, config_file_b); -} - -TEST(Compare, concat_slice) { - std::string config_file_a = "./legacy/gserver/tests/concat_slice_a.conf"; - std::string config_file_b = "./legacy/gserver/tests/concat_slice_b.conf"; - compareNetwork(config_file_a, config_file_b); -} - -#ifdef PADDLE_WITH_CUDA -TEST(Compare, img_pool) { - std::string config_file_a = "./legacy/gserver/tests/img_pool_a.conf"; - std::string config_file_b = "./legacy/gserver/tests/img_pool_b.conf"; - bool useGpu = FLAGS_use_gpu; - FLAGS_use_gpu = true; - compareNetwork(config_file_a, config_file_b); - FLAGS_use_gpu = useGpu; -} - -TEST(Compare, img_conv) { - std::string config_file_a = "./legacy/gserver/tests/img_conv_a.conf"; - std::string config_file_b = "./legacy/gserver/tests/img_conv_b.conf"; - bool useGpu = FLAGS_use_gpu; - FLAGS_use_gpu = true; - compareNetwork(config_file_a, config_file_b); - FLAGS_use_gpu = useGpu; -} - -// Test cudnn_conv and exconv give the same result -TEST(Compare, img_conv2) { - std::string config_file_a = "./legacy/gserver/tests/img_conv_cudnn.py"; - std::string config_file_b = "./legacy/gserver/tests/img_conv_exconv.py"; - bool useGpu = FLAGS_use_gpu; - double eps = FLAGS_checkgrad_eps; - FLAGS_use_gpu = true; - // Sometimes, this unit test will fail with 1e-2 - FLAGS_checkgrad_eps = 4e-2; - compareNetwork(config_file_a, config_file_b); - FLAGS_use_gpu = useGpu; - FLAGS_checkgrad_eps = eps; -} -#endif - -DEFINE_string(config_file_a, "", "config of one network to compare"); -DEFINE_string(config_file_b, "", "config of another network to compare"); -TEST(Compare, network) { - if (FLAGS_config_file_a != "" && FLAGS_config_file_b != "") { - compareNetwork(FLAGS_config_file_a, FLAGS_config_file_b); - } -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - paddle::initMain(argc, argv); - initPython(argc, argv); - int ret = RUN_ALL_TESTS(); - return ret; -} diff --git a/paddle/legacy/gserver/tests/test_PriorBox.cpp b/paddle/legacy/gserver/tests/test_PriorBox.cpp deleted file mode 100644 index 10d512ec45f8b7afaf21eaac98cfc13e84c85efc..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_PriorBox.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -// Do one forward pass of priorBox layer and check to see if its output -// matches the given result -void doOnePriorBoxTest(size_t feature_map_width, - size_t feature_map_height, - size_t image_width, - size_t image_height, - vector min_size, - vector max_size, - vector aspect_ratio, - vector variance, - bool use_gpu, - MatrixPtr& result) { - // Setting up the priorbox layer - TestConfig configt; - configt.layerConfig.set_type("priorbox"); - - configt.inputDefs.push_back({INPUT_DATA, "featureMap", 1, 0}); - LayerInputConfig* input = configt.layerConfig.add_inputs(); - configt.inputDefs.push_back({INPUT_DATA, "image", 1, 0}); - configt.layerConfig.add_inputs(); - PriorBoxConfig* pb = input->mutable_priorbox_conf(); - for (size_t i = 0; i < min_size.size(); i++) pb->add_min_size(min_size[i]); - for (size_t i = 0; i < max_size.size(); i++) pb->add_max_size(max_size[i]); - for (size_t i = 0; i < variance.size(); i++) pb->add_variance(variance[i]); - for (size_t i = 0; i < aspect_ratio.size(); i++) - pb->add_aspect_ratio(aspect_ratio[i]); - - // data layer initialize - std::vector dataLayers; - LayerMap layerMap; - vector datas; - initDataLayer( - configt, &dataLayers, &datas, &layerMap, "priorbox", 1, false, use_gpu); - dataLayers[0]->getOutput().setFrameHeight(feature_map_height); - dataLayers[0]->getOutput().setFrameWidth(feature_map_width); - dataLayers[1]->getOutput().setFrameHeight(image_height); - dataLayers[1]->getOutput().setFrameWidth(image_width); - - // test layer initialize - std::vector parameters; - LayerPtr priorboxLayer; - initTestLayer(configt, &layerMap, ¶meters, &priorboxLayer); - priorboxLayer->forward(PASS_GC); - checkMatrixEqual(priorboxLayer->getOutputValue(), result); -} - -TEST(Layer, priorBoxLayerFwd) { - vector minSize; - vector maxSize; - vector aspectRatio; - vector variance; - bool useGpu = false; - - minSize.push_back(276); - maxSize.push_back(330); - variance.push_back(0.1); - variance.push_back(0.1); - variance.push_back(0.2); - variance.push_back(0.2); - - // CPU case 1. - MatrixPtr result; - real resultData[] = {0.04, - 0.04, - 0.96, - 0.96, - 0.1, - 0.1, - 0.2, - 0.2, - 0, - 0, - 1, - 1, - 0.1, - 0.1, - 0.2, - 0.2}; - result = Matrix::create(1, 2 * 8, false, useGpu); - result->setData(resultData); - doOnePriorBoxTest(/* feature_map_width */ 1, - /* feature_map_height */ 1, - /* image_width */ 300, - /* image_height */ 300, - minSize, - maxSize, - aspectRatio, - variance, - useGpu, - result); - // CPU case 2. - variance[1] = 0.2; - variance[3] = 0.1; - maxSize.pop_back(); - real resultData2[] = {0, 0, 0.595, 0.595, 0.1, 0.2, 0.2, 0.1, - 0.405, 0, 1, 0.595, 0.1, 0.2, 0.2, 0.1, - 0, 0.405, 0.595, 1, 0.1, 0.2, 0.2, 0.1, - 0.405, 0.405, 1, 1, 0.1, 0.2, 0.2, 0.1}; - Matrix::resizeOrCreate(result, 1, 4 * 8, false, useGpu); - result->setData(resultData2); - doOnePriorBoxTest(/* feature_map_width */ 2, - /* feature_map_height */ 2, - /* image_width */ 400, - /* image_height */ 400, - minSize, - maxSize, - aspectRatio, - variance, - useGpu, - result); - // CPU case 3. - aspectRatio.push_back(2); - real resultData3[] = {0.04, 0.04, 0.96, 0.96, 0.1, 0.2, - 0.2, 0.1, 0, 0.17473088, 1, 0.825269, - 0.1, 0.2, 0.2, 0.1, 0.17473088, 0, - 0.825269, 1, 0.1, 0.2, 0.2, 0.1}; - Matrix::resizeOrCreate(result, 1, 3 * 8, false, useGpu); - result->setData(resultData3); - doOnePriorBoxTest(/* feature_map_width */ 1, - /* feature_map_height */ 1, - /* image_width */ 300, - /* image_height */ 300, - minSize, - maxSize, - aspectRatio, - variance, - useGpu, - result); - -#ifdef PADDLE_WITH_CUDA - // reset the input parameters - variance[1] = 0.1; - variance[3] = 0.2; - maxSize.push_back(330); - aspectRatio.pop_back(); - MatrixPtr resultGpu; - useGpu = true; - // GPU case 1. - resultGpu = Matrix::create(1, 2 * 8, false, useGpu); - resultGpu->copyFrom(resultData, 2 * 8); - doOnePriorBoxTest(/* feature_map_width */ 1, - /* feature_map_height */ 1, - /* image_width */ 300, - /* image_height */ 300, - minSize, - maxSize, - aspectRatio, - variance, - useGpu, - resultGpu); - // GPU case 2. - variance[1] = 0.2; - variance[3] = 0.1; - maxSize.pop_back(); - Matrix::resizeOrCreate(resultGpu, 1, 4 * 8, false, useGpu); - resultGpu->copyFrom(resultData2, 4 * 8); - doOnePriorBoxTest(/* feature_map_width */ 2, - /* feature_map_height */ 2, - /* image_width */ 400, - /* image_height */ 400, - minSize, - maxSize, - aspectRatio, - variance, - useGpu, - resultGpu); - // GPU case 3. - aspectRatio.push_back(2); - Matrix::resizeOrCreate(resultGpu, 1, 3 * 8, false, useGpu); - resultGpu->copyFrom(resultData3, 3 * 8); - doOnePriorBoxTest(/* feature_map_width */ 1, - /* feature_map_height */ 1, - /* image_width */ 300, - /* image_height */ 300, - minSize, - maxSize, - aspectRatio, - variance, - useGpu, - resultGpu); -#endif -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_PyDataProvider.cpp b/paddle/legacy/gserver/tests/test_PyDataProvider.cpp deleted file mode 100644 index 0209e6818a8340fe128146909b9e8ec610e310a3..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_PyDataProvider.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include - -#include "paddle/legacy/gserver/dataproviders/PyDataProvider.h" -#include "paddle/legacy/utils/Util.h" - -#include "paddle/testing/TestUtil.h" - -using namespace std; // NOLINT -using namespace paddle; // NOLINT - -void simpleValueCheck(const vector& argumentList, bool useGpu); -void simpleSequenceCheck(const vector& argumentList, int sample_num); - -TEST(PyDataProvider, py_fill_slots) { - DataConfig config; - config.set_type("py"); - config.set_async_load_data(false); - config.set_load_data_module(std::string("pyDataProvider")); - config.set_load_data_object(std::string("SimpleDataProvider")); - config.clear_files(); - std::string dataFile = - "legacy/gserver/tests/pyDataProvider/pyDataProviderList"; - config.set_files(dataFile); -#ifndef PADDLE_WITH_CUDA - bool useGpu = false; -#else - bool useGpu = true; -#endif - unique_ptr dataProvider(DataProvider::create(config, useGpu)); - DataBatch dataBatch; - dataProvider->getNextBatchInternal(2, &dataBatch); - const std::vector& argumentList = dataBatch.getStreams(); - // Check size - EXPECT_EQ(argumentList.size(), 3UL); - EXPECT_EQ(argumentList[0].value->getWidth(), 3UL); - EXPECT_EQ(argumentList[0].value->getHeight(), 2UL); - EXPECT_EQ(argumentList[0].value->getElementCnt(), 6UL); - EXPECT_EQ(argumentList[1].value->getWidth(), 7UL); - EXPECT_EQ(argumentList[1].value->getHeight(), 2UL); - EXPECT_EQ(argumentList[1].value->getElementCnt(), 4UL); - EXPECT_EQ(argumentList[2].ids->getSize(), 2UL); - // Check value - simpleValueCheck(argumentList, useGpu); - // Check sequenceStartPositions - simpleSequenceCheck(argumentList, 2); -} - -TEST(PyDataProvider, py_fill_nest_slots) { - DataConfig config; - config.set_type("py"); - config.set_async_load_data(false); - config.set_load_data_module(std::string("pyDataProvider")); - config.set_load_data_object(std::string("SimpleNestDataProvider")); - config.clear_files(); - std::string dataFile = - "legacy/gserver/tests/pyDataProvider/pyDataProviderList"; - config.set_files(dataFile); - EXPECT_EQ(config.IsInitialized(), true); -#ifndef PADDLE_WITH_CUDA - bool useGpu = false; -#else - bool useGpu = true; -#endif - unique_ptr dataProvider(DataProvider::create(config, useGpu)); - DataBatch dataBatch; - dataProvider->getNextBatchInternal(2, &dataBatch); - const std::vector& argumentList = dataBatch.getStreams(); - // Check size - EXPECT_EQ(argumentList.size(), 3UL); - EXPECT_EQ(argumentList[0].value->getWidth(), 3UL); - EXPECT_EQ(argumentList[0].value->getHeight(), 4UL); - EXPECT_EQ(argumentList[0].value->getElementCnt(), 12UL); - EXPECT_EQ(argumentList[1].value->getWidth(), 7UL); - EXPECT_EQ(argumentList[1].value->getHeight(), 4UL); - EXPECT_EQ(argumentList[1].value->getElementCnt(), 8UL); - EXPECT_EQ(argumentList[2].ids->getSize(), 4UL); - // Check value - simpleValueCheck(argumentList, useGpu); - // Check sequenceStartPositions - simpleSequenceCheck(argumentList, 4); - // Check subSequenceStartPositions - EXPECT_EQ(argumentList[0].subSequenceStartPositions->getSize(), 4UL); - EXPECT_EQ(argumentList[1].subSequenceStartPositions->getSize(), 3UL); - EXPECT_EQ(argumentList[2].subSequenceStartPositions->getSize(), 4UL); - for (size_t i = 0; i < argumentList.size(); i++) { - EXPECT_EQ(argumentList[i].subSequenceStartPositions->getElement(0), 0); - EXPECT_EQ(argumentList[i].subSequenceStartPositions->getElement(1), 1); - if (i != 1) { - EXPECT_EQ(argumentList[i].subSequenceStartPositions->getElement(2), 2); - EXPECT_EQ(argumentList[i].subSequenceStartPositions->getElement(3), 4); - } else { - EXPECT_EQ(argumentList[i].subSequenceStartPositions->getElement(2), 4); - } - } -} - -void simpleValueCheck(const vector& argumentList, bool useGpu) { - // Dense - real* data; - if (useGpu) { - MatrixPtr cpuMatrixPtr = Matrix::create(argumentList[0].value->getHeight(), - argumentList[0].value->getWidth(), - 0, - 0); - cpuMatrixPtr->copyFrom(*argumentList[0].value); - data = cpuMatrixPtr->getData(); - } else { - data = argumentList[0].value->getData(); - } - for (size_t i = 0; i < argumentList[0].value->getElementCnt(); ++i) { - EXPECT_EQ(*(data + i), (float)(i % 3 + 1)); - } - // Sparse without value - GpuSparseMatrixPtr matGpu; - CpuSparseMatrixPtr matCpu; - if (useGpu) { - matGpu = dynamic_pointer_cast(argumentList[1].value); - ASSERT_TRUE(matGpu != NULL); - } else { - data = argumentList[0].value->getData(); - matCpu = dynamic_pointer_cast(argumentList[1].value); - ASSERT_TRUE(matCpu != NULL); - } - for (size_t i = 0; i < argumentList[1].value->getHeight(); ++i) { - size_t colNum = useGpu ? matGpu->getColNum(i) : matCpu->getColNum(i); - EXPECT_EQ(colNum, (size_t)2); - const int* buf = useGpu ? matGpu->getRowCols(i) : matCpu->getRowCols(i); - for (size_t j = 0; j < colNum; ++j) { - EXPECT_EQ((size_t)buf[j], (size_t)(j + 1)); - } - } - // Index - for (size_t j = 0; j < argumentList[2].ids->getSize(); ++j) { - EXPECT_EQ((size_t)argumentList[2].ids->get(j), 0UL); - } -} - -void simpleSequenceCheck(const vector& argumentList, int sample_num) { - EXPECT_EQ(argumentList[0].sequenceStartPositions->getSize(), 3UL); - EXPECT_EQ(argumentList[1].sequenceStartPositions->getSize(), 2UL); - EXPECT_EQ(argumentList[2].sequenceStartPositions->getSize(), 3UL); - for (size_t i = 0; i < argumentList.size(); i++) { - EXPECT_EQ(argumentList[i].sequenceStartPositions->getElement(0), 0); - if (i != 1) { - EXPECT_EQ(argumentList[i].sequenceStartPositions->getElement(1), 1); - EXPECT_EQ(argumentList[i].sequenceStartPositions->getElement(2), - sample_num); - } else { - EXPECT_EQ(argumentList[i].sequenceStartPositions->getElement(1), - sample_num); - } - } -} - -int main(int argc, char** argv) { - initMain(argc, argv); - initPython(argc, argv); - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_PyDataProvider2.cpp b/paddle/legacy/gserver/tests/test_PyDataProvider2.cpp deleted file mode 100644 index de313ba82cf2697c13d6eae17056240b6272ca1c..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_PyDataProvider2.cpp +++ /dev/null @@ -1,409 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef PADDLE_NO_PYTHON -#include -#include -#include "paddle/legacy/gserver/dataproviders/DataProvider.h" -#include "paddle/legacy/utils/PythonUtil.h" -#include "paddle/legacy/utils/Util.h" - -DEFINE_string(train_list, "unittest.list", "file list for unittest"); - -namespace paddle { -namespace unittest { -namespace pydp2 { -extern void setOnPoolFilledHook(const std::function &func); -extern void clearOnPoolFilledHook(); - -} // namespace pydp2 -} // namespace unittest -} // namespace paddle - -const paddle::real epsilon = 1e-5; - -static inline int64_t readDataBatch(paddle::DataBatch *batch, - const std::string &funcName, - int64_t batchSize = 65535) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object(funcName); - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - provider->setSkipShuffle(); - provider->reset(); - return provider->getNextBatchInternal(batchSize, batch); -} - -TEST(PyDataProvider2, dense_no_seq) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_dense_no_seq"); - - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - - provider->setSkipShuffle(); // skip shuffle for unittest. - - paddle::DataBatch batch; - for (size_t pass = 0; pass < 2; ++pass) { // read 2 passes - provider->reset(); - int64_t num = provider->getNextBatchInternal(100, &batch); - ASSERT_NE(num, 0); - ASSERT_EQ((size_t)batch.getStreams().size(), (size_t)1); - ASSERT_EQ((size_t)batch.getSize(), (size_t)100); - // Check batch data. - for (size_t i = 0; i < 100; ++i) { - for (size_t j = 0; j < 200; ++j) { - paddle::real tmp = (paddle::real)((j - 100.0) * (i + 1) / 200.0); - ASSERT_NEAR( - batch.getStreams()[0].value->getData()[i * 200 + j], tmp, epsilon); - } - } - - num = provider->getNextBatchInternal(100, &batch); - ASSERT_NE(num, 0); - ASSERT_EQ(batch.getStreams().size(), (size_t)1); - ASSERT_EQ((size_t)batch.getSize(), (size_t)100); - // Check batch data. - for (size_t i = 0; i < 100; ++i) { - size_t ii = i + 100; - for (size_t j = 0; j < 200; ++j) { - paddle::real tmp = (paddle::real)((j - 100.0) * (ii + 1) / 200.0); - ASSERT_NEAR( - batch.getStreams()[0].value->getData()[i * 200 + j], tmp, epsilon); - } - } - num = provider->getNextBatchInternal(100, &batch); - ASSERT_EQ(num, 0); - } -} - -TEST(PyDataProvider2, index_no_seq) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_index_no_seq"); - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - - provider->setSkipShuffle(); // skip shuffle for unittest. - paddle::DataBatch batch; - for (size_t pass = 0; pass < 2; ++pass) { - provider->reset(); - int64_t num = provider->getNextBatchInternal(10000, &batch); - CHECK_EQ(num, 200); - for (int i = 0; i < 200; ++i) { - CHECK_EQ(i, batch.getStreams()[0].ids->getData()[i]); - } - } -} - -TEST(PyDataProvider2, init_hook) { - paddle::PyObjectPtr pickle = paddle::py::import("pickle"); - paddle::PyObjectPtr globals(PyModule_GetDict(PyImport_AddModule("__main__"))); - PyDict_SetItemString(globals.get(), "pickle", pickle.get()); - paddle::PyObjectPtr locals(PyDict_New()); - paddle::PyObjectPtr mdl(PyRun_String( - "dumps = pickle.dumps({'value':[float(x) for x in xrange(20)]})", - Py_file_input, - globals.get(), - locals.get())); - CHECK_PY(mdl) << "Error!"; - paddle::PyObjectPtr dps(PyDict_GetItemString(locals.get(), "dumps")); - CHECK_PY(dps) << "Error!"; - - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_init_hook"); - config.set_load_data_args(PyString_AsString(dps.get())); - - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - provider->setSkipShuffle(); // skip shuffle for unittest. - provider->reset(); - paddle::DataBatch batch; - int64_t num = provider->getNextBatchInternal(100000, &batch); - ASSERT_EQ(num, 200); - auto &mat = batch.getStreams()[0].value; - ASSERT_EQ((size_t)mat->getWidth(), (size_t)20); - for (size_t i = 0; i < 200; ++i) { - for (size_t j = 0; j < 20; ++j) { - ASSERT_NEAR((paddle::real)j, mat->getData()[i * 20 + j], epsilon); - } - } -} - -TEST(PyDataProvider2, sparse_no_value_no_seq) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_sparse_non_value_no_seq"); - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - provider->setSkipShuffle(); - provider->reset(); - paddle::DataBatch batch; - int64_t num = provider->getNextBatchInternal(10000, &batch); - CHECK_EQ(num, 200); - auto csm = std::dynamic_pointer_cast( - batch.getStreams()[0].value); - CHECK(csm != nullptr); - for (int i = 0; i < 200; ++i) { - CHECK_EQ(csm->getColNum(i), (size_t)10); - int *cols = csm->getRowCols(i); - for (int j = 0; j < 10; ++j) { - CHECK_EQ(cols[j], (i + 1) * (j + 1)); - } - } -} - -TEST(PyDataProvider2, sparse_value_no_seq) { - paddle::DataBatch batch; - CHECK_EQ(readDataBatch(&batch, "test_sparse_value_no_seq"), 200); - auto csm = std::dynamic_pointer_cast( - batch.getStreams()[0].value); - CHECK(csm != nullptr); - for (int i = 0; i < 200; ++i) { - CHECK_EQ(csm->getColNum(i), (size_t)10); - int *cols = csm->getRowCols(i); - real *dat = csm->getRowValues(i); - for (int j = 0; j < 10; ++j) { - EXPECT_EQ(cols[j], (i + 1) * (j + 1)); - EXPECT_EQ(dat[j], real(j) / real(i + 1)); - } - } -} - -TEST(PyDataProvider2, index_seq) { - paddle::DataBatch batch; - CHECK_EQ(readDataBatch(&batch, "test_index_seq"), 200); - auto &arg = batch.getStreams()[0]; - CHECK_EQ((int)arg.ids->getSize(), (200 + 1) * 200 / 2); - size_t tmp = 0; - for (size_t i = 0; i < 200; ++i) { // CHECK DATA CORRECT - for (size_t j = 0; j < i + 1; ++j) { - ASSERT_EQ((size_t)arg.ids->getData()[tmp], j); - ++tmp; - } - } - ASSERT_EQ(arg.sequenceStartPositions->getSize(), (size_t)201); - tmp = 0; - for (size_t i = 0; i < 200; ++i) { - tmp += i; - ASSERT_EQ((size_t)arg.sequenceStartPositions->getData(false)[i], tmp); - } - tmp += 200; - ASSERT_EQ((size_t)arg.sequenceStartPositions->getData(false)[200], tmp); -} - -TEST(PyDataProvider2, index_sub_seq) { - paddle::DataBatch batch; - ASSERT_EQ(readDataBatch(&batch, "test_index_sub_seq"), 200); - auto &arg = batch.getStreams()[0]; - size_t tmp = 0; - for (size_t i = 0; i < 200; ++i) { - for (size_t j = 0; j < i + 1; ++j) { - for (size_t k = 0; k < j + 1; ++k) { - CHECK_EQ((size_t)arg.ids->getData()[tmp++], k); - } - } - } - - CHECK_EQ(tmp, arg.ids->getSize()); - - ASSERT_EQ((size_t)arg.sequenceStartPositions->getSize(), (size_t)201); - ASSERT_EQ(arg.subSequenceStartPositions->getData(false)[0], 0); - ASSERT_EQ(arg.sequenceStartPositions->getData(false)[0], 0); - size_t idx = 1; - tmp = 0; - for (size_t i = 0; i < 200; ++i) { - for (size_t j = 0; j < i + 1; ++j) { - tmp += j + 1; - ASSERT_EQ((size_t)arg.subSequenceStartPositions->getData(false)[idx], - (size_t)tmp); - ++idx; - } - ASSERT_EQ((size_t)arg.sequenceStartPositions->getData(false)[i + 1], tmp); - } -} - -TEST(PyDataProvider2, min_pool_size) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_min_pool_size"); - config.set_load_data_args(""); - size_t totalData = 1 << 14; - constexpr size_t batchSize = 100; - constexpr size_t minPoolSize = 1000; - paddle::DataBatch batch; - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - provider->reset(); - - paddle::unittest::pydp2::setOnPoolFilledHook([&](size_t poolSize) { - if (totalData > batchSize) { - CHECK_GE(poolSize, std::min(totalData - batchSize, minPoolSize)); - } - }); - while (true) { - int64_t realBatchSize = provider->getNextBatchInternal(batchSize, &batch); - if (realBatchSize) { - totalData -= realBatchSize; - } else { - break; - } - } - paddle::unittest::pydp2::clearOnPoolFilledHook(); -} - -TEST(PyDataProvider2, can_over_batch_size) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_can_over_batch_size"); - config.set_load_data_args(""); - paddle::DataBatch batch; - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - provider->reset(); - constexpr size_t batchSize = 100; - while (true) { - int64_t realBatchSize = provider->getNextBatchInternal(batchSize, &batch); - if (realBatchSize) { - CHECK_LE(static_cast(realBatchSize), batchSize); - } else { - break; - } - } -} - -TEST(PyDataProvider2, input_order) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_input_order"); - config.set_load_data_args(""); - - paddle::ModelConfig modelConfig; - *modelConfig.add_input_layer_names() = "input1"; - *modelConfig.add_input_layer_names() = "input2"; - paddle::DataBatch batch; - std::unique_ptr provider( - paddle::DataProvider::create(config, modelConfig, false)); - provider->reset(); - constexpr size_t batchSize = 100; - while (true) { - int64_t realBatchSize = provider->getNextBatchInternal(batchSize, &batch); - if (!realBatchSize) { - break; - } - ASSERT_EQ(batch.getStreams().size(), static_cast(2)); - for (int64_t i = 0; i < realBatchSize; ++i) { - ASSERT_EQ(batch.getStream(0).ids->getData()[i], 0); - ASSERT_EQ(batch.getStream(1).ids->getData()[i], 1); - } - } -} - -TEST(PyDataProvider2, test_check) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_check"); - config.set_load_data_args(""); - paddle::DataBatch batch; - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - provider->reset(); - while (true) { - int64_t realBatchSize = provider->getNextBatchInternal(100, &batch); - if (!realBatchSize) { - break; - } else { - auto &ivec = batch.getStream(0).ids; - for (size_t i = 0; i < ivec->getSize(); ++i) { - CHECK_LT(ivec->getData()[i], 10); - } - } - } -} - -TEST(PyDataProvider2, multiThread) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_dense_no_seq"); - config.set_async_load_data(true); - - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - provider->reset(); - paddle::DataBatch batch; - provider->getNextBatch(100, &batch); - provider->reset(); - provider.reset(); -} - -TEST(PyDataProvider2, minPoolSizeWithCache) { - paddle::DataConfig config; - config.set_type("py2"); - config.set_files(FLAGS_train_list.c_str()); - config.set_load_data_module("test_PyDataProvider2"); - config.set_load_data_object("test_min_pool_size_with_cache"); - config.set_async_load_data(true); - - std::unique_ptr provider( - paddle::DataProvider::create(config, false)); - - paddle::DataBatch batch; - - for (int i = 0; i < 10; ++i) { - provider->reset(); - int64_t sum = 0; - while (int64_t actualNum = provider->getNextBatch(100, &batch)) { - sum += actualNum; - } - ASSERT_EQ(1 << 20, sum); - } -} - -int main(int argc, char **argv) { - testing::InitGoogleTest(&argc, argv); - paddle::initMain(argc, argv); - paddle::initPython(argc, argv); - - std::ofstream fout(FLAGS_train_list); - CHECK(fout.is_open()); - fout << "stub file name" << std::endl; // in unittest, filename is not used. - fout.close(); - - return RUN_ALL_TESTS(); -} - -#endif diff --git a/paddle/legacy/gserver/tests/test_PyDataProvider2.py b/paddle/legacy/gserver/tests/test_PyDataProvider2.py deleted file mode 100644 index 461d80b9e681cabc20b2c44fdf7afa8dc9c9bf5b..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_PyDataProvider2.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import random - -from paddle.trainer.PyDataProvider2 import * - - -@provider(slots=[dense_vector(200, seq_type=SequenceType.NO_SEQUENCE)]) -def test_dense_no_seq(setting, filename): - for i in xrange(200): - yield [(float(j - 100) * float(i + 1)) / 200.0 for j in xrange(200)] - - -@provider(input_types=[integer_value(200, seq_type=SequenceType.NO_SEQUENCE)]) -def test_index_no_seq(setting, filename): - for i in xrange(200): - yield i - - -def test_init_hooker(setting, value, **kwargs): - setting.value = value - - -@provider( - input_types=[dense_vector( - 20, seq_type=SequenceType.NO_SEQUENCE)], - init_hook=test_init_hooker) -def test_init_hook(setting, filename): - for i in xrange(200): - yield setting.value - - -@provider(input_types=[ - sparse_binary_vector( - 30000, seq_type=SequenceType.NO_SEQUENCE) -]) -def test_sparse_non_value_no_seq(setting, filename): - for i in xrange(200): - yield [(i + 1) * (j + 1) for j in xrange(10)] - - -@provider(input_types=[ - sparse_float_vector( - 30000, seq_type=SequenceType.NO_SEQUENCE) -]) -def test_sparse_value_no_seq(setting, filename): - for i in xrange(200): - yield [((i + 1) * (j + 1), float(j) / float(i + 1)) for j in xrange(10)] - - -@provider(input_types=[integer_value(200, seq_type=SequenceType.SEQUENCE)]) -def test_index_seq(setting, filename): - for i in xrange(200): - yield range(i + 1) - - -@provider(input_types=[index_slot(200, seq_type=SequenceType.SUB_SEQUENCE)]) -def test_index_sub_seq(setting, filename): - def gen_sub_seq(l): - l += 1 - for j in xrange(l): - yield range(j + 1) - - for i in xrange(200): - yield list(gen_sub_seq(i)) - - -@provider(input_types=[index_slot(100)], min_pool_size=1000) -def test_min_pool_size(setting, filename): - for _ in xrange(1 << 14): - yield random.randint(0, 100 - 1) - - -@provider( - input_types=[index_slot( - 100, seq_type=SequenceType.SEQUENCE)], - can_over_batch_size=False, - calc_batch_size=lambda x: len(x[0])) -def test_can_over_batch_size(setting, filename): - for _ in xrange(1 << 10): - seq_len = random.randint(0, 99) - yield [random.randint(0, 100 - 1) for _ in xrange(seq_len)] - - -@provider(input_types={'input1': index_slot(10), 'input2': index_slot(10)}) -def test_input_order(setting, filename): - for _ in xrange(1000): - yield {'input1': 0, 'input2': 1} - - -@provider( - input_types=[index_slot(10)], - check=True, - check_fail_continue=True, - should_shuffle="123") # also test should shuffle -def test_check(settings, filename): - yield_good_value = False - - while not yield_good_value: - for _ in xrange(10000): - i = random.randint(0, 100) - if i < 10: - yield_good_value = True - yield i - - -@provider( - input_types=[index_slot(10)], - min_pool_size=1000, - cache=CacheType.CACHE_PASS_IN_MEM, ) -def test_min_pool_size_with_cache(settings, filename): - import random - for _ in xrange(2**20): - yield random.randint(0, 9) diff --git a/paddle/legacy/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/legacy/gserver/tests/test_RecurrentGradientMachine.cpp deleted file mode 100644 index 153c3e7f36a30a70d0c5870144a0091b1e5f7237..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_RecurrentGradientMachine.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include -#include -#include -#include - -DECLARE_int32(seed); - -using namespace paddle; // NOLINT -using namespace std; // NOLINT -class TrainerForTest : public paddle::Trainer { - public: - void startTrain() { - GradientMachine& gm = *this->trainerInternal_.getGradientMachine(); - gm.start(); - } - - void finishTrain() { - GradientMachine& gm = *this->trainerInternal_.getGradientMachine(); - gm.finish(); - } - - /** - * Get total dimension of all parameters. - * - * @return the total dimension of all parameters - */ - size_t getTotalParameterSize() const { - auto p = const_cast(this); - auto& params = p->getGradientMachine()->getParameters(); - return std::accumulate( - params.begin(), params.end(), 0UL, [](size_t a, const ParameterPtr& p) { - return a + p->getSize(); - }); - } -}; - -void CalCost(const string& conf, - const string& dir, - real* cost, - int num_passes) { - auto config = std::make_shared(conf); - TrainerForTest trainer; - trainer.init(config); - mkDir(dir.c_str()); - config->setSaveDir(dir); - auto dataProvider = trainer.getDataProvider(); - int32_t batchSize = config->getOptConfig().batch_size(); - real learningRate = config->getOptConfig().learning_rate(); - real momentum = 0; - real decayRate = 0; - int64_t dim = trainer.getTotalParameterSize(); - CpuVector vecW(dim); - CpuVector vecGradient(dim); - CpuVector vecMomentum(dim); - - // vecW needs to be assigned, otherwise the variable is an uncertain value. - - *ThreadLocalRand::getSeed() = FLAGS_seed; - vecW.randnorm(0, 0.1); - vecMomentum.randnorm(0, 0.1); - - trainer.startTrain(); - for (int i = 0; i < num_passes; ++i) { - real totalCost = 0; - dataProvider->reset(); - while (true) { - DataBatch dataBatch; - int num = dataProvider->getNextBatch(batchSize, &dataBatch); - if (num == 0) break; - totalCost += trainer.calcGradient(dataBatch, vecW, vecGradient); - sgdUpdate( - learningRate, momentum, decayRate, &vecW, &vecGradient, &vecMomentum); - } - cost[i] = totalCost; - } - trainer.finishTrain(); - rmDir(dir.c_str()); -} - -void test(const string& conf1, const string& conf2, double eps, bool useGpu) { - if (!paddle::version::isWithGpu() && useGpu) { - return; - } - FLAGS_use_gpu = useGpu; - int num_passes = 5; - real* cost1 = new real[num_passes]; - const string dir1 = "legacy/gserver/tests/t1"; - CalCost(conf1, dir1, cost1, num_passes); - - real* cost2 = new real[num_passes]; - const string dir2 = "legacy/gserver/tests/t2"; - CalCost(conf2, dir2, cost2, num_passes); - - for (int i = 0; i < num_passes; i++) { - LOG(INFO) << "num_passes: " << i << ", cost1=" << cost1[i] - << ", cost2=" << cost2[i] - << ", diff=" << std::abs(cost1[i] - cost2[i]); - ASSERT_NEAR(cost1[i], cost2[i], eps); - } - delete[] cost1; - delete[] cost2; -} - -TEST(RecurrentGradientMachine, HasSubSequence) { - for (bool useGpu : {false, true}) { - test("legacy/gserver/tests/sequence_layer_group.conf", - "legacy/gserver/tests/sequence_nest_layer_group.conf", - 1e-5, - useGpu); - } -} - -TEST(RecurrentGradientMachine, rnn) { - for (bool useGpu : {false, true}) { - test("legacy/gserver/tests/sequence_rnn.conf", - "legacy/gserver/tests/sequence_nest_rnn.conf", - 1e-6, - useGpu); - } -} - -TEST(RecurrentGradientMachine, rnn_multi_input) { - for (bool useGpu : {false, true}) { - test("legacy/gserver/tests/sequence_rnn_multi_input.conf", - "legacy/gserver/tests/sequence_nest_rnn_multi_input.conf", - 1e-6, - useGpu); - } -} - -TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) { - for (bool useGpu : {false, true}) { - test("legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py", - "legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py", - 1e-6, - useGpu); - } -} - -TEST(RecurrentGradientMachine, rnn_mixed_input) { - for (bool useGpu : {false, true}) { - test("legacy/gserver/tests/sequence_rnn_mixed_inputs.py", - "legacy/gserver/tests/sequence_rnn_matched_inputs.py", - 1e-6, - useGpu); - } -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - - if (paddle::version::isWithPyDataProvider()) { - if (!paddle::version::isWithGpu()) { - FLAGS_use_gpu = false; - } - initMain(argc, argv); - initPython(argc, argv); - return RUN_ALL_TESTS(); - } else { - return 0; - } -} diff --git a/paddle/legacy/gserver/tests/test_RecurrentLayer.cpp b/paddle/legacy/gserver/tests/test_RecurrentLayer.cpp deleted file mode 100644 index 71198cb6a1d29433ed0e315378f5aee51b921766..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_RecurrentLayer.cpp +++ /dev/null @@ -1,571 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/gserver/layers/Layer.h" - -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT -DECLARE_bool(use_gpu); -DECLARE_bool(rnn_use_batch); -DECLARE_int32(fixed_seq_length); - -void checkError(const Matrix& matrix1, const Matrix& matrix2) { - CHECK(matrix1.getHeight() == matrix2.getHeight()); - CHECK(matrix1.getWidth() == matrix2.getWidth()); -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - - int height = matrix1.getHeight(); - int width = matrix1.getWidth(); - const real* data1 = matrix1.getData(); - const real* data2 = matrix2.getData(); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (fabs(data1[i * width + j] - data2[i * width + j]) > err) { - count++; - } - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void checkError(const CpuVector& vector1, const CpuVector& vector2) { - CHECK(vector1.getSize() == vector2.getSize()); -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - - int size = vector1.getSize(); - const real* data1 = vector1.getData(); - const real* data2 = vector2.getData(); - int count = 0; - for (int i = 0; i < size; i++) { - if (fabs(data1[i] - data2[i]) > err) { - count++; - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -LayerPtr creatDataLayer(string name, - size_t batchSize, - int layerSize, - bool useGpu) { - LayerConfig dataConfig; - dataConfig.set_name(name); - dataConfig.set_type("data"); - dataConfig.set_size(layerSize); - LayerPtr layer = LayerPtr(new DataLayer(dataConfig)); - - Argument data; - data.value = Matrix::create(batchSize, layer->getSize(), false, useGpu); - data.grad = Matrix::create(batchSize, layer->getSize(), false, useGpu); - data.value->randomizeUniform(); - data.value->add(-0.5); - data.value->sigmoid(*data.value); - data.grad->zeroMem(); - - generateSequenceStartPositions(batchSize, data.sequenceStartPositions); - - DataLayerPtr dataLayer = std::dynamic_pointer_cast(layer); - dataLayer->setData(data); - dataLayer->forward(PASS_GC); - - return layer; -} - -ParameterPtr creatParameter(string name, - int pid, - size_t paraSize, - bool useGpu) { - ParameterConfig paraConfig; - paraConfig.set_name(name); - paraConfig.set_size(paraSize); - - ParameterPtr parameter = - std::make_shared(paraConfig, useGpu, /*initialize */ false); - parameter->enableType(PARAMETER_VALUE); - parameter->enableType(PARAMETER_GRADIENT); - parameter->randomize(); - parameter->setID(pid); - - return parameter; -} - -ParameterPtr creatParameterBias(string name, - int pid, - size_t paraSize, - bool useGpu) { - ParameterConfig paraConfig; - paraConfig.set_name(name); - paraConfig.set_size(paraSize); - paraConfig.set_initial_std(1); - - ParameterPtr parameter = - std::make_shared(paraConfig, useGpu, /*initialize */ true); - parameter->randomize(); - parameter->setID(pid); - - return parameter; -} - -LayerPtr initRecurrentLayer(LayerConfig layerConfig, - size_t batchSize, - int layerSize, - bool useGpu) { - FLAGS_use_gpu = useGpu; - LayerMap layerMap; - ParameterMap parameterMap; - LayerPtr dataLayer = creatDataLayer("layer_0", batchSize, layerSize, useGpu); - layerMap[dataLayer->getName()] = dataLayer; - - ParameterPtr para = - creatParameter("para_0", 0, layerSize * layerSize, useGpu); - parameterMap[para->getName()] = para; - - layerConfig.add_inputs(); - LayerInputConfig& input = *(layerConfig.mutable_inputs(0)); - input.set_input_layer_name("layer_0"); - input.set_input_parameter_name("para_0"); - LayerPtr testLayer = Layer::create(layerConfig); - layerMap[testLayer->getName()] = testLayer; - - testLayer->init(layerMap, parameterMap); - testLayer->setNeedGradient(true); - - return testLayer; -} - -void checkRecurrentLayer(LayerPtr testLayer) { - const VectorPtr& weightGrad = - (testLayer->getParameters()[0])->getBuf(PARAMETER_GRADIENT); - const MatrixPtr& inputGrad = testLayer->getPrev(0)->getOutputGrad(); - CpuVector seqPara(weightGrad->getSize()); - CpuVector batPara(weightGrad->getSize()); - CpuMatrix seqInputGrad(inputGrad->getHeight(), inputGrad->getWidth()); - CpuMatrix batInputGrad(inputGrad->getHeight(), inputGrad->getWidth()); - - CpuMatrix outputGrad(inputGrad->getHeight(), inputGrad->getWidth()); - outputGrad.randomizeUniform(); - - /* use sequence calculate */ - FLAGS_rnn_use_batch = false; - weightGrad->zero(); - inputGrad->zero(); - testLayer->forward(PASS_GC); - testLayer->getOutputGrad()->copyFrom(outputGrad); - testLayer->backward(); - seqPara.copyFrom(*weightGrad); - seqInputGrad.copyFrom(*inputGrad); - - /* use batch calculate */ - FLAGS_rnn_use_batch = true; - weightGrad->zero(); - inputGrad->zero(); - testLayer->forward(PASS_GC); - testLayer->getOutputGrad()->copyFrom(outputGrad); - testLayer->backward(); - batPara.copyFrom(*weightGrad); - batInputGrad.copyFrom(*inputGrad); - - /* check */ - checkError(seqInputGrad, batInputGrad); - checkError(seqPara, batPara); -} - -TEST(Layer, RecurrentLayer) { - LayerConfig layerConfig; - layerConfig.set_name("rnn"); - layerConfig.set_type("recurrent"); - layerConfig.set_active_type("tanh"); - for (auto layerSize : {1, 10, 64, 128, 256, 512}) { - for (auto batchSize : {1, 5, 20, 100, 128}) { - for (auto useGpu : {false, true}) { - for (auto reversed : {false, true}) { - LOG(INFO) << " layerSize=" << layerSize << " batchSize=" << batchSize - << " useGpu=" << useGpu << " reversed=" << reversed; - layerConfig.set_size(layerSize); - layerConfig.set_reversed(reversed); - LayerPtr testLayer = - initRecurrentLayer(layerConfig, batchSize, layerSize, useGpu); - checkRecurrentLayer(testLayer); - } - } - } - } -} - -#define protected public -#include "paddle/legacy/gserver/layers/GatedRecurrentLayer.h" -#include "paddle/legacy/gserver/layers/LstmLayer.h" -#include "paddle/legacy/gserver/layers/RecurrentLayer.h" -template -class TestRecurrentLayer { - public: - LayerConfig config_; - bool useGpu_; - bool useBatch_; - LayerPtr testLayer_; - LayerPtr dataLayer_; - ParameterPtr para_; - ParameterPtr bias_; - LayerMap layerMap_; - ParameterMap parameterMap_; - TestRecurrentLayer(const LayerConfig& config, - bool useGpu, - bool useBatch = false) - : config_(config), useGpu_(useGpu), useBatch_(useBatch) {} - void init(size_t batchSize) { - FLAGS_use_gpu = useGpu_; - testLayer_ = Layer::create(config_); - if (typeid(T) == typeid(GatedRecurrentLayer)) { - dataLayer_ = creatDataLayer(config_.mutable_inputs(0)->input_layer_name(), - batchSize, - config_.size() * 3, - useGpu_); - para_ = creatParameter(config_.mutable_inputs(0)->input_parameter_name(), - 0, - config_.size() * config_.size() * 3, - useGpu_); - bias_ = creatParameterBias( - config_.bias_parameter_name(), 1, config_.size() * 3, useGpu_); - } else if (typeid(T) == typeid(LstmLayer)) { - dataLayer_ = creatDataLayer(config_.mutable_inputs(0)->input_layer_name(), - batchSize, - config_.size() * 4, - useGpu_); - para_ = creatParameter(config_.mutable_inputs(0)->input_parameter_name(), - 0, - config_.size() * config_.size() * 4, - useGpu_); - bias_ = creatParameterBias( - config_.bias_parameter_name(), 1, config_.size() * 7, useGpu_); - } - layerMap_[dataLayer_->getName()] = dataLayer_; - parameterMap_[para_->getName()] = para_; - parameterMap_[bias_->getName()] = bias_; - - layerMap_[testLayer_->getName()] = testLayer_; - testLayer_->init(layerMap_, parameterMap_); - testLayer_->setNeedGradient(true); - (dynamic_cast(testLayer_.get()))->useBatch_ = useBatch_; - } - void forward() { - FLAGS_use_gpu = useGpu_; - testLayer_->forward(PASS_GC); - } - void backward() { - FLAGS_use_gpu = useGpu_; - testLayer_->backward(nullptr); - } -}; - -template -void checkRecurrentLayer(LayerConfig layerConfig, - size_t batchSize, - bool cpuBatch, - bool gpuBatch) { - TestRecurrentLayer testCpu(layerConfig, false, cpuBatch); - TestRecurrentLayer testGpu(layerConfig, true, gpuBatch); - testCpu.init(batchSize); - testGpu.init(batchSize); - auto checkError = []( - MatrixPtr cpu, MatrixPtr gpu, int numSequences, const char* str) { - CpuMatrix check(gpu->getHeight(), gpu->getWidth()); - check.copyFrom(*gpu); - int height = cpu->getHeight(); - int width = cpu->getWidth(); - const real* data1 = cpu->getData(); - const real* data2 = check.getData(); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (fabs(data1[i * width + j] - data2[i * width + j]) / numSequences > - 1e-4) { - count++; - } - } - } - EXPECT_EQ(count, 0) << "[" << str << "]" - << "There are " << count << " different element."; - }; - T* cpuLayer = dynamic_cast(testCpu.testLayer_.get()); - T* gpuLayer = dynamic_cast(testGpu.testLayer_.get()); - - Argument& cpuInput = testCpu.dataLayer_->getOutput(); - Argument& gpuInput = testGpu.dataLayer_->getOutput(); - gpuInput.resizeAndCopyFrom(cpuInput, true); - - const VectorPtr& cpuVec = testCpu.para_->getBuf(PARAMETER_VALUE); - const VectorPtr& gpuVec = testGpu.para_->getBuf(PARAMETER_VALUE); - gpuVec->copyFrom(*cpuVec); - - const VectorPtr& cpuBiasVec = testCpu.bias_->getBuf(PARAMETER_VALUE); - const VectorPtr& gpuBiasVec = testGpu.bias_->getBuf(PARAMETER_VALUE); - gpuBiasVec->copyFrom(*cpuBiasVec); - - /* check forward */ - testCpu.forward(); - testGpu.forward(); - - checkError( - cpuLayer->getOutputValue(), gpuLayer->getOutputValue(), 1, "outputValue"); - - /* check backward */ - cpuLayer->getOutputGrad()->randomizeUniform(); - gpuLayer->getOutputGrad()->copyFrom(*cpuLayer->getOutputGrad()); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - - testCpu.backward(); - testGpu.backward(); - - // check input grad - checkError(cpuInput.grad, gpuInput.grad, 1, "inputGrad"); - // check weight grad - int numSequences = cpuInput.getNumSequences(); - checkError(cpuLayer->weight_->getWGrad(), - gpuLayer->weight_->getWGrad(), - numSequences, - "weightGrad"); - // check bias grad - checkError(cpuLayer->bias_->getWGrad(), - gpuLayer->bias_->getWGrad(), - numSequences, - "biasGrad"); -} - -TEST(Layer, GatedRecurrentLayer) { - LayerConfig layerConfig; - layerConfig.set_type("gated_recurrent"); - layerConfig.set_active_type("sigmoid"); - layerConfig.set_active_gate_type("sigmoid"); - - layerConfig.add_inputs(); - LayerInputConfig& input = *(layerConfig.mutable_inputs(0)); - input.set_input_layer_name("layer_0"); - input.set_input_parameter_name("para_0"); - layerConfig.set_bias_parameter_name("bias"); - - for (auto frameSize : {32, 64, 128, 256, 512}) { - for (auto batchSize : {1, 5, 100, 500}) { - for (auto reversed : {false, true}) { - for (auto cpuBatch : {false, true}) { - for (auto gpuBatch : {false, true}) { - LOG(INFO) << " batchSize=" << batchSize - << " frameSize=" << frameSize << " reversed=" << reversed - << " cpuBatch=" << cpuBatch << " gpuBatch=" << gpuBatch; - layerConfig.set_size(frameSize); - layerConfig.set_reversed(reversed); - checkRecurrentLayer( - layerConfig, batchSize, cpuBatch, gpuBatch); - } - } - } - } - } -} - -TEST(Layer, LstmLayer) { - LayerConfig layerConfig; - layerConfig.set_type("lstmemory"); - layerConfig.set_active_type("relu"); - layerConfig.set_active_state_type("tanh"); - layerConfig.set_active_gate_type("sigmoid"); - - layerConfig.add_inputs(); - LayerInputConfig& input = *(layerConfig.mutable_inputs(0)); - input.set_input_layer_name("layer_0"); - input.set_input_parameter_name("para_0"); - layerConfig.set_bias_parameter_name("bias"); - - for (auto frameSize : {32, 64, 128, 256, 512}) { - for (auto batchSize : {1, 5, 100, 500}) { - for (auto reversed : {false, true}) { - for (auto cpuBatch : {false, true}) { - for (auto gpuBatch : {false, true}) { - LOG(INFO) << " batchSize=" << batchSize - << " frameSize=" << frameSize << " reversed=" << reversed - << " cpuBatch=" << cpuBatch << " gpuBatch=" << gpuBatch; - layerConfig.set_size(frameSize); - layerConfig.set_reversed(reversed); - checkRecurrentLayer( - layerConfig, batchSize, cpuBatch, gpuBatch); - } - } - } - } - } -} - -#ifdef PADDLE_WITH_MKLML - -#include "paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.h" - -LayerPtr initMKLPackedLayer(LayerConfig layerConfig, - bool reversed, - int layerSize, - LayerPtr dataLayer, - ParameterPtr para, - ParameterPtr bias = nullptr) { - LayerMap layerMap; - ParameterMap parameterMap; - layerMap[dataLayer->getName()] = dataLayer; - parameterMap[para->getName()] = para; - if (bias) { - parameterMap[bias->getName()] = bias; - layerConfig.set_bias_parameter_name("bias_0"); - } - - layerConfig.set_size(layerSize); - layerConfig.set_reversed(reversed); - layerConfig.add_inputs(); - LayerInputConfig& input = *(layerConfig.mutable_inputs(0)); - input.set_input_layer_name("layer_0"); - input.set_input_parameter_name("para_0"); - - LayerPtr testLayer = Layer::create(layerConfig); - layerMap[testLayer->getName()] = testLayer; - - testLayer->init(layerMap, parameterMap); - testLayer->setNeedGradient(true); - - return testLayer; -} - -void checkMKLPackedLayer(LayerConfig layerConfig1, - LayerConfig layerConfig2, - bool reversed, - int layerSize, - int batchSize, - bool useBatch1, - bool useBatch2) { - LayerPtr dataLayer; - ParameterPtr para, bias; - - if (layerConfig1.type() == "recurrent") { - dataLayer = creatDataLayer("layer_0", batchSize, layerSize, false); - para = creatParameter("para_0", 0, layerSize * layerSize, false); - bias = nullptr; - } else if (layerConfig1.type() == "gated_recurrent") { - dataLayer = creatDataLayer("layer_0", batchSize, layerSize * 3, false); - para = creatParameter("para_0", 0, layerSize * layerSize * 3, false); - bias = creatParameterBias("bias_0", 1, layerSize * 3, false); - } - - LayerPtr testLayer1 = initMKLPackedLayer( - layerConfig1, reversed, layerSize, dataLayer, para, bias); - LayerPtr testLayer2 = initMKLPackedLayer( - layerConfig2, reversed, layerSize, dataLayer, para, bias); - - const VectorPtr& weightGrad = - (testLayer1->getParameters()[0])->getBuf(PARAMETER_GRADIENT); - const MatrixPtr& inputGrad = testLayer1->getPrev(0)->getOutputGrad(); - CpuVector wgt_grad1(weightGrad->getSize()); - CpuVector wgt_grad2(weightGrad->getSize()); - CpuMatrix input_grad1(inputGrad->getHeight(), inputGrad->getWidth()); - CpuMatrix input_grad2(inputGrad->getHeight(), inputGrad->getWidth()); - - for (int i = 0; i < 2; i++) { - FLAGS_rnn_use_batch = useBatch1; - - testLayer1->forward(PASS_GC); - - FLAGS_rnn_use_batch = useBatch2; - testLayer2->forward(PASS_GC); - - testLayer1->getOutputGrad()->randomizeUniform(); - testLayer2->getOutputGrad()->copyFrom(*testLayer1->getOutputGrad()); - - weightGrad->zero(); - inputGrad->zero(); - FLAGS_rnn_use_batch = useBatch1; - testLayer1->backward(nullptr); - - wgt_grad1.copyFrom(*weightGrad); - input_grad1.copyFrom(*inputGrad); - - weightGrad->zero(); - inputGrad->zero(); - FLAGS_rnn_use_batch = useBatch2; - testLayer2->backward(nullptr); - - wgt_grad2.copyFrom(*weightGrad); - input_grad2.copyFrom(*inputGrad); - - checkError(*testLayer1->getOutputValue(), *testLayer2->getOutputValue()); - checkError(wgt_grad1, wgt_grad2); - checkError(input_grad1, input_grad2); - } -} - -TEST(MKLPackedLayer, RecurrentLayer) { - LayerConfig layerConfig1; - LayerConfig layerConfig2; - - layerConfig1.set_name("paddle-rnn"); - layerConfig1.set_type("recurrent"); - layerConfig1.set_active_type("relu"); - - layerConfig2.set_name("mkl-packed-rnn"); - layerConfig2.set_type("mkl_packed_recurrent"); - layerConfig2.set_active_type("relu"); - - FLAGS_use_gpu = false; - - for (auto layerSize : {32, 64, 128, 256, 512}) { - for (auto batchSize : {1, 5, 100, 500}) { - for (auto reversed : {true, false}) { - for (auto paddle_use_batch : {true, false}) { - for (auto MKLPacked_use_batch : {true, false}) { - LOG(INFO) << " layerSize=" << layerSize - << " batchSize=" << batchSize << " reversed=" << reversed - << " paddle_use_batch=" << paddle_use_batch - << " MKLPacked_use_batch=" << MKLPacked_use_batch; - - checkMKLPackedLayer(layerConfig1, - layerConfig2, - reversed, - layerSize, - batchSize, - paddle_use_batch, - MKLPacked_use_batch); - } - } - } - } - } -} -#endif - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - if (!version::isWithGpu()) { - testing::GTEST_FLAG(filter) = "-Layer.*"; - } - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_SelectiveFCLayer.cpp b/paddle/legacy/gserver/tests/test_SelectiveFCLayer.cpp deleted file mode 100644 index 1975d9196d61dbb80667b2ba86c09d56bc568064..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_SelectiveFCLayer.cpp +++ /dev/null @@ -1,471 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/gserver/layers/FullyConnectedLayer.h" -#include "paddle/legacy/gserver/layers/Layer.h" -#include "paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.h" -#include "paddle/legacy/math/CpuSparseMatrix.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_bool(use_gpu); -DECLARE_int32(num_passes); -DECLARE_string(config); -DECLARE_string(init_model_path); -DECLARE_string(config_args); - -size_t fcLayerWidth = 1024; - -struct ComData { - vector outArgs; - vector parameters; -}; - -int randint(int* data, size_t int_max, size_t size) { - srand((size_t)(time(NULL))); - if (int_max < size) { - return -1; - } - size_t count = 0; - std::map tmp; - int this_int = 0; - - while (count < size) { - this_int = std::rand() % int_max; // NOLINT - if (tmp.find(this_int) == tmp.end()) { - tmp[this_int] = 0; - count += 1; - } - } - - if (tmp.size() != size) { - return -1; - } - count = 0; - for (auto itr = tmp.begin(); itr != tmp.end(); ++itr) { - data[count] = itr->first; - count += 1; - } - return 0; -} - -void calcOutput(ComData& comData, - const string configFile, - const string configArgs, - bool useGpu) { - FLAGS_config = configFile; - FLAGS_config_args = configArgs; - FLAGS_use_gpu = useGpu; - FLAGS_init_model_path = "legacy/gserver/tests/SelectiveFcTest/model"; - *ThreadLocalRand::getSeed() = 0; - srand(0); - - Trainer trainer; - trainer.init(TrainerConfigHelper::createFromFlags(), false); - - comData.parameters = trainer.getGradientMachine()->getParameters(); - - auto dataProvider = trainer.getDataProvider(); - int32_t batchSize = trainer.getConfig().opt_config().batch_size(); - DataBatch dataBatch; - dataProvider->setSkipShuffle(); - dataProvider->reset(); - dataProvider->getNextBatch(batchSize, &dataBatch); - CHECK(dataBatch.getSize()) << "No data from data provider"; - - vector& inArgs = dataBatch.getStreams(); - trainer.getGradientMachine()->start(trainer.getConfig(), nullptr); - trainer.getGradientMachine()->forwardBackward( - inArgs, &comData.outArgs, PASS_TRAIN); - trainer.getGradientMachine()->finish(); -} - -void checkMatrix(real* A, real* B, size_t matSize) { -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - int diffNum = 0; - for (size_t i = 0; i < matSize; ++i) { - if (std::isinf(A[i]) || std::isnan(A[i]) || std::isinf(B[i]) || - std::isnan(B[i])) { - } else if (fabs(A[i] - B[i]) > err) { - diffNum++; - } - } - EXPECT_EQ(0, diffNum); -} - -void checkTranspose(real* matrix, - real* transpose, - size_t width, - size_t matSize) { -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - size_t height = matSize / width; - int diffNum = 0; - size_t rowId = 0; - size_t colId = 0; - for (size_t i = 0; i < matSize; ++i) { - if (i % width == 0 && i) { - rowId++; - } - colId = i % width; - if (fabs(matrix[i] - transpose[colId * height + rowId]) > err) { - diffNum++; - LOG(INFO) << i << " diff : " << matrix[i] << "\t" - << transpose[colId * height + rowId]; - } - } - EXPECT_EQ(0, diffNum); -} - -void compareOutput(ComData& fcData, ComData& selFcData) { - vector outArgsFc = fcData.outArgs; - vector outArgsSelfc = selFcData.outArgs; - - // check cost - LOG(INFO) << "Check cost"; - CpuMatrix fcCost(outArgsFc[0].value->getHeight(), - outArgsFc[0].value->getWidth()); - CpuMatrix selfcCost(outArgsSelfc[0].value->getHeight(), - outArgsSelfc[0].value->getWidth()); - fcCost.copyFrom(*outArgsFc[0].value); - selfcCost.copyFrom(*outArgsSelfc[0].value); - checkMatrix(fcCost.getData(), selfcCost.getData(), fcCost.getElementCnt()); - - // check selective fc output and fc output - LOG(INFO) << "Compare output of SelectiveFullyConectedLayer " - << "with FullyConectedLayer"; - CpuMatrix fcOut(outArgsFc[1].value->getHeight(), - outArgsFc[1].value->getWidth()); - CpuMatrix selfcOut(outArgsSelfc[1].value->getHeight(), - outArgsSelfc[1].value->getWidth()); - - fcOut.copyFrom(*outArgsFc[1].value); - selfcOut.copyFrom(*outArgsSelfc[1].value); - checkMatrix(fcOut.getData(), selfcOut.getData(), fcOut.getElementCnt()); - - // check gradient math - vector& fcParam = fcData.parameters; - vector& selfcParam = selFcData.parameters; - for (size_t i = 0; i < fcParam.size(); ++i) { - ParameterPtr p1, p2; - p1 = fcParam[i]; - p2 = selfcParam[i]; - - string paramName = p1->getName(); - LOG(INFO) << "check parameter : " << paramName; - - // check parameter value - CpuVector paraValue1(p1->getSize()); - CpuVector paraValue2(p2->getSize()); - paraValue1.copyFrom(*p1->getBuf(PARAMETER_VALUE)); - paraValue2.copyFrom(*p2->getBuf(PARAMETER_VALUE)); - - // check gradient - CpuVector paraGrad1(*p1->getBuf(PARAMETER_GRADIENT)); - CpuVector paraGrad2(*p2->getBuf(PARAMETER_GRADIENT)); - if (paramName == "rand_fc_param.bias") { - checkMatrix( - paraValue1.getData(), paraValue2.getData(), paraValue1.getSize()); - checkMatrix( - paraGrad1.getData(), paraGrad2.getData(), paraGrad1.getSize()); - } else { - checkTranspose(paraValue1.getData(), - paraValue2.getData(), - fcLayerWidth, - paraValue1.getSize()); - checkTranspose(paraGrad1.getData(), - paraGrad2.getData(), - fcLayerWidth, - paraGrad1.getSize()); - } - } -} - -void compareSparseMulOutput( - real* fcOutput, - real* selOutput, - size_t nnz, - const std::shared_ptr>>& selCols) { -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - size_t nnzCount = - std::accumulate(selCols->begin(), - selCols->end(), - 0UL, - [](size_t a, const std::pair& arr) { - return a + arr.second; - }); - EXPECT_EQ(nnz, nnzCount); - - size_t sampleNum = selCols->size(); - int diffNum = 0; - size_t count = 0; - for (size_t i = 0; i < sampleNum; ++i) { - for (size_t j = 0; j < (*selCols)[i].second; ++j) { - size_t selIdx = (*selCols)[i].first[j]; - if (fabs(fcOutput[i * fcLayerWidth + selIdx] - selOutput[count]) > err) { - diffNum++; - LOG(INFO) << count << " diff : " << fcOutput[i * fcLayerWidth + selIdx] - << "\t" << selOutput[count]; - } - count++; - } - } - EXPECT_EQ(0, diffNum); -} - -LayerPtr creatDataLayer(string name, - size_t batchSize, - size_t layerSize, - std::vector& values, - bool useGpu) { - LayerConfig dataConfig; - dataConfig.set_name(name); - dataConfig.set_type("data"); - dataConfig.set_size(layerSize); - LayerPtr layer = LayerPtr(new DataLayer(dataConfig)); - - Argument data; - data.value = Matrix::create(batchSize, layerSize, false, useGpu); - data.value->copyFrom(values.data(), batchSize * layerSize); - - DataLayerPtr dataLayer = std::dynamic_pointer_cast(layer); - dataLayer->setData(data); - dataLayer->forward(PASS_TEST); - return layer; -} - -ParameterPtr creatParameter( - string name, int pid, size_t paraSize, string paramFile, bool useGpu) { - ParameterConfig paraConfig; - paraConfig.set_name(name); - paraConfig.set_size(paraSize); - - ParameterPtr parameter = - std::make_shared(paraConfig, useGpu, /*initialize */ false); - parameter->enableType(PARAMETER_VALUE); - parameter->randomize(); - parameter->setID(pid); - parameter->load(paramFile); - return parameter; -} - -LayerPtr initFcLayer(LayerPtr dataLayer, - LayerConfig layerConfig, - int dataLayerSize, - int fcLayerSize, - string paraName, - string paraFile, - bool useGpu) { - LayerMap layerMap; - ParameterMap parameterMap; - - layerMap[dataLayer->getName()] = dataLayer; - ParameterPtr para = creatParameter( - paraName, 0, dataLayerSize * fcLayerSize, paraFile, useGpu); - parameterMap[para->getName()] = para; - - layerConfig.add_inputs(); - LayerInputConfig& input = *(layerConfig.mutable_inputs(0)); - input.set_input_layer_name(dataLayer->getName()); - input.set_input_parameter_name(paraName); - - LayerPtr testLayer = Layer::create(layerConfig); - layerMap[testLayer->getName()] = testLayer; - - testLayer->setNeedGradient(false); - testLayer->init(layerMap, parameterMap); - return testLayer; -} - -#ifndef PADDLE_TYPE_DOUBLE -// The parameter file used in fc.conf and selective_fc.conf is float -TEST(Layer, SelectiveFcLayer_train_dense_mul) { - const string& fcConfig = "legacy/gserver/tests/SelectiveFcTest/conf/fc.conf"; - const string& fcConfigArgs = - "filelist=legacy/gserver/tests/SelectiveFcTest/dense_mul_list"; - const string& selFcConfig = - "legacy/gserver/tests/SelectiveFcTest/conf/selective_fc.conf"; - const string& selConfigArgs = - "filelist=legacy/gserver/tests/SelectiveFcTest/dense_mul_list"; - - for (auto useGpu : {false, true}) { -#ifndef PADDLE_WITH_CUDA - if (useGpu) { - break; - } -#endif - LOG(INFO) << "FullyConnectedLayer forwardBackward()"; - ComData fcData; - calcOutput(fcData, fcConfig, fcConfigArgs, useGpu); - - LOG(INFO) << "SelectiveFullyConnectedLayer forwardBackward()"; - ComData selFcData; - calcOutput(selFcData, selFcConfig, selConfigArgs, useGpu); - compareOutput(fcData, selFcData); - } -} -#endif // PADDLE_TYPE_DOUBLE - -void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config, - bool useGpu) { - FLAGS_use_gpu = useGpu; - size_t batchSize = 100; - size_t dataLayerSize = 512; - std::vector values(batchSize * dataLayerSize); - for (size_t j = 0; j < batchSize * dataLayerSize; ++j) { - values[j] = std::rand() / real(RAND_MAX); - } - LayerPtr dataLayer = - creatDataLayer("data", batchSize, dataLayerSize, values, useGpu); - - const string& selfcParaFile = - "legacy/gserver/tests/SelectiveFcTest/model/rand_fc_param.w.transpose"; - const string& selfcParaName = "rand_fc_param.w.transpose"; - - std::shared_ptr selfcLayer = - std::dynamic_pointer_cast( - initFcLayer(dataLayer, - config, - dataLayerSize, - fcLayerWidth, - selfcParaName, - selfcParaFile, - useGpu)); - - // create selected columns - std::shared_ptr>> selCols( - new std::vector>(batchSize)); - size_t maxNNZ = 30; - srand((size_t)(time(NULL))); - int total = 0; - while (total == 0) { - for (size_t i = 0; i < batchSize; ++i) { - size_t num = std::rand() % maxNNZ; - int* data = new int[num]; - randint(data, fcLayerWidth, num); - (*selCols)[i] = std::make_pair(data, num); - total += num; - } - } - selfcLayer->fillSelectiveData(selCols); - selfcLayer->forward(PASS_TEST); - - MatrixPtr outMatSelfc = selfcLayer->getOutputValue(); - CpuSparseMatrixPtr cpuOutMatSelfc( - new CpuSparseMatrix(outMatSelfc->getHeight(), - outMatSelfc->getWidth(), - outMatSelfc->getElementCnt())); - cpuOutMatSelfc->copyFrom(*outMatSelfc, HPPL_STREAM_DEFAULT); -#ifdef PADDLE_WITH_CUDA - if (useGpu) { - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - } -#endif - real* outValueSelfc = cpuOutMatSelfc->getValue(); - size_t nnz = cpuOutMatSelfc->getElementCnt(); - - const string& fcParaFile = - "legacy/gserver/tests/SelectiveFcTest/model/rand_fc_param.w"; - const string& fcParaName = "rand_fc_param.w"; - LayerConfig fcLayerConfig; - fcLayerConfig.set_name("fc_layer"); - fcLayerConfig.set_type("fc"); - fcLayerConfig.set_active_type("linear"); - fcLayerConfig.set_size(fcLayerWidth); - - LayerPtr fcLayer = initFcLayer(dataLayer, - fcLayerConfig, - dataLayerSize, - fcLayerWidth, - fcParaName, - fcParaFile, - useGpu); - fcLayer->forward(PASS_TEST); - - MatrixPtr outMatFc = fcLayer->getOutputValue(); - MatrixPtr cpuOutMatFc( - new CpuMatrix(outMatFc->getHeight(), outMatFc->getWidth())); - cpuOutMatFc->copyFrom(*outMatFc, HPPL_STREAM_DEFAULT); -#ifdef PADDLE_WITH_CUDA - if (useGpu) { - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - } -#endif - real* outValueFc = cpuOutMatFc->getData(); - - compareSparseMulOutput(outValueFc, outValueSelfc, nnz, selCols); - for (size_t i = 0; i < batchSize; ++i) { - delete[](*selCols)[i].first; - } -} - -#ifndef PADDLE_TYPE_DOUBLE -// The parameter file used in testSelectiveFcLayerTrainSparseMul is float -TEST(Layer, SelectiveFcLayer_train_sparse_mul) { - LayerConfig selLayerConfig; - selLayerConfig.set_name("sel_fc"); - selLayerConfig.set_type("selective_fc"); - selLayerConfig.set_active_type("linear"); - selLayerConfig.set_has_selected_colums(false); - selLayerConfig.set_selective_fc_pass_generation(true); - selLayerConfig.set_size(fcLayerWidth); - - testSelectiveFcLayerTrainSparseMul(selLayerConfig, false); -#ifdef PADDLE_WITH_CUDA - testSelectiveFcLayerTrainSparseMul(selLayerConfig, true); -#endif -} -#endif // PADDLE_TYPE_DOUBLE - -// TODO(dangqingqing) test multi threads after support in matrix -// TEST(Layer, SelectiveFcLayer_train_sparse_mul_parallel) { -// LayerConfig selLayerConfig; -// selLayerConfig.set_name("sel_fc"); -// selLayerConfig.set_type("selective_fc"); -// selLayerConfig.set_active_type("linear"); -// selLayerConfig.set_has_selected_colums(false); -// selLayerConfig.set_selective_fc_pass_generation(true); -// selLayerConfig.set_selective_fc_parallel_plain_mul_thread_num(10); -// selLayerConfig.set_selective_fc_full_mul_ratio(1000); -// selLayerConfig.set_size(fcLayerWidth); -// SelectiveFcLayer_test(selLayerConfig, false); -// } - -int main(int argc, char** argv) { - paddle::initMain(argc, argv); - testing::InitGoogleTest(&argc, argv); - initPython(argc, argv); - int ret = RUN_ALL_TESTS(); - return ret; -} diff --git a/paddle/legacy/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/legacy/gserver/tests/test_SeqSliceLayerGrad.cpp deleted file mode 100644 index 05acd714219fa5964b5b3595543682825ea67d84..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_SeqSliceLayerGrad.cpp +++ /dev/null @@ -1,224 +0,0 @@ -/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" - -#include "LayerGradUtil.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_int32(gpu_id); -DECLARE_bool(thread_local_rand_use_global_seed); - -const int MAX_SEQ_NUM = 17; -const int MAX_SEQ_LEN = 23; -const int MAX_BEAM_SIZE = 13; - -const size_t SEED = (size_t)(time(NULL)); - -vector randSampling(real range, int n) { - CHECK_GE(range, n); - vector num(range); - iota(begin(num), end(num), 0.); - if (range == n) return num; - - random_shuffle(begin(num), end(num)); - num.resize(n); - sort(begin(num), end(num)); - return num; -} - -void genSeqInfo(vector& seqStartPos, vector& subSeqStartPos) { - seqStartPos.resize(1, 0); - subSeqStartPos.resize(1, 0); - - srand(SEED); - int seqNum = 1 + (rand() % MAX_SEQ_NUM); - for (int i = 0; i < seqNum; ++i) { - int subSeqNum = 1 + (rand() % MAX_SEQ_NUM); - for (int j = 0; j < subSeqNum; ++j) - subSeqStartPos.push_back(subSeqStartPos.back() + - (1 + (rand() % MAX_SEQ_LEN))); - seqStartPos.push_back(subSeqStartPos.back()); - } -} - -/* - generate start indices according to sequence start positions. - */ -void genStarts(vector& seqStartPos, - vector>& starts, - size_t beamSize) { - starts.clear(); - starts.resize(seqStartPos.size() - 1, vector(beamSize, -1.)); - - for (size_t i = 0; i < seqStartPos.size() - 1; ++i) { - int seqLen = seqStartPos[i + 1] - seqStartPos[i]; - vector randStarts = - randSampling(seqLen, min(seqLen, static_cast(beamSize))); - copy(begin(randStarts), end(randStarts), begin(starts[i])); - } -} - -/* - generate end indices according to sequence start positions and start indices. - */ -void genEnds(vector& seqStartPos, - vector>& starts, - vector>& ends, - size_t beamSize) { - CHECK_EQ(seqStartPos.size() - 1, starts.size()); - ends.clear(); - ends.resize(seqStartPos.size() - 1, vector(beamSize, -1.)); - - for (size_t i = 0; i < starts.size(); ++i) { - for (size_t j = 0; j < starts[i].size(); ++j) { - int seqLen = seqStartPos[i + 1] - seqStartPos[i]; - CHECK_GE(seqLen - 1, starts[i][j]); - if (starts[i][j] == -1.) break; - if (starts[i][j] == (seqLen - 1)) { - ends[i][j] = starts[i][j]; - } else { - ends[i][j] = starts[i][j] + randSampling(seqLen - starts[i][j], 1)[0]; - } - } - } -} - -void genTestData(vector& seqStartPos, - vector& subSeqStartPos, - vector>& starts, - vector>& ends, - bool hasSubseq) { - size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE); - genSeqInfo(seqStartPos, subSeqStartPos); - - genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize); - genEnds(hasSubseq ? subSeqStartPos : seqStartPos, starts, ends, beamSize); -} - -template -void flatten2dVector(vector>& inVec, vector& outVec) { - size_t totalSize{0}; - for (auto const& items : inVec) totalSize += items.size(); - outVec.reserve(totalSize); - - for (auto& items : inVec) - move(items.begin(), items.end(), back_inserter(outVec)); -} - -void testSeqSliceLayer(bool hasSubseq, - bool useGpu, - vector& seqStartPos, - vector& subSeqStartPos, - vector>& starts, - vector>& ends) { - // layer size is not crutial for this layer, - // so here use a small layer size in the unittest. - const size_t layerSize{4}; - TestConfig config; - config.layerConfig.set_type("seq_slice"); - config.layerConfig.set_size(layerSize); - - // add the first input - MatrixPtr seqInputPtr = - Matrix::create(hasSubseq ? subSeqStartPos.back() : seqStartPos.back(), - layerSize, - false, - false); - seqInputPtr->randomizeUniform(); - - if (hasSubseq) { - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, - "seq_input", - seqInputPtr, - seqStartPos, - subSeqStartPos}); - } else { - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, "seq_input", seqInputPtr, seqStartPos}); - } - config.layerConfig.add_inputs(); - - // add start indices - if (starts.size()) { - vector startsToVec; - flatten2dVector(starts, startsToVec); - - MatrixPtr startMatrixPtr = - Matrix::create(starts.size(), starts[0].size(), false, false); - startMatrixPtr->copyFrom(startsToVec.data(), startsToVec.size()); - - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr}); - config.layerConfig.add_inputs(); - config.layerConfig.set_select_first(true); - } - - // add end indices - if (ends.size()) { - vector endsToVec; - flatten2dVector(ends, endsToVec); - - MatrixPtr endMatrixPtr = - Matrix::create(ends.size(), ends[0].size(), false, false); - endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size()); - - config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr}); - config.layerConfig.add_inputs(); - config.layerConfig.set_select_first(false); - } - - testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false); -} - -TEST(Layer, SeqSliceLayer) { - vector seqStartPos; - vector subSeqStartPos; - vector> starts; - vector> ends; - - std::vector mode = {false}; -#ifdef PADDLE_WITH_CUDA - mode.push_back(true); -#endif - genSeqInfo(seqStartPos, subSeqStartPos); - for (bool hasSubseq : {true, false}) { - LOG(INFO) << "hasSubSeq : " << hasSubseq; - genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq); - for (bool useGpu : mode) { - vector> tmp; - testSeqSliceLayer( - hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends); - testSeqSliceLayer( - hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, tmp); - testSeqSliceLayer( - hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, ends); - } - } -} - -int main(int argc, char** argv) { - initMain(argc, argv); - hl_start(); - hl_init(FLAGS_gpu_id); - FLAGS_thread_local_rand_use_global_seed = true; - srand(1); - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/gserver/tests/test_Upsample.cpp b/paddle/legacy/gserver/tests/test_Upsample.cpp deleted file mode 100644 index 940d46baf73f2d600cff6edc37c29a3a36bf5d90..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_Upsample.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "LayerGradUtil.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/testing/TestUtil.h" - -void setPoolConfig(paddle::TestConfig* config, - paddle::PoolConfig* pool, - const string& poolType) { - (*config).biasSize = 0; - (*config).layerConfig.set_type("pool"); - (*config).layerConfig.set_num_filters(1); - - int kw = 2, kh = 2; - int pw = 0, ph = 0; - int sw = 2, sh = 2; - pool->set_pool_type(poolType); - pool->set_channels(2); - pool->set_size_x(kw); - pool->set_size_y(kh); - pool->set_start(0); - pool->set_padding(pw); - pool->set_padding_y(ph); - pool->set_stride(sw); - pool->set_stride_y(sh); - - int ow = - paddle::outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); - int oh = - paddle::outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); - pool->set_output_x(ow); - pool->set_output_y(oh); -} - -paddle::LayerPtr doOneUpsampleTest(const paddle::MatrixPtr& inputMat, - const string& poolType, - bool use_gpu, - real* tempGradData) { - /* prepare maxPoolWithMaskLayer */ - paddle::TestConfig config; - config.inputDefs.push_back({paddle::INPUT_DATA, "layer_0", 128, 0}); - paddle::LayerInputConfig* input = config.layerConfig.add_inputs(); - paddle::PoolConfig* pool = input->mutable_pool_conf(); - - pool->set_img_size(8); - pool->set_img_size_y(8); - setPoolConfig(&config, pool, "max-pool-with-mask"); - config.layerConfig.set_size(pool->output_x() * pool->output_y() * - pool->channels()); - - config.layerConfig.set_name("MaxPoolWithMask"); - - std::vector dataLayers; - paddle::LayerMap layerMap; - vector datas; - - initDataLayer(config, - &dataLayers, - &datas, - &layerMap, - "MaxPoolWithMask", - 1, - false, - use_gpu); - - dataLayers[0]->getOutputValue()->copyFrom(*inputMat); - - FLAGS_use_gpu = use_gpu; - std::vector parameters; - paddle::LayerPtr maxPoolingWithMaskOutputLayer; - initTestLayer(config, &layerMap, ¶meters, &maxPoolingWithMaskOutputLayer); - maxPoolingWithMaskOutputLayer->forward(paddle::PASS_GC); - - /* prepare the upsample layer */ - paddle::LayerConfig upsampleLayerConfig; - upsampleLayerConfig.set_type("upsample"); - paddle::LayerInputConfig* input1 = upsampleLayerConfig.add_inputs(); - upsampleLayerConfig.add_inputs(); - - paddle::UpsampleConfig* upsampleConfig = input1->mutable_upsample_conf(); - upsampleConfig->set_scale(2); - paddle::ImageConfig* imageConfig = upsampleConfig->mutable_image_conf(); - imageConfig->set_channels(2); - imageConfig->set_img_size(4); - imageConfig->set_img_size_y(4); - upsampleLayerConfig.set_size(2 * 8 * 8); - upsampleLayerConfig.set_name("upsample"); - - for (size_t i = 0; i < 2; i++) { - paddle::LayerInputConfig& inputTemp = - *(upsampleLayerConfig.mutable_inputs(i)); - inputTemp.set_input_layer_name("MaxPoolWithMask"); - } - - paddle::LayerPtr upsampleLayer; - paddle::ParameterMap parameterMap; - upsampleLayer = paddle::Layer::create(upsampleLayerConfig); - layerMap[upsampleLayerConfig.name()] = upsampleLayer; - upsampleLayer->init(layerMap, parameterMap); - upsampleLayer->setNeedGradient(true); - upsampleLayer->forward(paddle::PASS_GC); - upsampleLayer->getOutputGrad()->copyFrom(tempGradData, 128); - upsampleLayer->backward(); - - return upsampleLayer; -} - -TEST(Layer, maxPoolingWithMaskOutputLayerFwd) { - bool useGpu = false; - paddle::MatrixPtr inputMat; - paddle::MatrixPtr inputGPUMat; - paddle::MatrixPtr tempGradMat; - - inputMat = paddle::Matrix::create(1, 128, false, useGpu); - inputMat->randomizeUniform(); - - tempGradMat = paddle::Matrix::create(1, 128, false, useGpu); - tempGradMat->randomizeUniform(); - real* tempGradData = tempGradMat->getData(); - - paddle::LayerPtr upsampleLayerCPU = - doOneUpsampleTest(inputMat, "max-pool-with-mask", useGpu, tempGradData); - -#ifdef PADDLE_WITH_CUDA - useGpu = true; - real* data = inputMat->getData(); - inputGPUMat = paddle::Matrix::create(1, 128, false, useGpu); - inputGPUMat->copyFrom(data, 128); - paddle::LayerPtr upsampleLayerGPU = doOneUpsampleTest( - inputGPUMat, "max-pool-with-mask", useGpu, tempGradData); - paddle::checkMatrixEqual(upsampleLayerCPU->getOutput("").value, - upsampleLayerGPU->getOutput("").value); - - paddle::checkMatrixEqual(upsampleLayerCPU->getPrev(0)->getOutputGrad(), - upsampleLayerGPU->getPrev(0)->getOutputGrad()); -#endif -} diff --git a/paddle/legacy/gserver/tests/test_WarpCTCLayer.cpp b/paddle/legacy/gserver/tests/test_WarpCTCLayer.cpp deleted file mode 100644 index b1697e1616484ec5389cdb5b59ba413a9615cf2e..0000000000000000000000000000000000000000 --- a/paddle/legacy/gserver/tests/test_WarpCTCLayer.cpp +++ /dev/null @@ -1,244 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "ModelConfig.pb.h" -#include "paddle/legacy/gserver/layers/CTCLayer.h" -#include "paddle/legacy/gserver/layers/DataLayer.h" -#include "paddle/legacy/gserver/layers/Layer.h" -#include "paddle/legacy/gserver/layers/WarpCTCLayer.h" - -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_bool(use_gpu); - -const real* getData(const Matrix& matrix) { - if (matrix.useGpu()) { - MatrixPtr cpuMatrix = Matrix::create( - matrix.getHeight(), matrix.getWidth(), matrix.isTransposed(), false); - cpuMatrix->copyFrom(matrix); - return cpuMatrix->getData(); - } else { - return matrix.getData(); - } -} - -int checkError(const Matrix& matrix1, const Matrix& matrix2) { - CHECK_EQ(matrix1.getHeight(), matrix2.getHeight()); - CHECK_EQ(matrix1.getWidth(), matrix2.getWidth()); - CHECK_EQ(matrix1.isTransposed(), matrix2.isTransposed()); -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - - int height = matrix1.getHeight(); - int width = matrix1.getWidth(); - - const real* data1 = getData(matrix1); - const real* data2 = getData(matrix2); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (fabs(data1[i * width + j] - data2[i * width + j]) > err) { - count++; - } - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; - return count; -} - -void initArgument(size_t batchSize, - int layerSize, - bool useGpu, - Argument& data) { - data.value = Matrix::create(batchSize, layerSize, false, useGpu); - data.grad = Matrix::create(batchSize, layerSize, false, useGpu); - data.value->randomizeUniform(); - data.value->add(-0.5); - data.grad->zeroMem(); - - generateSequenceStartPositions(batchSize, data.sequenceStartPositions); -} - -LayerPtr createDataLayer( - string name, size_t batchSize, int layerSize, bool useGpu, Argument& data) { - LayerConfig layerConfig; - layerConfig.set_name(name); - layerConfig.set_type("data"); - layerConfig.set_size(layerSize); - LayerPtr layer = LayerPtr(new DataLayer(layerConfig)); - - DataLayerPtr dataLayer = std::dynamic_pointer_cast(layer); - dataLayer->setData(data); - dataLayer->forward(PASS_GC); - - return layer; -} - -LayerPtr createLabelLayer(string name, - size_t batchSize, - size_t numClasses, - bool useGpu) { - LayerConfig layerConfig; - layerConfig.set_name(name); - layerConfig.set_type("data"); - layerConfig.set_size(1); - LayerPtr layer = LayerPtr(new DataLayer(layerConfig)); - - Argument data; - data.ids = IVector::create(batchSize, useGpu); - data.ids->rand(numClasses - 1); - - generateSequenceStartPositions(batchSize, data.sequenceStartPositions); - - DataLayerPtr labelLayer = std::dynamic_pointer_cast(layer); - labelLayer->setData(data); - labelLayer->forward(PASS_GC); - - return layer; -} - -LayerPtr createCTCLayer(string name, - size_t numClasses, - bool useGpu, - bool normByTimes, - LayerPtr dataLayer, - LayerPtr labelLayer) { - LayerMap layerMap; - layerMap[dataLayer->getName()] = dataLayer; - layerMap[labelLayer->getName()] = labelLayer; - - ParameterMap parameterMap; - - LayerConfig layerConfig; - layerConfig.set_name(name); - layerConfig.set_type("ctc"); - layerConfig.set_size(numClasses); - layerConfig.set_norm_by_times(normByTimes); - - layerConfig.add_inputs(); - LayerInputConfig& input0 = *(layerConfig.mutable_inputs(0)); - input0.set_input_layer_name(dataLayer->getName()); - - layerConfig.add_inputs(); - LayerInputConfig& input1 = *(layerConfig.mutable_inputs(1)); - input1.set_input_layer_name(labelLayer->getName()); - - LayerPtr layer = LayerPtr(new CTCLayer(layerConfig)); - layerMap[layer->getName()] = layer; - layer->init(layerMap, parameterMap); - - ActivationFunction* softmaxActivation = ActivationFunction::create("softmax"); - - softmaxActivation->forward(dataLayer->getOutput()).check(); - layer->forward(PASS_GC); - - layer->backward(); - softmaxActivation->backward(dataLayer->getOutput()).check(); - - return layer; -} - -LayerPtr createWarpCTCLayer(string name, - size_t numClasses, - bool useGpu, - bool normByTimes, - LayerPtr dataLayer, - LayerPtr labelLayer) { - LayerMap layerMap; - layerMap[dataLayer->getName()] = dataLayer; - layerMap[labelLayer->getName()] = labelLayer; - - ParameterMap parameterMap; - - LayerConfig layerConfig; - layerConfig.set_name(name); - layerConfig.set_type("warp_ctc"); - layerConfig.set_size(numClasses); - layerConfig.set_blank(numClasses - 1); - layerConfig.set_norm_by_times(normByTimes); - - layerConfig.add_inputs(); - LayerInputConfig& input0 = *(layerConfig.mutable_inputs(0)); - input0.set_input_layer_name(dataLayer->getName()); - - layerConfig.add_inputs(); - LayerInputConfig& input1 = *(layerConfig.mutable_inputs(1)); - input1.set_input_layer_name(labelLayer->getName()); - - LayerPtr layer = LayerPtr(new WarpCTCLayer(layerConfig)); - layerMap[layer->getName()] = layer; - layer->init(layerMap, parameterMap); - - layer->forward(PASS_GC); - layer->backward(); - - return layer; -} - -TEST(Layer, WarpCTCLayer) { - for (auto layerSize : {10, 64}) { - for (auto batchSize : {1, 10, 32}) { - for (auto normByTimes : {false, true}) { - for (auto useGpu : {false, true}) { -#ifndef PADDLE_WITH_CUDA - if (useGpu) continue; -#endif - LOG(INFO) << "layerSize=" << layerSize << " batchSize=" << batchSize - << " normByTimes = " << normByTimes << " useGpu=" << useGpu; - - FLAGS_use_gpu = useGpu; - - Argument data0; - initArgument(batchSize, layerSize, useGpu, data0); - - Argument data1; - data1.resizeAndCopyFrom(data0); - - LayerPtr dataLayer0 = - createDataLayer("data", batchSize, layerSize, useGpu, data0); - LayerPtr dataLayer1 = - createDataLayer("data", batchSize, layerSize, useGpu, data1); - - LayerPtr labelLayer = - createLabelLayer("label", batchSize, layerSize, useGpu); - - LayerPtr warpctcLayer = createWarpCTCLayer( - "cost", layerSize, useGpu, normByTimes, dataLayer0, labelLayer); - LayerPtr ctcLayer = createCTCLayer( - "cost", layerSize, useGpu, normByTimes, dataLayer1, labelLayer); - - /// Check cost - LOG(INFO) << "Check cost: " - << checkError(*(warpctcLayer->getOutput().value), - *(ctcLayer->getOutput().value)) - << " different elements."; - - /// Check gradients - LOG(INFO) << "Check gradients: " - << checkError(*(dataLayer0->getOutput().grad), - *(dataLayer1->getOutput().grad)) - << " different elements"; - } - } - } - } -} diff --git a/paddle/legacy/math/Allocator.h b/paddle/legacy/math/Allocator.h deleted file mode 100644 index ffb5ec1cad4113c2035daad8c385bbe57a161079..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/Allocator.h +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "hl_gpu.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -/** - * @brief Allocator base class. - * - * This is the base class of all Allocator class. - */ -class Allocator { - public: - virtual ~Allocator() {} - virtual void* alloc(size_t size) = 0; - virtual void free(void* ptr) = 0; - virtual std::string getName() = 0; -}; - -/** - * @brief CPU allocator implementation. - */ -class CpuAllocator : public Allocator { - public: - ~CpuAllocator() {} - - /** - * @brief Aligned allocation on CPU. - * @param size Size to be allocated. - * @return Pointer to the allocated memory - */ - virtual void* alloc(size_t size) { - void* ptr; -#ifdef PADDLE_WITH_MKLDNN - // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp - // memory alignment - CHECK_EQ(posix_memalign(&ptr, 4096ul, size), 0); -#else - CHECK_EQ(posix_memalign(&ptr, 32ul, size), 0); -#endif - CHECK(ptr) << "Fail to allocate CPU memory: size=" << size; - return ptr; - } - - /** - * @brief Free the memory space. - * @param ptr Pointer to be free. - */ - virtual void free(void* ptr) { - if (ptr) { - ::free(ptr); - } - } - - virtual std::string getName() { return "cpu_alloc"; } -}; - -/** - * @brief GPU allocator implementation. - */ -class GpuAllocator : public Allocator { - public: - ~GpuAllocator() {} - - /** - * @brief Allocate GPU memory. - * @param size Size to be allocated. - * @return Pointer to the allocated memory - */ - virtual void* alloc(size_t size) { - void* ptr = hl_malloc_device(size); - CHECK(ptr) << "Fail to allocate GPU memory " << size << " bytes"; - return ptr; - } - - /** - * @brief Free the GPU memory. - * @param ptr Pointer to be free. - */ - virtual void free(void* ptr) { - if (ptr) { - hl_free_mem_device(ptr); - } - } - - virtual std::string getName() { return "gpu_alloc"; } -}; - -/** - * @brief CPU pinned memory allocator implementation. - */ -class CudaHostAllocator : public Allocator { - public: - ~CudaHostAllocator() {} - - /** - * @brief Allocate pinned memory. - * @param size Size to be allocated. - * @return Pointer to the allocated memory - */ - virtual void* alloc(size_t size) { - void* ptr = hl_malloc_host(size); - CHECK(ptr) << "Fail to allocate pinned memory " << size << " bytes"; - return ptr; - } - - /** - * @brief Free the pinned memory. - * @param ptr Pointer to be free. - */ - virtual void free(void* ptr) { - if (ptr) { - hl_free_mem_host(ptr); - } - } - - virtual std::string getName() { return "cuda_host_alloc"; } -}; - -} // namespace paddle diff --git a/paddle/legacy/math/BaseMatrix.cu b/paddle/legacy/math/BaseMatrix.cu deleted file mode 100644 index 7e7cdc57a9887152ecd9e0bbd9fe14fcba56799d..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/BaseMatrix.cu +++ /dev/null @@ -1,1953 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include "BaseMatrix.h" -#include "MathFunctions.h" -#include "NEONFunctions.h" -#include "SIMDFunctions.h" -#include "hl_matrix_apply.cuh" -#include "hl_matrix_base.cuh" -#include "hl_matrix_ops.cuh" - -namespace paddle { - -const char* SPARSE_SUPPORT_ERROR = "Sparse Matrix/Vector is not supported."; - -template -template -int BaseMatrixT::applyUnary(Op op) { - MatrixOffset offset(0, 0); - applyUnary(op, height_, width_, offset); - return 0; -} - -template -template -int BaseMatrixT::applyUnary(Op op, - int numRows, - int numCols, - MatrixOffset& offset) { - CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; - int dimM = numRows; - int dimN = numCols; - int lda = stride_; - - T* A = data_; - CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - - CHECK_LE(dimM + offset.aRow_, this->height_); - CHECK_LE(dimN + offset.aCol_, this->width_); - if (true == useGpu_) { - hl_gpu_apply_unary_op(op, A, dimM, dimN, lda); - } else { - hl_cpu_apply_unary_op(op, A, dimM, dimN, lda); - } - return 0; -} - -template -template -int BaseMatrixT::applyBinary(Op op, BaseMatrixT& b) { - CHECK(height_ == b.height_ && width_ == b.width_) - << "Matrix dimensions are not equal"; - - MatrixOffset offset(0, 0, 0, 0); - applyBinary(op, b, height_, width_, offset); - return 0; -} - -template -template -int BaseMatrixT::applyBinary( - Op op, BaseMatrixT& b, int numRows, int numCols, MatrixOffset& offset) { - applyBinary(op, b, numRows, numCols, offset, false_type(), false_type()); - return 0; -} - -template -template -int BaseMatrixT::applyBinary(Op op, - BaseMatrixT& b, - int numRows, - int numCols, - MatrixOffset& offset, - bAsRowVector, - bAsColVector) { - CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; - CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; - CHECK(useGpu_ == b.useGpu_) << "Matrix type mismatch"; - - int dimM = numRows; - int dimN = numCols; - int lda = stride_; - int ldb = b.stride_; - - T* A = data_; - T* B = b.data_; - CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); - CHECK_LE(dimM + offset.aRow_, this->height_); - CHECK_LE(dimN + offset.aCol_, this->width_); - if (!bAsRowVector::value && !bAsColVector::value) { - CHECK_LE(dimM + offset.bRow_, b.height_); - CHECK_LE(dimN + offset.bCol_, b.width_); - } else if (bAsRowVector::value && !bAsColVector::value) { - CHECK_LE(dimN + offset.bCol_, b.width_); - } else if (!bAsRowVector::value && bAsColVector::value) { - CHECK_LE(dimM + offset.bRow_, b.height_); - } else { - } - if (true == useGpu_) { - hl_gpu_apply_binary_op( - op, A, B, dimM, dimN, lda, ldb); - } else { - hl_cpu_apply_binary_op( - op, A, B, dimM, dimN, lda, ldb); - } - - return 0; -} - -template -template -int BaseMatrixT::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) { - CHECK_EQ(height_, b.height_); - CHECK_EQ(width_, b.width_); - CHECK_EQ(height_, c.height_); - CHECK_EQ(width_, c.width_); - - MatrixOffset offset(0, 0, 0, 0, 0, 0); - applyTernary(op, b, c, height_, width_, offset); - - return 0; -} - -template -template -int BaseMatrixT::applyTernary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - int numRows, - int numCols, - MatrixOffset& offset) { - applyTernary(op, b, c, numRows, numCols, offset, false_type(), false_type()); - - return 0; -} - -template -template -int BaseMatrixT::applyTernary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - int numRows, - int numCols, - MatrixOffset& offset, - cAsRowVector, - cAsColVector) { - CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; - CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; - CHECK(!c.isSparse()) << SPARSE_SUPPORT_ERROR; - CHECK_EQ(useGpu_, b.useGpu_); - CHECK_EQ(useGpu_, c.useGpu_); - - int dimM = numRows; - int dimN = numCols; - int lda = stride_; - int ldb = b.stride_; - int ldc = c.stride_; - - T* A = data_; - T* B = b.data_; - T* C = c.data_; - CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); - CAL_MATRIX_START_ADDRESS( - C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); - - CHECK_LE(dimM + offset.aRow_, this->height_); - CHECK_LE(dimN + offset.aCol_, this->width_); - CHECK_LE(dimM + offset.bRow_, b.height_); - CHECK_LE(dimN + offset.bCol_, b.width_); - if (!cAsRowVector::value && !cAsColVector::value) { - CHECK_LE(dimM + offset.cRow_, c.height_); - CHECK_LE(dimN + offset.cCol_, c.width_); - } else if (cAsRowVector::value && !cAsColVector::value) { - CHECK_LE(dimN + offset.cCol_, c.width_); - } else if (!cAsRowVector::value && cAsColVector::value) { - CHECK_LE(dimM + offset.cRow_, c.height_); - } else { - } - - if (true == useGpu_) { - hl_gpu_apply_ternary_op( - op, A, B, C, dimM, dimN, lda, ldb, ldc); - } else { - hl_cpu_apply_ternary_op( - op, A, B, C, dimM, dimN, lda, ldb, ldc); - } - - return 0; -} - -template -template -int BaseMatrixT::applyQuaternary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - BaseMatrixT& d) { - CHECK_EQ(height_, b.height_); - CHECK_EQ(width_, b.width_); - CHECK_EQ(height_, c.height_); - CHECK_EQ(width_, c.width_); - CHECK_EQ(height_, d.height_); - CHECK_EQ(width_, d.width_); - - MatrixOffset offset(0, 0, 0, 0, 0, 0, 0, 0); - applyQuaternary(op, b, c, d, height_, width_, offset); - - return 0; -} - -template -template -int BaseMatrixT::applyQuaternary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - BaseMatrixT& d, - int numRows, - int numCols, - MatrixOffset& offset) { - CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR; - CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR; - CHECK(!c.isSparse()) << SPARSE_SUPPORT_ERROR; - CHECK(!d.isSparse()) << SPARSE_SUPPORT_ERROR; - CHECK_EQ(useGpu_, b.useGpu_); - CHECK_EQ(useGpu_, c.useGpu_); - CHECK_EQ(useGpu_, d.useGpu_); - - int dimM = numRows; - int dimN = numCols; - int lda = stride_; - int ldb = b.stride_; - int ldc = c.stride_; - int ldd = d.stride_; - - T* A = data_; - T* B = b.data_; - T* C = c.data_; - T* D = d.data_; - CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); - CAL_MATRIX_START_ADDRESS( - C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); - CAL_MATRIX_START_ADDRESS( - D, d.height_, d.width_, ldd, offset.dCol_, offset.dRow_); - - CHECK_LE(dimM + offset.aRow_, this->height_); - CHECK_LE(dimN + offset.aCol_, this->width_); - CHECK_LE(dimM + offset.bRow_, b.height_); - CHECK_LE(dimN + offset.bCol_, b.width_); - CHECK_LE(dimM + offset.cRow_, c.height_); - CHECK_LE(dimN + offset.cCol_, c.width_); - CHECK_LE(dimM + offset.dRow_, d.height_); - CHECK_LE(dimN + offset.dCol_, d.width_); - if (true == useGpu_) { - hl_gpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd); - } else { - hl_cpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd); - } - - return 0; -} - -template -template -int BaseMatrixT::aggregate(Agg agg, - Op op, - Saver sv, - BaseMatrixT& b, - int numRows, - int numCols, - MatrixOffset& offset, - aAsRowVector, - aAsColVector) { - CHECK_EQ(useGpu_, b.useGpu_); - - int ld = stride_; - int ldb = b.stride_; - - T* dst = data_; - T* B = b.data_; - CAL_MATRIX_START_ADDRESS( - dst, height_, width_, ld, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); - - if (aAsRowVector::value && !aAsColVector::value) { - if (useGpu_) { - hl_gpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B, ldb); - } else { - hl_cpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B, ldb); - } - } else if (!aAsRowVector::value && aAsColVector::value) { - if (useGpu_) { - hl_gpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B, ldb); - } else { - hl_cpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B, ldb); - } - } else { - LOG(FATAL) << "not supported"; - } - - return 0; -} - -template -template -int BaseMatrixT::aggregate(Agg agg, - Op op, - Saver sv, - BaseMatrixT& b, - BaseMatrixT& c, - int numRows, - int numCols, - MatrixOffset& offset, - aAsRowVector, - aAsColVector) { - CHECK_EQ(useGpu_, b.useGpu_); - CHECK_EQ(useGpu_, c.useGpu_); - - int ld = stride_; - int ldb = b.stride_; - int ldc = c.stride_; - - T* dst = data_; - T* B = b.data_; - T* C = c.data_; - CAL_MATRIX_START_ADDRESS( - dst, height_, width_, ld, offset.aCol_, offset.aRow_); - CAL_MATRIX_START_ADDRESS( - B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_); - CAL_MATRIX_START_ADDRESS( - C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_); - - if (aAsRowVector::value && !aAsColVector::value) { - if (useGpu_) { - hl_gpu_matrix_column_op( - agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc); - } else { - hl_cpu_matrix_column_op( - agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc); - } - } else if (!aAsRowVector::value && aAsColVector::value) { - if (useGpu_) { - hl_gpu_matrix_row_op( - agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc); - } else { - hl_cpu_matrix_row_op( - agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc); - } - } else { - LOG(FATAL) << "not supported"; - } - - return 0; -} - -/** - * @brief unary operator. - * - */ - -DEFINE_MATRIX_UNARY_OP(Neg, a = -a); -template -void BaseMatrixT::neg() { - applyUnary(unary::Neg()); -} - -DEFINE_MATRIX_UNARY_OP(Exp, a = exp(a)); -template <> -void BaseMatrixT::exp2() { - applyUnary(unary::Exp()); -} - -DEFINE_MATRIX_UNARY_OP(Log, a = log(a)); -template <> -void BaseMatrixT::log2() { - if (useGpu_) { - applyUnary(unary::Log()); - } else { - vLog(height_ * width_, data_, data_); - } -} - -DEFINE_MATRIX_UNARY_OP(Sqrt, a = sqrt(a)); -template <> -void BaseMatrixT::sqrt2() { - applyUnary(unary::Sqrt()); -} - -DEFINE_MATRIX_UNARY_OP(Square, a = a * a); -template -void BaseMatrixT::square2() { - applyUnary(unary::Square()); -} - -DEFINE_MATRIX_UNARY_OP(Reciprocal, a = 1.0f / a); -template -void BaseMatrixT::reciprocal2() { - applyUnary(unary::Reciprocal()); -} - -DEFINE_MATRIX_UNARY_OP(Abs, a = a > 0 ? a : -a); -template -void BaseMatrixT::abs2() { - applyUnary(unary::Abs()); -} - -DEFINE_MATRIX_UNARY_OP(Sign, a = (a > 0) - (a < 0)); -template -void BaseMatrixT::sign2() { - applyUnary(unary::Sign()); -} - -DEFINE_MATRIX_UNARY_OP(Zero, a = 0); -template -void BaseMatrixT::zero() { - applyUnary(unary::Zero()); -} - -template -void BaseMatrixT::zeroAtOffset(int64_t columnOffset, int64_t numColumns) { - int numRows = height_; - int numCols = numColumns; - MatrixOffset offset(columnOffset, 0); - applyUnary(unary::Zero(), numRows, numCols, offset); -} - -DEFINE_MATRIX_UNARY_OP(One, a = 1); -template -void BaseMatrixT::one() { - applyUnary(unary::One()); -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(a, p)); -template <> -void BaseMatrixT::pow2(real p) { - if (useGpu_) { - applyUnary(unary::Pow(p)); - } else { - vPow(height_ * width_, data_, p, data_); - } -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a -= p); -template -void BaseMatrixT::subScalar(T p) { - applyUnary(unary::SubScalar(p)); -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a *= p); -template -void BaseMatrixT::mulScalar(T p) { - applyUnary(unary::MulScalar(p)); -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a /= p); -template -void BaseMatrixT::divScalar(T p) { - applyUnary(unary::DivScalar(p)); -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(Assign, ONE_PARAMETER, a = p); -template -void BaseMatrixT::assign(T p) { - applyUnary(unary::Assign(p)); -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(Add, ONE_PARAMETER, a += p); -template -void BaseMatrixT::add(T p) { - applyUnary(unary::Add(p)); -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = a * p1 + p2); -template -void BaseMatrixT::add(T p1, T p2) { - applyUnary(unary::Add2(p1, p2)); -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, - TWO_PARAMETER, - a = a < p1 ? p1 : (a > p2 ? p2 : a)); -template -void BaseMatrixT::clip(T p1, T p2) { - applyUnary(unary::Clip(p1, p2)); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, - TWO_PARAMETER, - a = b < p1 ? 0 : (b > p2 ? 0 : 1)); -template -void BaseMatrixT::clipDerivative(BaseMatrixT& b, T p1, T p2) { - applyBinary(binary::ClipDerivative(p1, p2), b); -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, - ONE_PARAMETER, - a = a > p ? 1.0f : 0.0f); -template -void BaseMatrixT::biggerThanScalar(T p) { - applyUnary(unary::BiggerThanScalar(p)); -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(DownClip, ONE_PARAMETER, a = a > p ? a : p); -template -void BaseMatrixT::downClip(T p) { - applyUnary(unary::DownClip(p)); -} - -/** - * @brief binary operator. - * - */ - -DEFINE_MATRIX_BINARY_OP(Add, a += b); -template -void BaseMatrixT::add(BaseMatrixT& b) { - applyBinary(binary::Add(), b); -} - -template <> -void BaseMatrixT::add(BaseMatrixT& b) { - if (useGpu_) { - applyBinary(binary::Add(), b); - } else { // cpu branch - CHECK_EQ(height_, b.height_); - CHECK_EQ(width_, b.width_); - vAdd(height_ * width_, data_, b.data_, data_); - } -} - -template -void BaseMatrixT::addAtOffset(BaseMatrixT& b, int64_t columnOffset) { - if (columnOffset + b.width_ <= width_) { - int numRows = height_; - int numCols = b.width_; - MatrixOffset offset(columnOffset, 0, 0, 0); - applyBinary(binary::Add(), b, numRows, numCols, offset); - } else if (columnOffset + width_ <= b.width_) { - int numRows = height_; - int numCols = width_; - MatrixOffset offset(0, 0, columnOffset, 0); - applyBinary(binary::Add(), b, numRows, numCols, offset); - } else { - LOG(FATAL) << "Wrong argument " - << " a.width=" << width_ << " b.width=" << b.width_ - << " columnOffset=" << columnOffset; - } -} - -template -void BaseMatrixT::addP2P(BaseMatrixT& b) { - T* A = data_; - T* B = b.data_; - int dimM = height_; - int dimN = width_; - - hl_gpu_apply_binary_op, 0, 0>( - binary::Add(), A, B, dimM, dimN, dimN, dimN); -} - -template -void BaseMatrixT::addColVector(BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0); - int numRows = height_; - int numCols = width_; - applyBinary(binary::Add(), - b, - numRows, - numCols, - offset, - false_type(), - true_type() /* bAsColVector */); -} - -template -void BaseMatrixT::addRowVector(BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0); - int numRows = height_; - int numCols = width_; - applyBinary(binary::Add(), - b, - numRows, - numCols, - offset, - true_type() /* bAsRowVector */, - false_type()); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(Add1, ONE_PARAMETER, a += b * p); -template -void BaseMatrixT::add(BaseMatrixT& b, T p) { - applyBinary(binary::Add1(p), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(b, p)); -template <> -void BaseMatrixT::pow2(BaseMatrixT& b, real p) { - if (useGpu_) { - applyBinary(binary::Pow(p), b); - } else { - vPow(height_ * width_, b.data_, p, data_); - } -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = p1 * a + p2 * b); -template -void BaseMatrixT::add(BaseMatrixT& b, T p1, T p2) { - applyBinary(binary::Add2(p1, p2), b); -} - -template -void BaseMatrixT::addBias(BaseMatrixT& b, T scale) { - MatrixOffset offset(0, 0, 0, 0); - int numRows = height_; - int numCols = width_; - applyBinary(binary::Add1(scale), - b, - numRows, - numCols, - offset, - true_type() /* bAsRowVector */, - false_type()); -} - -DEFINE_MATRIX_BINARY_OP(Sub, a -= b); -template -void BaseMatrixT::sub(BaseMatrixT& b) { - applyBinary(binary::Sub(), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(Sub1, ONE_PARAMETER, a -= b * p); -template -void BaseMatrixT::sub(BaseMatrixT& b, T p) { - applyBinary(binary::Sub1(p), b); -} - -DEFINE_MATRIX_BINARY_OP(Relu, b = a > 0.0f ? a : 0.0f); -template -void BaseMatrixT::relu(BaseMatrixT& b) { - applyBinary(binary::Relu(), b); -} - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) -template <> -void BaseMatrixT::relu(BaseMatrixT& b) { - neon::relu(data_, b.data_, height_ * width_); -} -#endif - -DEFINE_MATRIX_BINARY_OP(ReluDerivative, a *= (b > 0.0f ? 1.0f : 0.0f)); -template -void BaseMatrixT::reluDerivative(BaseMatrixT& b) { - applyBinary(binary::ReluDerivative(), b); -} - -DEFINE_MATRIX_BINARY_OP(Softrelu, const T THRESHOLD = 40.0; - b = log(1.0 + exp((a > THRESHOLD) - ? THRESHOLD - : ((a < -THRESHOLD) ? (-THRESHOLD) - : a)))); -template <> -void BaseMatrixT::softrelu(BaseMatrixT& b) { - applyBinary(binary::Softrelu(), b); -} - -DEFINE_MATRIX_BINARY_OP( - SoftreluDerivative, const T THRESHOLD = 40.0; - a *= (1.0 - exp(-1.0 * ((b > THRESHOLD) - ? THRESHOLD - : ((b < -THRESHOLD) ? (-THRESHOLD) : b))))); -template <> -void BaseMatrixT::softreluDerivative(BaseMatrixT& b) { - applyBinary(binary::SoftreluDerivative(), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(Brelu, TWO_PARAMETER, b = a > p1 ? a : p1; - b = b < p2 ? b : p2); -template -void BaseMatrixT::brelu(BaseMatrixT& b) { - int p1 = 0, p2 = 24; //! TODO(yuyang18): Make p1,p2 configuable. - applyBinary(binary::Brelu(p1, p2), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(BreluDerivative, - TWO_PARAMETER, - a *= (b > p1 && b < p2) ? 1.0 : 0.0); -template -void BaseMatrixT::breluDerivative(BaseMatrixT& b) { - int p1 = 0, p2 = 24; - applyBinary(binary::BreluDerivative(p1, p2), b); -} - -DEFINE_MATRIX_BINARY_OP(Square, b = a * a); -template -void BaseMatrixT::square2(BaseMatrixT& b) { - applyBinary(binary::Square(), b); -} - -DEFINE_MATRIX_BINARY_OP(SquareDerivative, a *= 2.0 * b); -template -void BaseMatrixT::squareDerivative(BaseMatrixT& b) { - applyBinary(binary::SquareDerivative(), b); -} - -DEFINE_MATRIX_BINARY_OP(Tanh, T tmp = -2.0 * a; - tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; - b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); -template <> -void BaseMatrixT::tanh(BaseMatrixT& b) { - applyBinary(binary::Tanh(), b); -} - -DEFINE_MATRIX_BINARY_OP(TanhDerivative, a *= 1 - b * b); -template -void BaseMatrixT::tanhDerivative(BaseMatrixT& b) { - applyBinary(binary::TanhDerivative(), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP( - ScaledTanh, TWO_PARAMETER, b = p1 * (2.0 / (1.0 + exp(-2 * p2 * a)) - 1.0)); -template <> -void BaseMatrixT::scaledTanh(BaseMatrixT& b, real p1, real p2) { - applyBinary(binary::ScaledTanh(p1, p2), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanhDerivative, - TWO_PARAMETER, - a *= p2 * (p1 - b * b)); -template -void BaseMatrixT::scaledTanhDerivative(BaseMatrixT& b, T p1, T p2) { - applyBinary(binary::ScaledTanhDerivative(p1 * p1, p2 / p1), b); -} - -DEFINE_MATRIX_BINARY_OP(Reciprocal, b = 1.0f / a); -template -void BaseMatrixT::reciprocal2(BaseMatrixT& b) { - applyBinary(binary::Reciprocal(), b); -} - -DEFINE_MATRIX_BINARY_OP(ReciprocalDerivative, a *= -b * b); -template -void BaseMatrixT::reciprocalDerivative(BaseMatrixT& b) { - applyBinary(binary::ReciprocalDerivative(), b); -} - -DEFINE_MATRIX_BINARY_OP(Abs, b = a > 0.0f ? a : -a); -template -void BaseMatrixT::abs2(BaseMatrixT& b) { - applyBinary(binary::Abs(), b); -} - -DEFINE_MATRIX_BINARY_OP(AbsDerivative, a = (b > 0) ? a : (b < 0) ? -a : 0); -template -void BaseMatrixT::absDerivative(BaseMatrixT& b) { - applyBinary(binary::AbsDerivative(), b); -} - -DEFINE_MATRIX_BINARY_OP(Sigmoid, const T THRESHOLD_MIN = -40.0; - const T THRESHOLD_MAX = 13.0; - T tmp = (a < THRESHOLD_MIN) - ? THRESHOLD_MIN - : ((a > THRESHOLD_MAX) ? THRESHOLD_MAX : a); - b = 1.0f / (1.0f + exp(-tmp))); -template <> -void BaseMatrixT::sigmoid(BaseMatrixT& b) { - if (useGpu_) { - applyBinary(binary::Sigmoid(), b); - } else { // cpu versioni - size_t numSamples = this->height_; - size_t dim = this->width_; - CHECK_EQ(b.height_, numSamples); - CHECK_EQ(b.width_, dim); - const real* in = this->data_; - real* out = b.data_; - - // out = - in - const float THRESHOLD_MIN = -40.0; // make sure sigmoid(x) > 0 - const float THRESHOLD_MAX = 13.0; // make sure sigmoid(x) < 1 - for (size_t i = 0; i < numSamples * dim; ++i) { - real tmp = in[i]; - tmp = (tmp < THRESHOLD_MIN) - ? THRESHOLD_MIN - : ((tmp > THRESHOLD_MAX) ? THRESHOLD_MAX : tmp); - out[i] = -tmp; - } - - // out = exp(out) - vExp(numSamples * dim, out, out); - - // out = 1 / (1 + out) - for (size_t i = 0; i < numSamples * dim; ++i) { - out[i] = 1 / (1 + out[i]); - } - } -} - -DEFINE_MATRIX_BINARY_OP(SigmoidDerivative, a *= b * (1 - b)); -template -void BaseMatrixT::sigmoidDerivative(BaseMatrixT& b) { - applyBinary(binary::SigmoidDerivative(), b); -} - -DEFINE_MATRIX_BINARY_OP(ExpDerivative, a *= b); -template -void BaseMatrixT::expDerivative(BaseMatrixT& b) { - applyBinary(binary::ExpDerivative(), b); -} - -DEFINE_MATRIX_BINARY_OP(Sign, b = a > 0.0f ? 1.0f : -1.0f); -template -void BaseMatrixT::sign2(BaseMatrixT& b) { - applyBinary(binary::Sign(), b); -} - -DEFINE_MATRIX_BINARY_OP(Exp, a = exp(b)); -template <> -void BaseMatrixT::exp2(BaseMatrixT& b) { - applyBinary(binary::Exp(), b); -} - -DEFINE_MATRIX_BINARY_OP(Log, a = log(b)); -template <> -void BaseMatrixT::log2(BaseMatrixT& b) { - if (useGpu_) { - applyBinary(binary::Log(), b); - } else { - vLog(height_ * width_, b.data_, data_); - } -} - -DEFINE_MATRIX_BINARY_OP(Sqrt, a = sqrt(b)); -template <> -void BaseMatrixT::sqrt2(BaseMatrixT& b) { - applyBinary(binary::Sqrt(), b); -} - -DEFINE_MATRIX_BINARY_OP(InvSqrt, a = 1.0f / sqrt(b)); -template <> -void BaseMatrixT::invSqrt(BaseMatrixT& b) { - if (useGpu_) { - applyBinary(binary::InvSqrt(), b); - } else { // cpu branch - CHECK_EQ(height_, b.height_); - CHECK_EQ(width_, b.width_); - vInvSqrt(height_ * width_, b.data_, data_); - } -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(IsEqual, ONE_PARAMETER, a = (b == p)); -template -void BaseMatrixT::isEqualTo(BaseMatrixT& b, T value) { - applyBinary(binary::IsEqual(value), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(AddScalar, ONE_PARAMETER, a = b + p); -template -void BaseMatrixT::addScalar(BaseMatrixT& b, T p) { - applyBinary(binary::AddScalar(p), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a = b - p); -template -void BaseMatrixT::subScalar(BaseMatrixT& b, T p) { - applyBinary(binary::SubScalar(p), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a = b * p); -template -void BaseMatrixT::mulScalar(BaseMatrixT& b, T p) { - applyBinary(binary::MulScalar(p), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a = b / p); -template -void BaseMatrixT::divScalar(BaseMatrixT& b, T p) { - applyBinary(binary::DivScalar(p), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(ScalarDiv, ONE_PARAMETER, a = p / b); -template -void BaseMatrixT::scalarDiv(BaseMatrixT& b, T p) { - applyBinary(binary::ScalarDiv(p), b); -} - -/** - * @brief ternary operator. - * - */ - -DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropy, - a = -c * log(b) - (1 - c) * log(1 - b)); -template <> -void BaseMatrixT::softCrossEntropy(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::SoftCrossEntropy(), b, c); -} - -DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropyBp, a += (b - c) / (b * (1 - b))); -template -void BaseMatrixT::softCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::SoftCrossEntropyBp(), b, c); -} - -DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropy, - a = c > 0.5 ? -log(b) : -log(1.0 - b)); -template <> -void BaseMatrixT::binaryLabelCrossEntropy(BaseMatrixT& b, - BaseMatrixT& c) { - if (useGpu_) { - applyTernary(ternary::BinaryCrossEntropy(), b, c); - } else { - CHECK_EQ(height_, b.height_); - CHECK_EQ(height_, c.height_); - CHECK_EQ(width_, b.width_); - CHECK_EQ(width_, c.width_); - - size_t size = height_ * width_; - real* out = b.data_; - real* label = c.data_; - real* cost = data_; - - for (size_t i = 0; i < size; ++i) { - cost[i] = label[i] > 0.5 ? out[i] : 1.0 - out[i]; - } - vLog(size, cost, cost); - for (size_t i = 0; i < size; ++i) { - cost[i] *= -1.0; - } - } -} - -DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropyBp, - a += c > 0.5 ? -1.0 / b : 1.0 / (1.0 - b)); -template -void BaseMatrixT::binaryLabelCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::BinaryCrossEntropyBp(), b, c); -} - -DEFINE_MATRIX_TERNARY_OP(Add, a = b + c); -template -void BaseMatrixT::add(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::Add(), b, c); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add1, TWO_PARAMETER, a = p1 * b + p2 * c); -template -void BaseMatrixT::add(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) { - applyTernary(ternary::Add1(p1, p2), b, c); -} - -DEFINE_MATRIX_TERNARY_OP(Sub, a = b - c); -template -void BaseMatrixT::sub(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::Sub(), b, c); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(Sub1, TWO_PARAMETER, a = p1 * b - p2 * c); -template -void BaseMatrixT::sub(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) { - applyTernary(ternary::Sub1(p1, p2), b, c); -} - -DEFINE_MATRIX_TERNARY_OP(Add2, a = a + b + c); -template -void BaseMatrixT::add2(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::Add2(), b, c); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add3, - THREE_PARAMETER, - a = p1 * a + p2 * b + p3 * c); -template -void BaseMatrixT::add2(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) { - applyTernary(ternary::Add3(p1, p2, p3), b, c); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(SgdUpdate, - THREE_PARAMETER, - c = p2 * c - p1 * (b + p3 * a); - a = a + c); -template -void BaseMatrixT::sgdUpdate(BaseMatrixT& b, // grad - BaseMatrixT& c, // mom - T p1, // learningRate, - T p2, // momentum, - T p3) { // decayRate - applyTernary(ternary::SgdUpdate(p1, p2, p3), b, c); -} - -DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(SgdUpdate, - THREE_PARAMETER, - c = p2 * c - p1 * d * (b + p3 * a); - a += c); -template -void BaseMatrixT::sgdUpdate(BaseMatrixT& b, // grad, - BaseMatrixT& c, // mom, - BaseMatrixT& d, // lr, - T p1, // learningRate, - T p2, // momentum, - T p3) { // decayRate - applyQuaternary(quaternary::SgdUpdate(p1, p2, p3), b, c, d); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p * b; - a = (a > lambda) - ? (a - lambda) - : (a < -lambda) ? (a + lambda) : 0); -template -void BaseMatrixT::applyL1(BaseMatrixT& lr, T learningRate, T decayRate) { - applyBinary(binary::ApplyL1(learningRate * decayRate), lr); -} - -template <> -void BaseMatrixT::applyL1(BaseMatrixT& lr, - real learningRate, - real decayRate) { - if (useGpu_) { - applyBinary(binary::ApplyL1(learningRate * decayRate), lr); - } else { - simd::decayL1(this->data_, - this->data_, - lr.data_, - learningRate * decayRate, - height_ * width_); - } -} - -DEFINE_MATRIX_UNARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p; - a = (a > lambda) - ? (a - lambda) - : (a < -lambda) ? (a + lambda) : 0); -template -void BaseMatrixT::applyL1(T learningRate, T decayRate) { - applyUnary(unary::ApplyL1(learningRate * decayRate)); -} - -template <> -void BaseMatrixT::applyL1(real learningRate, real decayRate) { - if (useGpu_) { - applyUnary(unary::ApplyL1(learningRate * decayRate)); - } else { - simd::decayL1( - this->data_, this->data_, learningRate * decayRate, height_ * width_); - } -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL2, - ONE_PARAMETER, - a *= (1.0f / (1.0f + p * b))); -template -void BaseMatrixT::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) { - if (useGpu_) { - applyBinary(binary::ApplyL2(learningRate * decayRate), lr); - } else { - size_t size = this->height_ * this->width_; - T decay = learningRate * decayRate; - for (size_t j = 0; j < size; ++j) { - this->data_[j] *= 1.0f / (1.0f + decay * lr.data_[j]); - } - } -} - -template -void BaseMatrixT::applyL2(T learningRate, T decayRate) { - BaseMatrixT::mulScalar(1.0f / (1.0f + learningRate * decayRate)); -} - -DEFINE_MATRIX_BINARY_OP(DotMul, a *= b); -template -void BaseMatrixT::dotMul(BaseMatrixT& b) { - applyBinary(binary::DotMul(), b); -} - -DEFINE_MATRIX_TERNARY_OP(DotMul, a = b * c); -template -void BaseMatrixT::dotMul(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::DotMul(), b, c); -} - -DEFINE_MATRIX_TERNARY_OP(DotDiv, a = (b == 0.0) ? 0.0 : b / c); -template -void BaseMatrixT::dotDiv(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::DotDiv(), b, c); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotDiv2P, - TWO_PARAMETER, - a = (b + p1) / (c + p2)); -template -void BaseMatrixT::dotDiv(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { - applyTernary(ternary::DotDiv2P(p1, p2), b, c); -} - -DEFINE_MATRIX_QUATERNARY_OP(RankLoss, const T THRESHOLD = 40.0; a = b - c; - a = (a > THRESHOLD) - ? THRESHOLD - : ((a < -THRESHOLD) ? (-THRESHOLD) : a); - a = log(1 + exp(a)) - a * d); -template <> -void BaseMatrixT::rankLoss(BaseMatrixT& b, - BaseMatrixT& c, - BaseMatrixT& d) { - applyQuaternary(quaternary::RankLoss(), b, c, d); -} - -DEFINE_MATRIX_QUATERNARY_OP(RankLossBp, const T THRESHOLD = 40.0; a = b - c; - a = (a > THRESHOLD) - ? THRESHOLD - : ((a < -THRESHOLD) ? (-THRESHOLD) : a); - a = exp(a); - a = (a / (1 + a) - d)); -template <> -void BaseMatrixT::rankLossBp(BaseMatrixT& b, - BaseMatrixT& c, - BaseMatrixT& d) { - applyQuaternary(quaternary::RankLossBp(), b, c, d); -} - -/* this = log(1 + exp(b)) - c * b */ -DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLoss, const T THRESHOLD = 40.0; - T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD) - ? -THRESHOLD - : b; - a = log(1 + exp(x)) - c * x); -template <> -void BaseMatrixT::logisticRegressionLoss(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::LogisticRegressionLoss(), b, c); -} - -/* this = exp(b)/(1+exp(b)) - c */ -DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLossBp, const T THRESHOLD = 40.0; - T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD) - ? -THRESHOLD - : b; - x = exp(x); - a = x / (1 + x) - c); -template <> -void BaseMatrixT::logisticRegressionLossBp(BaseMatrixT& b, - BaseMatrixT& c) { - applyTernary(ternary::LogisticRegressionLossBp(), b, c); -} - -DEFINE_MATRIX_TERNARY_OP(BiggerThan, a = (b > c) ? 1.0f : 0.0f); -template -void BaseMatrixT::biggerThan(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::BiggerThan(), b, c); -} - -DEFINE_MATRIX_QUATERNARY_OP( - BiggerThan, a = ((b > c && d > 0.5f) || (b < c && d < 0.5f)) ? 1.0f : 0.0f); -template -void BaseMatrixT::biggerThan(BaseMatrixT& b, - BaseMatrixT& c, - BaseMatrixT& d) { - applyQuaternary(quaternary::BiggerThan(), b, c, d); -} - -DEFINE_MATRIX_TERNARY_OP(Max, a = (b > c) ? b : c); -template -void BaseMatrixT::max2(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::Max(), b, c); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(BinaryClassificationError, - ONE_PARAMETER, - c += ((a > p) == (b > p)) ? 0.0f : 1.0f); -template -void BaseMatrixT::binaryClassificationError2(size_t destCol, - BaseMatrixT& b, - BaseMatrixT& c, - T p) { - CHECK(!useGpu_) << "do not support gpu"; - MatrixOffset offset(0, 0, 0, 0, destCol, 0); - int numRows = b.height_; - int numCols = b.width_; - b.applyTernary(ternary::BinaryClassificationError(p), - c, - *this, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); -} - -template <> -void BaseMatrixT::binaryClassificationError(size_t destCol, - BaseMatrixT& b, - BaseMatrixT& c, - real p) { - MatrixOffset offset(destCol, 0, 0, 0, 0, 0); - int numRows = b.height_; - int numCols = b.width_; - aggregate(aggregate::sum(), - base::binary::classificationError(p), - base::binary::add(), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*aAsColVector*/); -} - -DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(Add3, - THREE_PARAMETER, - a = p1 * b + p2 * c + p3 * d); -template -void BaseMatrixT::add3( - BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d, T p1, T p2, T p3) { - applyQuaternary(quaternary::Add3(p1, p2, p3), b, c, d); -} - -DEFINE_MATRIX_TERNARY_OP(DotMulSquare, a = b * c * c); -template -void BaseMatrixT::dotMulSquare(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::DotMulSquare(), b, c); -} - -DEFINE_MATRIX_TERNARY_OP(DotSquareSquare, a = b * b * c * c); -template -void BaseMatrixT::dotSquareSquare(BaseMatrixT& b, BaseMatrixT& c) { - applyTernary(ternary::DotSquareSquare(), b, c); -} - -DEFINE_MATRIX_BINARY_OP(DotMulSquare, a *= b * b); -template -void BaseMatrixT::dotMulSquare(BaseMatrixT& b) { - applyBinary(binary::DotMulSquare(), b); -} - -DEFINE_MATRIX_BINARY_OP(DotSquareMul, a = a * a * b); -template -void BaseMatrixT::dotSquareMul(BaseMatrixT& b) { - applyBinary(binary::DotSquareMul(), b); -} - -DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(AddSquareSum, - THREE_PARAMETER, - T tmp = p1 * b + p2 * c + p3 * d; - a += tmp * tmp); -template -void BaseMatrixT::addSquareSum( - BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT d, T p1, T p2, T p3) { - applyQuaternary(quaternary::AddSquareSum(p1, p2, p3), b, c, d); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(AddSquare, ONE_PARAMETER, a += p * b * b); -template -void BaseMatrixT::addSquare(BaseMatrixT& b, T p) { - applyBinary(binary::AddSquare(p), b); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(DecayAddSquare, - TWO_PARAMETER, - a = p1 * a + p2 * b * b); -template -void BaseMatrixT::decayAddSquare(BaseMatrixT& b, T p1, T p2) { - applyBinary(binary::DecayAddSquare(p1, p2), b); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DecayAddSquareMul, - TWO_PARAMETER, - a = p1 * a + p2 * b * b * c * c); -template -void BaseMatrixT::decayAddSquareMul(BaseMatrixT& b, - BaseMatrixT& c, - T p1, - T p2) { - applyTernary(ternary::DecayAddSquareMul(p1, p2), b, c); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(ReciprocalSum, - THREE_PARAMETER, - a = 1 / (p1 * b + p2 * c + p3)); -template -void BaseMatrixT::reciprocalSum( - BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) { - applyTernary(ternary::ReciprocalSum(p1, p2, p3), b, c); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(Reciprocal2, - TWO_PARAMETER, - a = 1 / (p1 * b + p2)); -template -void BaseMatrixT::reciprocal2(BaseMatrixT& b, T p1, T p2) { - applyBinary(binary::Reciprocal2(p1, p2), b); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSquareSum, - TWO_PARAMETER, - T tmp = p1 * b + p2 * c; - a *= tmp * tmp); -template -void BaseMatrixT::dotMulSquareSum(BaseMatrixT& b, - BaseMatrixT& c, - T p1, - T p2) { - applyTernary(ternary::DotMulSquareSum(p1, p2), b, c); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotSquareSum, - TWO_PARAMETER, - T tmp = p1 * b + p2 * c; - a = tmp * tmp); -template -void BaseMatrixT::dotSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { - applyTernary(ternary::DotSquareSum(p1, p2), b, c); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSum, - TWO_PARAMETER, - a *= p1 * b + p2 * c); -template -void BaseMatrixT::dotMulSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { - applyTernary(ternary::DotMulSum(p1, p2), b, c); -} - -DEFINE_MATRIX_BINARY_OP(CopyAndClear, b = a; a = 0); -template -void BaseMatrixT::copyAndClear(BaseMatrixT& b) { - applyBinary(binary::CopyAndClear(), b); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(AddDotMul, - TWO_PARAMETER, - a = p1 * a + p2 * b * c); -template -void BaseMatrixT::addDotMul(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) { - applyTernary(ternary::AddDotMul(p1, p2), b, c); -} - -DEFINE_MATRIX_BINARY_OP(Assign, a = b;); -template -void BaseMatrixT::assign(BaseMatrixT& b) { - if (useGpu_) { - applyBinary(binary::Assign(), b); - } else { // cpu version - CHECK_EQ(this->height_, b.height_); - CHECK_EQ(this->width_, b.width_); - memcpy(data_, b.data_, sizeof(T) * height_ * width_); - } -} - -template -void BaseMatrixT::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) { - if (columnOffset + b.width_ <= width_) { - int numRows = height_; - int numCols = b.width_; - MatrixOffset offset(columnOffset, 0, 0, 0); - applyBinary(binary::Assign(), b, numRows, numCols, offset); - } else if (columnOffset + width_ <= b.width_) { - int numRows = height_; - int numCols = width_; - MatrixOffset offset(0, 0, columnOffset, 0); - applyBinary(binary::Assign(), b, numRows, numCols, offset); - } else { - LOG(FATAL) << "Wrong argument " - << " a.width=" << width_ << " b.width=" << b.width_ - << " columnOffset=" << columnOffset; - } -} - -DEFINE_MATRIX_BINARY_OP(DeepSwap, T tmp = a; a = b; b = tmp); -template -void BaseMatrixT::deepSwap(BaseMatrixT& b) { - applyBinary(binary::DeepSwap(), b); -} - -template <> -void BaseMatrixT::rowDotMul(size_t destCol, - BaseMatrixT& b, - BaseMatrixT& c) { - int numRows = b.height_; - int numCols = b.width_; - MatrixOffset offset(destCol, 0, 0, 0, 0, 0); - aggregate(aggregate::sum(), - base::binary::mul(), - base::binary::add(), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*aAsColVector*/); -} - -template -void BaseMatrixT::rowDotMul2(size_t destCol, - BaseMatrixT& b, - BaseMatrixT& c) { - CHECK(!useGpu_) << "do not support gpu"; - - size_t height = this->height_; - CHECK_LT(destCol, this->width_); - CHECK_EQ(height, b.height_); - CHECK_EQ(height, c.height_); - CHECK_EQ(b.width_, c.width_); - size_t width = b.width_; - T* A = this->data_; - const T* B = b.data_; - const T* C = c.data_; - for (size_t i = 0; i < height; - ++i, A += this->width_, B += width, C += width) { - for (size_t j = 0; j < width; ++j) { - A[destCol] += B[j] * C[j]; - } - } -} - -template <> -void BaseMatrixT::addDotMulVMM(BaseMatrixT& b, BaseMatrixT& c) { - MatrixOffset offset(0, 0, 0, 0, 0, 0); - int numRows = b.height_; - int numCols = b.width_; - aggregate(aggregate::sum(), - base::binary::mul(), - base::binary::add(), - b, - c, - numRows, - numCols, - offset, - true_type() /*aAsRowVector*/, - false_type()); -} - -template -void BaseMatrixT::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) { - CHECK(!useGpu_) << "do not support gpu"; - - CHECK_EQ(height_, 1LU); - CHECK_EQ(b.height_, c.height_); - CHECK_EQ(width_, b.width_); - CHECK_EQ(width_, c.width_); - size_t height = b.height_; - size_t width = b.width_; - T* A = this->data_; - const T* B = b.data_; - const T* C = c.data_; - for (size_t i = 0; i < height; ++i, B += width, C += width) { - for (size_t j = 0; j < width; ++j) { - A[j] += B[j] * C[j]; - } - } -} - -DEFINE_MATRIX_TERNARY_OP(addDotMulMMV, a += b * c); -template -void BaseMatrixT::addDotMulMMV(BaseMatrixT& b, BaseMatrixT& c) { - MatrixOffset offset(0, 0, 0, 0, 0, 0); - int numRows = height_; - int numCols = width_; - applyTernary(ternary::addDotMulMMV(), - b, - c, - numRows, - numCols, - offset, - true_type() /*cAsRowVector*/, - false_type()); -} - -template -void BaseMatrixT::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) { - CHECK(!useGpu_) << "do not support gpu"; - - CHECK_EQ(c.height_, 1LU); - CHECK_EQ(height_, b.height_); - CHECK_EQ(width_, b.width_); - CHECK_EQ(width_, c.width_); - size_t height = height_; - size_t width = width_; - T* A = this->data_; - const T* B = b.data_; - const T* C = c.data_; - for (size_t i = 0; i < height; ++i, A += width, B += width) { - for (size_t j = 0; j < width; ++j) { - A[j] += B[j] * C[j]; - } - } -} - -template -void BaseMatrixT::rowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { - MatrixOffset offset(0, 0, 0, 0, cCol, 0); - int numRows = height_; - int numCols = width_; - applyTernary(ternary::DotMul(), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); -} - -template -void BaseMatrixT::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { - CHECK(!useGpu_) << "do not support gpu"; - - size_t height = this->height_; - size_t width = this->width_; - CHECK_EQ(height, b.height_); - CHECK_EQ(width, b.width_); - CHECK_LT(cCol, c.width_); - CHECK_EQ(height, c.height_); - T* A = this->data_; - const T* B = b.data_; - const T* C = c.data_; - for (size_t i = 0; i < height; ++i, A += width, B += width, C += c.width_) { - for (size_t j = 0; j < width; ++j) { - A[j] = B[j] * C[cCol]; - } - } -} - -template -void BaseMatrixT::colScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) { - MatrixOffset offset(0, 0, 0, 0, 0, cRow); - int numRows = height_; - int numCols = width_; - applyTernary(ternary::DotMul(), - b, - c, - numRows, - numCols, - offset, - true_type() /* cAsRowVector */, - false_type() /* cAsColVector */); -} - -template -void BaseMatrixT::addColScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) { - MatrixOffset offset(0, 0, 0, 0, 0, cRow); - int numRows = height_; - int numCols = width_; - applyTernary(ternary::addDotMulMMV(), - b, - c, - numRows, - numCols, - offset, - true_type() /* cAsRowVector */, - false_type() /* cAsColVector */); -} - -template -void BaseMatrixT::addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { - MatrixOffset offset(0, 0, 0, 0, cCol, 0); - int numRows = height_; - int numCols = width_; - applyTernary(ternary::addDotMulMMV(), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); -} - -DEFINE_MATRIX_TERNARY_PARAMETER_OP(RowAdd, ONE_PARAMETER, a = b + p * c); -template -void BaseMatrixT::rowAdd(size_t cCol, BaseMatrixT& b, BaseMatrixT& c, T p) { - MatrixOffset offset(0, 0, 0, 0, cCol, 0); - int numRows = height_; - int numCols = width_; - applyTernary(ternary::RowAdd(p), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); -} - -DEFINE_MATRIX_TERNARY_OP(RowPow, a = pow(b, c)); -template <> -void BaseMatrixT::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) { - if (useGpu_) { - MatrixOffset offset(0, 0, 0, 0, cCol, 0); - int numRows = height_; - int numCols = width_; - applyTernary(ternary::RowPow(), - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*cAsColVector*/); - } else { - size_t height = this->height_; - size_t width = this->width_; - CHECK_EQ(height, b.height_); - CHECK_EQ(width, b.width_); - CHECK_LT(cCol, c.width_); - CHECK_EQ(height, c.height_); - real* A = this->data_; - const real* B = b.data_; - const real* C = c.data_; - for (size_t i = 0; i < height; ++i, A += width, B += width, C += c.width_) { - vPow(width, B, C[cCol], A); - } - } -} - -template -void BaseMatrixT::mulRowVector(BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0); - int numRows = height_; - int numCols = width_; - applyBinary(binary::DotMul(), - b, - numRows, - numCols, - offset, - true_type() /* bAsRowVector */, - false_type()); -} - -DEFINE_MATRIX_BINARY_OP(DotDiv, a /= b); -template -void BaseMatrixT::divRowVector(BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0); - int numRows = height_; - int numCols = width_; - applyBinary(binary::DotDiv(), - b, - numRows, - numCols, - offset, - true_type() /* bAsRowVector */, - false_type()); -} - -template -void BaseMatrixT::mulColVector(BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0); - int numRows = height_; - int numCols = width_; - applyBinary(binary::DotMul(), - b, - numRows, - numCols, - offset, - false_type(), - true_type() /* bAsColVector */); -} - -template -void BaseMatrixT::divColVector(BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0); - int numRows = height_; - int numCols = width_; - applyBinary(binary::DotDiv(), - b, - numRows, - numCols, - offset, - false_type(), - true_type() /* bAsColVector */); -} - -template <> -template -int BaseMatrixT::applyRow(Agg agg, BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0, 0, 0); - size_t numRows = b.height_; - size_t numCols = b.width_; - CHECK_EQ(height_, numRows); - CHECK_EQ(width_, 1UL); - aggregate(agg, - base::unary::identity(), - base::binary::second(), - b, - numRows, - numCols, - offset, - false_type(), - true_type() /*aAsColVector*/); - - return 0; -} - -template <> -template -int BaseMatrixT::applyRow(Agg agg, Saver sv, BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0, 0, 0); - size_t numRows = b.height_; - size_t numCols = b.width_; - CHECK_EQ(height_, numRows); - CHECK_EQ(width_, 1UL); - aggregate(agg, - base::unary::identity(), - sv, - b, - numRows, - numCols, - offset, - false_type(), - true_type() /*aAsColVector*/); - - return 0; -} - -template <> -template -int BaseMatrixT::applyRow(Agg agg, - real scaleDest, - real scaleAgg, - BaseMatrixT& b) { - if (scaleDest != 0) { - applyRow(agg, base::binary::add2(scaleDest, scaleAgg), b); - } else { - applyRow(agg, base::binary::second(), b); - if (scaleAgg != 1) { - mulScalar(scaleAgg); - } - } - return 0; -} - -template <> -template -int BaseMatrixT::applyRow( - Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c) { - MatrixOffset offset(0, 0, 0, 0, 0, 0); - size_t numRows = b.height_; - size_t numCols = b.width_; - CHECK_EQ(height_, numRows); - CHECK_EQ(width_, 1UL); - CHECK_EQ(c.height_, numRows); - CHECK_EQ(c.width_, numCols); - aggregate(agg, - op, - sv, - b, - c, - numRows, - numCols, - offset, - false_type(), - true_type() /*aAsColVector*/); - return 0; -} - -template <> -template -int BaseMatrixT::applyRow(Agg agg, - Op op, - real scaleDest, - real scaleAgg, - BaseMatrixT& b, - BaseMatrixT& c) { - if (scaleDest != 0) { - applyRow(agg, op, base::binary::add2(scaleDest, scaleAgg), b, c); - } else { - applyRow(agg, op, base::binary::second(), b, c); - if (scaleAgg != 1) { - mulScalar(scaleAgg); - } - } - return 0; -} - -template <> -template -int BaseMatrixT::applyCol(Agg agg, BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0, 0, 0); - size_t numRows = b.height_; - size_t numCols = b.width_; - CHECK_EQ(width_, numCols); - CHECK_EQ(height_, 1UL); - aggregate(agg, - base::unary::identity(), - base::binary::second(), - b, - numRows, - numCols, - offset, - true_type() /*aAsRowVector*/, - false_type()); - - return 0; -} - -template <> -template -int BaseMatrixT::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { - MatrixOffset offset(0, 0, 0, 0, 0, 0); - size_t numRows = b.height_; - size_t numCols = b.width_; - CHECK_EQ(width_, numCols); - CHECK_EQ(height_, 1UL); - aggregate(agg, - base::unary::identity(), - sv, - b, - numRows, - numCols, - offset, - true_type() /*aAsRowVector*/, - false_type()); - - return 0; -} - -template <> -template -int BaseMatrixT::applyCol(Agg agg, - real scaleDest, - real scaleAgg, - BaseMatrixT& b) { - if (scaleDest != 0) { - applyCol(agg, base::binary::add2(scaleDest, scaleAgg), b); - } else { - applyCol(agg, base::binary::second(), b); - if (scaleAgg != 1) { - mulScalar(scaleAgg); - } - } - return 0; -} - -template <> -void BaseMatrixT::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) { - applyRow(aggregate::sum(), scaleDest, scaleSum, b); -} - -template <> -void BaseMatrixT::maxRows(BaseMatrixT& b) { - applyRow(aggregate::max(), b); -} - -template <> -void BaseMatrixT::minRows(BaseMatrixT& b) { - applyRow(aggregate::min(), b); -} - -template <> -void BaseMatrixT::maxCols(BaseMatrixT& b) { - applyCol(aggregate::max(), b); -} - -template <> -void BaseMatrixT::minCols(BaseMatrixT& b) { - applyCol(aggregate::min(), b); -} - -template <> -void BaseMatrixT::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) { - applyCol(aggregate::sum(), scaleDest, scaleSum, b); -} - -template <> -void BaseMatrixT::sumOfSquaredDiffs(BaseMatrixT& b, - BaseMatrixT& c, - real scaleSum, - real scaleDest) { - applyRow( - aggregate::sum(), base::binary::squaredDiff(), scaleDest, scaleSum, b, c); -} - -template <> -void BaseMatrixT::sumOfProducts(BaseMatrixT& b, - BaseMatrixT& c, - real scaleSum, - real scaleDest) { - applyRow(aggregate::sum(), base::binary::mul(), scaleDest, scaleSum, b, c); -} - -template class BaseMatrixT; - -#ifndef PADDLE_MOBILE_INFERENCE - -template class BaseMatrixT; - -#else - -template <> -void BaseMatrixT::zero() { - applyUnary(unary::Zero()); -} - -template <> -void BaseMatrixT::assign(int p) { - applyUnary(unary::Assign(p)); -} - -template <> -void BaseMatrixT::isEqualTo(BaseMatrixT& b, int value) { - applyBinary(binary::IsEqual(value), b); -} - -template <> -void BaseMatrixT::neg() { - applyUnary(unary::Neg()); -} - -template <> -void BaseMatrixT::abs2() { - applyUnary(unary::Abs()); -} - -template <> -void BaseMatrixT::add(int p) { - applyUnary(unary::Add(p)); -} - -template <> -void BaseMatrixT::add(int p1, int p2) { - applyUnary(unary::Add2(p1, p2)); -} - -template <> -void BaseMatrixT::applyL1(int learningRate, int decayRate) { - applyUnary(unary::ApplyL1(learningRate * decayRate)); -} - -#endif -} // namespace paddle diff --git a/paddle/legacy/math/BaseMatrix.h b/paddle/legacy/math/BaseMatrix.h deleted file mode 100644 index 4627f847d356f07600edae8cadcb02302e19381c..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/BaseMatrix.h +++ /dev/null @@ -1,1095 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include "TensorExpression.h" -#include "paddle/legacy/utils/Common.h" - -namespace paddle { - -/* - * nvcc currently does not support C++11, - * so I realized false_type and true_type. - */ -template -struct bool_constant { - static const T value = v; -}; -typedef bool_constant false_type; -typedef bool_constant true_type; - -/** - * @brief Calculate matrix element address. - * - * For instance, address of A[i][j] = i * ld + j. - * - */ -#define CAL_MATRIX_START_ADDRESS(address, height, width, ld, col, row) \ - CHECK_LE(col, width); \ - CHECK_LE(row, height); \ - address += row * ld + col; - -class MatrixOffset { - public: - size_t aCol_; - size_t aRow_; - size_t bCol_; - size_t bRow_; - size_t cCol_; - size_t cRow_; - size_t dCol_; - size_t dRow_; - MatrixOffset(size_t aCol = 0, - size_t aRow = 0, - size_t bCol = 0, - size_t bRow = 0, - size_t cCol = 0, - size_t cRow = 0, - size_t dCol = 0, - size_t dRow = 0) - : aCol_(aCol), - aRow_(aRow), - bCol_(bCol), - bRow_(bRow), - cCol_(cCol), - cRow_(cRow), - dCol_(dCol), - dRow_(dRow) {} -}; - -template -class BaseMatrixT : public TensorExpression, T> { - public: - size_t height_, width_; - size_t stride_; - T* data_; - bool trans_; - bool useGpu_; - - public: - virtual ~BaseMatrixT() {} - BaseMatrixT(size_t height, size_t width, T* data, bool trans, bool useGpu) - : height_(height), - width_(width), - stride_(width), - data_(data), - trans_(trans), - useGpu_(useGpu) {} - - /** - * @note This constructor is for temporarily making a matrix with different - * useGpu flag as the original matrix so that mixed gpu/cpu operations - * can be performed successfully. - */ - BaseMatrixT(BaseMatrixT& mat, bool useGpu) - : height_(mat.height_), - width_(mat.width_), - stride_(mat.stride_), - data_(mat.data_), - trans_(mat.trans_), - useGpu_(useGpu) {} - - BaseMatrixT(size_t height, - size_t width, - size_t stride, - T* data, - bool trans, - bool use_gpu) - : height_(height), - width_(width), - stride_(stride), - data_(data), - trans_(trans), - useGpu_(use_gpu) { - /* CHECK_LE(width_, stride_); */ - } - - /// caller should make sure that the size of data is at least height*width - void setData(T* data) { data_ = data; } - - /** - * unary operator: element wise op(a). - * - * @code - * for 0 <= i < this->height_ & for 0 <= j < this->width_. - * @endcode - */ - template - int applyUnary(Op op); - - /** - * unary operator: element wise op(a). - * - * @code - * for 0 <= i < numRows & for 0 <= j < numCols. - * While matrix start address is: - * A = this->data_ + offset.aRow_*ld + offset.aCol_; - * @endcode - */ - template - int applyUnary(Op op, int numRows, int numCols, MatrixOffset& offset); - - /** - * binary operator: element wise op(a, b). - * - * @code - * for 0 <= i < this->height_ & for 0 <= j < this->width_. - * While this->height_ == b.height_ && this->width_ == b.width_. - * @endcode - */ - template - int applyBinary(Op op, BaseMatrixT& b); - - /** - * binary operator: element wise op(a, b) - * - * @code - * for 0 <= i < numRows & for 0 <= j < numCols. - * While matrix start address is: - * A = this->data_ + offset.aRow_*lda + offset.aCol_; - * B = b->data_ + offset.bRow_*ldb + offset.bCol_; - * - * if (bAsRowVector == false_type && bAsColVector == false_type) - * op(A[i * lda + j], B[i * ldb + j]) - * - * if (bAsRowVector == true_type && bAsColVector == false_type) - * op(A[i * lda + j], B[j]) - * - * if (bAsRowVector == false_type && bAsColVector == true_type) - * op(A[i * lda + j], B[i * ldb]) - * - * if (bAsRowVector == true_type && bAsColVector == true_type) - * op(A[i * lda + j], B[0]) - * @endcode - */ - template - int applyBinary(Op op, - BaseMatrixT& b, - int numRows, - int numCols, - MatrixOffset& offset, - bAsRowVector, - bAsColVector); - - template - int applyBinary( - Op op, BaseMatrixT& b, int numRows, int numCols, MatrixOffset& offset); - - /** - * ternary operator: element wise op(a, b, c). - * - * @code - * for 0 <= i < this->height_ & for 0 <= j < this->width_. - * - * While this->height_ == b.height_ && this->width_ == b.width_ - * && this->height_ == c.height_ && this->width_ == c.width_ - * @endcode - */ - template - int applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c); - - /** - * ternary operator: element wise op(a, b, c). - * - * @code - * for 0 <= i < numRows & for 0 <= j < numCols. - * While matrix start address is: - * - * A = this->data_ + offset.aRow_*lda + offset.aCol_; - * B = b->data_ + offset.bRow_*ldb + offset.bCol_; - * C = c->data_ + offset.cRow_*ldc + offset.cCol_; - * - * if (cAsRowVector == false_type && cAsColVector == false_type) - * op(A[i*lda + j], B[i*ldb + j], C[i*ldc + j]) - * - * if (cAsRowVector == true_type && cAsColVector == false_type) - * op(A[i*lda + j], B[i*ldb + j], C[j]) - * - * if (cAsRowVector == false_type && cAsColVector == true_type) - * op(A[i*lda + j], B[i*ldb + j], C[i*ldc]) - * - * if (cAsRowVector == 1 && cAsColVector == 1) - * op(A[i*lda + j], B[i*ldb + j], C[0]) - * @endcode - */ - template - int applyTernary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - int numRows, - int numCols, - MatrixOffset& offset, - cAsRowVector, - cAsColVector); - - template - int applyTernary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - int numRows, - int numCols, - MatrixOffset& offset); - - /** - * quaternary operator: element wise op(a, b, c, d). - * - * @code - * for 0 <= i < this->height_ & for 0 <= j < this->width_. - * - * While this->height_ == b.height_ && this->width_ == b.width_ - * && this->height_ == c.height_ && this->width_ == c.width_ - * && this->height_ == d.height_ && this->width_ == d.width_ - * @endcode - */ - template - int applyQuaternary(Op op, BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d); - - /** - * quaternary operator: element wise op(a, b, c, d). - * - * @code - * for 0 <= i < numRows & for 0 <= j < numCols. - * While matrix start address is: - * A = this->data_ + offset.aRow_*lda + offset.aCol_; - * B = b->data_ + offset.bRow_*ldb + offset.bCol_; - * C = c->data_ + offset.cRow_*ldc + offset.cCol_; - * D = d->data_ + offset.dRow_*ldd + offset.dCol_; - * @endcode - */ - template - int applyQuaternary(Op op, - BaseMatrixT& b, - BaseMatrixT& c, - BaseMatrixT& d, - int numRows, - int numCols, - MatrixOffset& offset); - - /** - * a aggregate expression that apply each row(or column) of matrix b. - * op and sv is element wise operator. - * - * @code - * if (aAsRowVector == true_type && aAsColVector == false_type) - * for each column j & 0 <= i < numRows, do: - * dst = agg(op(b[i*ldb + j])) - * a[j] = sv(a[j], dst) - * - * if (aAsRowVector == false_type && aAsColVector == true_type) - * for each row i & 0 <= j < numCols, do: - * dst = agg(op(b[i*ldb + j])) - * a[i] = sv(a[i], dst) - * @endcode - */ - template - int aggregate(Agg agg, - Op op, - Saver sv, - BaseMatrixT& b, - int numRows, - int numCols, - MatrixOffset& offset, - aAsRowVector, - aAsColVector); - - /** - * a aggregate expression that apply each row(or column) of matrix b and c. - * - * op and sv is element wise operator. - * - * @code - * if (aAsRowVector == true_type && aAsColVector == false_type) - * for each column j & 0 <= i < numRows, do: - * dst = agg(op(b[i*ldb + j], c[i*ldc + j])) - * a[j] = sv(a[j], dst) - * - * if (aAsRowVector == false_type && aAsColVector == true_type) - * for each row i & 0 <= j < numCols, do: - * dst = agg(op(b[i*ldb + j], c[i*ldc + j])) - * a[i] = sv(a[i], dst) - * @endcode - */ - template - int aggregate(Agg agg, - Op op, - Saver sv, - BaseMatrixT& b, - BaseMatrixT& c, - int numRows, - int numCols, - MatrixOffset& offset, - aAsRowVector, - aAsColVector); - - /** - * a aggregate expression that apply each row of matrix b. - * - * @code - * for each row i & 0 <= j < b.width_, do: - * this[i] = agg(b[i*ldb + j]) - * @endcode - */ - template - int applyRow(Agg agg, BaseMatrixT& b); - - /** - * a aggregate expression that apply each row of matrix b. - * - * @code - * for each row i & 0 <= j < b.width_, do: - * dst = agg(op(b[i*ldb + j], c[i*ldc + j]) - * this[i] = sv(this[i], dst) - * @endcode - */ - template - int applyRow(Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c); - - // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg) - template - int applyRow(Agg agg, - Op op, - real scaleDest, - real scaleAgg, - BaseMatrixT& b, - BaseMatrixT& c); - - /** - * a aggregate expression that apply each row of matrix b. - * - * @code - * for each row i & 0 <= j < b.width_, do: - * dst = agg(b[i*ldb + j]) - * this[i] = sv(this[i], dst) - * @endcode - */ - template - int applyRow(Agg agg, Saver sv, BaseMatrixT& b); - - // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg) - template - int applyRow(Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b); - - /** - * a aggregate expression that apply each column of matrix b. - * - * @code - * for each column j & 0 <= i < b.height_, do: - * this[j] = agg(b[i*ldb + j]) - * @endcode - */ - template - int applyCol(Agg agg, BaseMatrixT& b); - - /** - * a aggregate expression that apply each column of matrix b. - * - * @code - * for each column j & 0 <= i < b.height_, do: - * dst = agg(b[i*ldb + j]) - * this[j] = sv(this[j], dst) - * @endcode - */ - template - int applyCol(Agg agg, Saver sv, BaseMatrixT& b); - - // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg) - template - int applyCol(Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b); - - bool useGpu() const { return useGpu_; } - - const T* rowBuf(size_t row) const { return data_ + width_ * row; } - - T* rowBuf(size_t row) { return data_ + width_ * row; } - - /** - * @brief unary operator. - * - */ - void neg(); - void exp2(); - void pow2(T p); - void log2(); - void sqrt2(); - void square2(); - void reciprocal2(); - void abs2(); - void sign2(); - void zero(); - - /** - * @code - * this(row, col + columnOffset) = 0 for 0 <= col < numColumns - * @endcode - */ - void zeroAtOffset(int64_t columnOffset, int64_t numColumns); - void one(); - void subScalar(T p); - void mulScalar(T p); - void divScalar(T p); - - /** - * @code - * this = p - * @endcode - */ - void assign(T p); - - /** - * @code - * swap(this, b) - * example: swap two Matrices - * MatrixPtr cpuA = std::make_shared(height, width); - * MatrixPtr cpuB = std::make_shared(height, width); - * cpuA->deepSwap(*cpuB); - * @endcode - */ - void deepSwap(BaseMatrixT& b); - - /** - * @code - * this = this + p - * @endcode - */ - void add(T p); - - /** - * @code - * this = this*p1 + p2 - * @endcode - */ - void add(T p1, T p2); - - /** - * this = this < low ? low : this - * - * this = this > high ? high : this - */ - void clip(T p1, T p2); - - /** - * this = b < low ? 0 : 1 - * - * this = b > high ? 0 : 1 - */ - void clipDerivative(BaseMatrixT& b, T p1, T p2); - - /** - * @code - * a = a > p ? 1.0f : 0.0f - * @endcode - */ - void biggerThanScalar(T p); - - /** - * @code - * a = a > p ? a : p - * @endcode - */ - void downClip(T p); - - /** - * @code - * this = b - * @endcode - */ - void assign(BaseMatrixT& b); - - /** - * @code - * If b.width + columOffset <= this.width - * this(row, col + columnOffset) = b(row, col) for 0 <= col < b.width - * - * If this.width + columnOffset <= b.width - * this(row, col) = b(row, col + columnOffset) for 0 <= col < this.width - * - * Otherwise, FATAL - * @endcode - */ - void assignAtOffset(BaseMatrixT& b, int64_t columnOffset); - - /// this = this + b - void add(BaseMatrixT& b); - - /** - * @code - * If b.width + columOffset <= this.width - * this(row, col + columnOffset) += b(row, col) for 0 <= col < b.width - * - * If this.width + columnOffset <= b.width - * this(row, col) += b(row, col + columnOffset) for 0 <= col < this.width - * - * Otherwise, FATAL - * @endcode - */ - void addAtOffset(BaseMatrixT& b, int64_t columnOffset); - - void addColVector(BaseMatrixT& b); - void addRowVector(BaseMatrixT& b); - void addBias(BaseMatrixT& b, T scale); - - void mulRowVector(BaseMatrixT& b); - void divRowVector(BaseMatrixT& b); - - void mulColVector(BaseMatrixT& b); - void divColVector(BaseMatrixT& b); - - void addP2P(BaseMatrixT& b); - - /** - * @code - * this = this + b*p - * @endcode - */ - void add(BaseMatrixT& b, T p); - - /** - * @code - * this = p1*this + p2*b - * @endcode - */ - void add(BaseMatrixT& b, T p1, T p2); - - /** - * @code - * this = this - b - * @endcode - */ - void sub(BaseMatrixT& b); - - /** - * @code - * this = this - b*p - * @endcode - */ - void sub(BaseMatrixT& b, T p); - - /** - * @code - * b = max(0, this) - * @endcode - */ - void relu(BaseMatrixT& b); - void reluDerivative(BaseMatrixT& b); - - /** - * @code - * b = log(1.0 + exp(this)) - * @endcode - */ - void softrelu(BaseMatrixT& b); - void softreluDerivative(BaseMatrixT& b); - - /** - * @code - * b = min(max(this, p1), p2) - * @endcode - */ - void brelu(BaseMatrixT& b); - void breluDerivative(BaseMatrixT& b); - - /** - * @code - * b = this * this - * @endcode - */ - void square2(BaseMatrixT& b); - void squareDerivative(BaseMatrixT& b); - - /** - * @code - * b = tanh(this) - * @endcode - */ - void tanh(BaseMatrixT& b); - void tanhDerivative(BaseMatrixT& b); - - /** - * @code - * b = p1 * tanh(p2 * this) - * @endcode - */ - void scaledTanh(BaseMatrixT& b, T p1, T p2); - void scaledTanhDerivative(BaseMatrixT& b, T p1, T p2); - - /** - * @code - * b = 1.0f / this - * @endcode - */ - void reciprocal2(BaseMatrixT& b); - void reciprocalDerivative(BaseMatrixT& b); - - /** - * @code - * b = this > 0.0f ? this : -this - * @endcode - */ - void abs2(BaseMatrixT& b); - void absDerivative(BaseMatrixT& b); - - /** - * @code - * b = 1.0f / (1.0f + exp(-this)) - * @endcode - */ - void sigmoid(BaseMatrixT& b); - void sigmoidDerivative(BaseMatrixT& b); - - /** - * @code - * b = a - * @endcode - */ - void expDerivative(BaseMatrixT& b); - - void sign2(BaseMatrixT& b); - - void exp2(BaseMatrixT& b); - void pow2(BaseMatrixT& b, T p); - void log2(BaseMatrixT& b); - void sqrt2(BaseMatrixT& b); - void addScalar(BaseMatrixT& b, T p); - void subScalar(BaseMatrixT& b, T p); - void mulScalar(BaseMatrixT& b, T p); - void divScalar(BaseMatrixT& b, T p); - void scalarDiv(BaseMatrixT& b, T p); - - /** - * @code - * this = 1.0f / sqrt(b) - * @endcode - */ - void invSqrt(BaseMatrixT& b); - - /// this = (b == value) - void isEqualTo(BaseMatrixT& b, T value); - - /** - * @brief ternary operator. - */ - void softCrossEntropy(BaseMatrixT& b, BaseMatrixT& c); - void softCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c); - void binaryLabelCrossEntropy(BaseMatrixT& b, BaseMatrixT& c); - void binaryLabelCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this = b + c - * @endcode - */ - void add(BaseMatrixT& b, BaseMatrixT& c); - /** - * @code - * this = b*p1 + c*p2 - * @endcode - */ - void add(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2); - /** - * @code - * this = b - c - * @endcode - */ - void sub(BaseMatrixT& b, BaseMatrixT& c); - /** - * @code - * this = b*p1 - c*p2 - * @endcode - */ - void sub(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2); - - /** - * @code - * this = this + b + c - * @endcode - */ - void add2(BaseMatrixT& b, BaseMatrixT& c); - /** - * @code - * this = this*p1 + b*p2 + c*p3 - * @endcode - */ - void add2(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3); - - /** - * @code - * this = a*p1 + b*p2 + c*p3 - * @endcode - */ - void add3(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d, T p1, T p2, T p3); - - /** - * @code - * c = p2 * c - p1 * (b + p3 * this) - * this += mom - * @endcode - */ - void sgdUpdate(BaseMatrixT& b, // grad - BaseMatrixT& c, // mom - T p1, // learningRate, - T p2, // momentum, - T p3); // decayRate - - /** - * @code - * c = p2 * c - p1 * d * (b + p3 * this) - * this += mom - * @endcode - */ - void sgdUpdate(BaseMatrixT& b, // grad, - BaseMatrixT& c, // mom, - BaseMatrixT& d, // lr, - T p1, // learningRate, - T p2, // momentum, - T p3); // decayRate - - /// apply L1/L2 to *this* - virtual void applyL1(T learningRate, T decayRate); - void applyL1(BaseMatrixT& lr, T learningRate, T decayRate); - void applyL2(T learningRate, T decayRate); - void applyL2(BaseMatrixT& lr, T learningRate, T decayRate); - - /** - * @code - * this *= b - * @endcode - */ - void dotMul(BaseMatrixT& b); - - /** - * @code - * this = b * c - * @endcode - */ - void dotMul(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this = b / c - * @endcode - */ - void dotDiv(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this = (b + p1) / (c + p2) - * @endcode - */ - void dotDiv(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2); - - /** - * @code - * this = log(1 + exp(b - c)) - d * (b - c) - * @endcode - */ - void rankLoss(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d); - void rankLossBp(BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d); - - /** - * @code - * this = log(1 + exp(b)) - c * b - * @endcode - */ - void logisticRegressionLoss(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this += exp(b)/(1+exp(b)) - c - * @endcode - */ - void logisticRegressionLossBp(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this = b > c ? 1.0 : 0.0 - * @endcode - */ - void biggerThan(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this = ((b>c && d>0.5) || (bc ? b : c - * @endcode - */ - void max2(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this[destCol] += (b>p1 == c>p1) ? 0 : 1) - * @endcode - */ - void binaryClassificationError(size_t destCol, - BaseMatrixT& b, - BaseMatrixT& c, - T p); - void binaryClassificationError2(size_t destCol, - BaseMatrixT& b, - BaseMatrixT& c, - T p); - - /** - * @code - * this = this * b * b - * @endcode - */ - void dotMulSquare(BaseMatrixT& b); - - /** - * @code - * this = this * this * b - * @endcode - */ - void dotSquareMul(BaseMatrixT& b); - - /** - * @code - * this = b * c * c - * @endcode - */ - void dotMulSquare(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this = b * b * c * c - * @endcode - */ - void dotSquareSquare(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this = this * (p1*b + p2*c)^2 - * @endcode - */ - void dotMulSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2); - - /** - * @code - * this = (p1*b + p2*c)^2 - * @endcode - */ - void dotSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2); - - /** - * @code - * this= this * (p1*b + p2*c) - * @endcode - */ - void dotMulSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2); - - /** - * @code - * this += sqr(p1*b + p2*c + p3*d) - * @endcode - */ - void addSquareSum( - BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT d, T p1, T p2, T p3); - - /** - * @code - * this += p * sqr(b) - * @endcode - */ - void addSquare(BaseMatrixT& b, T p); - - /** - * @code - * this = p1 * this + p2 * sqr(b) - * @endcode - */ - void decayAddSquare(BaseMatrixT& b, T p1, T p2); - - /** - * @code - * this = p1 * this + p2 * sqr(b * c) - * @endcode - */ - void decayAddSquareMul(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2); - - /** - * @code - * this = 1 / (p1 * b + p2) - * @endcode - */ - void reciprocal2(BaseMatrixT& b, T p1, T p2); - - /** - * @code - * this = 1 / (p1 * b + p2 * c + p3) - * @endcode - */ - void reciprocalSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3); - - /** - * @code - * b = this; this = 0 - * @endcode - */ - void copyAndClear(BaseMatrixT& b); - - /** - * @code - * this_row[destCol] += dotprod(b_row, c_row) - * @endcode - */ - void rowDotMul(size_t destCol, BaseMatrixT& b, BaseMatrixT& c); - void rowDotMul2(size_t destCol, BaseMatrixT& b, BaseMatrixT& c); - - /** - * this is vector (one row matrix) - * - * @code - * for each row i, do: - * this_row += dotmul(b_row_i, c_row_i) - * @endcode - */ - void addDotMulVMM(BaseMatrixT& b, BaseMatrixT& c); - void addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c); - - /** - * c is vector (one row matrix) - * - * @code - * for each row i, do: - * this_row_i += dotmul(b_row_i, c_row) - * @endcode - */ - void addDotMulMMV(BaseMatrixT& b, BaseMatrixT& c); - void addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this = p1 * this + p2 * b * c - * @endcode - */ - void addDotMul(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2); - - /** - * @code - * this_row = b_row * c_row[cCol] - * @endcode - */ - void rowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c); - void rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this_col = b_col * c_col[cRow] - * @endcode - */ - void colScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this_col += b_col * c_col[cRow] - * @endcode - */ - void addColScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c); - - /** - * @code - * this_row += b_row * c_row[cCol] - * @endcode - */ - void addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c); - - /// calculate the sum of each row of the matrix b. - /// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} - void sumRows(BaseMatrixT& b, T scaleSum, T scaleDest); - - /// calculate the maximum value of each row of the matrix b. - void maxRows(BaseMatrixT& b); - /// calculate the minimum value of each row of the matrix b. - void minRows(BaseMatrixT& b); - - /// calculate the maximum value of each column of the matrix b. - void maxCols(BaseMatrixT& b); - /// calculate the minimum value of each column of the matrix b. - void minCols(BaseMatrixT& b); - - /// calculate the sum of each column of the matrix b. - /// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji} - void sumCols(BaseMatrixT& b, T scaleSum, T scaleDest); - - /// this_i = scaleDest * this_i + scaleSum * \sum_j (b_{ij} - c_{ij})^2 - void sumOfSquaredDiffs(BaseMatrixT& b, - BaseMatrixT& c, - T scaleSum, - T scaleDest); - - /// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij} - void sumOfProducts(BaseMatrixT& b, BaseMatrixT& c, T scaleSum, T scaleDest); - - /** - * @code - * this_row = b_row + p * ones * c_row[cCol] - * @endcode - */ - void rowAdd(size_t cCol, BaseMatrixT& b, BaseMatrixT& c, T p); - /** - * @code - * this_row = pow(b_row, c_row[cCol]) - * @endcode - */ - void rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c); - - virtual bool isSparse() const { return false; } - - template - void operator=(const ExpressionType& expr) { - if (useGpu_) { - TensorGpuApply(*this, expr); - } else { - TensorCpuApply(*this, expr); - } - } - - template - void operator+=(const ExpressionType& expr) { - (*this) = (*this) + expr; - } - template - void operator-=(const ExpressionType& expr) { - (*this) = (*this) - expr; - } - template - void operator*=(const ExpressionType& expr) { - (*this) = (*this) * expr; - } - template - void operator/=(const ExpressionType& expr) { - (*this) = (*this) / expr; - } -}; - -typedef BaseMatrixT BaseMatrix; -typedef BaseMatrixT IBaseMatrix; - -} // namespace paddle diff --git a/paddle/legacy/math/CMakeLists.txt b/paddle/legacy/math/CMakeLists.txt deleted file mode 100644 index 9992ec71f45b592e0a73e1cc9c655e773fa18e86..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/CMakeLists.txt +++ /dev/null @@ -1,57 +0,0 @@ -# common package contains: -# * the utilities: -# * Thread Libs -# * Memory Manage libs -# * CommandLine Parser -# * Logging -# * Timer/Stats -# * the math libraries: -# * Matrix/Vector -# * the parameter optimizers. -# * the parameter updater functions. -# -# TODO(yuyang18): separate libs. -# -file(GLOB MATH_HEADERS . *.h) -file(GLOB MATH_SOURCES . *.cpp) - -if(NOT WITH_MKLDNN) - set(DNN_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/MKLDNNMatrix.h") - set(DNN_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/MKLDNNMatrix.cpp") - list(REMOVE_ITEM MATH_HEADERS "${DNN_HEADER}") - list(REMOVE_ITEM MATH_SOURCES "${DNN_SOURCE}") - message(STATUS "Skip compiling with MKLDNNMatrix") -else() - message(STATUS "Compile with MKLDNNMatrix") -endif() - -if(MOBILE_INFERENCE) - # Remove sparse - list(REMOVE_ITEM MATH_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.h - ${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.h - ${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.h) - list(REMOVE_ITEM MATH_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.cpp) -endif() -set(MATH_SOURCES - "${PADDLE_SOURCE_DIR}/paddle/legacy/math/BaseMatrix.cu" - "${PADDLE_SOURCE_DIR}/paddle/legacy/math/TrainingAlgorithmOp.cu" - ${MATH_SOURCES}) -if(NOT WITH_GPU) - # then compile BaseMatrix.cu as c++ file - compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/legacy/math/BaseMatrix.cu") - compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/legacy/math/TrainingAlgorithmOp.cu") - add_library(paddle_math STATIC - ${MATH_SOURCES}) -else() - cuda_add_library(paddle_math ${MATH_SOURCES}) -endif() - - -add_dependencies(paddle_math paddle_proto ${external_project_dependencies}) # depends -if(WITH_TESTING) - add_subdirectory(tests) -endif() diff --git a/paddle/legacy/math/CpuSparseMatrix.cpp b/paddle/legacy/math/CpuSparseMatrix.cpp deleted file mode 100644 index 20c65a3a1d7099a73d8b3c490cd42e721e60823b..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/CpuSparseMatrix.cpp +++ /dev/null @@ -1,787 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CpuSparseMatrix.h" -#include "SparseMatrix.h" -#include "float.h" -#include "hl_gpu.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -const size_t CpuSparseMatrix::DEFAULT_AVG_WIDTH; - -CpuSparseMatrix::CpuSparseMatrix(size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans) - : Matrix(NULL, height, width, trans, false) { - resize(height, width, nnz, valueType, format); -} - -CpuSparseMatrix::CpuSparseMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans) - : Matrix(dataHandle, height, width, trans, false) { - resize(height, width, nnz, valueType, format); -} - -CpuSparseMatrix::CpuSparseMatrix(real* data, - int* rows, - int* cols, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans) - : Matrix(NULL, height, width, trans, false) { - cols_ = cols; - rows_ = rows; - value_ = data; - height_ = height; - width_ = width; - elementCnt_ = nnz; - valueType_ = valueType; - format_ = format; -} - -void CpuSparseMatrix::resize(size_t newHeight, - size_t newWidth, - size_t newNnz, - SparseValueType valueType, - SparseFormat format) { - CHECK_LE(newNnz, newHeight * newWidth); - size_t newSize = 0; - if (format == SPARSE_CSR) { - newSize = (newHeight + 1) * sizeof(int) + newNnz * sizeof(int); - } else { - newSize = (newWidth + 1) * sizeof(int) + newNnz * sizeof(int); - } - - if (NO_VALUE != valueType) { - newSize += newNnz * sizeof(real); - } - - if (NULL == memoryHandle_.get() || newSize > memoryHandle_->getSize()) { - memoryHandle_ = std::make_shared(newSize); - } - - height_ = newHeight; - width_ = newWidth; - elementCnt_ = newNnz; - valueType_ = valueType; - format_ = format; - sparseResize(); -} -void CpuSparseMatrix::sparseResize() { - if (format_ == SPARSE_CSR) { - rows_ = reinterpret_cast( - reinterpret_cast(memoryHandle_->getBuf())); - cols_ = reinterpret_cast( - reinterpret_cast(memoryHandle_->getBuf()) + - (height_ + 1) * sizeof(int)); - if (NO_VALUE != valueType_) { - value_ = reinterpret_cast( - reinterpret_cast(memoryHandle_->getBuf()) + - (height_ + 1) * sizeof(int) + elementCnt_ * sizeof(int)); - } else { - value_ = NULL; - } - } else { - cols_ = reinterpret_cast( - reinterpret_cast(memoryHandle_->getBuf())); - rows_ = reinterpret_cast( - reinterpret_cast(memoryHandle_->getBuf()) + - (width_ + 1) * sizeof(int)); - if (NO_VALUE != valueType_) { - value_ = reinterpret_cast( - reinterpret_cast(memoryHandle_->getBuf()) + - (width_ + 1) * sizeof(int) + elementCnt_ * sizeof(int)); - } else { - value_ = NULL; - } - } -} - -void CpuSparseMatrix::resize(size_t newHeight, size_t newWidth) { - resize(newHeight, - newWidth, - newHeight * std::min(DEFAULT_AVG_WIDTH, newWidth), - valueType_, - format_); -} - -MatrixPtr CpuSparseMatrix::getTranspose() { - if (!memoryHandle_ && !value_) { - MatrixPtr dest(new CpuSparseMatrix( - height_, width_, elementCnt_, valueType_, format_, true)); - return dest; - } else if (memoryHandle_) { - MatrixPtr dest(new CpuSparseMatrix( - std::dynamic_pointer_cast(memoryHandle_), - height_, - width_, - elementCnt_, - valueType_, - format_, - true)); - return dest; - } else if (value_) { - MatrixPtr dest(new CpuSparseMatrix(value_, - rows_, - cols_, - height_, - width_, - elementCnt_, - valueType_, - format_, - true)); - return dest; - } else { - return NULL; - } -} - -SparseValueType CpuSparseMatrix::getValueType() { return valueType_; } - -void CpuSparseMatrix::mul(const Matrix& a, - const Matrix& b, - real scaleAB, - real scaleT) { - CHECK(!isTransposed()) << "Not supported"; - const auto a_ptr = dynamic_cast(&a); - const auto b_ptr = dynamic_cast(&b); - - if (a_ptr && b_ptr) { - CpuMatrix::mul((CpuMatrix*)a_ptr, (CpuMatrix*)b_ptr, this, scaleAB, scaleT); - } else { - LOG(FATAL) << "not supported"; - } -} - -void CpuSparseMatrix::add3(CpuMatrix* b) { - CHECK(getFormat() != SPARSE_CSC) << "Not supported"; - CHECK(height_ == b->getHeight()); - CHECK(width_ == b->getWidth()); - real* A = getValue(); - real* B = b->getData(); - int* cols = getCols(); - for (size_t i = 0; i < height_; i++) { - size_t start = getRowStartIdx(i); - size_t end = getRowStartIdx(i + 1); - for (size_t j = start; j < end; j++) { - A[j] = B[i * width_ + cols[j]]; - } - } -} - -void CpuSparseMatrix::add3(MatrixPtr b) { - if (dynamic_cast(b.get())) { - add3(dynamic_cast(b.get())); - } else { - LOG(FATAL) << "not supported"; - } -} - -void CpuSparseMatrix::addBias(Matrix& b, real scale) { - CHECK_EQ(b.getHeight(), (size_t)1); - CHECK_EQ(width_, b.getWidth()); - real* A = getValue(); - real* B = b.getData(); - int* cols = getCols(); - size_t nnz = getElementCnt(); - for (size_t i = 0; i < nnz; i++) { - A[i] += scale * B[cols[i]]; - } -} - -template -void printBuf(std::ostream& os, T* a, size_t len, const char* name) { - os << "\n: " << name << " ["; - for (size_t i = 0; i < len; i++) { - os << a[i] << " "; - } - os << "]\n"; -} - -void CpuSparseMatrix::print(std::ostream& os) const { - size_t rowSize = format_ == SPARSE_CSC ? elementCnt_ : height_ + 1; - size_t colSize = format_ == SPARSE_CSC ? width_ + 1 : elementCnt_; - printBuf(os, rows_, rowSize, "row"); - printBuf(os, cols_, colSize, "col"); - if (valueType_ == FLOAT_VALUE) { - printBuf(os, value_, elementCnt_, "value"); - } - return; -} - -void CpuSparseMatrix::printOneRow(std::ostream& os, size_t idx) const { - CHECK_LT(idx, height_); - if (format_ == SPARSE_CSC) { - LOG(FATAL) << "SPARSE_CSC not supported"; - return; - } - - const int* col = getRowCols(idx); - size_t num = getColNum(idx); - if (num > 0) { - if (valueType_ == FLOAT_VALUE) { - const real* data = getRowValues(idx); - os << col[0] << ":" << data[0]; - for (size_t i = 1; i < num; ++i) { - os << " " << col[i] << ":" << data[i]; - } - } else { - os << col[0]; - for (size_t i = 1; i < num; ++i) { - os << " " << col[i]; - } - } - } - os << ";"; -} - -void CpuSparseMatrix::rowScale(size_t cCol, CpuSparseMatrix& b, Matrix& c) { - CHECK(getFormat() != SPARSE_CSC) << "Not supported"; - CHECK_EQ(height_, b.getHeight()); - CHECK_EQ(width_, b.getWidth()); - real* A = getValue(); - real* B = b.getValue(); - if (b.getValueType() == FLOAT_VALUE) { - for (size_t i = 0; i < height_; i++) { - size_t start = getRowStartIdx(i); - size_t end = getRowStartIdx(i + 1); - CHECK_EQ(start, b.getRowStartIdx(i)); - CHECK_EQ(end, b.getRowStartIdx(i + 1)); - for (size_t j = start; j < end; j++) { - A[j] = B[j] * c.getElement(i, cCol); - } - } - } else if (b.getValueType() == NO_VALUE) { - for (size_t i = 0; i < height_; i++) { - size_t start = getRowStartIdx(i); - size_t end = getRowStartIdx(i + 1); - CHECK_EQ(start, b.getRowStartIdx(i)); - CHECK_EQ(end, b.getRowStartIdx(i + 1)); - for (size_t j = start; j < end; j++) { - A[j] = c.getElement(i, cCol); - } - } - } -} - -void CpuSparseMatrix::randomizeUniform() { - CHECK_LE(elementCnt_, height_ * width_); - if (valueType_ == FLOAT_VALUE) { - real* data = getValue(); - for (size_t i = 0; i < elementCnt_; ++i) { - *data++ = rand() / static_cast(RAND_MAX); // NOLINT - } - } - if (format_ == SPARSE_CSR) { - sparseRand(rows_, cols_, elementCnt_, height_ + 1, width_, false); - } else { - sparseRand(cols_, rows_, elementCnt_, width_ + 1, height_, false); - } -} - -void CpuSparseMatrix::copyFrom(std::vector& rows, - std::vector& cols, - std::vector& values) { - size_t size = format_ == SPARSE_CSR ? cols.size() : rows.size(); - resize(height_, width_, size, valueType_, format_); - if (valueType_ == FLOAT_VALUE) { - memcpy(&value_[0], &values[0], sizeof(real) * values.size()); - } - memcpy(&cols_[0], &cols[0], sizeof(int) * cols.size()); - memcpy(&rows_[0], &rows[0], sizeof(int) * rows.size()); -} - -// Copy from a CpuMatrix, only supported in sparse_float_value_t -// SparseMatrix. -void CpuSparseMatrix::copyFrom(const CpuMatrix& src) { - CHECK_EQ(getHeight(), src.getHeight()); - CHECK_EQ(getWidth(), src.getWidth()); - CHECK(!src.trans_ && !trans_); - if (format_ == SPARSE_CSR) { - std::vector rows(getHeight() + 1); - std::vector cols; - std::vector values; - rows[0] = 0; - for (size_t r = 0; r < getHeight(); ++r) { - for (size_t c = 0; c < getWidth(); ++c) { - real v = src.getElement(r, c); - if (fabs(v) > FLT_EPSILON) { - cols.push_back(c); - values.push_back(v); - } - } - rows[r + 1] = values.size(); - } - copyFrom(rows, cols, values); - } else { - std::vector cols(getWidth() + 1); - std::vector rows; - std::vector values; - cols[0] = 0; - for (size_t r = 0; r < getWidth(); ++r) { - for (size_t c = 0; c < getHeight(); ++c) { - real v = src.getElement(c, r); - if (fabs(v) > FLT_EPSILON) { - rows.push_back(c); - values.push_back(v); - } - } - cols[r + 1] = values.size(); - } - copyFrom(rows, cols, values); - } -} - -MatrixPtr CpuSparseMatrix::clone(size_t height, size_t width, bool useGpu) { - if (height == 0 && width == 0) { - height = height_; - width = width_; - } - CHECK(width && height); - if (!useGpu) { - return std::make_shared( - height, width, 0, valueType_, format_); - } else { - return std::make_shared( - height, width, elementCnt_, valueType_, format_); - } -} - -MatrixPtr CpuSparseMatrix::subMatrix(size_t startRow, size_t numRows) { - CHECK_LE(startRow + numRows, height_); - CHECK_EQ(format_, SPARSE_CSR); - if (valueType_ == NO_VALUE) { - return std::make_shared( - nullptr, - rows_ + startRow, - cols_, - numRows, - width_, - rows_[startRow + numRows] - rows_[startRow], - valueType_, - format_, - trans_); - } else { - return std::make_shared( - value_, - rows_ + startRow, - cols_, - numRows, - width_, - rows_[startRow + numRows] - rows_[startRow], - valueType_, - format_, - trans_); - } -} - -/* mem MUST be alloced outside (memAlloc=false) */ -void CpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { - CHECK(!memAlloc); - CpuSparseMatrix* mat = dynamic_cast(matTrans.get()); - if (format_ == SPARSE_CSR) { - /*statistic element number in each col*/ - int* colCounters = mat->getRows() + 1; - memset(colCounters, 0, sizeof(int) * width_); - for (size_t i = 0; i < elementCnt_; ++i) { - int col = cols_[i]; - colCounters[col]++; - } - /*fill mat rows */ - mat->getRows()[0] = 0; - for (size_t i = 1; i < width_ + 1; i++) { - mat->getRows()[i] = mat->getRows()[i - 1] + mat->getRows()[i]; - } - /*fill mat values and cols*/ - std::vector colNumVec(width_, 0); - if (valueType_ == FLOAT_VALUE) { - for (size_t i = 0; i < height_; i++) { - for (int j = rows_[i]; j < rows_[i + 1]; j++) { - int colIdx = cols_[j]; - int index = mat->getRows()[colIdx] + colNumVec[colIdx]; - mat->getCols()[index] = i; - mat->getValue()[index] = value_[j]; - colNumVec[colIdx]++; - } - } - } else { - for (size_t i = 0; i < height_; i++) { - for (int j = rows_[i]; j < rows_[i + 1]; j++) { - int colIdx = cols_[j]; - int index = mat->getRows()[colIdx] + colNumVec[colIdx]; - mat->getCols()[index] = i; - colNumVec[colIdx]++; - } - } - } - } else { - /*statistic element number in each row*/ - int* rowCounters = mat->getCols() + 1; - memset(rowCounters, 0, sizeof(int) * height_); - for (size_t i = 0; i < elementCnt_; ++i) { - int row = rows_[i]; - rowCounters[row]++; - } - - /*fill mat cols */ - mat->getCols()[0] = 0; - for (size_t i = 1; i < height_ + 1; i++) { - mat->getCols()[i] = mat->getCols()[i - 1] + mat->getCols()[i]; - } - /*fill mat values and rows*/ - std::vector rowNumVec(height_, 0); - if (valueType_ == FLOAT_VALUE) { - for (size_t i = 0; i < width_; i++) { - for (int j = cols_[i]; j < cols_[i + 1]; j++) { - int rowIdx = rows_[j]; - int index = mat->getCols()[rowIdx] + rowNumVec[rowIdx]; - mat->getRows()[index] = i; - mat->getValue()[index] = value_[j]; - rowNumVec[rowIdx]++; - } - } - } else { - for (size_t i = 0; i < width_; i++) { - for (int j = cols_[i]; j < cols_[i + 1]; j++) { - int rowIdx = rows_[j]; - int index = mat->getCols()[rowIdx] + rowNumVec[rowIdx]; - mat->getRows()[index] = i; - rowNumVec[rowIdx]++; - } - } - } - } -} - -void CpuSparseMatrix::setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) { - if (format_ == SPARSE_CSR) { - CHECK_LT(row, height_); - CHECK(NULL != cols); - if (0 == row) { - rows_[row] = 0; - } - rows_[row + 1] = rows_[row] + colNum; - for (size_t i = 0; i < colNum; ++i) { - cols_[rows_[row] + i] = cols[i]; - } - if (valueType_ == NO_VALUE) { - CHECK(!values); - } else { - for (size_t i = 0; i < colNum; ++i) { - value_[rows_[row] + i] = values[i]; - } - } - } else { - LOG(FATAL) << "not supported"; - } -} - -void CpuSparseMatrix::fillRowIndices(IVectorPtr& outVec) const { - if (format_ == SPARSE_CSR) { - auto nnz = getElementCnt(); - IVector::resizeOrCreate(outVec, nnz, false); - auto out = outVec->getData(); - int* rows = getRows(); - for (size_t i = 0; i < height_; i++) { - for (int j = rows[i]; j < rows[i + 1]; j++) { - out[j] = i; - } - } - } else { - LOG(FATAL) << "SPARSE_CSC not supported"; - } -} - -ThreadLocal> CpuSparseMatrix::cpuLocalMats_; - -CpuSparseMatrixPtr CpuSparseMatrix::getTmpSparseMatrix(size_t height, - size_t width) { - std::vector* localMats = cpuLocalMats_.get(); - auto it = localMats->begin(); - while (it != localMats->end()) { - if (it->unique()) { - (*it)->resize(height, width, elementCnt_, valueType_, format_); - return *it; - } - } - localMats->emplace_back(std::make_shared( - height, width, elementCnt_, valueType_, format_, false)); - return localMats->back(); -} - -void CpuSparseMatrix::copyFrom(const Matrix& src, hl_stream_t stream) { - if (dynamic_cast(&src)) { - auto tmpSrc = dynamic_cast(&src); - copyFrom(*tmpSrc, stream); - } else if (dynamic_cast(&src)) { - auto tmpSrc = dynamic_cast(&src); - copyFrom(*tmpSrc); - } else if (dynamic_cast(&src)) { - auto tmpSrc = dynamic_cast(&src); - copyFrom(*tmpSrc); - } else { - LOG(FATAL) << "not implemented"; - } -} - -void CpuSparseMatrix::copyFrom(const Matrix& src) { - if (dynamic_cast(&src)) { - auto tmpSrc = dynamic_cast(&src); - copyFrom(*tmpSrc); - } else if (dynamic_cast(&src)) { - auto tmpSrc = dynamic_cast(&src); - copyFrom(*tmpSrc); - } else { - LOG(FATAL) << "not implemented"; - } -} - -void CpuSparseMatrix::copyFrom(const GpuSparseMatrix& src, hl_stream_t stream) { - CHECK_EQ(height_, src.getHeight()); - CHECK_EQ(width_, src.getWidth()); - CHECK_EQ(size_t(elementCnt_), src.getElementCnt()); - size_t valSize = valueType_ == NO_VALUE ? 0 : elementCnt_; - if (format_ == SPARSE_CSC) - hl_memcpy_from_csc_matrix(value_, - valSize, - rows_, - elementCnt_, - cols_, - width_ + 1, - src.sMatrix_.get(), - stream); - else - hl_memcpy_from_csr_matrix(value_, - valSize, - rows_, - height_ + 1, - cols_, - elementCnt_, - src.sMatrix_.get(), - stream); -} - -void CpuSparseMatrix::copyFrom(const CpuSparseMatrix& src) { - CHECK_EQ(height_, src.getHeight()); - CHECK_EQ(width_, src.getWidth()); - CHECK_EQ(format_, src.getFormat()); - int start = format_ == SPARSE_CSR ? src.getRows()[0] : src.getCols()[0]; - if (format_ == SPARSE_CSR) { - size_t totalColNum = 0; - for (size_t i = 0; i < height_; ++i) { - totalColNum += src.getColNum(i); - } - resize(height_, width_, totalColNum, valueType_, format_); - rows_[0] = 0; - for (size_t i = 0; i < height_; ++i) { - rows_[i + 1] = rows_[i] + src.getColNum(i); - } - memcpy(cols_, src.getCols() + start, totalColNum * sizeof(int)); - } else { - size_t totalColNum = 0; - for (size_t i = 0; i < width_; ++i) { - totalColNum += src.getRowNum(i); - } - resize(height_, width_, totalColNum, valueType_, format_); - cols_[0] = 0; - for (size_t i = 0; i < width_; ++i) { - cols_[i + 1] = cols_[i] + src.getRowNum(i); - } - memcpy(rows_, src.getRows() + start, totalColNum * sizeof(int)); - } - - // if have different value type, only copy rows and cols - if (valueType_ == FLOAT_VALUE && src.getValueType() == FLOAT_VALUE) { - memcpy(value_, src.getValue() + start, elementCnt_ * sizeof(real)); - } -} - -void CpuSparseMatrix::copyRow(int offsets, - size_t colNum, - const sparse_non_value_t* row) { - for (size_t j = 0; j < colNum; j++) { - cols_[offsets + j] = row[j].col; - } -} - -void CpuSparseMatrix::copyRow(int offsets, - size_t colNum, - const sparse_float_value_t* row) { - for (size_t j = 0; j < colNum; j++) { - cols_[offsets + j] = row[j].col; - value_[offsets + j] = row[j].value; - } -} - -template -void CpuSparseMatrix::copyFrom(int64_t* ids, int64_t* indices, T* data) { - size_t totalColNum = 0; - for (size_t i = 0; i < height_; ++i) { - int64_t id = ids[i]; - totalColNum += indices[id + 1] - indices[id]; - } - valueType_ = typeid(T) == typeid(sparse_non_value_t) ? NO_VALUE : FLOAT_VALUE; - - resize(height_, width_, totalColNum, valueType_, format_); - - rows_[0] = 0; - for (size_t i = 0; i < height_; ++i) { - int64_t id = ids[i]; - T* row = data + indices[id]; - size_t colNum = indices[id + 1] - indices[id]; - rows_[i + 1] = rows_[i] + colNum; - copyRow(rows_[i], colNum, row); - } -} - -template -void CpuSparseMatrix::copyFrom(int64_t* indices, T* data) { - CHECK(format_ == SPARSE_CSR); - size_t totalColNum = indices[height_] - indices[0]; - valueType_ = typeid(T) == typeid(sparse_non_value_t) ? NO_VALUE : FLOAT_VALUE; - resize(height_, width_, totalColNum, valueType_, format_); - - rows_[0] = 0; - for (size_t i = 0; i < height_; ++i) { - T* row = data + indices[i]; - size_t colNum = indices[i + 1] - indices[i]; - rows_[i + 1] = rows_[i] + colNum; - copyRow(rows_[i], colNum, row); - } -} - -void CpuSparseMatrix::trimFrom(const CpuSparseMatrix& src) { - CHECK_EQ(height_, src.getHeight()); - CHECK_LE(width_, src.getWidth()); - CHECK_EQ(format_, src.getFormat()); - CHECK_EQ(valueType_, src.getValueType()); - if (format_ == SPARSE_CSR) { - int* srcCols = src.getCols(); - size_t numLessWidth = - std::count_if(srcCols, srcCols + src.getElementCnt(), [this](size_t n) { - return n < this->width_; - }); - resize(height_, width_, numLessWidth, valueType_, format_); - rows_[0] = 0; - size_t index = 0; - for (size_t r = 0; r < height_; ++r) { - for (int i = src.getRows()[r]; i < src.getRows()[r + 1]; ++i) { - if (srcCols[i] < static_cast(width_)) { - cols_[index] = srcCols[i]; - if (valueType_ == FLOAT_VALUE) { - value_[index] = src.getValue()[i]; - } - ++index; - } - } - rows_[r + 1] = index; - } - CHECK_EQ(index, numLessWidth); - } else { - size_t numLessWidth = src.getCols()[width_] - src.getCols()[0]; - resize(height_, width_, numLessWidth, valueType_, format_); - cols_[0] = 0; - size_t index = 0; - // note: c < width_, not src.getWidth(); - for (size_t c = 0; c < width_; ++c) { - for (int i = src.getCols()[c]; i < src.getCols()[c + 1]; ++i) { - rows_[index] = src.getRows()[i]; - if (valueType_ == FLOAT_VALUE) { - value_[index] = src.getValue()[i]; - } - ++index; - } - cols_[c + 1] = index; - } - CHECK_EQ(index, numLessWidth); - } -} - -void CpuSparseMatrix::zeroMem() { - CHECK(valueType_ == FLOAT_VALUE); - memset(value_, 0, elementCnt_ * sizeof(real)); -} - -template void CpuSparseMatrix::copyFrom(int64_t* ids, - int64_t* indices, - sparse_non_value_t* data); - -template void CpuSparseMatrix::copyFrom(int64_t* ids, - int64_t* indices, - sparse_float_value_t* data); - -template void CpuSparseMatrix::copyFrom(int64_t* indices, - sparse_non_value_t* data); - -template void CpuSparseMatrix::copyFrom(int64_t* indices, - sparse_float_value_t* data); - -void CpuSparseMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { - size_t numSamples = getHeight(); - size_t beam = maxVal.getWidth(); - CHECK_EQ(maxIds.getSize(), numSamples * beam); - CHECK_EQ(maxVal.getHeight(), numSamples); - maxVal.zeroMem(); - int* outids = maxIds.getData(); - real* outvalues = maxVal.getData(); - - typedef std::pair valuepair; - std::vector vec; - for (size_t i = 0; i < numSamples; i++) { - vec.clear(); - - auto num = getColNum(i); - auto ids = getRowCols(i); - auto values = getRowValues(i); - for (size_t j = 0; j < num; j++) { - vec.push_back(std::make_pair(values[j], ids[j])); - } - - size_t outsize = std::min(num, beam); - std::partial_sort(vec.begin(), - vec.begin() + outsize, - vec.end(), - [](const valuepair& a, const valuepair& b) { - return a.first > b.first; - }); - for (size_t j = 0; j < outsize; j++) { - outids[i * beam + j] = vec[j].second; - outvalues[i * beam + j] = vec[j].first; - } - if (outsize < beam) { - // if the number of values to sort are less than the output size, - // use -1 to indicate the end of valid sorted values. - outids[i * beam + outsize] = -1; - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/math/CpuSparseMatrix.h b/paddle/legacy/math/CpuSparseMatrix.h deleted file mode 100644 index 172792c2950ce56281715cb7f3eb076da252d77e..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/CpuSparseMatrix.h +++ /dev/null @@ -1,377 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifndef PADDLE_MOBILE_INFERENCE - -#include -#include "Matrix.h" - -namespace paddle { - -class CpuSparseMatrix : public Matrix { - public: - CpuSparseMatrix(size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType = FLOAT_VALUE, - SparseFormat format = SPARSE_CSR, - bool trans = false); - - CpuSparseMatrix(CpuMemHandlePtr memHandle, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans); - - CpuSparseMatrix(real* data, - int* rows, - int* cols, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans); - - ~CpuSparseMatrix() {} - - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format); - void resize(size_t newHeight, size_t newWidth); - - MatrixPtr getTranspose(); - - SparseValueType getValueType(); - - real* getRowValues(size_t i) const { - if (format_ == SPARSE_CSR) { - return value_ + rows_[i]; - } else { - LOG(FATAL) << "SPARSE_CSC not supported"; - return 0; - } - } - - int* getRowCols(size_t i) const { - if (format_ == SPARSE_CSR) { - return cols_ + rows_[i]; - } else { - LOG(FATAL) << "SPARSE_CSC not supported"; - return 0; - } - } - - /// fill row indices of each value in CSR matrix - void fillRowIndices(IVectorPtr& outVec) const; - - size_t getColNum(size_t i) const { - if (format_ == SPARSE_CSR) { - return rows_[i + 1] - rows_[i]; - } else { - LOG(FATAL) << "SPARSE_CSC not supported"; - return 0; - } - } - - real* getColumn(size_t i) const { - if (format_ == SPARSE_CSC) { - return value_ + cols_[i]; - } else { - LOG(FATAL) << "SPARSE_CSR not supported"; - return 0; - } - } - - size_t getColStartIdx(size_t i) const { - if (format_ == SPARSE_CSC) { - return cols_[i]; - } else { - LOG(FATAL) << "SPARSE_CSR not supported"; - return 0; - } - } - - size_t getRowStartIdx(size_t i) const { - if (format_ == SPARSE_CSR) { - return rows_[i]; - } else { - LOG(FATAL) << "SPARSE_CSC not supported"; - return 0; - } - } - - size_t getRowNum(size_t i) const { - if (format_ == SPARSE_CSC) { - return cols_[i + 1] - cols_[i]; - } else { - LOG(FATAL) << "SPARSE_CSR not supported"; - return 0; - } - } - - virtual real getSum() { - CHECK(isContiguous()); - if (valueType_ == NO_VALUE) { - return elementCnt_; - } - double sum = 0; - for (size_t i = 0; i < elementCnt_; ++i) { - sum += value_[i]; - } - return sum; - } - - virtual void square2() { - CHECK(isContiguous()); - if (valueType_ == NO_VALUE) { - return; - } - for (size_t i = 0; i < elementCnt_; ++i) { - value_[i] = value_[i] * value_[i]; - } - } - - /** - * only consider nonzero values. - * the actual min value should compare with 0.0. - */ - virtual real getMin() { - CHECK(isContiguous()); - if (valueType_ == NO_VALUE) { - return (elementCnt_ > 0 ? 1.0 : 0.0); - } - real min = value_[0]; - for (size_t i = 1; i < elementCnt_; ++i) { - min = value_[i] < min ? value_[i] : min; - } - return min; - } - - /** - * only consider nonzero values. - * the actual max value should compare with 0.0. - */ - virtual real getMax() { - CHECK(isContiguous()); - if (valueType_ == NO_VALUE) { - return (elementCnt_ > 0 ? 1.0 : 0.0); - } - real max = value_[0]; - for (size_t i = 1; i < elementCnt_; ++i) { - max = value_[i] > max ? value_[i] : max; - } - return max; - } - - void rowMax(IVector& maxIds, Matrix& maxVal); - int* getRows() const { return rows_; } - int* getCols() const { return cols_; } - real* getValue() const { return value_; } - SparseFormat getFormat() const { return format_; } - SparseValueType getValueType() const { return valueType_; } - - /** - * @brief return value_ of sparse matrix - * - * Some times CpuSparseMatrix maybe Matrix, - * if getValue, must dynamic_cast to CpuSparseMatrix, - * getData is convenient to get value - */ - real* getData() { return getValue(); } - const real* getData() const { return getValue(); } - - /** - * @brief only set value_ of FLOAT_VALUE sparse matrix to zero - */ - void zeroMem(); - - /// mem MUST be alloced outside (memAlloc=false) - void transpose(MatrixPtr& matTrans, bool memAlloc); - - void mul(const Matrix& A, const Matrix& B, real alpha, real beta); - - /** - * @brief sparseMatrix += denseMatrix - * - * Named add3 just because add/add2 has been used in BaseMatrix.cu - * and they are not virtual function. - * - * Only add value of same (row, col) index in dense matrix - * and do not use others values whoes postions are not in sparse matirx. - * - * @param[in] b dense matrix - */ - void add3(CpuMatrix* b); - void add3(MatrixPtr b); - - /** - * @brief sparseMatrix[i,j] += bias[j], (j is the col index of sparse matrix) - * - * @param[in] b bias, dense matrix and height = 1 - * @param[in] scale scale of b - */ - void addBias(Matrix& b, real scale); - - void print(std::ostream& os) const; - - void printOneRow(std::ostream& os, size_t idx) const; - - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values); - - /** - * @brief this_row = b_row * c_row[cCol] - * - * @param[in] cCol the column of matrix c used to scale each row of b - * @param[in] b CpuSparseMatrix - * @param[in] c Matrix - */ - void rowScale(size_t cCol, CpuSparseMatrix& b, Matrix& c); - - void randomizeUniform(); - - void copyFrom(const GpuSparseMatrix& src, hl_stream_t stream); - - void copyFrom(const Matrix& src, hl_stream_t stream = HPPL_STREAM_DEFAULT); - - void copyFrom(const Matrix& src); - - /** - * Get a temporary matrix. This is threadsafe. It should be only used - * temporarily, i.e. do not store it or use it as return value. - * - * @note Do NOT use large amount of tmp matrix. - */ - CpuSparseMatrixPtr getTmpSparseMatrix(size_t height, size_t width); - - virtual MatrixPtr subMatrix(size_t startRow, size_t numRows); - - void copyFrom(std::vector& rows, - std::vector& cols, - std::vector& values); - - void copyFrom(const CpuMatrix& src); - - void copyFrom(const CpuSparseMatrix& src); - - // trim the large size - void trimFrom(const CpuSparseMatrix& src); - - void copyRow(int offsets, size_t colNum, const sparse_non_value_t* row); - - void copyRow(int offsets, size_t colNum, const sparse_float_value_t* row); - - template - void copyFrom(int64_t* ids, int64_t* indices, T* data); - - template - void copyFrom(int64_t* indices, T* data); - - void copyFrom(const real* data, size_t len) { - LOG(FATAL) << "not supported!"; - } - - private: - MatrixPtr clone(size_t height = 0, size_t width = 0, bool useGpu = false); - - protected: - void sparseResize(); - /*for csr , record row start position, for csc, record row index for every no - * zero value*/ - int* rows_; - /*for csc , record col start position, for csr, record col index for every no - * zero value*/ - int* cols_; - real* value_; /*nonzero value*/ - SparseFormat format_; /* matrix format */ - SparseValueType valueType_; /*with value or not */ - static const size_t DEFAULT_AVG_WIDTH = 20; - - static ThreadLocal> cpuLocalMats_; - - // BaseMatrixT interface - public: - bool isSparse() const { return true; } - - private: - using Matrix::mul; - using Matrix::copyFrom; - using Matrix::rowMax; - using Matrix::print; - using Matrix::subMatrix; -}; -} // namespace paddle - -#else - -#include "Matrix.h" - -namespace paddle { - -class CpuSparseMatrix : public Matrix { - public: - CpuSparseMatrix(size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType = FLOAT_VALUE, - SparseFormat format = SPARSE_CSR, - bool trans = false) - : Matrix(NULL, height, width, trans, false) {} - - CpuSparseMatrix(real* data, - int* rows, - int* cols, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans) - : Matrix(NULL, height, width, trans, false) {} - - real* getValue() const { return nullptr; } - size_t getColStartIdx(size_t i) const { return 0; } - size_t getRowStartIdx(size_t i) const { return 0; } - size_t getColNum(size_t i) const { return 0; } - int* getRowCols(size_t i) const { return nullptr; } - - CpuSparseMatrixPtr getTmpSparseMatrix(size_t height, size_t width) { - return nullptr; - } - - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format) {} - void resize(size_t newHeight, size_t newWidth) {} - MatrixPtr getTranspose() { return nullptr; } - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) {} -}; - -} // namespace paddle - -#endif diff --git a/paddle/legacy/math/ExecViaCpu.h b/paddle/legacy/math/ExecViaCpu.h deleted file mode 100644 index ec2337545e9e3efdf31d3d786a096a67283715f2..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/ExecViaCpu.h +++ /dev/null @@ -1,195 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - execViaCpu is used to do operations on GpuMatirx and/or GpuIVector through - cpu functions. It can automatically make a temporary CPU copy for the - gpu matrix/vector, and copy back after executing the CPU function. - - Examples: - 1. For a function, functor or lambda: - r = execViaCpu(&f, mat, vec) - - 2. For member function of CpuMatirx, execViaCpu2 should be used: - execViaCpu2(&CpuMatrix::selectElements, *this, table, ids) -*/ - -#pragma once - -namespace paddle { - -template -class CopyToCpu { - public: - explicit CopyToCpu(Arg& arg) : arg_(arg) {} - Arg& copiedArg() const { return arg_; } - - private: - Arg& arg_; -}; - -template <> -class CopyToCpu { - public: - explicit CopyToCpu(Matrix& arg) : arg_(arg) { - if (arg.useGpu()) { - CHECK(!arg.isTransposed()) << "Not supported"; - copied_ = Matrix::create(arg.getHeight(), - arg.getWidth(), - /* trans= */ false, - /* useGpu= */ false); - copied_->copyFrom(arg); - } - } - ~CopyToCpu() { - if (copied_) { - arg_.copyFrom(*copied_); - } - } - Matrix& copiedArg() const { return copied_ ? *copied_ : arg_; } - - private: - Matrix& arg_; - MatrixPtr copied_; -}; - -template <> -class CopyToCpu { - public: - explicit CopyToCpu(const Matrix& arg) : arg_(arg) { - if (arg.useGpu()) { - CHECK(!arg.isTransposed()) << "Not supported"; - copied_ = Matrix::create(arg.getHeight(), - arg.getWidth(), - /* trans= */ false, - /* useGpu= */ false); - copied_->copyFrom(arg); - } - } - const Matrix& copiedArg() const { return copied_ ? *copied_ : arg_; } - - private: - const Matrix& arg_; - MatrixPtr copied_; -}; - -template <> -class CopyToCpu { - public: - explicit CopyToCpu(IVector& arg) : arg_(arg) { - if (arg.useGpu()) { - copied_ = IVector::create(arg.getSize(), /* useGpu= */ false); - copied_->copyFrom(arg); - } - } - ~CopyToCpu() { - if (copied_) { - arg_.copyFrom(*copied_); - } - } - IVector& copiedArg() const { return copied_ ? *copied_ : arg_; } - - private: - IVector& arg_; - IVectorPtr copied_; -}; - -template <> -class CopyToCpu { - public: - explicit CopyToCpu(const IVector& arg) : arg_(arg) { - if (arg.useGpu()) { - copied_ = IVector::create(arg.getSize(), /* useGpu= */ false); - copied_->copyFrom(arg); - } - } - const IVector& copiedArg() const { return copied_ ? *copied_ : arg_; } - - private: - const IVector& arg_; - IVectorPtr copied_; -}; - -namespace detail { - -template -class GpuFuncWrapperImp; - -template -class GpuFuncWrapperBase { - public: - typedef R ResultType; - R operator()(F&& f, Args... args) { - return f(CopyToCpu::type>(args) - .copiedArg()...); - } -}; - -// function -template -class GpuFuncWrapperImp - : public GpuFuncWrapperBase {}; - -// function pointer -template -class GpuFuncWrapperImp - : public GpuFuncWrapperBase {}; - -template -class GpuFuncWrapperImp2; - -template -class GpuFuncWrapperImp2 - : public GpuFuncWrapperBase {}; - -template -class GpuFuncWrapperImp2 - : public GpuFuncWrapperBase {}; - -// functor or lambda -template -class GpuFuncWrapperImp - : public GpuFuncWrapperImp2 {}; - -template -class GpuFuncWrapper2 - : public GpuFuncWrapperImp< - std::is_function::value, - std::is_pointer::value && - std::is_function::type>::value, - std::is_class::value, - F> {}; - -template -class GpuFuncWrapper - : public GpuFuncWrapper2::type> {}; - -} // namespace detail - -template -typename detail::GpuFuncWrapper::ResultType execViaCpu(F&& f, - Args&&... args) { - return detail::GpuFuncWrapper()(std::move(f), args...); -} - -// The second version is for F as member function of CpuMatrix -template -R execViaCpu2(R (CpuMatrix::*f)(FArgs...), Args&&... args) { - auto lambda = [](R (CpuMatrix::*f)(FArgs...), Matrix& ths, FArgs... args) { - return (((CpuMatrix&)ths).*f)(args...); - }; - return execViaCpu(lambda, f, args...); -} - -} // namespace paddle diff --git a/paddle/legacy/math/MKLDNNMatrix.cpp b/paddle/legacy/math/MKLDNNMatrix.cpp deleted file mode 100644 index 52036c5f80313cf624bcebb6bd9aded53a78277d..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/MKLDNNMatrix.cpp +++ /dev/null @@ -1,158 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MKLDNNMatrix.h" - -using namespace mkldnn; // NOLINT - -namespace paddle { - -MKLDNNMatrixPtr MKLDNNMatrix::create(memory::primitive_desc pd, MatrixPtr m) { - memory::desc md = pd.desc(); - size_t ndims = md.data.ndims; - int* dims = md.data.dims; - CHECK(ndims > 0) << "Input dims should not be empty"; - size_t cnts = 1; - for (size_t i = 0; i < ndims; ++i) { - cnts *= dims[i]; - } - - if (m == nullptr) { - size_t height = dims[0]; - size_t width = cnts / dims[0]; - m = Matrix::create(height, width, false, false); - } - CHECK(m) << " Matrix should not be empty"; - - CpuMatrixPtr cpuMatrix = std::dynamic_pointer_cast(m); - CHECK(cpuMatrix) << "Only support create from CPU matrix yet"; - CHECK_EQ(cpuMatrix->getElementCnt(), cnts) << "Count size does not match"; - return std::make_shared(cpuMatrix, pd); -} - -MKLDNNMatrixPtr MKLDNNMatrix::create(memory::dims dims, - memory::format fmt, - engine& eg, - MatrixPtr m, - mkldnn::memory::data_type dtype) { - return create(createPrimitiveDesc(dims, fmt, eg, dtype), m); -} - -std::shared_ptr MKLDNNMatrix::createReorder(const MKLDNNMatrixPtr& src, - const MKLDNNMatrixPtr& dst, - bool checkData) { - if (src == dst || src->getPrimitiveDesc() == dst->getPrimitiveDesc()) { - return nullptr; - } - - if (checkData && (src->getData() == dst->getData())) { - LOG(FATAL) << "can not create reorder with inplace data"; - return nullptr; - } - - memory::dims srcDims = src->getDims(); - memory::dims dstDims = dst->getDims(); - CHECK_EQ(srcDims.size(), dstDims.size()); - for (size_t i = 0; i < srcDims.size(); ++i) { - CHECK_EQ(srcDims[i], dstDims[i]); - } - return std::make_shared(*src, *dst); -} - -void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, - memory::format srcFmt, - memory::dims targetDim) { - memory::format dstFmt = getFormat(); - if (srcFmt == dstFmt) { - return; - } - CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; - reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim); -} - -void MKLDNNMatrix::reorderDataTo(const MKLDNNMatrixPtr& m, - memory::format dstFmt, - memory::dims targetDim) { - memory::format srcFmt = getFormat(); - if (srcFmt == dstFmt) { - return; - } - CHECK_EQ(getElementCnt(), m->getElementCnt()) << "size should equal"; - reorderOnce(getData(), m->getData(), srcFmt, dstFmt, targetDim); -} - -void MKLDNNMatrix::reorderOnce(void* srcData, - void* dstData, - memory::format srcFmt, - memory::format dstFmt, - memory::dims dm) { - CHECK(srcData); - CHECK(dstData); - MatrixPtr tmpSrc; - if (dstData == srcData) { - // inplace data - size_t sz = 1; - for (size_t i = 0; i < dm.size(); ++i) { - sz *= dm[i]; - } - tmpSrc = Matrix::create(sz, 1, false, false); - tmpSrc->copyFrom((real*)srcData, sz); - srcData = tmpSrc->getData(); - } - - auto dtype = this->getDtype(); - auto srcMD = memory::desc(dm, dtype, srcFmt); - auto dstMD = memory::desc(dm, dtype, dstFmt); - - auto eg = this->getEngine(); - auto src = memory(memory::primitive_desc(srcMD, eg), srcData); - auto dst = memory(memory::primitive_desc(dstMD, eg), dstData); - - auto r = reorder(src, dst); - stream(stream::kind::eager).submit({r}).wait(); -} - -void MKLDNNMatrix::downSpatial() { - int fmt = getFormat(); - if (!(fmt == memory::format::nchw || fmt == memory::format::oihw)) { - // only support nchw and oihw yet, later can support more like nhwc, ihwo - return; - } - - // TODO(TJ): change H(height) and W(width) if support nhwc or more - const int H = 2, W = 3; - memory::dims srcDims = getDims(); - if (srcDims[H] != 1 || srcDims[W] != 1) { - // can not down spatial - return; - } - - memory::dims dstDims = memory::dims{srcDims[0], srcDims[1]}; - memory::format dstFmt; - switch (fmt) { - case memory::format::nchw: - dstFmt = memory::format::nc; - break; - case memory::format::oihw: - dstFmt = memory::format::oi; - break; - default: - LOG(FATAL) << "unsupported format"; - } - memory::desc md = memory::desc(dstDims, getDtype(), dstFmt); - memory::primitive_desc pd = memory::primitive_desc(md, getEngine()); - resetMKLDNNMemory(pd, data_); -} - -} // namespace paddle diff --git a/paddle/legacy/math/MKLDNNMatrix.h b/paddle/legacy/math/MKLDNNMatrix.h deleted file mode 100644 index 5a0e5f85923dfd822dad4c63679acde63719f217..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/MKLDNNMatrix.h +++ /dev/null @@ -1,256 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "Matrix.h" -#include "mkldnn.hpp" -#include "paddle/legacy/parameter/Parameter.h" - -namespace paddle { - -class MKLDNNMatrix; -typedef std::shared_ptr MKLDNNMatrixPtr; - -#define CHECK_PRIMITIVE_DESC_EQ(MAT, PD, ...) \ - CHECK(MAT) << " can not be empty."; \ - CHECK(MAT->getPrimitiveDesc() == PD) \ - << #MAT "->getPrimitiveDesc() and " #PD " should be equal.\n " \ - << "" __VA_ARGS__; - -/** - * @brief MKLDNN Matrix. - * - */ -class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { - public: - MKLDNNMatrix(CpuMatrixPtr m, mkldnn::memory::primitive_desc pd) - : CpuMatrix(m->getData(), m->getHeight(), m->getWidth(), false), - mkldnn::memory(pd, m->getData()), - m_(m) {} - - ~MKLDNNMatrix() {} - - /** - * Create MKLDNNMatrix from a MatrixPtr and memory primitive_desc - */ - static MKLDNNMatrixPtr create(mkldnn::memory::primitive_desc pd, - MatrixPtr m = nullptr); - - /** - * Create MKLDNNMatrix from a MatrixPtr and memory details info - */ - static MKLDNNMatrixPtr create( - mkldnn::memory::dims dims, - mkldnn::memory::format fmt, - mkldnn::engine& eg, - MatrixPtr m = nullptr, - mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); - - /** - * Create primitive descriptor. - * default with f32 dtype - */ - static mkldnn::memory::primitive_desc createPrimitiveDesc( - const mkldnn::memory::dims dims, - const mkldnn::memory::format& fmt, - const mkldnn::engine& eg, - const mkldnn::memory::data_type& dtype = mkldnn::memory::data_type::f32) { - return mkldnn::memory::primitive_desc(memory::desc(dims, dtype, fmt), eg); - } - - /** - * Create Memory descriptor. - * default with any format and f32 dtype - */ - static mkldnn::memory::desc createMemoryDesc( - const mkldnn::memory::dims dims, - const mkldnn::memory::format& fmt = mkldnn::memory::format::any, - const mkldnn::memory::data_type& dtype = mkldnn::memory::data_type::f32) { - return mkldnn::memory::desc(dims, dtype, fmt); - } - - /** - * Create reorder primitive. - * Create a mkldnn::reorder handle for converting src MKLDNNMatrix to dst. - * checkData: whether to check the data handle of src and dst. - * if true, it will check the data and do not allow them equal; - * otherwise, it will not check them, then the reorder created - * may have inplace buffer. - * Do not set false, if you can not guarantee the inplace logical - * would work with your reorder. - */ - static std::shared_ptr createReorder( - const MKLDNNMatrixPtr& src, - const MKLDNNMatrixPtr& dst, - bool checkData = true); - - void copyFrom(const Matrix& src) { - // TODO(TJ): reorder data if this format is not nchw or x - m_->copyFrom(src); - } - - void copyTo(Matrix& dst) { - // TODO(TJ): reorder data if this format is not nchw or x - dst.copyFrom(*m_); - } - - public: - /** - * Reorder this MKLDNNMatrix from other format. - * Support inplace reorder. - * @note: this function would only reorder the data layout. - * will NOT change this original dim or format info - */ - void reorderDataFrom(const MKLDNNMatrixPtr& m, - memory::format srcFmt, - memory::dims targetDim); - - /** - * Reorder this MKLDNNMatrix to other format. - * Support inplace reorder. - * @note: this function would only reorder the data layout. - * will NOT change the dst dim or format info - */ - void reorderDataTo(const MKLDNNMatrixPtr& m, - memory::format dstFmt, - memory::dims targetDim); - - /** - * Dimensionality reduction. - * Change format "nchw --> nc" or "oihw --> oi" if the h and w are both 1 - */ - void downSpatial(); - - /** - * set the memory data handle. - * Caution: This will not check the buffer size of the data, - * it should be coverd by user. - */ - void setData(real* data) { - set_data_handle(data); - CpuMatrix::setData(data); - m_.reset(); - } - - /** - * override the CpuMatrix::resize - */ - void resize(size_t newHeight, size_t newWidth) override { - m_->resize(newHeight, newWidth); - if (data_ == m_->getData() && elementCnt_ == newHeight * newWidth) { - return; - } - CpuMatrix::setData(data_); - height_ = newHeight; - width_ = newWidth; - elementCnt_ = newHeight * newWidth; - stride_ = width_; - auto pd = mkldnn::memory::primitive_desc( - mkldnn::memory::desc({(int)newHeight, (int)newWidth}, - getDtype(), - mkldnn::memory::format::nc), - getEngine()); - resetMKLDNNMemory(pd, data_); - } - - /** - * override Matrix::getData - * check data before return - */ - real* getData() override { - CHECK_EQ((void*)data_, get_data_handle()); - return data_; - } - - const real* getData() const override { - CHECK_EQ((void*)data_, get_data_handle()); - return data_; - } - - /** - * Get primitive descriptor. - */ - mkldnn::memory::primitive_desc getPrimitiveDesc() { - return this->get_primitive_desc(); - } - - /** - * Get memory descriptor. - */ - mkldnn::memory::desc getMemoryDesc() { return getPrimitiveDesc().desc(); } - - /** - * Get dimensions. - */ - mkldnn::memory::dims getDims() { - mkldnn::memory::desc md = getMemoryDesc(); - const int* src = md.data.dims; - int ndims = md.data.ndims; - mkldnn::memory::dims dst; - dst.resize(ndims); - for (int i = 0; i < ndims; ++i) { - dst[i] = src[i]; - } - return dst; - } - - /** - * Get format. - */ - mkldnn::memory::format getFormat() { - return (mkldnn::memory::format)(getMemoryDesc().data.format); - } - - /** - * Get memory data type. - */ - mkldnn::memory::data_type getDtype() { - return (mkldnn::memory::data_type)(getMemoryDesc().data.data_type); - } - - /** - * Get engine. - */ - mkldnn::engine getEngine() { return getPrimitiveDesc().get_engine(); } - - protected: - /** - * Do reorder once. - * Can support inplace. - */ - void reorderOnce(void* srcData, - void* dstData, - memory::format srcFmt, - memory::format dstFmt, - memory::dims dm); - /** - * reset this MKLDNN Memory from primitve desc - */ - void resetMKLDNNMemory(memory::primitive_desc pd, real* data) { - mkldnn_primitive_t result; - mkldnn::error::wrap_c_api( - mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr), - "could not create a memory primitive"); - reset(result); - set_data_handle(data); - } - - private: - // save the CpuMatrixPtr in case the buffer released outside - CpuMatrixPtr m_; -}; - -} // namespace paddle diff --git a/paddle/legacy/math/MathFunctions.cpp b/paddle/legacy/math/MathFunctions.cpp deleted file mode 100644 index bbf34a32f36fa7988058f8d3bb7f91eaf2bc1ba0..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/MathFunctions.cpp +++ /dev/null @@ -1,348 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/math/MathFunctions.h" -#include "hl_matrix_apply.cuh" -#include "hl_matrix_ops.cuh" -#include "paddle/legacy/utils/DynamicLoader.h" - -namespace dynload { - -std::once_flag lapack_dso_flag; -void* lapack_dso_handle = nullptr; - -/** - * The following macro definition can generate structs - * (for each function) to dynamic load lapack routine - * via operator overloading. - * - * note: default dynamic linked libs - */ - -// The argument for stringizing operator is not macro-expanded first. -// We have to use two levels of macro to do the expansion. -// See https://gcc.gnu.org/onlinedocs/cpp/Stringizing.html -#define STR(x) #x - -// clang-format off -#ifndef LAPACK_FOUND -#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using lapack_func = decltype(__name(args...)) (*)(Args...); \ - std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \ - void* p_##__name = dlsym(lapack_dso_handle, STR(__name)); \ - CHECK(p_##__name) << "Cannot find symbol " << STR(__name) \ - << " in liblapack.so"; \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - } __name; // struct DynLoad__##__name -#else -#define DYNAMIC_LOAD_LAPACK_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - return __name(args...); \ - } \ - } __name; // struct DynLoad__##__name -#endif - -#define PADDLE_SGETRF LAPACKE_sgetrf -#define PADDLE_DGETRF LAPACKE_dgetrf -#define PADDLE_SGETRI LAPACKE_sgetri -#define PADDLE_DGETRI LAPACKE_dgetri - -#define LAPACK_ROUTINE_EACH(__macro) \ - __macro(PADDLE_SGETRF) \ - __macro(PADDLE_DGETRF) \ - __macro(PADDLE_SGETRI) \ - __macro(PADDLE_DGETRI) -// clang-format on - -LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP) - -} // namespace dynload - -namespace paddle { - -#ifndef PADDLE_USE_EIGEN_FOR_BLAS -template <> -void gemm(const CBLAS_TRANSPOSE transA, - const CBLAS_TRANSPOSE transB, - const int M, - const int N, - const int K, - const float alpha, - const float* A, - const int lda, - const float* B, - const int ldb, - const float beta, - float* C, - const int ldc) { - cblas_sgemm(CblasRowMajor, - transA, - transB, - M, - N, - K, - alpha, - A, - lda, - B, - ldb, - beta, - C, - ldc); -} - -template <> -void gemm(const CBLAS_TRANSPOSE transA, - const CBLAS_TRANSPOSE transB, - const int M, - const int N, - const int K, - const double alpha, - const double* A, - const int lda, - const double* B, - const int ldb, - const double beta, - double* C, - const int ldc) { - cblas_dgemm(CblasRowMajor, - transA, - transB, - M, - N, - K, - alpha, - A, - lda, - B, - ldb, - beta, - C, - ldc); -} -#endif - -template <> -int getrf(const CBLAS_ORDER order, - const int M, - const int N, - float* A, - const int lda, - int* ipiv) { - return dynload::PADDLE_SGETRF(order, M, N, A, lda, ipiv); -} - -template <> -int getrf(const CBLAS_ORDER order, - const int M, - const int N, - double* A, - const int lda, - int* ipiv) { - return dynload::PADDLE_DGETRF(order, M, N, A, lda, ipiv); -} - -template <> -int getri(const CBLAS_ORDER order, - const int N, - float* A, - const int lda, - const int* ipiv) { - return dynload::PADDLE_SGETRI(order, N, A, lda, ipiv); -} - -template <> -int getri(const CBLAS_ORDER order, - const int N, - double* A, - const int lda, - const int* ipiv) { - return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv); -} - -#ifndef PADDLE_USE_EIGEN_FOR_BLAS -template <> -void axpy(const int n, const float alpha, const float* x, float* y) { - cblas_saxpy(n, alpha, x, 1, y, 1); -} - -template <> -void axpy(const int n, const double alpha, const double* x, double* y) { - cblas_daxpy(n, alpha, x, 1, y, 1); -} - -template <> -float dotProduct(const int n, const float* x, const float* y) { - return cblas_sdot(n, x, 1, y, 1); -} - -template <> -double dotProduct(const int n, const double* x, const double* y) { - return cblas_ddot(n, x, 1, y, 1); -} -#endif - -#if defined(PADDLE_WITH_MKLML) - -template <> -void vExp(const int n, const float* a, float* r) { - vsExp(n, a, r); -} - -template <> -void vExp(const int n, const double* a, double* r) { - vdExp(n, a, r); -} - -template <> -void vPow(const int n, const float* a, const float b, float* r) { - vsPowx(n, a, b, r); -} - -template <> -void vPow(const int n, const double* a, const double b, double* r) { - vdPowx(n, a, b, r); -} - -template <> -void vLog(const int n, const float* a, float* r) { - vsLn(n, a, r); -} - -template <> -void vLog(const int n, const double* a, double* r) { - vdLn(n, a, r); -} - -template <> -void vAdd(const int n, const float* a, const float* b, float* r) { - vsAdd(n, a, b, r); -} - -template <> -void vAdd(const int n, const double* a, const double* b, double* r) { - vdAdd(n, a, b, r); -} - -template <> -void vTanh(const int n, const float* a, float* r) { - vsTanh(n, a, r); -} - -template <> -void vTanh(const int n, const double* a, double* r) { - vdTanh(n, a, r); -} - -template <> -void vInvSqrt(const int n, const float* a, float* r) { - vsInvSqrt(n, a, r); -} - -template <> -void vInvSqrt(const int n, const double* a, double* r) { - vdInvSqrt(n, a, r); -} - -template <> -void vLog1p(const int n, const float* a, float* r) { - vsLog1p(n, a, r); -} - -template <> -void vLog1p(const int n, const double* a, double* r) { - vdLog1p(n, a, r); -} -#else - -DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a)); -template -void vExp(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vExp(), const_cast(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a)); -template -void vLog(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vLog(), const_cast(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p)); -template -void vPow(const int n, const T* a, const T b, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vPow(b), const_cast(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b); -template -void vAdd(const int n, const T* a, const T* b, T* r) { - hl_cpu_apply_ternary_op, 0, 0>(ternary::vAdd(), - const_cast(a), - const_cast(b), - r, - 1, - n, - n, - n, - n); -} - -DEFINE_MATRIX_BINARY_OP(vInvSqrt, b = 1.0f / std::sqrt(a)); -template -void vInvSqrt(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vInvSqrt(), const_cast(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_BINARY_OP(vLog1p, b = std::log(1.0f + a)); -template -void vLog1p(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vLog1p(), const_cast(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_BINARY_OP(vTanh, T tmp = -2.0 * a; - tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp; - b = 2.0 / (1.0 + std::exp(tmp)) - 1.0); -template -void vTanh(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vTanh(), const_cast(a), r, 1, n, n, n); -} - -template void vExp(const int n, const float* a, float* r); -template void vExp(const int n, const double* a, double* r); -template void vLog(const int n, const float* a, float* r); -template void vLog(const int n, const double* a, double* r); -template void vPow(const int n, const float* a, const float b, float* r); -template void vPow(const int n, const double* a, const double b, double* r); -template void vAdd(const int n, const float* a, const float* b, float* r); -template void vAdd(const int n, const double* a, const double* b, double* r); -template void vInvSqrt(const int n, const double* a, double* r); -template void vInvSqrt(const int n, const float* a, float* r); -template void vLog1p(const int n, const float* a, float* r); -template void vLog1p(const int n, const double* a, double* r); -template void vTanh(const int n, const float* a, float* r); -template void vTanh(const int n, const double* a, double* r); -#endif -} // namespace paddle diff --git a/paddle/legacy/math/MathFunctions.h b/paddle/legacy/math/MathFunctions.h deleted file mode 100644 index 854e4baa3987f61353038c7b26acf43943c89636..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/MathFunctions.h +++ /dev/null @@ -1,129 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifdef PADDLE_WITH_MKLML -#include -#include -#include -#endif - -#ifdef PADDLE_USE_VECLIB -extern "C" { -#include -#include -} -#endif - -#ifdef PADDLE_USE_OPENBLAS -#include -#ifdef LAPACK_FOUND -#include -#endif -#endif - -#ifndef LAPACK_FOUND -extern "C" { -#ifndef PADDLE_USE_EIGEN_FOR_BLAS -#include -#else -typedef enum CBLAS_ORDER { - CblasRowMajor = 101, - CblasColMajor = 102 -} CBLAS_ORDER; -#endif -int LAPACKE_sgetrf( - int matrix_layout, int m, int n, float* a, int lda, int* ipiv); -int LAPACKE_dgetrf( - int matrix_layout, int m, int n, double* a, int lda, int* ipiv); -int LAPACKE_sgetri( - int matrix_layout, int n, float* a, int lda, const int* ipiv); -int LAPACKE_dgetri( - int matrix_layout, int n, double* a, int lda, const int* ipiv); -} -#endif - -#include - -namespace paddle { - -#ifndef PADDLE_USE_EIGEN_FOR_BLAS -template -void gemm(const CBLAS_TRANSPOSE transA, - const CBLAS_TRANSPOSE transB, - const int M, - const int N, - const int K, - const T alpha, - const T* A, - const int lda, - const T* B, - const int ldb, - const T beta, - T* C, - const int ldc); -#endif - -template -int getrf(const CBLAS_ORDER Order, - const int M, - const int N, - T* A, - const int lda, - int* ipiv); - -template -int getri( - const CBLAS_ORDER Order, const int N, T* A, const int lda, const int* ipiv); - -template -void axpy(const int n, const T alpha, const T* x, T* y) { - /// y = y + alpha * x - for (int i = 0; i < n; i++) { - y[i] = y[i] + alpha * x[i]; - } -} - -template -T dotProduct(const int n, const T* x, const T* y) { - T result = static_cast(0); - for (int i = 0; i < n; i++) { - result += x[i] * y[i]; - } - return result; -} - -template -void vExp(const int n, const T* a, T* r); - -template -void vPow(const int n, const T* a, const T b, T* r); - -template -void vLog(const int n, const T* a, T* r); - -template -void vAdd(const int n, const T* a, const T* b, T* r); - -template -void vInvSqrt(const int n, const T* a, T* r); - -template -void vLog1p(const int n, const T* a, T* r); - -template -void vTanh(const int n, const T* a, T* r); - -} // namespace paddle diff --git a/paddle/legacy/math/MathUtils.cpp b/paddle/legacy/math/MathUtils.cpp deleted file mode 100644 index 47ac9c187ca731c98c755501ff3633eabf095186..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/MathUtils.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MathUtils.h" -#include -#include "Vector.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -/*if csc, major is cols and minor is rows, else - * major is rows and minor is cols, according to - * major value to initialize minor value" - */ -void sparseRand( - int* major, int* minor, int nnz, int majorLen, int minorMax, bool useGpu) { - CHECK(size_t(nnz) >= size_t(1)); - int* cpuMajor; - int* cpuMinor; - CpuIVector cpuMinorVec(nnz); - CpuIVector cpuMajorVec(majorLen); - if (useGpu) { - cpuMajor = cpuMajorVec.getData(); - cpuMinor = cpuMinorVec.getData(); - } else { - cpuMajor = major; - cpuMinor = minor; - } - - /*major value init*/ - for (int i = 0; i < majorLen - 1; i++) { - cpuMajor[i] = 1.0 * i * nnz / (majorLen - 1); - } - cpuMajor[majorLen - 1] = nnz; - - /*minor value init according to major value*/ - std::vector used(minorMax, 0); - for (int i = 0; i < majorLen - 1; i++) { - CHECK_LE(cpuMajor[i + 1] - cpuMajor[i], minorMax); - used.assign(minorMax, 0); - for (int j = cpuMajor[i]; j < cpuMajor[i + 1]; j++) { - int idx = ::rand() % minorMax; - while (used[idx]) { - idx = ::rand() % minorMax; - } - cpuMinor[j] = idx; - used[idx] = 1; - } - std::sort(cpuMinor + cpuMajor[i], - cpuMinor + cpuMajor[i + 1], - [](int a, int b) { return a < b; }); - } - /*memcpy result to gpu*/ - if (useGpu) { - hl_memcpy_host2device(major, cpuMajor, sizeof(int) * majorLen); - hl_memcpy_host2device(minor, cpuMinor, sizeof(int) * nnz); - } -} - -int outputSize( - int imageSize, int filterSize, int padding, int stride, bool caffeMode) { - int outputSize; - if (!caffeMode) { - outputSize = - (imageSize - filterSize + 2 * padding + stride - 1) / stride + 1; - } else { - outputSize = (imageSize - filterSize + 2 * padding) / stride + 1; - } - CHECK_GE(outputSize, 1); - return outputSize; -} - -int imageSize( - int outputSize, int filterSize, int padding, int stride, bool caffeMode) { - int imageSize; - if (!caffeMode) { - imageSize = - (outputSize - 1) * stride + filterSize - 2 * padding - stride + 1; - } else { - imageSize = (outputSize - 1) * stride + filterSize - 2 * padding; - } - CHECK_GE(imageSize, 1); - return imageSize; -} - -} // namespace paddle diff --git a/paddle/legacy/math/MathUtils.h b/paddle/legacy/math/MathUtils.h deleted file mode 100644 index 597485d9c54a2942134f58d308b387ff0bdf061b..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/MathUtils.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -namespace paddle { - -/** - * this function is for SparseMatrix initialization except data. - * It generates a random non-zero pattern for a sparse matrix. - * - * if format is SPARSE_CSC, - * major is column start index and minor is row index - * for each non zero value. - * else - * major is row start index and minor is col - * index for each non zero value. - * - * Initialize minor value according to major value. - * - * For example, A is 5*3 CSC matrix, nnz is 10, then - * - * @code - * cols[i] = i * nnz / 3 - * cols=[0, 3, 6, 10] - * @endcode - * - * for column i, we randomly select cols[i+1] - cols[i] rows - * as non zero number row index. - * - * rows is [1, 3, 4, 0, 2, 4, 1, 2, 3, 4] - */ -void sparseRand( - int* major, int* minor, int nnz, int majorLen, int minorMax, bool useGpu); - -/** - * Calculate output size based on caffeMode_. - * - input(+padding): 0123456789 - * - imageSize(+padding) = 10; - * - filterSize = 3; - * - stride = 2; - * - caffeMode is true: - - output: (012), (234), (456), (678) - - outputSize = 4; - * - caffeMode is false: - * - output: (012), (234), (456), (678), (9) - * - outputSize = 5; - */ -int outputSize( - int imageSize, int filterSize, int padding, int stride, bool caffeMode); - -/** - * Calculate image size based on output size and caffeMode_. - * It is the reverse function of outputSize() - */ -int imageSize( - int outputSize, int filterSize, int padding, int stride, bool caffeMode); - -} // namespace paddle diff --git a/paddle/legacy/math/Matrix.cpp b/paddle/legacy/math/Matrix.cpp deleted file mode 100644 index e53f95006c36bfce5df8e57e9efc249f56098b70..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/Matrix.cpp +++ /dev/null @@ -1,4787 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Matrix.h" -#include "MathFunctions.h" -#include "SparseMatrix.h" -#include "SparseRowMatrix.h" - -#include -#include -#include - -#include -#include "hl_cnn.h" -#include "hl_gpu.h" -#include "hl_table_apply.h" -#include "hl_top_k.h" -#include "paddle/legacy/utils/Logging.h" - -#include "NEONFunctions.h" -#include "paddle/legacy/function/GemmFunctor.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -#include "SIMDFunctions.h" - -namespace paddle { - -inline real _pow(real a, real beta) { return std::pow(a, beta); } - -inline real _square(real a) { return a * a; } - -inline real _safelog(real a) { return a > 0.0f ? std::log(a) : -40.0f; } - -Matrix::Matrix(MemoryHandlePtr memHandle, - size_t height, - size_t width, - bool trans, - bool use_gpu) - : BaseMatrix( - height, - width, - memHandle ? (reinterpret_cast(memHandle->getBuf())) : nullptr, - trans, - use_gpu) { - elementCnt_ = width * height; - memoryHandle_ = memHandle; -} - -Matrix::Matrix( - real* data, size_t height, size_t width, bool trans, bool use_gpu) - : BaseMatrix(height, width, data, trans, use_gpu) { - elementCnt_ = width * height; -} - -Matrix::Matrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans, - bool use_gpu) - : BaseMatrix(height, width, stride, data, trans, use_gpu) { - elementCnt_ = width * height; -} - -MatrixPtr Matrix::createSparseMatrix(real* data, - int* row, - int* col, - size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType, /*value type*/ - SparseFormat format, - bool trans, - bool useGpu) { - if (useGpu) { - return std::make_shared( - data, row, col, height, width, nnz, valueType, format, trans); - } else { - return std::make_shared( - data, row, col, height, width, nnz, valueType, format, trans); - } -} - -MatrixPtr Matrix::createSparseMatrix(size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType, /*value type*/ - SparseFormat format, - bool trans, - bool useGpu) { - if (useGpu) { - return std::make_shared( - height, width, nnz, valueType, format, trans); - } else { - return std::make_shared( - height, width, nnz, valueType, format, trans); - } -} - -MatrixPtr Matrix::create(MemoryHandlePtr memHandle, - size_t height, - size_t width, - bool trans) { - if (auto gpuHandle = std::dynamic_pointer_cast(memHandle)) { - return std::make_shared(gpuHandle, height, width, trans); - } else if (auto cpuHandle = - std::dynamic_pointer_cast(memHandle)) { - return std::make_shared(cpuHandle, height, width, trans); - } else { - LOG(FATAL) << "Wrong"; - return nullptr; - } -} - -MatrixPtr Matrix::create(size_t height, size_t width, bool trans, bool useGpu) { - if (useGpu) { - return std::make_shared(height, width, trans); - } else { - return std::make_shared(height, width, trans); - } -} - -MatrixPtr Matrix::create( - real* data, size_t height, size_t width, bool trans, bool useGpu) { - if (useGpu) { - return std::make_shared(data, height, width, trans); - } else { - return std::make_shared(data, height, width, trans); - } -} - -MatrixPtr Matrix::create(real* data, - size_t height, - size_t width, - size_t stride, - bool trans, - bool useGpu) { - if (useGpu) { - return std::make_shared(data, height, width, stride, trans); - } else { - return std::make_shared(data, height, width, stride, trans); - } -} - -MatrixPtr Matrix::createSparseMatrix(size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - bool trans, - bool useGpu) { - if (useGpu) { - return std::make_shared( - height, width, nnz, valueType, SPARSE_CSR, trans); - } else { - return std::make_shared( - height, width, nnz, valueType, SPARSE_CSR, trans); - } -} - -void Matrix::resizeOrCreate( - MatrixPtr& matrix, size_t height, size_t width, bool trans, bool useGpu) { - if (!matrix) { - matrix = Matrix::create(height, width, trans, useGpu); - } else { - CHECK_EQ(matrix->useGpu(), useGpu); - matrix->resize(height, width); - } -} - -void Matrix::resizeOrCreateSparseMatrix(MatrixPtr& matrix, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans, - bool useGpu) { - if (!matrix) { - matrix = Matrix::createSparseMatrix( - height, width, nnz, valueType, format, trans, useGpu); - } else { - CHECK(dynamic_cast(matrix.get()) || - dynamic_cast(matrix.get())); - CHECK_EQ(matrix->useGpu(), useGpu); - matrix->resize(height, width, nnz, valueType, format); - } -} - -void Matrix::reshape(size_t height, size_t width) { - CHECK(isContiguous()); - CHECK(height_ * width_ == height * width); - height_ = height; - width_ = width; - stride_ = width_; -} - -MatrixPtr Matrix::subMatrix(size_t startRow, - size_t endRow, - size_t startCol, - size_t endCol) { - CHECK_LE(startRow, endRow); - CHECK_LE(endRow, getHeight()); - CHECK_LE(startCol, endCol); - CHECK_LE(endCol, getWidth()); - - return Matrix::create(getData() + startRow * getStride() + startCol, - endRow - startRow, - endCol - startCol, - getStride(), - trans_, - useGpu_); -} - -void Matrix::setDiag(real value) { - CHECK(data_ != NULL); - CHECK_EQ(height_, width_); - - zeroMem(); - BaseMatrix diag(height_, 1, stride_ + 1, data_, false, useGpu_); - diag.assign(value); -} - -GpuMatrix::GpuMatrix(size_t height, size_t width, bool trans) - : Matrix(std::make_shared(height * width * sizeof(real)), - height, - width, - trans, - true) {} - -GpuMatrix::~GpuMatrix() {} - -void GpuMatrix::zeroMem() { - CHECK(data_ != NULL); - zero(); -} - -void GpuMatrix::resetOne() { - CHECK(data_ != NULL); - one(); -} - -void GpuMatrix::resize(size_t newHeight, size_t newWidth) { - size_t newSize = newHeight * newWidth; - if (NULL == memoryHandle_.get() || - newSize * sizeof(real) > memoryHandle_->getAllocSize()) { - memoryHandle_ = std::make_shared(newSize * sizeof(real)); - data_ = reinterpret_cast(memoryHandle_->getBuf()); - } - height_ = newHeight; - width_ = newWidth; - elementCnt_ = newSize; - stride_ = width_; -} - -real GpuMatrix::getElement(size_t x, size_t y) const { - real elem = 0; - hl_memcpy_device2host(&elem, &data_[x * stride_ + y], sizeof(real)); - return elem; -} - -real GpuMatrix::getSum() { - CHECK(isContiguous()); - real sum = 0.0f; - hl_vector_sum(data_, &sum, height_ * width_); - return sum; -} - -real GpuMatrix::getMin() { - CHECK(isContiguous()); - auto vec = GpuVector(height_ * width_, data_); - return vec.getMin(); -} - -real GpuMatrix::getMax() { - CHECK(isContiguous()); - auto vec = GpuVector(height_ * width_, data_); - return vec.getMax(); -} - -void GpuMatrix::accumulateColSum(Matrix& src) { - CHECK_EQ(getWidth(), src.getWidth()); - CHECK_EQ(getHeight(), (size_t)1); - sumCols(src, 1.0, 1.0); -} - -real GpuMatrix::getAbsSum() { - CHECK(isContiguous()); - real sum = 0.0f; - hl_vector_abs_sum(data_, &sum, height_ * width_); - return sum; -} - -void GpuMatrix::copyFrom(const Matrix& src) { - CHECK(isContiguous()); - CHECK(src.isContiguous()); - CHECK(elementCnt_ == src.getElementCnt()); - - if (typeid(src) == typeid(CpuMatrix)) { - hl_memcpy_host2device( - data_, const_cast(src.getData()), sizeof(real) * elementCnt_); - } else if (typeid(src) == typeid(GpuMatrix)) { - hl_memcpy_device2device( - data_, const_cast(src.getData()), sizeof(real) * elementCnt_); - } else { - LOG(FATAL) << "Wrong"; - } -} - -void GpuMatrix::copyFrom(const Matrix& src, hl_stream_t stream) { - CHECK(isContiguous()); - CHECK(src.isContiguous()); - CHECK(elementCnt_ == src.getElementCnt()); - hl_memcpy_async(this->getData(), - const_cast(src.getData()), - sizeof(real) * elementCnt_, - stream); -} - -void GpuMatrix::copyFrom(const real* hostSrc, size_t size) { - CHECK(isContiguous()); - CHECK(size <= elementCnt_); - hl_memcpy_host2device(data_, const_cast(hostSrc), sizeof(real) * size); -} - -void GpuMatrix::copyFrom(const real* hostSrc, const int64_t* seq) { - LOG(FATAL) << "not implemented"; -} - -void GpuMatrix::copyFrom(const IVector& src) { - CHECK(isContiguous()); - CpuMatrix matrix(src.getSize(), 1, false); - matrix.copyFrom(src); - copyFrom(matrix); -} - -void GpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) { - size_t height = getHeight(); - size_t width = getWidth(); - CHECK_EQ(b.getWidth(), width); - real* dst = getData(); - real* src = b.getData(); - const int* index = rowIndex.getData(); - hl_sequence2batch_copy(dst, src, index, width, height, true); -} - -MatrixPtr GpuMatrix::clone(size_t height, size_t width, bool useGpu) { - CHECK(isContiguous()); - - if (height == 0 && width == 0) { - height = height_; - width = width_; - } - - CHECK(width && height); - - if (useGpu) { - return std::make_shared(height, width); - } else { - return std::make_shared(height, width); - } -} - -MatrixPtr GpuMatrix::getTranspose() { - if (memoryHandle_.get() != NULL) { - MatrixPtr copy_T( - new GpuMatrix(std::dynamic_pointer_cast(memoryHandle_), - height_, - width_, - true)); - return copy_T; - } else { - MatrixPtr copy_T(new GpuMatrix(data_, height_, width_, true)); - return copy_T; - } -} - -void GpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { - if (memAlloc) { - matTrans = std::make_shared(width_, height_); - } else { - CHECK(matTrans != NULL); - CHECK_EQ(matTrans->getHeight(), width_); - CHECK_EQ(matTrans->getWidth(), height_); - } - real* dataTrans = matTrans->getData(); - real* data = getData(); - int lda = getStride(); - int ldc = matTrans->getStride(); - - hl_matrix_transpose(data, dataTrans, height_, width_, lda, ldc); -} - -void GpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { - if (memAlloc) { - matRot = std::make_shared(width_, height_); - } else { - CHECK(matRot != NULL); - CHECK_EQ(matRot->getHeight(), width_); - CHECK_EQ(matRot->getWidth(), height_); - } - - real* dataRot = matRot->getData(); - real* data = getData(); - hl_matrix_rotate(data, dataRot, height_, width_, clockWise); -} - -MatrixPtr GpuMatrix::getInverse() { - MatrixPtr matInv; - inverse(matInv, true); - return matInv; -} - -void GpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) { - CHECK_EQ(height_, width_); - - if (memAlloc) { - matInv = std::make_shared(height_, width_); - } else { - CHECK(matInv != NULL); - } - - real* data = getData(); - real* dataInv = matInv->getData(); - int lda = getStride(); - int ldc = matInv->getStride(); - - hl_matrix_inverse(data, dataInv, height_, lda, ldc); -} - -void GpuMatrix::addBias(Matrix& b, real scale) { - CHECK(b.getHeight() == 1) << "the Bias should be a vector"; - BaseMatrix::addBias(b, scale); -} - -void GpuMatrix::addSharedBias(Matrix& b, real scale) { - CHECK(b.getHeight() == 1) << "the Bias should be a vector"; - CHECK_LE(b.getWidth(), getWidth()); - CHECK_EQ(getWidth() % b.getWidth(), 0UL); - hl_matrix_add_shared_bias( - getData(), b.getData(), b.getWidth(), getHeight(), getWidth(), scale); -} - -void GpuMatrix::collectBias(Matrix& a, real scale) { -#ifdef PADDLE_WITH_CUDA - CHECK_EQ(getHeight(), (size_t)1); - CHECK_EQ(width_, a.getWidth()); - GpuSparseMatrix* sMatPtr = dynamic_cast(&a); - if (!sMatPtr) { - sumCols(a, /* scaleSum= */ scale, /* scaleDest= */ 1); - } else { - real* data = getData(); - hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get(); - hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale); - } -#endif -} - -void GpuMatrix::collectSharedBias(Matrix& a, real scale) { - CHECK_EQ(getHeight(), (size_t)1); - CHECK_EQ(a.getWidth() % getWidth(), 0UL); - hl_matrix_collect_shared_bias( - getData(), a.getData(), getWidth(), a.getHeight(), a.getWidth(), scale); -} - -void GpuMatrix::sequenceAvgForward(Matrix& a, - const IVector& startsPos, - int mode) { - size_t height = getHeight(); - size_t width = getWidth(); - CHECK_EQ(height, startsPos.getSize() - 1); - CHECK_EQ(width, a.getWidth()); - real* dst = getData(); - real* src = a.getData(); - const int* starts = startsPos.getData(); - - hl_sequence_avg_forward(dst, src, starts, height, width, mode); -} - -void GpuMatrix::sequenceAvgBackward(Matrix& a, - const IVector& startsPos, - int mode) { - size_t height = a.getHeight(); - size_t width = getWidth(); - CHECK_EQ(height, startsPos.getSize() - 1); - CHECK_EQ(width, a.getWidth()); - real* dst = getData(); - real* src = a.getData(); - const int* starts = startsPos.getData(); - - hl_sequence_avg_backward(dst, src, starts, height, width, mode); -} - -/* this = scaleAB*(a*b) + scaleT*this */ -void GpuMatrix::mul(const GpuMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT) { - CHECK(!isTransposed()) << "Not supported"; - - if (!a.isTransposed() && !b.isTransposed()) { - CHECK_EQ(width_, b.width_); - CHECK_EQ(height_, a.height_); - CHECK_EQ(a.width_, b.height_); - } else if (a.isTransposed() && !b.isTransposed()) { - CHECK_EQ(width_, b.width_); - CHECK_EQ(height_, a.width_); - CHECK_EQ(a.height_, b.height_); - } else if (!a.isTransposed() && b.isTransposed()) { - CHECK_EQ(width_, b.height_); - CHECK_EQ(height_, a.height_); - CHECK_EQ(a.width_, b.width_); - } else { - LOG(FATAL) << "Is not supported"; - } - - real* A_d = a.data_; - real* B_d = b.data_; - real* C_d = data_; - int dimM = getHeight(); - int dimN = getWidth(); - int dimK = !a.isTransposed() ? a.width_ : a.height_; - int lda = a.getStride(); - int ldb = b.getStride(); - int ldc = getStride(); - hl_trans_op_t transa = !a.isTransposed() ? HPPL_OP_N : HPPL_OP_T; - hl_trans_op_t transb = !b.isTransposed() ? HPPL_OP_N : HPPL_OP_T; - - hl_matrix_mul(A_d, - transa, - B_d, - transb, - C_d, - dimM, - dimN, - dimK, - scaleAB, - scaleT, - lda, - ldb, - ldc); -} - -void GpuMatrix::mul(const GpuSparseMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT) { -#ifdef PADDLE_WITH_CUDA - CHECK(isContiguous()); - CHECK(b.isContiguous()); - CHECK(b.useGpu_ == true) << "Matrix type are not equal"; - CHECK(!trans_ && !b.trans_) << "not supported"; - - if (!a.trans_) { - CHECK(width_ == b.width_ && height_ == a.height_ && a.width_ == b.height_) - << "Matrix dimensions are not equal"; - } else { - CHECK(width_ == b.width_ && height_ == a.width_ && a.height_ == b.height_) - << "Matrix dimensions are not equal"; - } - hl_trans_op_t transA = a.trans_ ? HPPL_OP_T : HPPL_OP_N; - hl_sparse_matrix_s A_d = a.sMatrix_.get(); - real* B_d = b.data_; - real* C_d = data_; - hl_matrix_csr_mul_dense(A_d, - transA, - B_d, - HPPL_OP_N, - C_d, - height_, - width_, - b.height_, - scaleAB, - scaleT); -#endif -} - -void GpuMatrix::mul(const GpuMatrix& a, - const GpuSparseMatrix& b, - real scaleAB, - real scaleT) { -#ifdef PADDLE_WITH_CUDA - CHECK(isContiguous()); - CHECK(a.isContiguous()); - CHECK(a.useGpu_ == true) << "Matrix type are not equal"; - - hl_sparse_matrix_s B_d = b.sMatrix_.get(); - real* A_d = a.data_; - real* C_d = data_; - hl_trans_op_t transB = b.trans_ ? HPPL_OP_T : HPPL_OP_N; - if (!b.trans_) { - CHECK(width_ == b.width_ && height_ == a.height_ && a.width_ == b.height_) - << "Matrix dimensions are not equal"; - } else { - CHECK(width_ == b.height_ && height_ == a.height_ && a.width_ == b.width_) - << "Matrix dimensions are not equal"; - } - if (b.format_ == SPARSE_CSC) { - hl_matrix_dense_mul_csc(A_d, - HPPL_OP_N, - B_d, - transB, - C_d, - height_, - width_, - a.width_, - scaleAB, - scaleT); - } else { - hl_matrix_dense_mul_csr(A_d, - HPPL_OP_N, - B_d, - transB, - C_d, - height_, - width_, - a.width_, - scaleAB, - scaleT); - } -#endif -} - -/* this = a*b */ -void GpuMatrix::mul(const Matrix& a, const Matrix& b) { mul(a, b, 1.0, 0.0); } - -void GpuMatrix::mul(const Matrix& a, - const Matrix& b, - real scaleAB, - real scaleT) { - const auto a_ptr = dynamic_cast(&a); - const auto b_ptr = dynamic_cast(&b); - const auto a_ptr_s = dynamic_cast(&a); - const auto b_ptr_s = dynamic_cast(&b); - - if (a_ptr && b_ptr) { - mul(*a_ptr, *b_ptr, scaleAB, scaleT); - } else if (a_ptr_s && b_ptr) { - mul(*a_ptr_s, *b_ptr, scaleAB, scaleT); - } else if (a_ptr && b_ptr_s) { - mul(*a_ptr, *b_ptr_s, scaleAB, scaleT); - } else { - LOG(FATAL) << "Not supported"; - } -} - -/* this = this* b */ -void GpuMatrix::rightMul(Matrix& b) { rightMul(b, 1.0, 0.0); } - -/* this = scaleAB*(this*b) + scaleT*this */ -void GpuMatrix::rightMul(Matrix& b, real scaleAB, real scaleT) { - CHECK(dynamic_cast(&b)); - CHECK(!isTransposed()) << "Not supported"; - CHECK(!b.isTransposed()) << "Not supported"; - mul(*this, *dynamic_cast(&b), scaleAB, scaleT); -} - -/* this = a*this */ -void GpuMatrix::leftMul(Matrix& a) { leftMul(a, 1.0, 0.0); } - -/* this = scaleAB*(a*this) + scaleT*this */ -void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) { - CHECK(dynamic_cast(&a)); - CHECK(!isTransposed()) << "Not supported"; - CHECK(!a.isTransposed()) << "Not supported"; - mul(*dynamic_cast(&a), *this, scaleAB, scaleT); -} - -void GpuMatrix::selectRows(Matrix& table, IVector& ids) { -#ifdef PADDLE_WITH_CUDA - CHECK(dynamic_cast(&table)); - CHECK(table.useGpu()); - CHECK(ids.useGpu()); - CHECK_EQ(getHeight(), ids.getSize()); - CHECK_EQ(getWidth(), table.getWidth()); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - real* a = getData(); - size_t tableSize = table.getHeight(); - int* index = ids.getData(); - - hl_matrix_select_rows(a, - stride_, - table.getData(), - table.stride_, - index, - numSamples, - tableSize, - dim); -#endif -} - -void GpuMatrix::addToRows(Matrix& table, IVector& ids) { -#ifdef PADDLE_WITH_CUDA - CHECK(dynamic_cast(&table)); - CHECK(table.useGpu()); - CHECK(ids.useGpu()); - CHECK_EQ(getHeight(), ids.getSize()); - CHECK_EQ(getWidth(), table.getWidth()); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - real* a = getData(); - size_t tableSize = table.getHeight(); - int* index = ids.getData(); - - hl_matrix_add_to_rows(table.getData(), - table.stride_, - a, - stride_, - index, - numSamples, - tableSize, - dim); -#endif -} - -void GpuMatrix::colMerge(Matrix& src) { - CHECK(src.height_ == height_); - if (!trans_ && !src.trans_) { - sumRows(src, /* scaleSum= */ 1, /* scaleDest= */ 0); - } else { - LOG(FATAL) << "Is not supported"; - } -} - -void GpuMatrix::rowSum(Matrix& sum) { - CHECK_EQ(sum.getHeight(), getHeight()); - CHECK_EQ(sum.getWidth(), (size_t)1); - - sum.sumRows(*this, /* scaleSum= */ 1, /* scaleDest= */ 0); -} - -void GpuMatrix::rowMax(Matrix& max) { - CHECK_EQ(max.getHeight(), getHeight()); - CHECK_EQ(max.getWidth(), (size_t)1); - - max.maxRows(*this); -} - -void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { -#ifdef PADDLE_WITH_CUDA - CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal"; - size_t numSamples = getHeight(); - size_t beam = maxVal.getWidth(); - CHECK_EQ(maxIds.getSize(), numSamples * beam); - CHECK_EQ(maxVal.getHeight(), numSamples); - CHECK_EQ(maxVal.getWidth(), beam); - - hl_matrix_top_k(maxVal.getData(), - maxVal.getStride(), - maxIds.getData(), - this->getData(), - this->getStride(), - this->getWidth(), - beam, - numSamples); -#endif -} - -void GpuMatrix::colMax(Matrix& max) { - CHECK_EQ(max.getWidth(), getWidth()); - CHECK_EQ(max.getHeight(), (size_t)1); - - max.maxCols(*this); -} - -void GpuMatrix::colMax(IVector& maxIds, Matrix& maxVal) { - LOG(FATAL) << "Is not supported"; -} - -void GpuMatrix::maxoutForward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - CHECK(dynamic_cast(&a)); - CHECK(dynamic_cast(&id)); - CHECK_EQ(a.getHeight(), getHeight()); - - size_t size = getWidth(); - size_t batchSize = getHeight(); - const real* input = a.getData(); - real* output = getData(); - int* idForGpu = id.getData(); - - hl_maxout_forward( - input, output, idForGpu, batchSize, size, size / channels, groups); -} - -void GpuMatrix::maxoutBackward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - CHECK(dynamic_cast(&a)); - CHECK(dynamic_cast(&id)); - CHECK_EQ(a.getHeight(), getHeight()); - - size_t size = a.getWidth(); - size_t batchSize = getHeight(); - real* input = getData(); - const real* output = a.getData(); - const int* idForGpu = id.getData(); - - hl_maxout_backward( - input, output, idForGpu, batchSize, size, size / channels, groups); -} - -/*calulate the error of classification */ -void GpuMatrix::classificationError(Matrix& output, - IVector& label, - size_t topkSize) { - auto gpuOutput = dynamic_cast(&output); - auto gpuLabel = dynamic_cast(&label); - size_t numSamples = this->getHeight(); - GpuMatrixPtr gpuTopVal = std::make_shared(numSamples, topkSize); - GpuIVectorPtr gpuTopIds = std::make_shared(numSamples * topkSize); - - CHECK(gpuOutput && gpuLabel) << "Invalid argument pointer"; - CHECK(gpuTopVal && gpuTopIds) << "Allocate GPU memory failed"; - CHECK(gpuLabel->getSize() == numSamples) << "Vector size is not equal"; - CHECK(numSamples == gpuOutput->getHeight() && this->getWidth() == 1) - << "Matrix dimensions are not equal"; - - size_t dim = gpuOutput->getWidth(); - hl_matrix_classification_error(gpuTopVal->getData(), - gpuTopVal->getStride(), - gpuTopIds->getData(), - gpuOutput->getData(), - gpuOutput->getStride(), - dim, - topkSize, - numSamples, - gpuLabel->getData(), - this->getData()); -} - -/* copy -log(output[i * width + label]) to this->data[i] */ -void GpuMatrix::oneHotCrossEntropy(Matrix& output, IVector& label) { - GpuMatrix* output_ptr = dynamic_cast(&output); - GpuIVector* label_ptr = dynamic_cast(&label); - - CHECK(output_ptr && label_ptr) << "Invalid argument pointer"; - - CHECK(height_ == label.getSize() && width_ == 1 && height_ == output.height_) - << "Matrix dimensions are not equal"; - - real* A_d = output_ptr->data_; - real* C_d = data_; - int* label_d = label_ptr->getData(); - - hl_matrix_cross_entropy(A_d, C_d, label_d, height_, output.width_); -} - -/* calculate the error of outputV according to label */ -void GpuMatrix::oneHotCrossEntropyBp(Matrix& outputV, IVector& label) { - GpuMatrix* output_ptr = dynamic_cast(&outputV); - GpuIVector* label_ptr = dynamic_cast(&label); - - CHECK(output_ptr && label_ptr) << "Invalid argument pointer"; - - CHECK(height_ == output_ptr->height_ && width_ == output_ptr->width_) - << "Matrix dimensions are not equal"; - - real* output_d = output_ptr->data_; - real* grad_d = data_; - int* label_d = label_ptr->getData(); - - hl_matrix_cross_entropy_bp(grad_d, output_d, label_d, height_, width_); -} - -void GpuMatrix::oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha) { - LOG(FATAL) << "Not implemented"; -} - -void GpuMatrix::oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha) { - LOG(FATAL) << "Not implemented"; -} - -void GpuMatrix::softmax(Matrix& output) { - CHECK(output.useGpu()) << "Matrix type are not equal"; - - size_t height = getHeight(); - size_t width = getWidth(); - CHECK(height == output.getHeight() && width == output.getWidth()) - << "Matrix dimensions are not equal"; - - real* inputData = getData(); - real* outputData = output.getData(); - hl_matrix_softmax(inputData, outputData, height, width); -} - -void GpuMatrix::sequenceSoftmax(Matrix& output, const IVector& index) { - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output.getWidth(), 1UL); - CHECK(isContiguous()); - - real* inputData = getData(); - real* outputData = output.getData(); - auto starts = index.getData(); - int numSequences = index.getSize() - 1; - hl_sequence_softmax_forward(inputData, outputData, starts, numSequences); -} - -void GpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { - CHECK(output.useGpu_ == true && sftmaxSum.useGpu_ == true) - << "Matrix type are not equal"; - - CHECK(height_ == output.height_ && width_ == output.width_ && - height_ == sftmaxSum.height_) - << "Matrix dimensions are not equal"; - - real* output_d = output.data_; - real* sftmaxSum_d = sftmaxSum.data_; - real* grad_d = data_; - hl_matrix_softmax_derivative(grad_d, output_d, sftmaxSum_d, height_, width_); -} - -void GpuMatrix::softmaxBackward(Matrix& outputV) { - CHECK(outputV.useGpu()) << "Matrix type are not equal"; - - size_t height = getHeight(); - size_t width = getWidth(); - CHECK(height == outputV.getHeight() && width == outputV.getWidth()) - << "Matrix dimensions are not equal"; - - real* output_grad = getData(); - real* output_value = outputV.getData(); - hl_softmax_backward(output_value, output_grad, height, width); -} - -void GpuMatrix::sumOfSquares(Matrix& output, Matrix& label) { - CHECK_EQ(label.getHeight(), height_); - CHECK_EQ(output.getHeight(), height_); - CHECK_EQ(label.getWidth(), output.getWidth()); - CHECK_EQ((size_t)1, width_); - - auto labelptr = dynamic_cast(&label); - if (labelptr) { - LOG(FATAL) << "not supported: GpuSparseMatrix as label"; - } - - BaseMatrix::sumOfSquaredDiffs(output, - label, - /* scaleSum= */ 1, - /* scaleDest= */ 1); -} - -void GpuMatrix::sumOfSquaresBp(Matrix& outputV, Matrix& label) { - add2(outputV, label, 1, 2, -2); -} - -void GpuMatrix::tanh(Matrix& output) { BaseMatrix::tanh(output); } - -void GpuMatrix::tanhDerivative(Matrix& output) { - BaseMatrix::tanhDerivative(output); -} - -void GpuMatrix::softrelu(Matrix& output) { BaseMatrix::softrelu(output); } - -void GpuMatrix::softreluDerivative(Matrix& output) { - BaseMatrix::softreluDerivative(output); -} - -void GpuMatrix::scaledTanh(Matrix& output, real p1, real p2) { - BaseMatrix::scaledTanh(output, p1, p2); -} - -void GpuMatrix::randomizeUniform() { - CHECK(isContiguous()); - real* data = data_; - size_t size = height_ * width_; - - hl_rand(data, size); -} - -void GpuMatrix::print(std::ostream& os) const { - CHECK(isContiguous()); - CpuMatrix cpuMat(getHeight(), getWidth()); - cpuMat.copyFrom(*this); - cpuMat.print(os); -} - -void GpuMatrix::print(std::ostream& os, size_t height, size_t width) const { - CHECK(isContiguous()); - CpuMatrix cpuMat(getHeight(), getWidth()); - cpuMat.copyFrom(*this); - cpuMat.print(os, height, width); -} - -void GpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) { - CHECK(isContiguous()); - CHECK(height_ == refMat.getHeight()); - CHECK(width_ == refMat.getWidth()); - CpuMatrix cpuRef(height_, width_); - GpuMatrix gpuRef(height_, width_); - cpuRef.copyFrom(refMat); - gpuRef.copyFrom(*this); - size_t diffCnt = 0; - for (size_t i = 0; i < height_; ++i) { - for (size_t j = 0; j < width_; ++j) { - real a = gpuRef.getElement(i, j); - real b = cpuRef.getElement(i, j); - if (fabs(a - b) > 0.00001) { - ++diffCnt; - if (printDiff) { - os << "ref= " << a << " check= " << b << std::endl; - } - } - } - } - LOG(INFO) << "the diffCnt is " << diffCnt; -} - -void GpuMatrix::upsampleForward(Matrix& input, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW) { - CHECK(input.useGpu_ == true) << "Matrix type are not equal"; - CHECK(mask.useGpu_ == true) << "Matrix type are not equal"; - - real* inputData = input.getData(); - real* maskData = mask.getData(); - real* outData = data_; - - size_t batch = input.getHeight(); - - CHECK(imgSizeH * imgSizeW * channels == input.getWidth()); - CHECK(imgSizeH * imgSizeW * channels == mask.getWidth()); - CHECK_EQ(batch, this->getHeight()); - CHECK(width_ == outputH * outputW * channels); - hl_upsample_forward(inputData, - maskData, - batch, - imgSizeH, - imgSizeW, - channels, - outputH, - outputW, - outData); -} - -void GpuMatrix::upsampleBackward(Matrix& outputGrad, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW) { - CHECK(outputGrad.useGpu_ == true) << "Matrix type are not equal"; - CHECK(mask.useGpu_ == true) << "Matrix type are not equal"; - - real* outputGradData = outputGrad.getData(); - real* maskData = mask.getData(); - real* inputGradData = data_; - size_t batch = outputGrad.getHeight(); - - CHECK(imgSizeH * imgSizeW == this->getWidth() / channels); - CHECK_EQ(batch, this->getHeight()); - CHECK_EQ(channels * outputH * outputW, outputGrad.getWidth()); - hl_upsample_backward(outputGradData, - maskData, - batch, - imgSizeH, - imgSizeW, - channels, - outputH, - outputW, - inputGradData); -} - -void GpuMatrix::maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - MatrixPtr maskMatP) { - CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; - - real* inputData = inputMat.getData(); - real* maskData = NULL; - size_t frameNum = inputMat.getHeight(); - CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth()); - CHECK(height_ == inputMat.getHeight()); - CHECK(width_ == outputH * outputW * channels); - - if (maskMatP != NULL) { - CHECK(maskMatP->useGpu_ == true) << "Matrix type are not equal"; - CHECK(outputH * outputW * channels == maskMatP->getWidth()); - maskData = maskMatP->getData(); - } - - hl_maxpool_forward(frameNum, - inputData, - channels, - imgSizeH, - imgSizeW, - outputH, - outputW, - sizeX, - sizeY, - strideH, - strideW, - paddingH, - paddingW, - data_, - getStride(), - maskData); -} - -void GpuMatrix::maxPoolBackward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW) { - CHECK(inputMat.useGpu_ == true && outGrad.useGpu_ == true && - outV.useGpu_ == true) - << "Matrix type are not equal"; - - real* inputData = inputMat.getData(); - real* outData = outV.getData(); - real* outDiff = outGrad.getData(); - size_t frameNum = inputMat.getHeight(); - size_t channels = outV.getWidth() / outputH / outputW; - CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth()); - CHECK(height_ == inputMat.getHeight()); - CHECK(outGrad.getHeight() == outV.getHeight() && - outGrad.getWidth() == outV.getWidth()); - - hl_maxpool_backward(frameNum, - inputData, - outData, - outDiff, - channels, - imgSizeH, - imgSizeW, - outputH, - outputW, - sizeX, - sizeY, - strideH, - strideW, - paddingH, - paddingW, - scaleTargets, - scaleOutput, - data_, - outGrad.getStride()); -} - -void GpuMatrix::avgPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - bool excludeMode) { - CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; - - real* inputData = inputMat.getData(); - size_t frameNum = inputMat.getHeight(); - CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth()); - CHECK(height_ == inputMat.getHeight()); - CHECK(width_ == outputH * outputW * channels); - - hl_avgpool_forward(frameNum, - inputData, - channels, - imgSizeH, - imgSizeW, - outputH, - outputW, - sizeX, - sizeY, - strideH, - strideW, - paddingH, - paddingW, - data_, - getStride(), - excludeMode); -} - -void GpuMatrix::avgPoolBackward(Matrix& outGrad, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW, - bool excludeMode) { - CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal"; - - real* outDiff = outGrad.getData(); - size_t frameNum = outGrad.getHeight(); - size_t channels = outGrad.getWidth() / outputH / outputW; - CHECK(imgSizeH * imgSizeW * channels == width_); - CHECK(height_ == outGrad.getHeight()); - CHECK(outGrad.getWidth() == outputH * outputW * channels); - - hl_avgpool_backward(frameNum, - outDiff, - channels, - imgSizeH, - imgSizeW, - outputH, - outputW, - sizeX, - sizeY, - strideH, - strideW, - paddingH, - paddingW, - scaleTargets, - scaleOutput, - data_, - outGrad.getStride(), - excludeMode); -} - -void GpuMatrix::maxPool3DForward(Matrix& inputMat, - Matrix& maxPoolIdx, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW) { - CHECK(inputMat.useGpu_) << "Matrix type are not correct"; - - real* inputData = inputMat.getData(); - real* maxPoolIdxData = maxPoolIdx.getData(); - size_t num = inputMat.getHeight(); - CHECK(imgSizeD * imgSizeH * imgSizeW * channels == inputMat.getWidth()); - CHECK(height_ == inputMat.getHeight()); - CHECK(width_ == outputD * outputH * outputW * channels); - - hl_maxpool3D_forward(num, - inputData, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - outputD, - outputH, - outputW, - sizeZ, - sizeY, - sizeX, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - getData(), - maxPoolIdxData, - getStride()); -} - -void GpuMatrix::maxPool3DBackward(Matrix& outGrad, - Matrix& maxPoolIdx, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput) { - CHECK(outGrad.useGpu_ && maxPoolIdx.useGpu_) << "Matrix type are not equal"; - - real* outDiff = outGrad.getData(); - real* maxPoolIdxData = maxPoolIdx.getData(); - size_t frameNum = getHeight(); - size_t channels = outGrad.getWidth() / outputD / outputH / outputW; - CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth()); - CHECK(outGrad.getHeight() == maxPoolIdx.getHeight() && - outGrad.getWidth() == maxPoolIdx.getWidth()); - - hl_maxpool3D_backward(frameNum, - outDiff, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - outputD, - outputH, - outputW, - sizeZ, - sizeY, - sizeX, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - scaleTargets, - scaleOutput, - getData(), - maxPoolIdxData, - outGrad.getStride()); -} - -void GpuMatrix::avgPool3DForward(Matrix& inputMat, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW) { - CHECK(inputMat.useGpu_) << "Matrix type are not equal"; - - real* inputData = inputMat.getData(); - size_t frameNum = inputMat.getHeight(); - CHECK(imgSizeD * imgSizeH * imgSizeW * channels == inputMat.getWidth()); - CHECK(height_ == inputMat.getHeight()); - CHECK(width_ == outputD * outputH * outputW * channels); - - hl_avgpool3D_forward(frameNum, - inputData, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - outputD, - outputH, - outputW, - sizeZ, - sizeY, - sizeX, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - getData(), - getStride()); -} - -void GpuMatrix::avgPool3DBackward(Matrix& outGrad, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput) { - CHECK(outGrad.useGpu_) << "Matrix type are not equal"; - - real* outDiff = outGrad.getData(); - size_t frameNum = outGrad.getHeight(); - size_t channels = outGrad.getWidth() / outputD / outputH / outputW; - CHECK(imgSizeD * imgSizeH * imgSizeW * channels == width_); - CHECK(height_ == outGrad.getHeight()); - CHECK(outGrad.getWidth() == outputD * outputH * outputW * channels); - - hl_avgpool3D_backward(frameNum, - outDiff, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - outputD, - outputH, - outputW, - sizeZ, - sizeY, - sizeX, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - scaleTargets, - scaleOutput, - getData(), - outGrad.getStride()); -} - -void GpuMatrix::maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index) { - CHECK(dynamic_cast(&input)); - CHECK(dynamic_cast(&sequence)); - CHECK(dynamic_cast(&index)); - - real* outData = getData(); - real* inputData = input.getData(); - const int* starts = sequence.getData(); - int* maxIndex = index.getData(); - size_t numSequences = getHeight(); - size_t dim = getWidth(); - - CHECK_EQ(dim, input.getWidth()); - CHECK_EQ(numSequences, sequence.getSize() - 1); - CHECK_EQ(numSequences * dim, index.getSize()); - - hl_max_sequence_forward( - inputData, starts, outData, maxIndex, numSequences, dim); -} - -void GpuMatrix::maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index) { - CHECK(dynamic_cast(&outputGrad)); - CHECK(dynamic_cast(&sequence)); - CHECK(dynamic_cast(&index)); - - real* inputGrad = getData(); - real* outGrad = outputGrad.getData(); - int* maxIndex = index.getData(); - size_t dim = getWidth(); - size_t numSequences = sequence.getSize() - 1; - - CHECK_EQ(dim, outputGrad.getWidth()); - CHECK_EQ(numSequences, outputGrad.getHeight()); - CHECK_EQ(numSequences * dim, index.getSize()); - - hl_max_sequence_backward(outGrad, maxIndex, inputGrad, numSequences, dim); -} - -void GpuMatrix::paramReluForward(Matrix& data, Matrix& W) { - CHECK(data.useGpu_ == true && W.useGpu_ == true) - << "Matrix type are not equal"; - real* input = data.getData(); - real* w = W.getData(); - size_t numElements = data.getWidth(); - size_t numSamples = data.getHeight(); - size_t paraSize = W.getHeight() * W.getWidth(); - CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init - size_t partial_sum = numElements / paraSize; - real* output = getData(); - hl_param_relu_forward(output, input, w, numElements, numSamples, partial_sum); -} - -void GpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) { - CHECK(oGrad.useGpu_ == true && data.useGpu_ == true) - << "Matrix type are not equal"; - real* ograd = oGrad.getData(); - real* input = data.getData(); - real* wgrad = data_; - size_t numElements = data.getWidth(); - size_t numSamples = data.getHeight(); - size_t paraSize = this->getHeight() * this->getWidth(); - CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init - size_t partial_sum = numElements / paraSize; - hl_param_relu_backward_w( - wgrad, ograd, input, numElements, numSamples, partial_sum); -} - -void GpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { - real* diff = data_; - real* input = data.getData(); - real* ograd = oGrad.getData(); - real* w = W.getData(); - size_t numElements = data.getWidth(); - size_t numSamples = data.getHeight(); - size_t paraSize = W.getHeight() * W.getWidth(); - CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init - size_t partial_sum = numElements / paraSize; - hl_param_relu_backward_diff( - ograd, input, w, diff, numElements, numSamples, partial_sum); -} - -void GpuMatrix::addColumnVector(const Matrix& b) { - BaseMatrix::addColVector(const_cast(b)); -} - -void GpuMatrix::bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - CHECK(dynamic_cast(&in)); - - const size_t outputW = getWidth(); - const size_t outputH = getHeight(); - const size_t inputW = in.getWidth(); - const size_t inputH = in.getHeight(); - - real* outData = getData(); - const real* inData = in.getData(); - - if (inImgH == outImgW && inImgW == outImgW) { - this->copyFrom(in); - } else { - hl_bilinear_forward(inData, - inImgH, - inImgW, - inputH, - inputW, - outData, - outImgH, - outImgW, - outputH, - outputW, - numChannels, - ratioH, - ratioW); - } -} - -void GpuMatrix::bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - CHECK(dynamic_cast(&out)); - - const size_t inputW = getWidth(); - const size_t inputH = getHeight(); - const size_t outputW = out.getWidth(); - const size_t outputH = out.getHeight(); - - real* inGrad = getData(); - const real* outGrad = out.getData(); - - if (outImgH == inImgH && outImgW == inImgW) { - this->add(const_cast(out)); - } else { - hl_bilinear_backward(inGrad, - inImgH, - inImgW, - inputH, - inputW, - outGrad, - outImgH, - outImgW, - outputH, - outputW, - numChannels, - ratioH, - ratioW); - } -} - -void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { -#ifdef PADDLE_WITH_CUDA - GpuMatrix* outputPtr = dynamic_cast(&output); - auto labelPtr = dynamic_cast(&label); - - CHECK(outputPtr && labelPtr) << "Invalid argument pointer"; - CHECK(labelPtr->format_ == SPARSE_CSR) << "Matrix format not supported"; - CHECK(height_ == outputPtr->height_ && width_ == 1 && - outputPtr->width_ == labelPtr->getWidth() && - outputPtr->height_ == labelPtr->getHeight()) - << "Matrix dimensions are not equal"; - - real* output_d = outputPtr->data_; - real* entropy_d = data_; - hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get(); - hl_matrix_multi_binary_cross_entropy( - output_d, entropy_d, mat_d, height_, outputPtr->width_); -#endif -} - -void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { -#ifdef PADDLE_WITH_CUDA - GpuMatrix* outputPtr = dynamic_cast(&output); - auto labelPtr = dynamic_cast(&label); - - CHECK(outputPtr && labelPtr) << "Invalid argument pointer"; - CHECK(labelPtr->format_ == SPARSE_CSR) << "Matrix format not supported"; - CHECK(height_ == outputPtr->height_ && width_ == outputPtr->width_ && - outputPtr->width_ == labelPtr->getWidth() && - outputPtr->height_ == labelPtr->getHeight()) - << "Matrix dimensions are not equal"; - - real* output_d = outputPtr->data_; - real* grad_d = data_; - hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get(); - hl_matrix_multi_binary_cross_entropy_bp( - output_d, grad_d, mat_d, height_, width_); -#endif -} - -void GpuMatrix::vol2Col(real* dataSrc, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW) { - hl_matrix_vol2Col(dataSrc, - channels, - depth, - height, - width, - filterD, - filterH, - filterW, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - getData()); -} - -void GpuMatrix::col2Vol(real* dataDst, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real alpha, - real beta) { - hl_matrix_col2Vol(dataDst, - channels, - depth, - height, - width, - filterD, - filterH, - filterW, - strideD, - strideH, - strideW, - paddingD, - paddingH, - paddingW, - getData(), - alpha, - beta); -} - -/** - * CpuMatrix - */ - -CpuMatrix::CpuMatrix(size_t height, size_t width, bool trans) - : Matrix(std::make_shared(height * width * sizeof(real)), - height, - width, - trans, - false) {} - -CpuMatrix::~CpuMatrix() {} - -void CpuMatrix::zeroMem() { - CHECK(data_ != NULL); - if (isContiguous()) { - memset(data_, 0, height_ * width_ * sizeof(real)); - } else { - BaseMatrix::zero(); - } -} -void CpuMatrix::resetOne() { - CHECK(data_ != NULL); - BaseMatrix::one(); -} - -void CpuMatrix::copyFrom(const Matrix& src) { - CHECK(isContiguous()); - if (typeid(src) == typeid(GpuMatrix)) { - CHECK(src.isContiguous()); - CHECK(elementCnt_ == src.getElementCnt()); - hl_memcpy_device2host( - data_, const_cast(src.getData()), sizeof(real) * elementCnt_); - } else if (typeid(src) == typeid(CpuMatrix) || - typeid(src) == typeid(SharedCpuMatrix)) { - CHECK(src.isContiguous()); - CHECK(elementCnt_ == src.getElementCnt()); - memcpy(data_, src.getData(), sizeof(real) * elementCnt_); - } else if (typeid(src) == typeid(CpuSparseMatrix)) { - CHECK_GE(elementCnt_, src.getElementCnt()); - copyFrom(dynamic_cast(const_cast(src))); - } else { - LOG(FATAL) << "Wrong"; - } -} - -void CpuMatrix::copyFrom(CpuSparseMatrix& src) { - CHECK(isContiguous()); - CHECK(height_ == src.getHeight()); - CHECK(width_ == src.getWidth()); - memset(data_, 0, sizeof(real) * height_ * width_); - if (src.getValueType() == FLOAT_VALUE) { - if (src.getFormat() == SPARSE_CSC) { - int* rows = src.getRows(); - real* vals = src.getValue(); - for (size_t i = 0; i < width_; i++) { - for (size_t j = src.getColStartIdx(i); j < src.getColStartIdx(i + 1); - j++) { - data_[rows[j] * width_ + i] = vals[j]; - } - } - } else { - int* cols = src.getCols(); - real* vals = src.getValue(); - for (size_t i = 0; i < height_; i++) { - for (size_t j = src.getRowStartIdx(i); j < src.getRowStartIdx(i + 1); - j++) { - data_[i * width_ + cols[j]] = vals[j]; - } - } - } - } else { - if (src.getFormat() == SPARSE_CSC) { - int* rows = src.getRows(); - for (size_t i = 0; i < width_; i++) { - for (size_t j = src.getColStartIdx(i); j < src.getColStartIdx(i + 1); - j++) { - data_[rows[j] * width_ + i] = 1.0; - } - } - } else { - int* cols = src.getCols(); - for (size_t i = 0; i < height_; i++) { - for (size_t j = src.getRowStartIdx(i); j < src.getRowStartIdx(i + 1); - j++) { - data_[i * width_ + cols[j]] = 1.0; - } - } - } - } -} - -void CpuMatrix::copyFrom(const Matrix& src, hl_stream_t stream) { - CHECK(isContiguous()); - CHECK(src.isContiguous()); - CHECK(elementCnt_ == src.getElementCnt()); - if (typeid(src) == typeid(GpuMatrix)) { - hl_memcpy_async(this->getData(), - const_cast(src.getData()), - sizeof(real) * elementCnt_, - stream); - // There is a need to add synchronization to ensure that the data is copied. - hl_stream_synchronize(stream); - } else if (typeid(src) == typeid(CpuMatrix)) { - memcpy(data_, src.getData(), sizeof(real) * elementCnt_); - } else { - LOG(FATAL) << "Wrong"; - } -} - -void CpuMatrix::copyFrom(const real* cpuSrc, size_t size) { - CHECK(isContiguous()); - CHECK(size <= elementCnt_); - memcpy(data_, cpuSrc, sizeof(real) * size); -} - -void CpuMatrix::copyFrom(const real* cpuSrc, const int64_t* seq) { - CHECK(isContiguous()); - for (size_t i = 0; i < height_; i++) { - memcpy(data_ + i * width_, cpuSrc + seq[i] * width_, sizeof(real) * width_); - } -} - -void CpuMatrix::copyFrom(const IVector& src) { - CHECK(isContiguous()); - CHECK(elementCnt_ == src.getSize()) - << "the src and dst should have same size."; - const int* cpuSrc = NULL; - IVectorPtr tmp; - if (src.useGpu()) { - CpuIVector tmp(src.getSize()); - tmp.copyFrom(src); - cpuSrc = tmp.getData(); - } else { - cpuSrc = src.getData(); - } - for (size_t i = 0; i < elementCnt_; ++i) { - data_[i] = cpuSrc[i]; - } -} - -void CpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) { - size_t height = getHeight(); - size_t width = getWidth(); - CHECK_EQ(b.getWidth(), width); - const int* index = rowIndex.getData(); - for (size_t i = 0; i < height; i++) { - CHECK_LT(static_cast(index[i]), b.getHeight()); - real* src = b.getData() + index[i] * width; - real* dst = getData() + i * width; - memcpy(dst, src, sizeof(real) * width); - } -} - -MatrixPtr CpuMatrix::clone(size_t height, size_t width, bool useGpu) { - CHECK(isContiguous()); - - if (height == 0 && width == 0) { - height = height_; - width = width_; - } - - CHECK(width && height); - - if (useGpu) { - return std::make_shared(height, width); - } else { - return std::make_shared(height, width); - } -} - -void CpuMatrix::resize(size_t newHeight, size_t newWidth) { - size_t newSize = newHeight * newWidth; - if (NULL == memoryHandle_.get() || - newSize * sizeof(real) > memoryHandle_->getAllocSize()) { - memoryHandle_ = std::make_shared(newSize * sizeof(real)); - data_ = reinterpret_cast(memoryHandle_->getBuf()); - } - - height_ = newHeight; - width_ = newWidth; - elementCnt_ = newSize; - stride_ = width_; -} - -real CpuMatrix::getElement(size_t x, size_t y) const { - return data_[x * stride_ + y]; -} - -real CpuMatrix::getSum() { - CHECK(isContiguous()); - double sum = 0; - for (size_t i = 0; i < height_; ++i) { - for (size_t j = 0; j < width_; ++j) { - sum += data_[i * width_ + j]; - } - } - return sum; -} - -void CpuMatrix::accumulateColSum(Matrix& src) { - CHECK_EQ(getWidth(), src.getWidth()); - CHECK_EQ(getHeight(), (size_t)1); - - sumCols(src, /* scaleSum= */ 1, /* scaleDest= */ 1); -} - -real CpuMatrix::getAbsSum() { - CHECK(isContiguous()); - double sum = 0; - for (size_t i = 0; i < height_; ++i) { - for (size_t j = 0; j < width_; ++j) { - sum += fabs(data_[i * width_ + j]); - } - } - return sum; -} - -MatrixPtr CpuMatrix::getTranspose() { - if (memoryHandle_.get() != NULL) { - return std::make_shared( - std::dynamic_pointer_cast(memoryHandle_), - height_, - width_, - true); - } else { - MatrixPtr copy_T(new CpuMatrix(data_, height_, width_, true)); - return copy_T; - } -} - -void CpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { - if (memAlloc) { - matTrans = std::make_shared(width_, height_); - } else { - CHECK(matTrans != NULL); - CHECK_EQ(matTrans->getHeight(), width_); - CHECK_EQ(matTrans->getWidth(), height_); - } - real* dataTrans = matTrans->getData(); - real* data = getData(); - int lda = getStride(); - int ldc = matTrans->getStride(); - - for (size_t i = 0; i < height_; i++) { - for (size_t j = 0; j < width_; j++) { - dataTrans[j * ldc + i] = data[i * lda + j]; - } - } -} - -void CpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { - if (memAlloc) { - matRot = std::make_shared(width_, height_); - } else { - CHECK(matRot != NULL); - CHECK_EQ(matRot->getHeight(), width_); - CHECK_EQ(matRot->getWidth(), height_); - } - real* dataRot = matRot->getData(); - real* data = getData(); - - for (size_t i = 0; i < height_; i++) { - for (size_t j = 0; j < width_; j++) { - if (clockWise) { - dataRot[j * height_ + i] = data[(height_ - i - 1) * width_ + j]; - } else { - dataRot[j * height_ + i] = data[i * width_ + (width_ - j - 1)]; - } - } - } -} - -MatrixPtr CpuMatrix::getInverse() { - MatrixPtr matInv; - inverse(matInv, true); - return matInv; -} - -void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) { - CHECK_EQ(height_, width_); - - if (memAlloc) { - matInv = std::make_shared(height_, width_); - } else { - CHECK(matInv != NULL); - } - - CHECK_EQ(height_, matInv->getHeight()); - CHECK_EQ(width_, matInv->getWidth()); - matInv->copyFrom(*this); - - real* data = getData(); - real* dataInv = matInv->getData(); - int ldc = matInv->getStride(); - - if (height_ == 1) { - CHECK_NE(*data, 0); - *dataInv = 1.0 / (*data); - return; - } - - /* Compute the LU decomposition of the matrix */ - std::vector ipiv(height_); - CBLAS_ORDER order = (matInv->isTransposed() ? CblasColMajor : CblasRowMajor); - int info = getrf(order, height_, height_, dataInv, ldc, ipiv.data()); - CHECK_EQ(info, 0); - - /* Compute the inverse of the matrix given its LU decompsotion */ - info = getri(order, height_, dataInv, ldc, ipiv.data()); - CHECK_EQ(info, 0); -} - -void CpuMatrix::upsampleForward(Matrix& input, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW) { - real* inputData = input.getData(); - real* maskData = mask.getData(); - real* outData = data_; - size_t inLength = imgSizeH * imgSizeW; - size_t outLength = outputH * outputW; - size_t batch = input.getHeight(); - CHECK(inLength == input.getWidth() / channels); - CHECK_EQ(batch, this->getHeight()); - CHECK_EQ(channels * outLength, this->getWidth()); - - for (size_t k = 0; k < batch; k++) { - for (size_t c = 0; c < channels; c++) { - for (size_t i = 0; i < inLength; i++) { - size_t out_index = static_cast(maskData[i]); - if (out_index >= outLength) { - LOG(FATAL) << "upsample index " << out_index << " out of range."; - } - outData[out_index] = inputData[i]; - } - inputData += inLength; - maskData += inLength; - outData += outLength; - } - } -} - -void CpuMatrix::upsampleBackward(Matrix& outputGrad, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW) { - real* outputGradData = outputGrad.getData(); - real* maskData = mask.getData(); - real* inputGradData = data_; - size_t inLength = imgSizeH * imgSizeW; - size_t outLength = outputH * outputW; - size_t batch = outputGrad.getHeight(); - CHECK(inLength == this->getWidth() / channels); - CHECK_EQ(batch, this->getHeight()); - CHECK_EQ(channels * outLength, outputGrad.getWidth()); - - for (size_t k = 0; k < batch; k++) { - for (size_t c = 0; c < channels; c++) { - for (size_t i = 0; i < inLength; i++) { - size_t out_index = static_cast(maskData[i]); - if (out_index >= outLength) { - LOG(FATAL) << "upsample index " << out_index << " out of range."; - } - inputGradData[i] = outputGradData[out_index]; - } - inputGradData += inLength; - maskData += inLength; - outputGradData += outLength; - } - } -} - -void CpuMatrix::maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - MatrixPtr maskMatP) { - real* inputData = inputMat.getData(); - real* outData = data_; - real* maskData = NULL; - size_t num = inputMat.getHeight(); - size_t inLength = imgSizeH * imgSizeW; - size_t outLength = outputH * outputW; - CHECK(inLength == inputMat.getWidth() / channels); - CHECK_EQ(num, this->getHeight()); - CHECK_EQ(channels * outLength, this->getWidth()); - size_t outStride = getStride(); - - if (maskMatP != NULL) { - maskData = maskMatP->getData(); - CHECK_EQ(channels * outLength, maskMatP->getWidth()); - } - - /* pool max one by one */ - for (size_t n = 0; n < num; ++n) { // frame by frame - if (!isContiguous()) { - outData = data_ + n * outStride; - } - for (size_t c = 0; c < channels; ++c) { // channel by channel - for (size_t ph = 0; ph < outputH; ++ph) { - int hstart = ph * strideH - paddingH; - int hend = hstart + sizeY; - hstart = hstart < 0 ? 0 : hstart; - hend = hend < (int)imgSizeH ? hend : (int)imgSizeH; - for (size_t pw = 0; pw < outputW; ++pw) { - int wstart = pw * strideW - paddingW; - int wend = wstart + sizeX; - wstart = wstart < 0 ? 0 : wstart; - wend = wend < (int)imgSizeW ? wend : (int)imgSizeW; - - real maxval = -(real)FLT_MAX; - int max_index = -1; - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - if (maxval < inputData[h * imgSizeW + w]) { - maxval = inputData[h * imgSizeW + w]; - max_index = h * imgSizeW + w; - } - } - } - - outData[ph * outputW + pw] = maxval; - if (maskData != NULL) maskData[ph * outputW + pw] = max_index; - } - } - // compute offset - inputData += inLength; - outData += outLength; - - if (maskData != NULL) maskData += outLength; - } - } -} - -void CpuMatrix::maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW) { - size_t num = image.getHeight(); - size_t inLength = imgSizeH * imgSizeW; - size_t outLength = outputH * outputW; - size_t channels = size_t(width_ / inLength); - CHECK(image.getWidth() == inLength * channels); - CHECK(image.getHeight() == height_ && image.getWidth() == width_); - CHECK(outV.getHeight() == outGrad.getHeight() && - outV.getWidth() == outGrad.getWidth()); - - real* tgtGrad = data_; - real* inData = image.getData(); - real* otData = outV.getData(); - real* otGrad = outGrad.getData(); - - size_t outStride = outV.getStride(); - real* origOutData = otData; - real* origOutGrad = otGrad; - - for (size_t n = 0; n < num; ++n) { - if (!outV.isContiguous()) { - otData = origOutData + n * outStride; - otGrad = origOutGrad + n * outStride; - } - for (size_t c = 0; c < channels; ++c) { - for (size_t ph = 0; ph < outputH; ++ph) { - int hstart = ph * strideH - paddingH; - int hend = std::min(hstart + sizeY, imgSizeH); - hstart = std::max(hstart, 0); - for (size_t pw = 0; pw < outputW; ++pw) { - int wstart = pw * strideW - paddingW; - int wend = std::min(wstart + sizeX, imgSizeW); - wstart = std::max(wstart, 0); - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - tgtGrad[h * imgSizeW + w] = - scaleTargets * tgtGrad[h * imgSizeW + w] + - scaleOutput * otGrad[ph * outputW + pw] * - (inData[h * imgSizeW + w] == otData[ph * outputW + pw]); - } - } - } - } - // offset - inData += inLength; - tgtGrad += inLength; - otData += outLength; - otGrad += outLength; - } - } -} - -void CpuMatrix::avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - bool excludeMode) { - // The main loop - size_t num = input.getHeight(); - size_t inLength = imgSizeH * imgSizeW; - size_t outLength = outputH * outputW; - CHECK(inLength * channels == input.getWidth()); - CHECK(outLength * channels * num == height_ * width_); - real* tgtData = data_; - real* inData = input.getData(); - - for (size_t n = 0; n < num; ++n) { - if (!isContiguous()) { - tgtData = data_ + n * getStride(); - } - for (size_t c = 0; c < channels; ++c) { - for (size_t ph = 0; ph < outputH; ++ph) { - int hstart = ph * strideH - paddingH; - int hend = std::min(hstart + sizeY, imgSizeH); - hstart = std::max(hstart, 0); - for (size_t pw = 0; pw < outputW; ++pw) { - int wstart = pw * strideW - paddingW; - int wend = std::min(wstart + sizeX, imgSizeW); - wstart = std::max(wstart, 0); - tgtData[ph * outputW + pw] = 0; // clear - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - tgtData[ph * outputW + pw] += inData[h * imgSizeW + w]; - } - } - int poolSize = - excludeMode ? (hend - hstart) * (wend - wstart) : sizeY * sizeX; - CHECK(poolSize); - tgtData[ph * outputW + pw] /= poolSize; - } - } - // compute offset - inData += inLength; - tgtData += outLength; - } - } -} - -void CpuMatrix::avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW, - bool excludeMode) { - size_t num = input.getHeight(); - size_t channels = input.getWidth() / outputH / outputW; - size_t inLength = imgSizeH * imgSizeW; - size_t outLength = outputH * outputW; - CHECK(inLength * channels == getWidth()); - real* inData = input.getData(); - real* outData = getData(); - - for (size_t n = 0; n < num; ++n) { - if (!input.isContiguous()) { - inData = input.getData() + n * input.getStride(); - } - for (size_t c = 0; c < channels; ++c) { - for (size_t ph = 0; ph < outputH; ++ph) { - int hstart = ph * strideH - paddingH; - int hend = std::min(hstart + sizeY, imgSizeH); - hstart = std::max(hstart, 0); - for (size_t pw = 0; pw < outputW; ++pw) { - int wstart = pw * strideW - paddingW; - int wend = std::min(wstart + sizeX, imgSizeW); - wstart = std::max(wstart, 0); - int poolSize = - excludeMode ? (hend - hstart) * (wend - wstart) : sizeY * sizeX; - CHECK(poolSize); - - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - outData[h * imgSizeW + w] += inData[ph * outputW + pw] / poolSize; - } - } - } - } - // offset - outData += inLength; - inData += outLength; - } - } -} - -void CpuMatrix::maxPool3DForward(Matrix& inputMat, - Matrix& maxPoolIdx, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW) { - real* inputData = inputMat.getData(); - real* outData = getData(); - real* maxPoolIdxData = maxPoolIdx.getData(); - size_t num = inputMat.getHeight(); - size_t inLength = imgSizeH * imgSizeW * imgSizeD; - size_t outLength = outputH * outputW * outputD; - CHECK(inLength == inputMat.getWidth() / channels); - CHECK_EQ(num, this->getHeight()); - CHECK_EQ(channels * outLength, this->getWidth()); - size_t outStride = getStride(); - - /* initialize the data_ */ - for (size_t i = 0; i < height_; i++) { - for (size_t j = 0; j < width_; j++) { - outData[(i)*outStride + j] = -(real)FLT_MAX; - maxPoolIdxData[(i)*outStride + j] = -1; - } - } - - /* pool max one by one */ - for (size_t n = 0; n < num; ++n) { // frame by frame - if (!isContiguous()) { - outData = getData() + n * outStride; - maxPoolIdxData = maxPoolIdx.getData() + n * outStride; - } - for (size_t c = 0; c < channels; ++c) { // channel by channel - for (size_t pd = 0; pd < outputD; ++pd) { - int dstart = pd * strideD - paddingD; - int dend = std::min(dstart + sizeZ, imgSizeD); - dstart = std::max(dstart, 0); - for (size_t ph = 0; ph < outputH; ++ph) { - int hstart = ph * strideH - paddingH; - int hend = std::min(hstart + sizeY, imgSizeH); - hstart = std::max(hstart, 0); - for (size_t pw = 0; pw < outputW; ++pw) { - int wstart = pw * strideW - paddingW; - int wend = std::min(wstart + sizeX, imgSizeW); - wstart = std::max(wstart, 0); - int maxIdx = -1; - real maxOutData = outData[(pd * outputH + ph) * outputW + pw]; - for (int d = dstart; d < dend; ++d) { - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - if (maxOutData < - inputData[(d * imgSizeH + h) * imgSizeW + w]) { - maxOutData = inputData[(d * imgSizeH + h) * imgSizeW + w]; - maxIdx = (d * imgSizeH + h) * imgSizeW + w; - } - } - } - } - outData[(pd * outputH + ph) * outputW + pw] = maxOutData; - maxPoolIdxData[(pd * outputH + ph) * outputW + pw] = maxIdx; - } - } - } - // compute offset - inputData += inLength; - outData += outLength; - maxPoolIdxData += outLength; - } - } -} - -void CpuMatrix::maxPool3DBackward(Matrix& outGrad, - Matrix& maxPoolIdx, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput) { - size_t num = getHeight(); - size_t inLength = imgSizeH * imgSizeW * imgSizeD; - size_t outLength = outputH * outputW * outputD; - size_t channels = size_t(width_ / inLength); - CHECK(maxPoolIdx.getHeight() == outGrad.getHeight() && - maxPoolIdx.getWidth() == outGrad.getWidth()); - - real* tgtGrad = getData(); - real* otGrad = outGrad.getData(); - real* maxPoolIdxData = maxPoolIdx.getData(); - size_t outStride = outGrad.getStride(); - - for (size_t n = 0; n < num; ++n) { - if (!outGrad.isContiguous()) { - otGrad = outGrad.getData() + n * outStride; - maxPoolIdxData = maxPoolIdx.getData() + n * outStride; - } - for (size_t c = 0; c < channels; ++c) { - for (size_t pd = 0; pd < outputD; ++pd) { - for (size_t ph = 0; ph < outputH; ++ph) { - for (size_t pw = 0; pw < outputW; ++pw) { - const size_t index = (pd * outputH + ph) * outputW + pw; - const size_t tgtIdx = static_cast(maxPoolIdxData[index]); - tgtGrad[tgtIdx] = - scaleTargets * tgtGrad[tgtIdx] + scaleOutput * otGrad[index]; - } - } - } - // offset - tgtGrad += inLength; - otGrad += outLength; - maxPoolIdxData += outLength; - } - } -} - -void CpuMatrix::avgPool3DForward(Matrix& input, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW) { - // The main loop - size_t num = input.getHeight(); - size_t inLength = imgSizeH * imgSizeW * imgSizeD; - size_t outLength = outputH * outputW * outputD; - CHECK(inLength * channels == input.getWidth()); - CHECK(outLength * channels * num == height_ * width_); - real* tgtData = getData(); - real* inData = input.getData(); - - for (size_t n = 0; n < num; ++n) { - if (!isContiguous()) { - tgtData = data_ + n * getStride(); - } - for (size_t c = 0; c < channels; ++c) { - for (size_t pd = 0; pd < outputD; ++pd) { - int dstart = pd * strideD - paddingD; - int dend = std::min(dstart + sizeZ, imgSizeD); - dstart = std::max(dstart, 0); - for (size_t ph = 0; ph < outputH; ++ph) { - int hstart = ph * strideH - paddingH; - int hend = std::min(hstart + sizeY, imgSizeH); - hstart = std::max(hstart, 0); - for (size_t pw = 0; pw < outputW; ++pw) { - int wstart = pw * strideW - paddingW; - int wend = std::min(wstart + sizeX, imgSizeW); - wstart = std::max(wstart, 0); - - tgtData[(pd * outputH + ph) * outputW + pw] = 0; // clear - for (int d = dstart; d < dend; ++d) { - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - tgtData[(pd * outputH + ph) * outputW + pw] += - inData[(d * imgSizeH + h) * imgSizeW + w]; - } - } - } - int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart); - CHECK(poolSize); - tgtData[(pd * outputH + ph) * outputW + pw] /= poolSize; - } - } - } - // compute offset - inData += inLength; - tgtData += outLength; - } - } -} - -void CpuMatrix::avgPool3DBackward(Matrix& input, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput) { - size_t num = input.getHeight(); - size_t inLength = imgSizeH * imgSizeW * imgSizeD; - size_t outLength = outputH * outputW * outputD; - size_t channels = input.getWidth() / outLength; - CHECK(inLength * channels == getWidth()); - real* inData = input.getData(); - real* outData = getData(); - - for (size_t n = 0; n < num; ++n) { - if (!input.isContiguous()) { - inData = input.getData() + n * input.getStride(); - } - for (size_t c = 0; c < channels; ++c) { - for (size_t pd = 0; pd < outputD; ++pd) { - int dstart = pd * strideD - paddingD; - int dend = std::min(dstart + sizeZ, imgSizeD); - dstart = std::max(dstart, 0); - for (size_t ph = 0; ph < outputH; ++ph) { - int hstart = ph * strideH - paddingH; - int hend = std::min(hstart + sizeY, imgSizeH); - hstart = std::max(hstart, 0); - for (size_t pw = 0; pw < outputW; ++pw) { - int wstart = pw * strideW - paddingW; - int wend = std::min(wstart + sizeX, imgSizeW); - wstart = std::max(wstart, 0); - int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart); - CHECK(poolSize); - for (int d = dstart; d < dend; ++d) { - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - outData[(d * imgSizeH + h) * imgSizeW + w] += - inData[(pd * outputH + ph) * outputW + pw] / poolSize; - } - } - } - } - } - } - // offset - outData += inLength; - inData += outLength; - } - } -} - -/** - * Input: one or more sequences. Each sequence contains some instances. - * Output: output size is the number of input sequences (NOT input instances). - * output[i] is set to max_{for each instance in this sequence}{input[i]} - */ -void CpuMatrix::maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index) { - CHECK(dynamic_cast(&input)); - CHECK(dynamic_cast(&sequence)); - CHECK(dynamic_cast(&index)); - - real* outData = getData(); - real* inputData = input.getData(); - const int* starts = sequence.getData(); - int* maxIndex = index.getData(); - size_t numSequences = getHeight(); - size_t dim = getWidth(); - - CHECK_EQ(dim, input.getWidth()); - CHECK_EQ(numSequences, sequence.getSize() - 1); - CHECK_EQ(starts[numSequences], (int)input.getHeight()); - CHECK_EQ(numSequences * dim, index.getSize()); - - for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) { - // current sequence, loop for each input instance - // (1) first instance: do not need compare, copy value to outV directly - for (size_t k = 0; k < dim; ++k) { - outData[sequenceId * dim + k] = inputData[starts[sequenceId] * dim + k]; - maxIndex[sequenceId * dim + k] = starts[sequenceId]; - } - // (2) other instance in same sequence - for (int insId = starts[sequenceId] + 1; insId < starts[sequenceId + 1]; - ++insId) { - // insId is the index on all instances - for (size_t k = 0; k < dim; ++k) { - // for each dim - if (inputData[insId * dim + k] > outData[sequenceId * dim + k]) { - // update max value and record index - outData[sequenceId * dim + k] = inputData[insId * dim + k]; - maxIndex[sequenceId * dim + k] = insId; - } - } - } - } -} - -void CpuMatrix::maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index) { - CHECK(dynamic_cast(&outputGrad)); - CHECK(dynamic_cast(&sequence)); - CHECK(dynamic_cast(&index)); - - real* inputGrad = getData(); - real* outGrad = outputGrad.getData(); - int* maxIndex = index.getData(); - size_t dim = getWidth(); - size_t numSequences = sequence.getSize() - 1; - - CHECK_EQ(dim, outputGrad.getWidth()); - CHECK_EQ(numSequences, outputGrad.getHeight()); - CHECK_EQ(numSequences * dim, index.getSize()); - - for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) { - // current sequence - for (size_t j = 0; j < dim; ++j) { - // each dim - int insId = maxIndex[sequenceId * dim + j]; - inputGrad[insId * dim + j] += outGrad[sequenceId * dim + j]; - } - } -} - -inline void vecAddTo(real* a, const real* b, size_t len) { - for (unsigned int i = 0; i < len; ++i) { - a[i] += b[i]; - } -} - -inline void vecAddTo(real* a, const real* b, real scaleB, size_t len) { - for (unsigned int i = 0; i < len; ++i) { - a[i] += scaleB * b[i]; - } -} - -inline void colVecAddTo( - real* a, const real* b, size_t len, size_t aWidth, size_t bWidth) { - for (unsigned int i = 0; i < len; ++i) { - a[i * aWidth] += b[i * bWidth]; - } -} - -inline void colVecAddTo( - real* a, real* b, real c, size_t len, size_t aWidth, size_t bWidth) { - for (unsigned int i = 0; i < len; ++i) { - a[i * aWidth] += b[i * bWidth] * c; - } -} - -void CpuMatrix::addBias(Matrix& b, real scale) { - CHECK(b.useGpu_ == false) << "Matrix type are not equal"; - - CHECK_EQ(b.getHeight(), (size_t)1); - CHECK_EQ(width_, b.getWidth()); - real* aData = getData(); - real* bData = b.getData(); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - - if (scale == 1 && getStride() % 32 == 0) { // use libaddto - // @TODO(yuyang18) Make input addr can be unaligned. - // So merge this if and else - CHECK_EQ((size_t)aData % 32, 0UL); - CHECK_EQ((size_t)bData % 32, 0UL); - for (size_t i = 0; i < numSamples; i++) { - simd::addTo(aData + i * getStride(), bData, dim); - } - } else { - for (size_t i = 0; i < numSamples; i++) { - for (size_t j = 0; j < dim; j++) { - aData[i * getStride() + j] += scale * bData[j]; - } - } - } -} - -void CpuMatrix::addSharedBias(Matrix& b, real scale) { - CHECK_EQ(b.getHeight(), (size_t)1); - real* aData = getData(); - real* bData = b.getData(); - size_t numSamples = getHeight(); - size_t channel = b.getWidth(); - CHECK_EQ(getWidth() % channel, 0UL); - size_t dim = getWidth() / channel; - - for (size_t i = 0; i < numSamples; i++) { - for (size_t c = 0; c < channel; c++) { - for (size_t j = 0; j < dim; j++) { - aData[i * getStride() + c * dim + j] += scale * bData[c]; - } - } - } -} - -void CpuMatrix::collectBias(Matrix& a, real scale) { - CHECK_EQ(getHeight(), (size_t)1); - CHECK_EQ(width_, a.getWidth()); - CpuSparseMatrix* aptr = dynamic_cast(&a); - if (!aptr) { - sumCols(a, /* scaleSum= */ scale, /* scaleDest= */ 1); - } else { - size_t nnz = aptr->getElementCnt(); - int* cols = aptr->getCols(); - real* A = aptr->getValue(); - real* B = getData(); - for (size_t i = 0; i < nnz; i++) { - B[cols[i]] += scale * A[i]; - } - } -} - -void CpuMatrix::collectSharedBias(Matrix& a, real scale) { - CHECK_EQ(getHeight(), (size_t)1); - real* B = getData(); - real* A = a.getData(); - size_t numSamples = a.getHeight(); - size_t channel = getWidth(); - CHECK_EQ(a.getWidth() % channel, 0UL); - size_t dim = a.getWidth() / channel; - for (size_t i = 0; i < numSamples; i++) { - for (size_t c = 0; c < channel; c++) { - for (size_t j = 0; j < dim; j++) { - B[c] += scale * A[i * channel * dim + c * dim + j]; - } - } - } -} - -void CpuMatrix::sequenceAvgForward(Matrix& a, - const IVector& startsPos, - int mode) { - size_t height = getHeight(); - size_t width = getWidth(); - CHECK_EQ(height, startsPos.getSize() - 1); - CHECK_EQ(width, a.getWidth()); - real* dst = getData(); - real* src = a.getData(); - const int* starts = startsPos.getData(); - MatrixPtr outMtx = Matrix::create(nullptr, 1, width, false, false); - MatrixPtr dataMtx = Matrix::create(nullptr, 1, width, false, false); - for (size_t i = 0; i < height; i++) { - int sequenceLength = starts[i + 1] - starts[i]; - if (0 == sequenceLength) { - // empty sequence - continue; - } - outMtx->setData(dst + i * width); - dataMtx->setData(src + starts[i] * width, sequenceLength, width); - if (mode == 0) { - // plain average - outMtx->sumCols(*dataMtx, - (real)1 / (real)sequenceLength, - /* scaleDest= */ 1); - } else if (mode == 1) { - // sum instead of average - outMtx->sumCols(*dataMtx, /* scaleSum= */ 1, /* scaleDest= */ 1); - } else if (mode == 2) { - // divide by square root of sequenceLength - outMtx->sumCols(*dataMtx, - (real)1 / std::sqrt(sequenceLength), - /* scaleDest= */ 1); - } else { - LOG(FATAL) << "should not reach here"; - } - } -} - -void CpuMatrix::sequenceAvgBackward(Matrix& a, - const IVector& startsPos, - int mode) { - size_t height = a.getHeight(); - size_t width = getWidth(); - CHECK_EQ(height, startsPos.getSize() - 1); - CHECK_EQ(width, a.getWidth()); - real* dst = getData(); - real* src = a.getData(); - const int* starts = startsPos.getData(); - MatrixPtr outMtx = Matrix::create(nullptr, 1, width, false, false); - MatrixPtr dataMtx = Matrix::create(nullptr, 1, width, false, false); - for (size_t i = 0; i < height; ++i) { - int sequenceLength = starts[i + 1] - starts[i]; - if (0 == sequenceLength) { - // empty sequence - continue; - } - outMtx->setData(dst + starts[i] * width, sequenceLength, width); - dataMtx->setData(src + i * width); - if (mode == 0) { - // plain average - outMtx->addBias(*dataMtx, 1.0f / sequenceLength); - } else if (mode == 1) { - // sum instead of average - outMtx->addBias(*dataMtx, 1.0f); - } else if (mode == 2) { - // divide by square root of sequenceLength - outMtx->addBias(*dataMtx, 1.0f / std::sqrt(sequenceLength)); - } else { - LOG(FATAL) << "should not reach here"; - } - } -} - -/* this = scaleAB*(a*b) + scaleT*this*/ -void CpuMatrix::mul(const Matrix& a, - const Matrix& b, - real scaleAB, - real scaleT) { - CHECK(!isTransposed()) << "Not supported"; - const auto a_ptr = dynamic_cast(&a); - const auto b_ptr = dynamic_cast(&b); - const auto a_ptr_s = dynamic_cast(&a); - const auto b_ptr_s = dynamic_cast(&b); - - if (a_ptr && b_ptr) { - mul((CpuMatrix*)a_ptr, (CpuMatrix*)b_ptr, scaleAB, scaleT); - } else if (a_ptr_s && b_ptr) { - mul((CpuSparseMatrix*)a_ptr_s, (CpuMatrix*)b_ptr, scaleAB, scaleT); - } else if (a_ptr && b_ptr_s) { - mul((CpuMatrix*)a_ptr, (CpuSparseMatrix*)b_ptr_s, scaleAB, scaleT); - } else { - LOG(FATAL) << "Not supported"; - } -} - -void CpuMatrix::mul(CpuSparseMatrix* a, - CpuMatrix* b, - real scaleAB, - real scaleT) { - if (dynamic_cast(b)) { - return mul(a, dynamic_cast(b), this, scaleAB, scaleT); - } else if (dynamic_cast(b)) { - return mul(a, dynamic_cast(b), this, scaleAB, scaleT); - } else { - return mul(a, b, this, scaleAB, scaleT); - } -} - -void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) { - CHECK(!isTransposed()) << "Not supported"; - - size_t a_col, b_col, a_row, b_row; - bool a_trans, b_trans; - if (!a->isTransposed()) { - a_col = a->getWidth(); - a_row = a->getHeight(); - a_trans = false; - } else { - a_col = a->getHeight(); - a_row = a->getWidth(); - a_trans = true; - } - if (!b->isTransposed()) { - b_col = b->getWidth(); - b_row = b->getHeight(); - b_trans = false; - } else { - b_col = b->getHeight(); - b_row = b->getWidth(); - b_trans = true; - } - - CHECK_EQ(a_col, b_row); - CHECK_EQ(a_row, getHeight()); - CHECK_EQ(b_col, getWidth()); - - real* A = a->getData(); - real* B = b->getData(); - real* C = getData(); - - int M = getHeight(); - int N = getWidth(); - int K = a_col; - int lda = a->getStride(); - int ldb = b->getStride(); - int ldc = getStride(); - BlasGemm::compute( - a_trans, b_trans, M, N, K, scaleAB, A, lda, B, ldb, scaleT, C, ldc); -} - -void CpuMatrix::mul( - CpuMatrix* a, CpuMatrix* b, CpuSparseMatrix* c, real scaleAB, real scaleT) { - CHECK(!c->isTransposed()) << "Not supported"; - CHECK_EQ(c->getValueType(), FLOAT_VALUE); - - real* A = a->getData(); - real* B = b->getData(); - real* C = c->getValue(); - int* rows = c->getRows(); - int* cols = c->getCols(); - size_t height = c->getHeight(); - size_t width = c->getWidth(); - if (scaleT == 0) { - c->zeroMem(); - } - - if (!a->isTransposed() && !b->isTransposed()) { - size_t m = a->getWidth(); - CHECK_EQ(b->getHeight(), m); - CHECK_EQ(a->getHeight(), height); - CHECK_EQ(b->getWidth(), width); - if (c->getFormat() == SPARSE_CSC) { - for (size_t i = 0; i < width; i++) { - size_t start = c->getColStartIdx(i); - size_t end = c->getColStartIdx(i + 1); - for (size_t j = start; j < end; j++) { - real sum = 0; - size_t rowIdx = rows[j]; - for (size_t k = 0; k < m; k++) { - sum += A[rowIdx * m + k] * B[k * width + i]; - } - C[j] = scaleAB * sum + scaleT * C[j]; - } - } - } else { - for (size_t i = 0; i < height; i++) { - size_t start = c->getRowStartIdx(i); - size_t end = c->getRowStartIdx(i + 1); - for (size_t j = start; j < end; j++) { - real sum = 0; - size_t colIdx = cols[j]; - for (size_t k = 0; k < m; k++) { - sum += A[i * m + k] * B[k * width + colIdx]; - } - C[j] = scaleAB * sum + scaleT * C[j]; - } - } - } - } else if (a->isTransposed() && !b->isTransposed()) { - size_t m = a->getHeight(); - CHECK_EQ(m, b->getHeight()); - CHECK_EQ(b->getWidth(), width); - CHECK_EQ(a->getWidth(), height); - - if (c->getFormat() == SPARSE_CSC) { - for (size_t i = 0; i < width; i++) { - size_t start = c->getColStartIdx(i); - size_t end = c->getColStartIdx(i + 1); - for (size_t j = start; j < end; j++) { - real sum = 0; - size_t rowIdx = rows[j]; - for (size_t k = 0; k < m; k++) { - sum += A[k * height + rowIdx] * B[k * width + i]; - } - C[j] = scaleAB * sum + scaleT * C[j]; - } - } - } else { - for (size_t i = 0; i < height; i++) { - int start = c->getRowStartIdx(i); - int end = c->getRowStartIdx(i + 1); - for (int j = start; j < end; j++) { - real sum = 0; - size_t colIdx = cols[j]; - for (size_t k = 0; k < m; k++) { - sum += A[k * height + i] * B[k * width + colIdx]; - } - C[j] = scaleAB * sum + scaleT * C[j]; - } - } - } - } else if (!a->isTransposed() && b->isTransposed()) { - size_t m = a->getWidth(); - CHECK_EQ(b->getWidth(), m); - CHECK_EQ(a->getHeight(), height); - CHECK_EQ(b->getHeight(), width); - if (c->getFormat() == SPARSE_CSR) { - for (size_t i = 0; i < height; i++) { - size_t start = c->getRowStartIdx(i); - size_t end = c->getRowStartIdx(i + 1); - for (size_t j = start; j < end; j++) { - real sum = 0; - size_t colIdx = cols[j]; - for (size_t k = 0; k < m; k++) { - sum += A[i * m + k] * B[colIdx * m + k]; - } - C[j] = scaleAB * sum + scaleT * C[j]; - } - } - } else { - LOG(FATAL) << "Not supported csc format " - "when a is not trans and b is trans"; - } - } else { - LOG(FATAL) << "Not supported"; - } -} - -void CpuMatrix::mul(CpuMatrix* a, - CpuSparseMatrix* b, - real scaleAB, - real scaleT) { - CHECK(!trans_) << "Not supported"; - CHECK(!a->isTransposed()) << "Not supported"; - CHECK(scaleT == 0 || scaleT == 1); - - // TODO(yuyang18): Maybe bug implementation here - CHECK_EQ(scaleAB, static_cast(1.0)); - - real* A = a->getData(); - real* B = b->getValue(); - real* C = getData(); - int* rows = b->getRows(); - int* cols = b->getCols(); - - if (scaleT == 0) { - zeroMem(); - } - if (b->getFormat() == SPARSE_CSC) { - if (!b->isTransposed()) { - size_t m = a->getWidth(); - CHECK_EQ(b->getHeight(), m); - CHECK_EQ(a->getHeight(), height_); - CHECK_EQ(b->getWidth(), width_); - - if (b->getValueType() == NO_VALUE) { - for (size_t j = 0; j < b->getWidth(); ++j) { - int start = b->getColStartIdx(j); - int end = b->getColStartIdx(j + 1); - for (int i = start; i < end; ++i) { - colVecAddTo(C + j, A + rows[i], height_, width_, a->getWidth()); - } - } - } else if (b->getValueType() == FLOAT_VALUE) { - for (size_t j = 0; j < b->getWidth(); ++j) { - int start = b->getColStartIdx(j); - int end = b->getColStartIdx(j + 1); - for (int i = start; i < end; ++i) { - colVecAddTo( - C + j, A + rows[i], B[i], height_, width_, a->getWidth()); - } - } - } - } else /*if (b->isTransposed())*/ { - size_t m = a->getWidth(); - CHECK_EQ(b->getHeight(), width_); - CHECK_EQ(a->getHeight(), height_); - CHECK_EQ(b->getWidth(), m); - if (b->getValueType() == NO_VALUE) { - for (size_t i = 0; i < b->getWidth(); ++i) { - int start = b->getColStartIdx(i); - int end = b->getColStartIdx(i + 1); - for (int j = start; j < end; ++j) { - colVecAddTo(C + rows[j], A + i, height_, width_, a->getWidth()); - } - } - } else if (b->getValueType() == FLOAT_VALUE) { - for (size_t i = 0; i < b->getWidth(); ++i) { - int start = b->getColStartIdx(i); - int end = b->getColStartIdx(i + 1); - for (int j = start; j < end; ++j) { - colVecAddTo( - C + rows[j], A + i, B[j], height_, width_, a->getWidth()); - } - } - } - } - } else { - if (!b->isTransposed()) { - size_t m = a->getWidth(); - CHECK_EQ(b->getHeight(), m); - CHECK_EQ(a->getHeight(), height_); - CHECK_EQ(b->getWidth(), width_); - - if (b->getValueType() == NO_VALUE) { - for (size_t j = 0; j < b->getHeight(); ++j) { - int start = b->getRowStartIdx(j); - int end = b->getRowStartIdx(j + 1); - for (int i = start; i < end; ++i) { - colVecAddTo(C + cols[i], A + j, height_, width_, a->getWidth()); - } - } - } else if (b->getValueType() == FLOAT_VALUE) { - for (size_t j = 0; j < b->getHeight(); ++j) { - int start = b->getRowStartIdx(j); - int end = b->getRowStartIdx(j + 1); - for (int i = start; i < end; ++i) { - colVecAddTo( - C + cols[i], A + j, B[i], height_, width_, a->getWidth()); - } - } - } - } else /*if (b->isTransposed())*/ { - size_t m = a->getWidth(); - CHECK_EQ(b->getHeight(), width_); - CHECK_EQ(a->getHeight(), height_); - CHECK_EQ(b->getWidth(), m); - if (b->getValueType() == NO_VALUE) { - for (size_t i = 0; i < b->getHeight(); ++i) { - int start = b->getRowStartIdx(i); - int end = b->getRowStartIdx(i + 1); - for (int j = start; j < end; ++j) { - colVecAddTo(C + i, A + cols[j], height_, width_, a->getWidth()); - } - } - } else if (b->getValueType() == FLOAT_VALUE) { - for (size_t i = 0; i < b->getHeight(); ++i) { - int start = b->getRowStartIdx(i); - int end = b->getRowStartIdx(i + 1); - for (int j = start; j < end; ++j) { - colVecAddTo( - C + i, A + cols[j], B[j], height_, width_, a->getWidth()); - } - } - } - } - } -} - -void CpuMatrix::selectRows(Matrix& table, IVector& ids) { - if (dynamic_cast(&table)) { - selectRowsImp(*dynamic_cast(&table), ids); - } else if (dynamic_cast(&table)) { - selectRowsImp(*dynamic_cast(&table), ids); - } else { - CHECK(table.isContiguous()); - selectRowsImp(*dynamic_cast(&table), ids); - } -} - -void CpuMatrix::selectElements(Matrix& table, IVector& ids) { - CHECK_EQ(table.getHeight(), ids.getSize()); - CHECK_EQ(getHeight(), ids.getSize()); - CHECK_EQ(getWidth(), 1U); - real* tableData = table.getData(); - int* idsData = ids.getData(); - for (size_t i = 0; i < table.getHeight(); i++) { - data_[i] += tableData[i * table.getWidth() + idsData[i]]; - } -} - -void CpuMatrix::addElements(Matrix& table, IVector& ids) { - CHECK_EQ(table.getHeight(), ids.getSize()); - CHECK_EQ(getHeight(), ids.getSize()); - CHECK_EQ(getWidth(), 1U); - real* tableData = table.getData(); - int* idsData = ids.getData(); - for (size_t i = 0; i < table.getHeight(); i++) { - tableData[i * table.getWidth() + idsData[i]] += data_[i]; - } -} - -// this.row[i] += table.row[ids[i]] -template -void CpuMatrix::selectRowsImp(TableMatType& table, IVector& ids) { - CHECK(!table.useGpu()); - CHECK(!ids.useGpu()); - CHECK_EQ(getHeight(), ids.getSize()); - CHECK_EQ(getWidth(), table.getWidth()); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - real* a = getData(); - size_t tableSize = table.getHeight(); - int* index = ids.getData(); - - for (size_t i = 0; i < numSamples; ++i) { - if (index[i] == -1) continue; - CHECK_LT(index[i], (int)tableSize); - CHECK_GE(index[i], 0); - vecAddTo(a + i * stride_, table.getRow(index[i]), dim); - } -} - -void CpuMatrix::addToRows(Matrix& table, IVector& ids) { - if (dynamic_cast(&table)) { - addToRowsImp(*dynamic_cast(&table), ids); - } else if (dynamic_cast(&table)) { - addToRowsImp(*dynamic_cast(&table), ids); - } else if (dynamic_cast(&table)) { - addToRowsImp(*dynamic_cast(&table), ids); - } else { - CHECK(table.isContiguous()); - addToRowsImp(*dynamic_cast(&table), ids); - } -} - -// table.row[ids[i]] += this.row[i] -template -void CpuMatrix::addToRowsImp(TableMatType& table, IVector& ids) { - CHECK(!table.useGpu()); - CHECK(!ids.useGpu()); - CHECK_EQ(getHeight(), ids.getSize()); - CHECK_EQ(getWidth(), table.getWidth()); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - real* a = getData(); - size_t tableSize = table.getHeight(); - int* index = ids.getData(); - - for (size_t i = 0; i < numSamples; ++i) { - if (index[i] == -1) continue; - CHECK_LT(index[i], (int)tableSize); - CHECK_GE(index[i], 0); - vecAddTo(table.getRow(index[i]), a + i * stride_, dim); - } -} - -static ThreadLocal> threadLocalColArray; - -template -void CpuMatrix::mul( - CpuSparseMatrix* a, MatBType* b, MatCType* c, real scaleAB, real scaleT) { - CHECK(!c->isTransposed()) << "Not supported"; - CHECK(!b->isTransposed()) << "Not supported"; - // TODO(yuyang18): Maybe bug implementation here. - CHECK(scaleAB == 1) << "Not supported"; - CHECK(scaleT == 0 || scaleT == 1) << "Not supported"; - CHECK_EQ(a->getFormat(), SPARSE_CSR) << "Not supported"; - - real* B = b->getData(); - real* C = c->getData(); - size_t height = c->getHeight(); - size_t width = c->getWidth(); - int* cols = a->getCols(); - real* values = a->getValue(); - - if (scaleT == 0) { - c->zeroMem(); - } - - if (!a->isTransposed()) { - size_t m = a->getWidth(); - CHECK_EQ(b->getHeight(), m); - CHECK_EQ(a->getHeight(), height); - CHECK_EQ(b->getWidth(), width); - - if (a->getValueType() == NO_VALUE) { - if (width % 32 == 0) { // use libaddto - // @TODO(yuyang18) Make input addr can be unaligned. - // So merge this if and else - CHECK_EQ((size_t)B % 32, 0UL); - CHECK_EQ((size_t)C % 32, 0UL); - auto& colArray = *threadLocalColArray; - for (size_t i = 0; i < a->getHeight(); ++i) { - const int start = a->getRowStartIdx(i); - const int end = a->getRowStartIdx(i + 1); - size_t colNum = end - start; - colArray.resize(colNum); - for (int j = 0; j < end - start; ++j) { - colArray[j] = b->getRow(cols[j + start]); - } - simd::batchAddTo(c->getRow(i), &colArray[0], colNum, width); - } - - } else { - for (size_t i = 0; i < a->getHeight(); ++i) { - const int start = a->getRowStartIdx(i); - const int end = a->getRowStartIdx(i + 1); - for (int j = start; j < end; ++j) { - vecAddTo(c->getRow(i), b->getRow(cols[j]), width); - } - } - } - } else if (a->getValueType() == FLOAT_VALUE) { - for (size_t i = 0; i < a->getHeight(); ++i) { - const int start = a->getRowStartIdx(i); - const int end = a->getRowStartIdx(i + 1); - for (int j = start; j < end; ++j) { - vecAddTo(c->getRow(i), b->getRow(cols[j]), values[j], width); - } - } - } - } else /*if (a->isTransposed())*/ { - size_t m = a->getHeight(); - CHECK_EQ(b->getHeight(), m); - CHECK_EQ(a->getWidth(), height); - CHECK_EQ(b->getWidth(), width); - if (a->getValueType() == NO_VALUE) { - if (width % 32 == 0) { // use libaddto - // @TODO(yuyang18) Make input addr can be unaligned. - // So merge this if and else - CHECK_EQ((size_t)B % 32, 0UL); - CHECK_EQ((size_t)C % 32, 0UL); - for (size_t i = 0; i < a->getHeight(); ++i) { - const int start = a->getRowStartIdx(i); - const int end = a->getRowStartIdx(i + 1); - for (int j = start; j < end; ++j) { - simd::addTo(c->getRow(cols[j]), b->getRow(i), width); - } - } - - } else { - for (size_t i = 0; i < a->getHeight(); ++i) { - const int start = a->getRowStartIdx(i); - const int end = a->getRowStartIdx(i + 1); - for (int j = start; j < end; ++j) { - vecAddTo(c->getRow(cols[j]), b->getRow(i), width); - } - } - } - } else if (a->getValueType() == FLOAT_VALUE) { - for (size_t i = 0; i < a->getHeight(); ++i) { - const int start = a->getRowStartIdx(i); - const int end = a->getRowStartIdx(i + 1); - for (int j = start; j < end; ++j) { - vecAddTo(c->getRow(cols[j]), b->getRow(i), values[j], width); - } - } - } - } -} - -// instantiation mul() called in SparseRowMatrix.cpp -template void CpuMatrix::mul( - CpuSparseMatrix* a, - CpuMatrix* b, - SparseRowCpuMatrix* c, - real scaleAB, - real scaleT); -template void CpuMatrix::mul( - CpuSparseMatrix* a, - CpuMatrix* b, - SparseAutoGrowRowCpuMatrix* c, - real scaleAB, - real scaleT); -template void CpuMatrix::mul(CpuSparseMatrix* a, - CpuMatrix* b, - CacheRowCpuMatrix* c, - real scaleAB, - real scaleT); - -#ifndef PADDLE_MOBILE_INFERENCE -void SharedCpuMatrix::mul(CpuSparseMatrix* a, - CpuMatrix* b, - real scaleAB, - real scaleT) { - CHECK(!isTransposed()) << "Not supported"; - CHECK(!b->isTransposed()) << "Not supported"; - CHECK_EQ(scaleAB, 1) << "Not supported"; - CHECK_EQ(scaleT, 1) << "Not supported"; - CHECK_EQ(a->getFormat(), SPARSE_CSR) << "not supported"; - - real* B = b->getData(); - real* C = getData(); - size_t height = getHeight(); - size_t width = getWidth(); - - // get real trans - MatrixPtr aTrans; - if (a->isTransposed()) { - aTrans = a->getTmpSparseMatrix(a->getWidth(), a->getHeight()); - a->transpose(aTrans, false); - } - a = dynamic_cast(aTrans.get()); - - size_t m = a->getWidth(); - CHECK_EQ(b->getHeight(), m); - CHECK_EQ(a->getHeight(), height); - CHECK_EQ(b->getWidth(), width); - - size_t blockSize = (height / blockNum_) + 1; - CpuMatrixPtr localBuf = *localBuf_; - if (!localBuf) { - localBuf = std::make_shared(blockSize, width); - } else { - localBuf->resize(blockSize, width); - } - localBuf->zeroMem(); - real* localC = localBuf->getData(); - std::vector& blockSeq = *blockSeq_; - if (blockSeq.size() == 0) { - for (int k = 0; k < blockNum_; ++k) { - blockSeq.push_back(k); - } - std::shuffle( - blockSeq.begin(), blockSeq.end(), ThreadLocalRandomEngine::get()); - } - std::vector& localBufRows = *localBufRows_; - int* cols = a->getCols(); - real* value = a->getValue(); - - for (int k = 0; k < blockNum_; ++k) { - int blockId = blockSeq[k]; - size_t blockBegin = blockId * blockSize; - size_t blockEnd = (blockId + 1) * blockSize; - if (blockId == blockNum_ - 1) { - blockEnd = height; - } - if (a->getValueType() == NO_VALUE) { - for (size_t i = blockBegin; i < blockEnd; ++i) { - int start = a->getRowStartIdx(i); - int end = a->getRowStartIdx(i); - size_t colNum = a->getColNum(i); - if (colNum == 0) { - continue; - } // skip empty row - localBufRows.push_back(i); - size_t bufPos = localBufRows.size() - 1; - for (int j = start; j < end; ++j) { - vecAddTo(localC + bufPos * width, B + cols[j] * width, width); - } - } - } else if (a->getValueType() == FLOAT_VALUE) { - for (size_t i = blockBegin; i < blockEnd; ++i) { - int start = a->getRowStartIdx(i); - int end = a->getRowStartIdx(i); - size_t colNum = a->getColNum(i); - if (colNum == 0) { - continue; - } // skip empty row - localBufRows.push_back(i); - size_t bufPos = localBufRows.size() - 1; - for (int j = start; j < end; ++j) { - vecAddTo( - localC + bufPos * width, B + cols[j] * width, value[j], width); - } - } - } - - { - std::lock_guard guard(*blockLocks_[blockId]); - for (size_t i = 0; i < localBufRows.size(); ++i) { - vecAddTo(C + localBufRows[i] * width, localC + i * width, width); - } - } - memset(localC, 0, localBufRows.size() * width * sizeof(real)); - localBufRows.clear(); - } - - VLOG(2) << " B[0]=" << B[0] << " B[1]=" << B[1] << " C[0]=" << C[0] - << " C[1]=" << C[1]; -} - -void SharedCpuMatrix::add(Matrix& b, real p1, real p2) { - CHECK_EQ(blockNum_, 1); - std::lock_guard guard(*blockLocks_[0]); - CpuMatrix::add(b, p1, p2); -} - -void SharedCpuMatrix::add(real p1, real p2) { - CHECK_EQ(blockNum_, 1); - std::lock_guard guard(*blockLocks_[0]); - CpuMatrix::add(p1, p2); -} - -void SharedCpuMatrix::initShared(int blockNum) { - CHECK_GT(height_ * width_, 1UL * 1024 * 1024) - << "should not share small matrix"; - initBlock(blockNum); -} - -void SharedCpuMatrix::initBlock(int blockNum) { - CHECK_LE(blockNum, 200) << "should not use large block number"; - blockNum_ = blockNum; - blockLocks_.resize(blockNum); - for (auto& locker : blockLocks_) { - locker.reset(new std::mutex); - } -} - -#endif -/* Add a (column) vector b to matrix a, column by column */ -void CpuMatrix::addColumnVector(const Matrix& b) { - BaseMatrix::addColVector(const_cast(b)); -} - -/* this = a*b */ -void CpuMatrix::mul(const Matrix& a, const Matrix& b) { - return mul(a, b, 1.0, 0.0); -} - -/* this = scaleAB*(this*b) + scaleT*this */ -void CpuMatrix::rightMul(Matrix& b, real scaleAB, real scaleT) { - (void)b; - (void)scaleAB; - (void)scaleT; - LOG(FATAL) << "Not implemented"; -} - -/* this = this* b */ -void CpuMatrix::rightMul(Matrix& b) { return rightMul(b, 1.0, 0.0); } - -/* this = scaleAB*(a*this) + scaleT*this */ -void CpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) { - (void)a; - (void)scaleAB; - (void)scaleT; - LOG(FATAL) << "Not implemented"; -} - -/* this = a*this) */ -void CpuMatrix::leftMul(Matrix& a) { return leftMul(a, 1.0, 0.0); } - -void CpuMatrix::colMerge(Matrix& src) { src.rowSum(*this); } - -void CpuMatrix::rowSum(Matrix& sum) { - CHECK_EQ(sum.getHeight(), getHeight()); - CHECK_EQ(sum.getWidth(), (size_t)1); - - sum.sumRows(*this, /* scaleSum= */ 1, /* scaleDest= */ 0); -} - -void CpuMatrix::rowMaxId(IVector& maxIds) { - CHECK(!maxIds.useGpu()) << "Matrix type are not equal"; - - size_t numSamples = getHeight(); - CHECK_EQ(maxIds.getSize(), numSamples); - - real* a = getData(); - int* s = maxIds.getData(); - size_t dim = getWidth(); - - for (size_t i = 0; i < numSamples; i++) { - real sm = a[i * dim]; - int maxId = 0; - for (size_t j = 1; j < dim; j++) { - if (a[i * dim + j] > sm) { - maxId = j; - sm = a[i * dim + j]; - } - } - s[i] = maxId; - } -} - -void CpuMatrix::rowMax(Matrix& max) { - CHECK_EQ(max.getHeight(), getHeight()); - CHECK_EQ(max.getWidth(), (size_t)1); - max.maxRows(*this); -} - -/* Get the top k elements of each row of this matrix */ -void CpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { - CHECK(isContiguous()); - CHECK(!maxIds.useGpu() && !maxVal.useGpu()) << "Matrix type are not equal"; - size_t numSamples = getHeight(); - size_t beam = maxVal.getWidth(); - CHECK_EQ(maxIds.getSize(), numSamples * beam); - CHECK_EQ(maxVal.getHeight(), numSamples); - CHECK_EQ(maxVal.getWidth(), beam); - - real* a = getData(); - int* s = maxIds.getData(); - real* t = maxVal.getData(); - size_t dim = getWidth(); - for (size_t i = 0; i < numSamples; i++) { - std::vector> vec; - for (size_t j = 0; j < dim; j++) { - vec.push_back(std::pair(a[i * dim + j], j)); - } - - std::partial_sort( - vec.begin(), - vec.begin() + beam, - vec.end(), - [](const std::pair& l, const std::pair& r) { - return l.first > r.first; - }); - for (size_t j = 0; j < beam; j++) { - t[i * beam + j] = vec[j].first; - s[i * beam + j] = vec[j].second; - } - } -} - -void CpuMatrix::colMax(Matrix& max) { - CHECK_EQ(max.getWidth(), getWidth()); - CHECK_EQ(max.getHeight(), (size_t)1); - max.maxCols(*this); -} - -void CpuMatrix::colMax(IVector& maxIds, Matrix& maxVal) { - CHECK(isContiguous()); - CHECK(!maxIds.useGpu() && !maxVal.useGpu()) << "Matrix type are not equal"; - size_t numSamples = getWidth(); - size_t beam = maxVal.getHeight(); - CHECK_EQ(maxIds.getSize(), numSamples * beam); - CHECK_EQ(maxVal.getWidth(), numSamples); - - real* a = getData(); - int* s = maxIds.getData(); - real* t = maxVal.getData(); - size_t dim = getHeight(); - for (size_t i = 0; i < numSamples; i++) { - std::vector> vec; - for (size_t j = 0; j < dim; j++) { - vec.push_back(std::pair(a[i + j * numSamples], j)); - } - - std::partial_sort( - vec.begin(), - vec.begin() + beam, - vec.end(), - [](const std::pair& l, const std::pair& r) { - return l.first > r.first; - }); - for (size_t j = 0; j < beam; j++) { - t[i + j * numSamples] = vec[j].first; - s[i + j * numSamples] = vec[j].second; - } - } -} - -void CpuMatrix::maxoutForward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - CHECK(dynamic_cast(&a)); - CHECK(dynamic_cast(&id)); - CHECK_EQ(a.getHeight(), getHeight()); - - size_t size = getWidth(); - size_t batchSize = getHeight(); - size_t featLen = size / channels; - const real* input = a.getData(); - int* idForCpu = id.getData(); - - MatrixPtr maxInMat, maxOutMat; - Matrix::resizeOrCreate(maxInMat, groups, size, false, false); - Matrix::resizeOrCreate(maxOutMat, 1, size, false, false); - - for (size_t batch_idx = 0; batch_idx < batchSize; ++batch_idx) { - size_t newIndex = batch_idx * size; - IVectorPtr tmpId = IVector::create(idForCpu + newIndex, size, false); - - for (size_t i = 0; i < channels; ++i) { - size_t newFeatLen = i * featLen; - for (size_t j = 0; j < groups; ++j) { - maxInMat->subMatrix(j, j + 1, newFeatLen, newFeatLen + featLen) - ->copyFrom(input + (newIndex + newFeatLen) * groups + j * featLen, - featLen); - } - } - maxInMat->colMax(*tmpId, *maxOutMat); - this->subRowMatrix(batch_idx, batch_idx + 1)->copyFrom(*maxOutMat); - } -} - -void CpuMatrix::maxoutBackward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - CHECK(dynamic_cast(&a)); - CHECK(dynamic_cast(&id)); - CHECK_EQ(a.getHeight(), getHeight()); - - size_t size = a.getWidth(); - size_t batchSize = getHeight(); - size_t featLen = size / channels; - size_t newFeatLen = groups * featLen; - real* inputG = getData(); - const real* outG = a.getData(); - int* idForCpu = id.getData(); - - for (size_t batch_idx = 0; batch_idx < batchSize; ++batch_idx) { - size_t newIndex = batch_idx * size; - int* idData = idForCpu + newIndex; - - for (size_t i = 0; i < size; ++i) { - int gradIdx = - idData[i] * featLen + (i / featLen) * newFeatLen + i % featLen; - (inputG + newIndex * groups)[gradIdx] += (outG + newIndex)[i]; - } - } -} - -void CpuMatrix::rowNormalizeL1(Matrix& out) { - CHECK(!out.useGpu()); - - size_t numSamples = getHeight(); - size_t dim = getWidth(); - CHECK_EQ(out.getHeight(), numSamples); - CHECK_EQ(out.getWidth(), dim); - real* a = getData(); - real* b = out.getData(); - for (size_t i = 0; i < numSamples; ++i) { - real s = 0; - for (size_t j = 0; j < dim; ++j) { - s += a[i * dim + j]; - } - // Right now, we just bet that sum won't be zero. If this really happens, - // we will figure out what should be done then. - CHECK_GT(s, 0); - s = 1 / s; - for (size_t j = 0; j < dim; ++j) { - b[i * dim + j] = s * a[i * dim + j]; - } - } -} - -/* calulate classification error */ -void CpuMatrix::classificationError(Matrix& output, - IVector& label, - size_t topkSize) { - size_t numSamples = this->getHeight(); - auto cpuOutput = dynamic_cast(&output); - auto cpuLabel = dynamic_cast(&label); - IVectorPtr cpuTopIds = std::make_shared(numSamples * topkSize); - MatrixPtr cpuTopVal = std::make_shared(numSamples, topkSize); - - CHECK(cpuOutput && cpuLabel) << "Invalid argument pointer"; - CHECK(cpuTopIds && cpuTopVal) << "Allocate cpu memory failed"; - CHECK(cpuLabel->getSize() == numSamples) << "Vector size is not equal"; - CHECK(cpuOutput->getHeight() == numSamples && this->getWidth() == 1) - << "Matrix dimensions are not equal"; - - // top k matrix classification - cpuOutput->rowMax(*cpuTopIds, *cpuTopVal); - - size_t dim = cpuOutput->getWidth(); - real* result = this->getData(); - int* ids = cpuTopIds->getData(); - int* lbl = cpuLabel->getData(); - for (size_t i = 0; i < numSamples; ++i) { - CHECK_GE(lbl[i], 0); - CHECK_LT((size_t)lbl[i], dim); - - for (size_t j = 0; j < topkSize; ++j) { - if (ids[j + i * topkSize] == lbl[i]) { - result[i] = 0; - break; - } - result[i] = 1.0f; - } - } -} - -/* copy -log(output[label]) to this->data[i] */ -void CpuMatrix::oneHotCrossEntropy(Matrix& output, IVector& label) { - CHECK(dynamic_cast(&output)); - CHECK(dynamic_cast(&label)); - - size_t numSamples = getHeight(); - size_t dim = output.getWidth(); - CHECK_EQ(label.getSize(), numSamples); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(getWidth(), (size_t)1); - - real* out = output.getData(); - real* cost = getData(); - int* lbl = label.getData(); - for (size_t i = 0; i < numSamples; ++i, out += dim) { - CHECK_GE(lbl[i], 0); - CHECK_LT((size_t)lbl[i], dim); - cost[i] = -std::log(out[lbl[i]]); - } -} - -/* calculate the error of outputV according to label */ -void CpuMatrix::oneHotCrossEntropyBp(Matrix& output, IVector& label) { - CHECK(dynamic_cast(&output)); - CHECK(dynamic_cast(&label)); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - CHECK_EQ(output.getWidth(), dim); - real* out = output.getData(); - real* grad = getData(); - int* lbl = label.getData(); - for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim) { - grad[lbl[i]] -= 1 / out[lbl[i]]; - } -} - -/* - We implement the matrix functionality in CostLayer.cpp, - but we define the scalar function here for sanity check - deletion of the function does not affect anything neverthelss -*/ -void CpuMatrix::oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha) { - CHECK(dynamic_cast(&output)); - CHECK(dynamic_cast(&label)); - - size_t numSamples = getHeight(); - size_t dim = output.getWidth(); - CHECK_EQ(label.getSize(), numSamples); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(getWidth(), (size_t)1); - - real* out = output.getData(); - real* cost = getData(); - int* lbl = label.getData(); - for (size_t i = 0; i < numSamples; ++i, out += dim) { - CHECK_GE(lbl[i], 0); - CHECK_LT((size_t)lbl[i], dim); - real sum = 0; - for (size_t j = 0; j < dim; ++j) { - sum += out[j]; - } - sum = _safelog(sum); - cost[i] = -_safelog(out[lbl[i]]) + sum + alpha * _square(sum); - } -} - -/* - We implement the matrix functionality in CostLayer.cpp, - but we define the scalar function here for sanity check - deletion of the function does not affect anything neverthelss -*/ -void CpuMatrix::oneHotCrossEntropyWithSelfNormBp(Matrix& output, - IVector& label, - real alpha) { - CHECK(dynamic_cast(&output)); - CHECK(dynamic_cast(&label)); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - CHECK_EQ(output.getWidth(), dim); - real* out = output.getData(); - real* grad = getData(); - int* lbl = label.getData(); - - for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim) { - grad[lbl[i]] -= 1 / out[lbl[i]]; - real sum = 0; - for (size_t j = 0; j < dim; ++j) { - sum += out[j]; - } - for (size_t j = 0; j < dim; ++j) { - if (j == (size_t)lbl[i]) { - grad[j] += -1 / out[j]; - } - grad[j] += 1 / sum + 2 * alpha * _safelog(sum) / sum; - } - } -} - -#define FORWARD_LOOP() \ - size_t numSamples = getHeight(); \ - size_t dim = getWidth(); \ - CHECK_EQ(output.getHeight(), numSamples); \ - CHECK_EQ(output.getWidth(), dim); \ - const real* in = getData(); \ - real* out = output.getData(); \ - for (size_t i = 0; i < numSamples; ++i, in += dim, out += dim) - -#define BACKWARD_LOOP() \ - size_t numSamples = getHeight(); \ - size_t dim = getWidth(); \ - CHECK_EQ(output.getHeight(), numSamples); \ - CHECK_EQ(output.getWidth(), dim); \ - real* grad = getData(); \ - real* out = output.getData(); \ - for (size_t i = 0; i < numSamples; ++i, grad += dim, out += dim) - -void CpuMatrix::softmax(Matrix& output) { - CHECK(!output.useGpu()); - - const float THRESHOLD = -64.0; - - FORWARD_LOOP() { - real max = -1.0e20; - for (size_t j = 0; j < dim; ++j) { - if (in[j] > max) { - max = in[j]; - } - } - for (size_t j = 0; j < dim; ++j) { - real a = in[j] - max; - if (a < THRESHOLD) { - a = THRESHOLD; - } - out[j] = a; - } - vExp(dim, out, out); - - real sum = 0; - for (size_t j = 0; j < dim; ++j) { - sum += out[j]; - } - sum = 1 / sum; - for (size_t j = 0; j < dim; ++j) { - out[j] *= sum; - } - } -} - -void CpuMatrix::sequenceSoftmax(Matrix& output, const IVector& index) { - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output.getWidth(), 1UL); - CHECK(isContiguous()); - - MatrixPtr inTmp = Matrix::create(nullptr, - /* height= */ 1, - 1, - /* trans= */ false, - false); - MatrixPtr outTmp = Matrix::create(nullptr, - /* height= */ 1, - 1, - /* trans= */ false, - false); - size_t numSequences = index.getSize() - 1; - auto starts = index.getData(); - for (size_t i = 0; i < numSequences; ++i) { - size_t offset = starts[i]; - size_t size = starts[i + 1] - starts[i]; - inTmp->setData(getData() + offset, 1UL, size); - outTmp->setData(output.getData() + offset, 1UL, size); - inTmp->softmax(*outTmp); - } -} - -void CpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { - CHECK(output.useGpu_ == false) << "Matrix type are not equal"; - CHECK_EQ(getHeight(), sftmaxSum.getHeight()); - - real* sums = sftmaxSum.getData(); - - BACKWARD_LOOP() { - real sum = sums[i]; - for (size_t j = 0; j < dim; ++j) { - grad[j] = out[j] * (grad[j] - sum); - } - } -} - -void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) { - CHECK(output.useGpu_ == false && label.useGpu_ == false) - << "Matrix type are not equal"; - - size_t numSamples = getHeight(); - size_t dim = output.getWidth(); - CHECK_EQ(label.getHeight(), numSamples); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(label.getWidth(), dim); - CHECK_EQ(getWidth(), (size_t)1); - real* out = output.getData(); - real* cost = getData(); - - auto labelptr = dynamic_cast(&label); - if (labelptr) { - // it is a CpuSparseMatrix - if (labelptr->getFormat() == SPARSE_CSR) { - // treat label as a SparseMatrix - for (size_t i = 0; i < numSamples; ++i) { - for (size_t j = 0; j < dim; ++j) { - cost[i] += _square(out[i * dim + j]); - } - } - if (labelptr->getValueType() == NO_VALUE) { - int* cols = labelptr->getCols(); - for (size_t i = 0; i < numSamples; ++i) { - for (size_t j = labelptr->getRowStartIdx(i); - j < labelptr->getRowStartIdx(i + 1); - ++j) { - cost[i] += 1.0 - 2.0 * out[i * dim + cols[j]]; - /* - * explanation of above line: original codes are follows: - * cost[i] -= _square(out[i * dim + feature.col]); - * cost[i] += _square(1.0 - out[i * dim + feature.col]); - */ - } - } - } else if (labelptr->getValueType() == FLOAT_VALUE) { - int* cols = labelptr->getCols(); - real* values = labelptr->getValue(); - for (size_t i = 0; i < numSamples; ++i) { - real sum1 = 0; - real sum2 = 0; - for (size_t j = labelptr->getRowStartIdx(i); - j < labelptr->getRowStartIdx(i + 1); - ++j) { - sum1 += values[j] * values[j]; - sum2 += values[j] * out[i * dim + cols[j]]; - /* - * explanation of above line: original codes are follows: - * cost[i] -= _square(out[i * dim + feature.col]); - * cost[i] += _square(value.col - out[i * dim + feature.col]); - */ - } - cost[i] += sum1 - 2.0 * sum2; - } - } else { - LOG(FATAL) << "unsupported sparse matrix value type in sumOfSquares"; - return; - } - return; - } else { - LOG(FATAL) << "unsupported sparse matrix format in sumOfSquares"; - return; - } - } - - BaseMatrix::sumOfSquaredDiffs(output, - label, - /* scaleSum= */ 1, - /* scaleDest= */ 1); -} - -/* calculate the error of outputV according to label */ -void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) { - CHECK(output.useGpu_ == false && label.useGpu_ == false) - << "Matrix type are not equal"; - - size_t numSamples = getHeight(); - size_t dim = getWidth(); - CHECK_EQ(output.getWidth(), dim); - CHECK_EQ(label.getWidth(), dim); - - real* out = output.getData(); - real* grad = getData(); - - auto labelptr = dynamic_cast(&label); - if (labelptr) { - // it is a CpuSparseMatrix - if (labelptr->getFormat() == SPARSE_CSR) { - // treat label as a SparseMatrix - for (size_t i = 0; i < numSamples; ++i) { - for (size_t j = 0; j < dim; ++j) { - grad[i * dim + j] += 2.0 * out[i * dim + j]; - } - } - if (labelptr->getValueType() == NO_VALUE) { - int* cols = labelptr->getCols(); - for (size_t i = 0; i < numSamples; ++i) { - for (size_t j = labelptr->getRowStartIdx(i); - j < labelptr->getRowStartIdx(i + 1); - ++j) { - grad[i * dim + cols[j]] -= 2.0; - /* - * explanation of above line: original codes are follows: - * grad[i * dim + feature.col] -= 2.0 * out[i * dim + feature.col]; - * grad[i * dim + feature.col] += 2.0 * (out[i * dim + feature.col] - * - 1); - */ - } - } - } else if (labelptr->getValueType() == FLOAT_VALUE) { - int* cols = labelptr->getCols(); - real* values = labelptr->getValue(); - for (size_t i = 0; i < numSamples; ++i) { - for (size_t j = labelptr->getRowStartIdx(i); - j < labelptr->getRowStartIdx(i + 1); - ++j) { - grad[i * dim + cols[j]] -= 2.0 * values[j]; - /* - * explanation of above line: original codes are follows: - * grad[i * dim + feature.col] -= 2.0 * out[i * dim + feature.col]; - * grad[i * dim + feature.col] += 2.0 * (out[i * dim + feature.col] - * - value.col); - */ - } - } - } else { - LOG(FATAL) << "unsupported sparse matrix value type in sumOfSquares"; - return; - } - return; - } else { - LOG(FATAL) << "unsupported sparse matrix format in sumOfSquares"; - return; - } - } - - real* lbl = label.getData(); - size_t ld = getStride(); - size_t outLd = output.getStride(); - size_t lblLd = label.getStride(); - CHECK(lbl); - for (size_t i = 0; i < numSamples; - ++i, out += outLd, lbl += lblLd, grad += ld) { - for (size_t j = 0; j < dim; ++j) { - grad[j] += 2.0 * (out[j] - lbl[j]); // positive gradient; - } - } -} - -void CpuMatrix::smoothL1(Matrix& output, Matrix& label, real destScale) { - CHECK(output.useGpu_ == false && label.useGpu_ == false) - << "Matrix type are not equal"; - - size_t numSamples = getHeight(); - size_t dim = output.getWidth(); - CHECK_EQ(label.getHeight(), numSamples); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(label.getWidth(), dim); - CHECK_EQ(getWidth(), (size_t)1); - - real* cost = getData(); - real* out = output.getData(); - real* lbl = label.getData(); - - for (size_t i = 0; i < numSamples; ++i, out += dim, lbl += dim) { - for (size_t j = 0; j < dim; ++j) { - real absVal = std::fabs(out[j] - lbl[j]); - cost[i] *= destScale; - if (absVal < 1.0) - cost[i] += 0.5 * absVal * absVal; - else - cost[i] += absVal - 0.5; - } - } -} - -void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label, real destScale) { - CHECK(output.useGpu_ == false && label.useGpu_ == false) - << "Matrix type are not equal"; - - size_t numSamples = getHeight(); - size_t dim = output.getWidth(); - CHECK_EQ(label.getHeight(), numSamples); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(label.getWidth(), dim); - CHECK_EQ(getWidth(), dim); - - real* out = output.getData(); - real* lbl = label.getData(); - real* grad = getData(); - - for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim, lbl += dim) { - for (size_t j = 0; j < dim; ++j) { - real val = out[j] - lbl[j]; - grad[j] *= destScale; - if (std::fabs(val) < 1) { - grad[j] += val; - } else { - grad[j] += (real(0) < val) - (val < real(0)); - } - } - } -} - -void CpuMatrix::tanh(Matrix& output) { - CHECK(isContiguous()); - CHECK(output.isContiguous()); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(output.getWidth(), dim); - vTanh(numSamples * dim, getData(), output.getData()); -} - -void CpuMatrix::tanhDerivative(Matrix& output) { - BaseMatrix::tanhDerivative(output); -} - -void CpuMatrix::softrelu(Matrix& output) { - CHECK(isContiguous()); - CHECK(output.isContiguous()); - const real THRESHOLD = 40.0; - FORWARD_LOOP() { // TODO(yuyang18): SIMD it? - for (size_t j = 0; j < dim; ++j) { - real x = in[j]; - if (x > THRESHOLD) { - x = THRESHOLD; - } else if (x < -THRESHOLD) { - x = -THRESHOLD; - } - out[j] = x; - } - } - vExp(numSamples * dim, output.getData(), output.getData()); - vLog1p(numSamples * dim, output.getData(), output.getData()); -} - -void CpuMatrix::softreluDerivative(Matrix& output) { - CHECK(isContiguous()); - CHECK(output.isContiguous()); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - size_t size = numSamples * dim; - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(output.getWidth(), dim); - real* grad = getData(); - MatrixPtr tmpMat = Matrix::create(numSamples, dim); - real* tmp = tmpMat->getData(); - - vExp(size, output.getData(), tmpMat->getData()); - - for (size_t i = 0; i < size; ++i) { - grad[i] *= (1.0 - 1.0 / tmp[i]); - } -} - -void CpuMatrix::scaledTanh(Matrix& output, real p1, real p2) { - CHECK(isContiguous()); - CHECK(output.isContiguous()); - size_t numSamples = getHeight(); - size_t dim = getWidth(); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(output.getWidth(), dim); - - const real* in = getData(); - real* out = output.getData(); - - // out = p2*in - for (size_t i = 0; i < numSamples * dim; ++i) { - out[i] = p2 * in[i]; - } - - vTanh(numSamples * dim, out, out); - - // out = p1 * out - for (size_t i = 0; i < numSamples * dim; ++i) { - out[i] = p1 * out[i]; - } -} - -/* uniform randomization, minimize precision = 1e-5 */ -void CpuMatrix::randomizeUniform() { - CHECK(isContiguous()); - real* data = getData(); - unsigned int* randSeed = ThreadLocalRand::getSeed(); - real recipRandMax = 1.0f / (real)RAND_MAX; - for (size_t i = 0; i < elementCnt_; ++i) { - *data++ = rand_r(randSeed) * recipRandMax; - } -} - -void CpuMatrix::print(std::ostream& os) const { - CHECK(isContiguous()); - for (size_t i = 0; i < height_; ++i) { - for (size_t j = 0; j < width_; ++j) { - os << data_[i * width_ + j] << " "; - } - os << std::endl; - } -} - -void CpuMatrix::paramReluForward(Matrix& data, Matrix& W) { - real* input = data.getData(); - real* w = W.getData(); - real* output = data_; - size_t numElements = data.getWidth(); - size_t numSamples = data.getHeight(); - size_t paraSize = W.getHeight() * W.getWidth(); - CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init - - size_t partial_sum = numElements / paraSize; - if (paraSize == numElements) { - for (size_t n = 0; n < numSamples * numElements; ++n) { - output[n] = input[n] > 0 ? input[n] : input[n] * w[n % numElements]; - } - return; - } - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - for (size_t n = 0; n < numSamples; ++n) { - for (size_t i = 0; i < paraSize; i++) { - neon::prelu( - input + i * partial_sum, w[i], output + i * partial_sum, partial_sum); - } - input = input + numElements; - output = output + numElements; - } -#else - for (size_t n = 0, k = 0; n < numSamples; ++n) { - for (size_t i = 0; i < numElements; ++i, ++k) { - output[k] = input[k] > 0 ? input[k] : input[k] * w[i / partial_sum]; - } - } -#endif -} - -void CpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) { - real* ograd = oGrad.getData(); - real* input = data.getData(); - real* wgrad = data_; - size_t numElements = data.getWidth(); - size_t numSamples = data.getHeight(); - size_t paraSize = this->getHeight() * this->getWidth(); - CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init - size_t partial_sum = numElements / paraSize; - for (size_t n = 0, k = 0; n < numSamples; ++n) { - for (size_t i = 0; i < numElements; ++i, ++k) { - wgrad[i / partial_sum] += ograd[k] * (input[k] > 0 ? 0 : input[k]); - } - } -} - -void CpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { - real* diff = data_; - real* input = data.getData(); - real* ograd = oGrad.getData(); - real* w = W.getData(); - size_t numElements = data.getWidth(); - size_t numSamples = data.getHeight(); - size_t paraSize = W.getHeight() * W.getWidth(); - CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init - size_t partial_sum = numElements / paraSize; - for (size_t n = 0, k = 0; n < numSamples; ++n) { - for (size_t i = 0; i < numElements; ++i, ++k) { - diff[k] += ograd[k] * (input[k] > 0 ? 1 : w[i / partial_sum]); - } - } -} - -void CpuMatrix::print(std::ostream& os, size_t height, size_t width) const { - CHECK(isContiguous()); - size_t h = height_ < height ? height_ : height; - size_t w = width_ < width ? width_ : width; - os.setf(std::ostream::scientific); - os << "["; - for (size_t i = 0; i < h; ++i) { - for (size_t j = 0; j < w; ++j) { - os << data_[i * width_ + j] << " "; - } - if (i == h - 1) { - os << "]"; - } - os << std::endl; - } -} - -void CpuMatrix::printOneRow(std::ostream& os, size_t idx) const { - CHECK_LT(idx, height_); - size_t offset = idx * stride_; - os << data_[offset]; - for (size_t i = 1; i < width_; ++i) { - os << " " << data_[offset + i]; - } - os << ";"; -} - -void CpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) { - CHECK(isContiguous()); - CHECK(height_ == refMat.getHeight()); - CHECK(width_ == refMat.getWidth()); - CpuMatrix cpuRef(height_, width_); - cpuRef.copyFrom(refMat); - size_t diffCnt = 0; - for (size_t i = 0; i < height_; ++i) { - for (size_t j = 0; j < width_; ++j) { - real a = getElement(i, j); - real b = cpuRef.getElement(i, j); - if (fabs(a - b) > 0.00001) { - ++diffCnt; - if (printDiff) { - os << "ref= " << a << " check= " << b << std::endl; - } - } - } - } - LOG(INFO) << "the diffCnt is " << diffCnt; -} - -real CpuMatrix::getMin() { - size_t size = getHeight() * getWidth(); - real* data = getData(); - real res = data[0]; - for (size_t i = 1; i < size; ++i) { - if (res > data[i]) { - res = data[i]; - } - } - return res; -} - -real CpuMatrix::getMax() { - size_t size = getHeight() * getWidth(); - real* data = getData(); - real res = data[0]; - for (size_t i = 1; i < size; ++i) { - if (res < data[i]) { - res = data[i]; - } - } - return res; -} - -void CpuMatrix::circularConv(Matrix& in0, Matrix& in1) { - size_t height = this->getHeight(); - size_t width0 = this->getWidth(); - size_t width1 = in1.getWidth(); - - CHECK_EQ(height, in0.getHeight()); - CHECK_EQ(width0, in0.getWidth()); - CHECK_EQ(height, in1.getHeight()); - - CHECK_EQ(width1 % 2, 1U); - - real* outV = this->getData(); - real* inV0 = in0.getData(); - real* inV1 = in1.getData(); - - int leftCtxLen = (width1 - 1) / 2; - for (size_t x = 0; x < height; - ++x, outV += width0, inV0 += width0, inV1 += width1) { - for (size_t i = 0; i < width0; ++i) { // each dimension of output - for (size_t j = 0; j < width1; ++j) { - // iterate over all dimentions of inV1 - int index = i + j - leftCtxLen; - index = (index + width0) % width0; - outV[i] += inV0[index] * inV1[j]; - } - } - } -} - -void CpuMatrix::circularConvDerivative( - Matrix& outG, Matrix& in0, Matrix& in1, Matrix& inG0, Matrix& inG1) { - size_t height = in0.getHeight(); - size_t width0 = in0.getWidth(); - size_t width1 = in1.getWidth(); - - CHECK_EQ(height, in1.getHeight()); - CHECK_EQ(height, inG0.getHeight()); - CHECK_EQ(width0, inG0.getWidth()); - CHECK_EQ(height, inG1.getHeight()); - CHECK_EQ(width1, inG1.getWidth()); - CHECK_EQ(height, outG.getHeight()); - CHECK_EQ(width0, outG.getWidth()); - - real* outGV = outG.getData(); - real* inV0 = in0.getData(); - real* inV1 = in1.getData(); - real* inGV0 = inG0.getData(); - real* inGV1 = inG1.getData(); - - int leftCtxLen = (width1 - 1) / 2; - for (size_t x = 0; x < height; ++x, - outGV += width0, - inV0 += width0, - inV1 += width1, - inGV0 += width0, - inGV1 += width1) { - for (size_t j = 0; j < width1; ++j) { // iterate over width1 - for (size_t i = 0; i < width0; ++i) { - // such over all dimensions of outG - int index = i + j - leftCtxLen; - index = (index + width0) % width0; - inGV0[index] += outGV[i] * inV1[j]; - inGV1[j] += outGV[i] * inV0[index]; - } - } - } -} - -void CpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { - CHECK(dynamic_cast(&output)); - auto labelPtr = dynamic_cast(&label); - CHECK(labelPtr); - - size_t numSamples = getHeight(); - size_t dim = output.getWidth(); - CHECK_EQ(numSamples, output.getHeight()); - CHECK_EQ(numSamples, labelPtr->getHeight()); - CHECK_EQ(dim, labelPtr->getWidth()); - - real* out = output.getData(); - real* cost = getData(); - for (size_t i = 0; i < numSamples; ++i, out += dim) { - for (size_t j = 0; j < dim; ++j) { - CHECK(out[j] > 0 && out[j] < 1.0); - cost[i] -= std::log(1 - out[j]); - } - - const int* cols = labelPtr->getRowCols(i); - for (size_t j = 0; j < labelPtr->getColNum(i); ++j) { - CHECK_LT(size_t(cols[j]), dim); - cost[i] -= std::log(out[cols[j]] / (1 - out[cols[j]])); - } - } -} - -void CpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { - CHECK(dynamic_cast(&output)); - auto labelPtr = dynamic_cast(&label); - CHECK(labelPtr); - - size_t numSamples = getHeight(); - size_t dim = getWidth(); - CHECK_EQ(numSamples, output.getHeight()); - CHECK_EQ(numSamples, labelPtr->getHeight()); - CHECK_EQ(dim, output.getWidth()); - CHECK_EQ(dim, labelPtr->getWidth()); - - real* out = output.getData(); - real* grad = getData(); - for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim) { - for (size_t j = 0; j < dim; ++j) { - CHECK(out[j] > 0 && out[j] < 1.0); - grad[j] += 1.0 / (1 - out[j]); - } - - const int* cols = labelPtr->getRowCols(i); - for (size_t j = 0; j < labelPtr->getColNum(i); ++j) { - CHECK_LT(size_t(cols[j]), dim); - grad[cols[j]] -= 1.0 / (out[cols[j]] * (1 - out[cols[j]])); - } - } -} - -/* calculate the classification error for multi binary label */ -void CpuMatrix::classificationErrorMulti(Matrix& output, - Matrix& label, - real threshold) { - CHECK(dynamic_cast(&output)); - auto labelPtr = dynamic_cast(&label); - CHECK(labelPtr); - - size_t numSamples = getHeight(); - size_t dim = output.getWidth(); - CHECK_EQ(numSamples, output.getHeight()); - CHECK_EQ(numSamples, labelPtr->getHeight()); - CHECK_EQ(dim, labelPtr->getWidth()); - - real* out = output.getData(); - real* result = getData(); - for (size_t i = 0; i < numSamples; ++i, out += dim) { - real sum = 0.0; - for (size_t j = 0; j < dim; ++j) { - if (out[j] >= threshold) { - sum += 1.0; - } - } - - const int* cols = labelPtr->getRowCols(i); - for (size_t j = 0; j < labelPtr->getColNum(i); ++j) { - CHECK_LT(size_t(cols[j]), dim); - if (out[cols[j]] < threshold) { - sum += 1.0; - } else { - sum -= 1.0; - } - } - result[i] = sum / dim; - } -} - -void CpuMatrix::bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - CHECK(dynamic_cast(&in)); - - size_t outputW = getWidth(); - size_t batchSize = getHeight(); - size_t inputW = in.getWidth(); - size_t inputH = in.getHeight(); - size_t inPosOffset = inImgH * inImgW; - size_t outPosOffset = outImgH * outImgW; - (void)(inputH); - - real* outData = getData(); - const real* inData = in.getData(); - - if (inImgH == outImgH && inImgW == outImgW) { - this->copyFrom(in); - } else { - for (size_t k = 0; k < batchSize; ++k) { // loop for batches - for (size_t i = 0; i < outImgH; ++i) { // loop for images - size_t h = ratioH * i; - size_t hid = (h < inImgH - 1) ? 1 : 0; - real h1lambda = ratioH * i - h; - real h2lambda = 1 - h1lambda; - - for (size_t j = 0; j < outImgW; ++j) { - size_t w = ratioW * j; - size_t wid = (w < inImgW - 1) ? 1 : 0; - real w1lambda = ratioW * j - w; - real w2lambda = 1 - w1lambda; - // calculate four position for bilinear interpolation - const real* inPos = &inData[k * inputW + h * inImgW + w]; - real* outPos = &outData[k * outputW + i * outImgW + j]; - for (size_t c = 0; c < numChannels; ++c) { // loop for channels - // bilinear interpolation - outPos[0] = - h2lambda * (w2lambda * inPos[0] + w1lambda * inPos[wid]) + - h1lambda * (w2lambda * inPos[hid * inImgW] + - w1lambda * inPos[hid * inImgW + wid]); - inPos += inPosOffset; - outPos += outPosOffset; - } - } - } - } - } -} - -void CpuMatrix::bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - CHECK(dynamic_cast(&out)); - - size_t inputW = getWidth(); - size_t inputH = getHeight(); - size_t outputW = out.getWidth(); - size_t batchSize = out.getHeight(); - size_t inPosOffset = inImgH * inImgW; - size_t outPosOffset = outImgH * outImgW; - (void)(inputH); - - real* inGrad = getData(); - const real* outGrad = out.getData(); - - if (inImgH == outImgH && inImgW == outImgW) { - this->add(const_cast(out)); - } else { - for (size_t k = 0; k < batchSize; ++k) { // loop for batches - for (size_t i = 0; i < outImgH; ++i) { // loop for images - size_t h = ratioH * i; - size_t hid = (h < inImgH - 1) ? 1 : 0; - real h1lambda = ratioH * i - h; - real h2lambda = 1 - h1lambda; - for (size_t j = 0; j < outImgW; ++j) { - size_t w = ratioW * j; - size_t wid = (w < inImgW - 1) ? 1 : 0; - real w1lambda = ratioW * j - w; - real w2lambda = 1 - w1lambda; - - real* inPos = &inGrad[k * inputW + h * inImgW + w]; - const real* outPos = &outGrad[k * outputW + i * outImgW + j]; - for (size_t c = 0; c < numChannels; ++c) { // loop for channels - inPos[0] += h2lambda * w2lambda * outPos[0]; - inPos[wid] += h2lambda * w1lambda * outPos[0]; - inPos[hid * inImgW] += h1lambda * w2lambda * outPos[0]; - inPos[hid * inImgW + wid] += h1lambda * w1lambda * outPos[0]; - inPos += inPosOffset; - outPos += outPosOffset; - } - } - } - } - } -} - -void CpuMatrix::vol2Col(real* data, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW) { - real* outData = getData(); - int outHeight = (height + 2 * paddingH - filterH) / strideH + 1; - int outWidth = (width + 2 * paddingW - filterW) / strideW + 1; - int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1; - - int channelsCol = channels * filterD * filterH * filterW; - for (int c = 0; c < channelsCol; ++c) { - int wOffset = c % filterW; - int hOffset = (c / filterW) % filterH; - int dOffset = (c / filterW / filterH) % filterD; - int cIn = c / filterW / filterH / filterD; - for (int d = 0; d < outDepth; ++d) { - for (int h = 0; h < outHeight; ++h) { - for (int w = 0; w < outWidth; ++w) { - int dPad = d * strideD - paddingD + dOffset; - int hPad = h * strideH - paddingH + hOffset; - int wPad = w * strideW - paddingW + wOffset; - - if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width && - dPad >= 0 && dPad < depth) - outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = - data[((cIn * depth + dPad) * height + hPad) * width + wPad]; - else - outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = 0; - } - } - } - } -} - -void CpuMatrix::col2Vol(real* trg, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real alpha, - real beta) { - real* src = getData(); - int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1; - int outHeight = (height + 2 * paddingH - filterH) / strideH + 1; - int outWidth = (width + 2 * paddingW - filterW) / strideW + 1; - int channelsCol = channels * filterD * filterH * filterW; - for (int c = 0; c < channelsCol; ++c) { - int wOffset = c % filterW; - int hOffset = (c / filterW) % filterH; - int dOffset = (c / filterW / filterH) % filterD; - int cIm = c / filterW / filterH / filterD; - for (int d = 0; d < outDepth; ++d) { - for (int h = 0; h < outHeight; ++h) { - for (int w = 0; w < outWidth; ++w) { - int dPad = d * strideD - paddingD + dOffset; - int hPad = h * strideH - paddingH + hOffset; - int wPad = w * strideW - paddingW + wOffset; - if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width && - dPad >= 0 && dPad < depth) - trg[((cIm * depth + dPad) * height + hPad) * width + wPad] = - alpha * - src[((c * outDepth + d) * outHeight + h) * outWidth + w] + - beta * - trg[((cIm * depth + dPad) * height + hPad) * width + wPad]; - } - } - } - } -} - -//////////////////////////////////////////////////////////////// -// functions executed via cpu // -//////////////////////////////////////////////////////////////// - -void GpuMatrix::selectElements(Matrix& table, IVector& ids) { - execViaCpu2(&CpuMatrix::selectElements, *this, table, ids); -} -} // namespace paddle diff --git a/paddle/legacy/math/Matrix.h b/paddle/legacy/math/Matrix.h deleted file mode 100644 index ff4f4cfc2a41add1a06308556b38aba5bbdac884..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/Matrix.h +++ /dev/null @@ -1,2189 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -#include - -#include "BaseMatrix.h" -#include "MemoryHandle.h" -#include "Vector.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { - -/// TODO(tianbing), move to paddle/legacy/function/TensorType.h -enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 }; - -/** - * @brief matrix sparse_format . - * - * nnz represents nonzero number in sparse matrix. - * - * SPARSE_CSR: row major matrix. length of row is height_ + 1, each element - * represents row start index in Matrix. length of col and value are nnz. - * - * SPARSE_CSC: col major matrix. length of col is width_ + 1, each element - * represents col start index in Matrix. length of col and value are nnz. - * - * @code - * for example: [0, 1, 0, 2, 0; - * 1, 0, 0, 0, 0; - * 0, 0, 0, 2, 5]; - * SPARSE_CSR row [0, 2, 3, 5]; - * col [1, 3, 0, 3, 4]; - * value [1, 2, 1, 2, 5] - * SPARSE_CSC col [0, 1, 2, 2, 4, 5]; - * row [1, 0, 0, 2, 2]; - * value [1, 1, 2, 2, 5] - * @endcode - */ -/// TODO(tianbing), move to paddle/legacy/function/TensorType.h -enum SparseFormat { SPARSE_CSR = 0, SPARSE_CSC = 1 }; - -class Matrix; -class GpuMatrix; -class CpuMatrix; -class CpuSparseMatrix; -class GpuSparseMatrix; -typedef std::shared_ptr MatrixPtr; -typedef std::shared_ptr GpuMatrixPtr; -typedef std::shared_ptr CpuMatrixPtr; -typedef std::shared_ptr GpuSparseMatrixPtr; -typedef std::shared_ptr CpuSparseMatrixPtr; - -/** - * Copy or assignemnt constructor will share the data as opposed to making a - * copy of the original data. To make a copy of the orinal data, use copyFrom() - * instead. - */ -class Matrix : public BaseMatrix { - protected: - Matrix(MemoryHandlePtr memHandle, - size_t height, - size_t width, - bool trans, - bool use_gpu); - - Matrix(real* data, size_t height, size_t width, bool trans, bool use_gpu); - - Matrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans, - bool use_gpu); - - static ThreadLocal tmpMat_; - - public: - size_t elementCnt_; // maximal number of elements which can be held in data_ - MemoryHandlePtr memoryHandle_; - - public: - virtual ~Matrix() {} - - static MatrixPtr create(MemoryHandlePtr memHandle, - size_t height, - size_t width, - bool trans = false); - static MatrixPtr create(size_t height, - size_t width, - bool trans = false, - bool useGpu = false); - static MatrixPtr create(real* data, - size_t height, - size_t width, - bool trans = false, - bool useGpu = false); - static MatrixPtr create(real* data, - size_t height, - size_t width, - size_t stride, - bool trans = false, - bool useGpu = false); - - static MatrixPtr createSparseMatrix(size_t height, - size_t width, - size_t nnz, - SparseValueType valueType = FLOAT_VALUE, - bool trans = false, - bool useGpu = false); - static MatrixPtr createSparseMatrix(size_t height, - size_t width, - size_t nnz, - SparseValueType valueType = FLOAT_VALUE, - SparseFormat foramt = SPARSE_CSR, - bool trans = false, - bool useGpu = false); - - static MatrixPtr createSparseMatrix(real* data, - int* row, - int* col, - size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType, /*value type*/ - SparseFormat format, - bool trans, - bool useGpu); - - static void resizeOrCreateSparseMatrix( - MatrixPtr& matrix, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType = FLOAT_VALUE, - SparseFormat foramt = SPARSE_CSR, - bool trans = false, - bool useGpu = false); - - static void resizeOrCreate(MatrixPtr& a, - size_t height, - size_t width, - bool trans = false, - bool useGpu = false); - - /** - * @brief set the data buffer used to hold the matrix data. - * - * caller should make sure that the size of data is at least - * sizeof(real)*height*width. - */ - void setData(real* data) { - BaseMatrix::setData(data); - memoryHandle_.reset(); - } - - /// the data should be contiguous - void setData(real* data, size_t newHeight, size_t newWidth) { - setData(data); - height_ = newHeight; - width_ = newWidth; - elementCnt_ = newHeight * newWidth; - stride_ = width_; - } - - size_t getWidth() const { return width_; } - size_t getHeight() const { return height_; } - size_t getStride() const { return stride_; } - size_t getElementCnt() const { return elementCnt_; } - virtual real* getData() { return data_; } - virtual const real* getData() const { return data_; } - bool isTransposed() const { return trans_; } - bool isContiguous() const { return stride_ == width_ || height_ == 1; } - - // If sparse matrix, need to dynamic_cast to CpuSparseMatrix/GpuSparseMatrix - // befor call the following functions. - // Declare these functions in the base class just easy to call them. - // And these declarations should be moved to base class of sparse matrix - // if refactor sparse matrix - virtual int* getRows() const { - LOG(FATAL) << "Not implemented"; - return nullptr; //! suppress warning for no return value. - } - - virtual int* getCols() const { - LOG(FATAL) << "Not implemented"; - return nullptr; //! suppress warning for no return value. - } - - virtual SparseFormat getFormat() const { - LOG(FATAL) << "Not implemented"; - return SPARSE_CSR; //! suppress warning for no return value. - } - - virtual SparseValueType getValueType() const { - LOG(FATAL) << "Not implemented"; - return NO_VALUE; //! suppress warning for no return value. - } - - /** - * @brief matrix elment-wise add - * - * Named add3 just because add/add2 has been used in BaseMatrix.cu - * and they are not virtual function. - */ - virtual void add3(MatrixPtr b) { LOG(FATAL) << "Not implemented"; } - - MemoryHandlePtr getMemoryHandle() const { return memoryHandle_; } - - virtual void zeroMem() { LOG(FATAL) << "Not implemented"; } - - virtual void resetOne() { LOG(FATAL) << "Not implemented"; } - - void setDiag(real value); - - virtual void copyFrom(const Matrix& src) { LOG(FATAL) << "Not implemented"; } - - virtual void trimFrom(const CpuSparseMatrix& src) { - LOG(FATAL) << "Not implemented"; - } - - // For GpuMatrix this is an asynchronous copy interface - // For CpuMatrix this is an synchronous copy interface - virtual void copyFrom(const Matrix& src, hl_stream_t stream) { - LOG(FATAL) << "Not implemented"; - } - - MatrixPtr subMatrix(size_t startRow, - size_t endRow, - size_t startCol, - size_t endCol); - - MatrixPtr subRowMatrix(size_t startRow, size_t endRow) { - return subMatrix(startRow, endRow, 0, getWidth()); - } - - MatrixPtr subColMatrix(size_t startCol, size_t endCol) { - return subMatrix(0, getHeight(), startCol, endCol); - } - - virtual MatrixPtr subMatrix(size_t startRow, size_t numRows) { - CHECK_LE(startRow + numRows, getHeight()); - return Matrix::create(getData() + startRow * getWidth(), - numRows, - getWidth(), - trans_, - useGpu_); - } - virtual MatrixPtr subMatrix(size_t startRow, size_t numRows, MatrixPtr dest) { - CHECK_LE(startRow + numRows, getHeight()); - CHECK_EQ(useGpu_, dest->useGpu_); - dest->setData(this->rowBuf(startRow), numRows, getWidth()); - return dest; - } - - /** - * If this is GpuMatrix, src is assumed to be CPU memory - * - * If this is CpuMatrix, src is assumed to be CPU memory - */ - virtual void copyFrom(const real* src, size_t size) { - LOG(FATAL) << "Not implemented"; - } - - virtual void copyFrom(const real* src, const int64_t* seq) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief convert a int vector to a real matrix. - * - * (1) source and dest are both in CPU. - * - * (2) sizes are exactly match. - */ - virtual void copyFrom(const IVector& src) { - LOG(FATAL) << "copy data from int vector only available on CpuMatrix."; - } - - virtual void copyByRowIndex(Matrix& b, const IVector& rowIndex) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief Create a matrix with the same type (GpuMatrix, CpuMatrix, - * NonValueSparseMatrix, etc.) as this. - * - * If height and width is zero, the new matrix will have the same size - * as this, otherwise the new matrix will have the specified size. - * - */ - virtual MatrixPtr clone(size_t height = 0, - size_t width = 0, - bool useGpu = false) { - LOG(FATAL) << "Not implemented"; - return nullptr; - } - - virtual real* getRowBuf(size_t row) { - LOG(FATAL) << "Not implemented"; - return nullptr; - } - - virtual real getElement(size_t x, size_t y) const { - LOG(FATAL) << "Not implemented"; - return 0; - } - - virtual real getSum() { - LOG(FATAL) << "Not implemented"; - return 0; - } - - virtual void accumulateColSum(Matrix& src) { - LOG(FATAL) << "Not implemented"; - } - - virtual real getAbsSum() { - LOG(FATAL) << "Not implemented"; - return 0; - } - - /** - * @note Original data may not be preserved after resize(). - */ - virtual void resize(size_t newHeight, size_t newWidth) = 0; - - /** - * @note This should only be used for sparse matrix. - */ - virtual void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* total item used to allocate space */ - SparseValueType valueType, - SparseFormat format) = 0; - - /** - * @brief This should only be used for sparse matrix. - * - * Currently must be called for each row in order. - * The matrix is not valid until setRow is called for the last row. - */ - virtual void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) = 0; - - virtual MatrixPtr getTranspose() = 0; - - /** - * @brief hard transpose. - * - * allocate matTrans' memory outside, then set memAlloc as false; - * else set as true. - */ - virtual void transpose(MatrixPtr& matTrans, bool memAlloc) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief rotate 90 degrees in clock-wise if clockWise=true; - * otherwise rotate in anti clock-wise - * clock-wise: - * \f[ - * y(j,i) = x(M-i-1,j) - * \f] - * anti clock-wise: - * \f[ - * y(j,i) = x(i, N-1-j) - * \f] - * where \f$x\f$ is (M x N) input, and \f$y\f$ is (N x M) output. - * - * allocate matRot' memory outside, then set memAlloc as false; - * else set as true. - */ - virtual void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { - LOG(FATAL) << "Not implemented"; - } - - virtual MatrixPtr getInverse() { - LOG(FATAL) << "Not implemented"; - return nullptr; - } - - /** - * @brief inverse. - * - * if allocate matInv's memory outside, then set memAlloc as false; - * else set as true. - */ - virtual void inverse(MatrixPtr& matInv, bool memAlloc) { - LOG(FATAL) << "Not implemented"; - } - - public: - /// Only set all variables to 0 or NULL but not free them. - virtual void clear() { - height_ = 0; - width_ = 0; - data_ = NULL; - } - - void reshape(size_t height, size_t width); - - /// add b to each sample of this. - virtual void addBias(Matrix& b, real scale) { - LOG(FATAL) << "Not implemented"; - } - - virtual void addSharedBias(Matrix& b, real scale) { - LOG(FATAL) << "Not implemented"; - } - - void addBias(Matrix& b, real scale, bool sharedBias) { - if (!sharedBias) { - addBias(b, scale); - } else { - addSharedBias(b, scale); - } - } - - /// add each sample from a to this. - virtual void collectBias(Matrix& a, real scale) { - LOG(FATAL) << "Not implemented"; - } - - virtual void collectSharedBias(Matrix& a, real scale) { - LOG(FATAL) << "Not implemented"; - } - - void collectBias(Matrix& a, real scale, bool sharedBias) { - if (!sharedBias) { - collectBias(a, scale); - } else { - collectSharedBias(a, scale); - } - } - - virtual void sequenceAvgForward(Matrix& a, - const IVector& startsPos, - int mode) { - LOG(FATAL) << "Not implemented"; - } - - virtual void sequenceAvgBackward(Matrix& a, - const IVector& startsPos, - int mode) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = scaleAB*(a*b) + scaleT*this - * @endcode - */ - virtual void mul(const Matrix& a, - const Matrix& b, - real scaleAB, - real scaleT) { - LOG(FATAL) << "Not implemented"; - } - - /// Add a vector (column) b to matrix a, column by column. - virtual void addColumnVector(const Matrix& b) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * For j < codeLength: - * this(i, j) += vec(index(i, j), 0) - * where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1 - * @endcode - */ - virtual void addByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& vec) { - (void)numClasses; - (void)codes; - (void)vec; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * vec(index(i, j), 0) += this(i, j) - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void addByBitCodeBackward(size_t numClasses, - const IVector& codes, - Matrix& vec) { - (void)numClasses; - (void)codes; - (void)vec; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * this(i, j) += - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void mulByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& mat, - const Matrix& input) { - (void)numClasses; - (void)codes; - (void)mat; - (void)input; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * mat.row(index(i, j)) += this(i, j) * input.row(i) - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void mulByBitCodeBackwardWeight(size_t numClasses, - const IVector& codes, - Matrix& mat, - const Matrix& input) { - (void)numClasses; - (void)codes; - (void)mat; - (void)input; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * input.row(i) += this(i, j) * mat.row(index(i, j)) - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void mulByBitCodeBackwardError(size_t numClasses, - const IVector& codes, - const Matrix& mat, - Matrix& input) { - (void)numClasses; - (void)codes; - (void)mat; - (void)input; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength - * sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) - * where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0 - * @endcode - */ - virtual void sumByBitCode(size_t numClasses, - IVector& codes, - Matrix& sum, - real scaleSum) { - (void)numClasses; - (void)codes; - (void)sum; - (void)scaleSum; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength - * this(i, j) -= bit(i, j) - * where bit(i, j) is same as that for sumByBitCode - * @endcode - */ - virtual void subByBitCode(size_t numClasses_, IVector& codes) { - (void)numClasses_; - (void)codes; - LOG(FATAL) << "Not implemeted"; - } - - /** - * add the sum of each row of this to mat - */ - virtual void rowSum(Matrix& sum) { - (void)sum; - LOG(FATAL) << "Not implemeted"; - } - - /** - * set the max of each row of this to mat - */ - virtual void rowMax(Matrix& max) { - (void)max; - LOG(FATAL) << "Not implemeted"; - } - - /** - * set the max of each column of this to mat - */ - virtual void colMax(Matrix& max) { LOG(FATAL) << "not implemented"; } - - /** - * @brief Get the top k elements of each column of this matrix. - * - * The row ids and values of these elements are stored in - * maxIds and max respectively. where k is the size of maxIds. - * And note that the top k elements are not sorted. - */ - virtual void colMax(IVector& maxIds, Matrix& maxVal) { - LOG(FATAL) << "not implemented"; - } - - virtual void maxoutForward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - LOG(FATAL) << "not implemented"; - } - - virtual void maxoutBackward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - LOG(FATAL) << "not implemented"; - } - - virtual void rowMaxId(IVector& maxIds) { LOG(FATAL) << "Not implemented"; } - - /** - * @brief Get the top k elements of each row of this matrix. - * - * The column ids and values of these elements are stored in - * maxIds and max respectively. where k is the size of maxIds. - * And note that the top k elements are not sorted. - */ - virtual void rowMax(IVector& maxIds, Matrix& max) { - LOG(FATAL) << "Not implemented"; - } - - /// normalize each row so that the sum of each row is 1. - virtual void rowNormalizeL1(Matrix& out) { - (void)out; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * this = a*b - * @endcode - */ - virtual void mul(const Matrix& a, const Matrix& b) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = scaleAB*(this*b) + scaleT*this - * @endcode - */ - virtual void rightMul(Matrix& b, real scaleAB, real scaleT) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = this* b - * @endcode - */ - virtual void rightMul(Matrix& b) { LOG(FATAL) << "Not implemented"; } - - /** - * @code - * this = scaleAB*(a*this) + scaleT*this - * @endcode - */ - virtual void leftMul(Matrix& a, real scaleAB, real scaleT) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = a*this) - * @endcode - */ - virtual void leftMul(Matrix& a) { LOG(FATAL) << "Not implemented"; } - - /// merge the element for each col. - virtual void colMerge(Matrix& src) { LOG(FATAL) << "Not implemented"; } - - /// copy -log(output[label]) to this->data[i]. - virtual void oneHotCrossEntropy(Matrix& output, IVector& label) { - LOG(FATAL) << "Not implemented"; - } - - /// calculate the error of outputV according to label. - virtual void oneHotCrossEntropyBp(Matrix& outputV, IVector& label) { - LOG(FATAL) << "Not implemented"; - } - - /// copy -log(output[label]) to this->data[i]. - virtual void oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha) { - LOG(FATAL) << "Not implemented"; - } - - /// calculate the error of outputV according to label. - virtual void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha) { - LOG(FATAL) << "Not implemented"; - } - - /** - * \f[ - * a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} - * \f] - * - * b contains M elements, - * c contains N elements (N is odd), - * b's index arithmetic is computed modulo M, - * c's index arithmetic is computed modulo N. - */ - virtual void circularConv(Matrix& b, Matrix& c) { - LOG(FATAL) << "Not implemented"; - } - - virtual void circularConvDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2) { - LOG(FATAL) << "Not implemented"; - } - - /* output_ij = exp(this_{ij}) / (sum_j exp(this_ij)) */ - virtual void softmax(Matrix& output) { - (void)output; - LOG(FATAL) << "Not implemeted"; - } - virtual void sequenceSoftmax(Matrix& output, const IVector& index) { - (void)output; - LOG(FATAL) << "Not implemeted"; - } - - virtual void softmaxBackward(Matrix& outputV) { - (void)outputV; - LOG(FATAL) << "Not implemeted"; - } - - /* - sum_i = sum_j this_ij * output_ij - this_ij = output_ij* (this_ij - sum_i) - */ - virtual void softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { - LOG(FATAL) << "Not implemented"; - } - - /// calculate the sum of squares diff cost. - virtual void sumOfSquares(Matrix& output, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - /// gradient of sumOfSquares. - virtual void sumOfSquaresBp(Matrix& outputV, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - virtual void smoothL1(Matrix& output, Matrix& label, real destScale) { - LOG(FATAL) << "Not implemented"; - } - - virtual void smoothL1Bp(Matrix& outputV, Matrix& label, real destScale) { - LOG(FATAL) << "Not implemented"; - } - - virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; } - - virtual void tanhDerivative(Matrix& output) { - LOG(FATAL) << "Not implemented"; - } - - virtual void softrelu(Matrix& output) { LOG(FATAL) << "Not implemented"; } - - virtual void softreluDerivative(Matrix& output) { - LOG(FATAL) << "Not implemented"; - } - - virtual void scaledTanh(Matrix& output, real p1, real p2) { - LOG(FATAL) << "Not implemented"; - } - - /// print out the values of elements to os - virtual void print(std::ostream& os) const { - LOG(FATAL) << "Not implemented"; - } - - /** - * print a part of the matrix - * from the (top,left) value to the (height, width) value (not included) - */ - virtual void print(std::ostream& os, size_t height, size_t width) const { - LOG(FATAL) << "Not implemented"; - } - - /// print one row to os - virtual void printOneRow(std::ostream& os, size_t idx) const { - LOG(FATAL) << "Not implemented"; - } - - virtual void check(std::ostream& os, Matrix& refMat, bool printDiff = true) {} - - virtual real getMin() { - LOG(FATAL) << "Not implemented"; - return 0; - } - virtual real getMax() { - LOG(FATAL) << "Not implemented"; - return 0; - } - - virtual void randomizeUniform() { LOG(FATAL) << "Not implemented"; } - - /** - * @brief calulate the error of classification - * - * output[i] = 1 if row i is an error. - * - * output[i] = 0 if row i is correct. - * - */ - virtual void classificationError(Matrix& output, - IVector& label, - size_t topkSize = 1) { - LOG(FATAL) << "Not implemented"; - } - - virtual void upsampleForward(Matrix& input, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void upsampleBackward(Matrix& outputGrad, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * Pooling forward operation, pick out the largest element - * in the sizeX of value, if the maskMatP is not NULL, it will - * also caculate the location indices. - */ - virtual void maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - MatrixPtr maskMatP = NULL) { - LOG(FATAL) << "Not implemeted"; - } - - /// Pooling backward operation. - virtual void maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - /// Pooling forward operation, caculate the average of sizeX elements. - virtual void avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - bool excludeMode = true) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW, - bool excludeMode = true) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * Pooling 3D forward operation, pick out the largest element - * in the sizeX of value - */ - virtual void maxPool3DForward(Matrix& inputMat, - Matrix& maxPoolIdx, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void maxPool3DBackward(Matrix& outGrad, - Matrix& maxPoolIdx, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void avgPool3DForward(Matrix& input, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void avgPool3DBackward(Matrix& input, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * Input: one or more sequences. Each sequence contains some instances. - * - * Output: output size is the number of input sequences (NOT input - * instances). - * - * output[i] is set to max_input[i]. - */ - virtual void maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * this.row[i] += table.row[ids[i]] - * if ids[i] == -1, it will be ignored - * @endcode - */ - virtual void selectRows(Matrix& table, IVector& ids) { - (void)table; - (void)ids; - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this[i] = table[i, id[i]] - * @endcode - */ - virtual void selectElements(Matrix& table, IVector& ids) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * table.row[ids[i]] += this.row[i] - * if ids[i] == -1, it will be ignored - * @endcode - */ - virtual void addToRows(Matrix& table, IVector& ids) { - (void)table; - (void)ids; - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * table[i, id[i]] += this[i] - * @endcode - */ - virtual void addElements(Matrix& table, IVector& ids) { - LOG(FATAL) << "Not implemented"; - } - /** - * @brief cross entropy for multi binary labels - * - * @code - * this[i] = -sum(label[i][j]*log(output[i][j]) - * + (1-label[i][j])*log(1-output[i][j])) - * @endcode - */ - virtual void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief The gradient of cross entropy for multi binary labels on output - * - * @code - * this[i][j] = -label[i][j]/output[i][j] - * + (1-label[i][j])/(1-output[i][j]) - * @endcode - */ - virtual void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief Calculate the classification error for multi binary labels - * - * @code - * this[i] = sum((output[i][j] >= threshold && label[i][j] == 0) - * || (output[i][j] < threshold && label[i][j] == 1)) - * / output->getWidth() - * @endcode - */ - virtual void classificationErrorMulti(Matrix& output, - Matrix& label, - real threshold) { - LOG(FATAL) << "Not implemented"; - } - - virtual void paramReluForward(Matrix& data, Matrix& W) { - LOG(FATAL) << "Not implemented"; - } - virtual void paramReluBackwardW(Matrix& oGrad, Matrix& data) { - LOG(FATAL) << "Not implemented"; - } - virtual void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { - LOG(FATAL) << "Not implemented"; - } - - virtual void vol2Col(real* data, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void col2Vol(real* trg, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real alpha, - real beta) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - LOG(FATAL) << "Not implemented"; - } - virtual void bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - LOG(FATAL) << "Not implemented"; - } - - template - void operator=(const ExpressionType& expr) { - if (useGpu_) { - TensorGpuApply(*this, expr); - } else { - TensorCpuApply(*this, expr); - } - } - - bool isEmpty() const { return data_ == nullptr; } - - explicit operator bool() const { return !isEmpty(); } -}; - -inline std::ostream& operator<<(std::ostream& os, const Matrix& mat) { - mat.print(os); - return os; -} - -class GpuMatrix : public Matrix { - public: - GpuMatrix(); - - GpuMatrix(size_t height, size_t width, bool trans = false); - GpuMatrix(real* data, size_t height, size_t width, bool trans = false) - : Matrix(data, height, width, trans, true) {} - GpuMatrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans = false) - : Matrix(data, height, width, stride, trans, true) {} - GpuMatrix(GpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : Matrix(dataHandle, height, width, trans, true) {} - ~GpuMatrix(); - - void zeroMem(); - void resetOne(); - void setDiag(real value); - - void resize(size_t newHeight, size_t newWidth); - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - - /** - * Copy the data from cpu_memory buffer - */ - void copyFrom(const real* hostSrc, size_t size); - - void copyFrom(const real* hostSrc, const int64_t* seq); - - void copyFrom(const Matrix& src, hl_stream_t stream); - - void copyFrom(const Matrix& src); - - void copyFrom(const IVector& src); - - void copyByRowIndex(Matrix& b, const IVector& rowIndex); - - MatrixPtr clone(size_t height, size_t width, bool useGpu = false); - - real getElement(size_t x, size_t y) const; - - real* getRow(size_t row) { return BaseMatrix::rowBuf(row); } - virtual real* getRowBuf(size_t row) { return getRow(row); } - - real getSum(); - void accumulateColSum(Matrix& src); - real getAbsSum(); - - real getMin(); - real getMax(); - - MatrixPtr getTranspose(); - void transpose(MatrixPtr& matTrans, bool memAlloc); - void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise); - - MatrixPtr getInverse(); - void inverse(MatrixPtr& matInv, bool memAlloc); - - /// add b to each sample of this. - void addBias(Matrix& b, real scale); - void addSharedBias(Matrix& b, real scale); - - /** - * @code - * add each sample from a to this. - * @endcode - */ - void collectBias(Matrix& a, real scale); - void collectSharedBias(Matrix& a, real scale); - - void sequenceAvgForward(Matrix& a, const IVector& startsPos, int mode); - void sequenceAvgBackward(Matrix& a, const IVector& startsPos, int mode); - - /** - * @code - * this.row[i] += table.row[ids[i]] - * @endcode - */ - virtual void selectRows(Matrix& table, IVector& ids); - - /** - * @code - * this[i] = table[i, id[i]] - * @endcode - */ - virtual void selectElements(Matrix& table, IVector& ids); - - /** - * @code - * table.row[ids[i]] += this.row[i] - * @endcode - */ - virtual void addToRows(Matrix& table, IVector& ids); - - void addColumnVector(const Matrix& b); - - /** - * @code - * this = scaleAB*(a*b) + scaleT*this - * @endcode - */ - void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); - - /** - * @code - * this = a*b - * @endcode - */ - void mul(const Matrix& a, const Matrix& b); - - void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT); - - void mul(const GpuSparseMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT); - - void mul(const GpuMatrix& a, - const GpuSparseMatrix& b, - real scaleAB, - real scaleT); - - /** - * @code - * this = scaleAB*(this*b) + scaleT*this - * @endcode - */ - void rightMul(Matrix& b, real scaleAB, real scaleT); - - /** - * @code - * this = this* b - * @endcode - */ - void rightMul(Matrix& b); - - /** - * @code - * this = scaleAB*(a*this) + scaleT*this - * @endcode - */ - void leftMul(Matrix& a, real scaleAB, real scaleT); - - /** - * @code - * this = a*this - * @endcode - */ - void leftMul(Matrix& a); - - void colMerge(Matrix& src); - void rowSum(Matrix& sum); - void rowMax(Matrix& max); - void rowMax(IVector& maxIds, Matrix& max); - void colMax(Matrix& max); - void colMax(IVector& maxIds, Matrix& max); - void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups); - void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups); - - void oneHotCrossEntropy(Matrix& output, IVector& label); - void oneHotCrossEntropyBp(Matrix& outputV, IVector& label); - void oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha); - void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha); - - void softmax(Matrix& output); - void sequenceSoftmax(Matrix& output, const IVector& index); - void softmaxBackward(Matrix& outputV); - void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); - - /// calculate the sum of squares diff cost. - void sumOfSquares(Matrix& output, Matrix& label); - - /// gradient of sumOfSquares. - void sumOfSquaresBp(Matrix& outputV, Matrix& label); - void tanh(Matrix& output); - void tanhDerivative(Matrix& output); - void softrelu(Matrix& output); - void softreluDerivative(Matrix& output); - void scaledTanh(Matrix& output, real p1, real p2); - - virtual void print(std::ostream& os) const; - virtual void print(std::ostream& os, size_t height, size_t width) const; - - void paramReluForward(Matrix& data, Matrix& W); - void paramReluBackwardW(Matrix& oGrad, Matrix& data); - void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W); - - void check(std::ostream& os, Matrix& refMat, bool printDiff = true); - void randomizeUniform(); - - void classificationError(Matrix& output, IVector& label, size_t topkSize = 1); - - void upsampleForward(Matrix& input, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW); - - void upsampleBackward(Matrix& outputGrad, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW); - - void maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - MatrixPtr maskMatP); - - void maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - bool excludeMode = true); - - void avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW, - bool excludeMode = true); - - void maxPool3DForward(Matrix& inputMat, - Matrix& maxPoolIdx, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW); - - void maxPool3DBackward(Matrix& outGrad, - Matrix& maxPoolIdx, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput); - - void avgPool3DForward(Matrix& input, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW); - - void avgPool3DBackward(Matrix& input, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput); - - void maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index); - - void maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index); - - void bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void vol2Col(real* data, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW); - - void col2Vol(real* trg, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real alpha, - real beta); - - void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); - - void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); - - template - void operator=(const ExpressionType& expr) { - TensorGpuApply(*this, expr); - } -}; - -class CpuMatrix : public Matrix { - private: - MatrixPtr sftmaxSum_; - MatrixPtr sftmaxDot_; - - public: - CpuMatrix(size_t height, size_t width, bool trans = false); - CpuMatrix(real* data, size_t height, size_t width, bool trans = false) - : Matrix(data, height, width, trans, false) {} - CpuMatrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans = false) - : Matrix(data, height, width, stride, trans, false) {} - - CpuMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : Matrix(dataHandle, height, width, trans, false) {} - - ~CpuMatrix(); - - void zeroMem(); - void resetOne(); - void setDiag(real value); - - void resize(size_t newHeight, size_t newWidth); - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - - real getElement(size_t x, size_t y) const; - real getSum(); - void accumulateColSum(Matrix& src); - real getAbsSum(); - - MatrixPtr getTranspose(); - void transpose(MatrixPtr& matTrans, bool memAlloc); - void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise); - - MatrixPtr getInverse(); - void inverse(MatrixPtr& matInv, bool memAlloc); - - void copyFrom(const Matrix& src); - - void copyFrom(const Matrix& src, hl_stream_t stream); - - void copyFrom(const real* cpuSrc, size_t size); - - void copyFrom(const real* cpuSrc, const int64_t* seq); - - void copyFrom(const IVector& src); - - void copyFrom(CpuSparseMatrix& src); - - void copyByRowIndex(Matrix& b, const IVector& rowIndex); - - MatrixPtr clone(size_t height, size_t width, bool useGpu = false); - - void upsampleForward(Matrix& input, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW); - - void upsampleBackward(Matrix& outputGrad, - Matrix& mask, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t outputH, - size_t outputW); - - void maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - MatrixPtr maskMatP); - - void maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW, - bool excludeMode = true); - - void avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW, - bool excludeMode = true); - - void maxPool3DForward(Matrix& inputMat, - Matrix& maxPoolIdx, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW); - - void maxPool3DBackward(Matrix& outGrad, - Matrix& maxPoolIdx, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput); - - void avgPool3DForward(Matrix& input, - size_t channels, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW); - - void avgPool3DBackward(Matrix& input, - size_t imgSizeD, - size_t imgSizeH, - size_t imgSizeW, - size_t outputD, - size_t outputH, - size_t outputW, - size_t sizeZ, - size_t sizeY, - size_t sizeX, - size_t strideD, - size_t strideH, - size_t strideW, - size_t paddingD, - size_t paddingH, - size_t paddingW, - real scaleTargets, - real scaleOutput); - - void maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index); - - void maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index); - - real* getRow(size_t row) { return BaseMatrix::rowBuf(row); } - virtual real* getRowBuf(size_t row) { return getRow(row); } - - public: - /// add b to each sample of this. - void addBias(Matrix& b, real scale); - void addSharedBias(Matrix& b, real scale); - - /// add each sample of a to this. - void collectBias(Matrix& a, real scale); - void collectSharedBias(Matrix& a, real scale); - - void sequenceAvgForward(Matrix& a, const IVector& startsPos, int mode); - void sequenceAvgBackward(Matrix& a, const IVector& startsPos, int mode); - - /** - * @code - * this.row[i] += table.row[ids[i]] - * @endcode - */ - virtual void selectRows(Matrix& table, IVector& ids); - - /** - * @code - * table.row[ids[i]] += this.row[i] - * @endcode - */ - virtual void addToRows(Matrix& table, IVector& ids); - - /** - * @code - * this[i] = table[i, id[i]] - * @endcode - */ - virtual void selectElements(Matrix& table, IVector& ids); - - /** - * @code - * table[i, id[i]] += this[i] - * @endcode - */ - virtual void addElements(Matrix& table, IVector& ids); - - /** - * use abstract getRow() to get row from table. - * - * Define table as template instead of virtual class for performance sake. - * internal used by above two virtual funcs. - */ - template - void selectRowsImp(TableMatType& table, IVector& ids); - template - void addToRowsImp(TableMatType& table, IVector& ids); - - void addColumnVector(const Matrix& b); - - void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); - void mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - - void mul(CpuMatrix* a, CpuSparseMatrix* b, real scaleAB, real scaleT); - - static void mul(CpuMatrix* a, - CpuMatrix* b, - CpuSparseMatrix* c, - real scaleAB, - real scaleT); - - /** - * c = a * b - * - * use abstract getRow() to get row from B,C. - * Define B,C as template instead of virtual class for performance sake. - */ - template - static void mul( - CpuSparseMatrix* a, MatBType* b, MatCType* c, real scaleAB, real scaleT); - - virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - - void mul(const Matrix& a, const Matrix& b); - - void rightMul(Matrix& b, real scaleAB, real scaleT); - void rightMul(Matrix& b); - - void leftMul(Matrix& a, real scaleAB, real scaleT); - void leftMul(Matrix& a); - void colMerge(Matrix& src); - void rowSum(Matrix& sum); - void rowMaxId(IVector& maxIds); - void rowMax(Matrix& max); - void rowMax(IVector& maxIds, Matrix& maxVal); - void colMax(Matrix& max); - void colMax(IVector& maxIds, Matrix& maxVal); - void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups); - void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups); - void rowNormalizeL1(Matrix& out); - - void oneHotCrossEntropy(Matrix& output, IVector& label); - void oneHotCrossEntropyBp(Matrix& outputV, IVector& label); - void oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha); - void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha); - - void circularConv(Matrix& b, Matrix& c); - void circularConvDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2); - - void softmax(Matrix& output); - void sequenceSoftmax(Matrix& output, const IVector& index); - void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); - - /// calculate the sum of squares diff cost. - void sumOfSquares(Matrix& output, Matrix& label); - - /// gradient of sumOfSquares. - void sumOfSquaresBp(Matrix& outputV, Matrix& label); - - void smoothL1(Matrix& output, Matrix& label, real destScale); - void smoothL1Bp(Matrix& output, Matrix& label, real destScale); - - void tanh(Matrix& output); - void tanhDerivative(Matrix& output); - - void softrelu(Matrix& output); - void softreluDerivative(Matrix& output); - void scaledTanh(Matrix& output, real p1, real p2); - - void print(std::ostream& os) const; - void print(std::ostream& os, size_t height, size_t width) const; - void printOneRow(std::ostream& os, size_t idx) const; - - void paramReluForward(Matrix& data, Matrix& W); - void paramReluBackwardW(Matrix& oGrad, Matrix& data); - void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W); - - void check(std::ostream& os, Matrix& refMat, bool printDiff = true); - - real getMin(); - real getMax(); - - void randomizeUniform(); - - void classificationError(Matrix& output, IVector& label, size_t topkSize = 1); - - void addByBitCode(size_t numClasses, const IVector& codes, const Matrix& vec); - - void addByBitCodeBackward(size_t numClasses, - const IVector& codes, - Matrix& vec); - - void mulByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& mat, - const Matrix& input); - - void mulByBitCodeBackwardWeight(size_t numClasses, - const IVector& codes, - Matrix& mat, - const Matrix& input); - - void mulByBitCodeBackwardError(size_t numClasses, - const IVector& codes, - const Matrix& mat, - Matrix& input); - - void sumByBitCode(size_t numClasses, - IVector& codes, - Matrix& sum, - real scaleSum); - - void subByBitCode(size_t numClasses_, IVector& codes); - - void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); - void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); - void classificationErrorMulti(Matrix& output, Matrix& label, real threshold); - - void bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void vol2Col(real* data, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW); - - void col2Vol(real* trg, - int channels, - int depth, - int height, - int width, - int filterD, - int filterH, - int filterW, - int strideD, - int strideH, - int strideW, - int paddingD, - int paddingH, - int paddingW, - real alpha, - real beta); - - template - void operator=(const ExpressionType& expr) { - TensorCpuApply(*this, expr); - } -}; - -class SharedCpuMatrix : public CpuMatrix { - public: -#ifndef PADDLE_MOBILE_INFERENCE - /* blockNum is number of partitions of the matrix */ - SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false) - : CpuMatrix(height, width, trans) { - initShared(blockNum); - } - SharedCpuMatrix( - int blockNum, real* data, size_t height, size_t width, bool trans = false) - : CpuMatrix(data, height, width, trans) { - initShared(blockNum); - } - - SharedCpuMatrix(int blockNum, - CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : CpuMatrix(dataHandle, height, width, trans) { - initShared(blockNum); - } - - SharedCpuMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : CpuMatrix(dataHandle, height, width, trans) { - initBlock(1); - } - - ~SharedCpuMatrix() {} - - public: - virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - virtual void add(Matrix& b, real p1, real p2); - virtual void add(real p1, real p2); - - private: - using Matrix::mul; - void initShared(int blockNum); - void initBlock(int blockNum); - - int blockNum_; - std::vector> blockLocks_; - ThreadLocal localBuf_; - ThreadLocal> localBufRows_; - ThreadLocal> blockSeq_; -#endif -}; - -typedef struct { unsigned int col; } sparse_non_value_t; - -typedef struct { - unsigned int col; - float value; -} sparse_float_value_t; - -} // namespace paddle -#include "ExecViaCpu.h" diff --git a/paddle/legacy/math/MatrixBitCode.cpp b/paddle/legacy/math/MatrixBitCode.cpp deleted file mode 100644 index f35f266a30506110eb6c656f7b631d12d8f6ae90..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/MatrixBitCode.cpp +++ /dev/null @@ -1,291 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Matrix.h" -#include "hl_gpu.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -namespace { - -struct SimpleCode { - SimpleCode(size_t code, size_t numClasses) : c_(code + numClasses) {} - inline size_t calcIndex(int bit) const { return (c_ >> (bit + 1)) - 1; } - inline bool calcBit(int bit) const { return c_ & (1 << bit); } - inline int getLength() const { return findLastSet(c_) - 1; } - - private: - size_t c_; -}; - -struct SimpleCodeTable { - explicit SimpleCodeTable(size_t numClasses) : numClasses_(numClasses) {} - SimpleCode operator()(size_t code) const { - return SimpleCode(code, numClasses_); - } - size_t size() const { return numClasses_; } - int getMaxCodeLength() const { return findLastSet(numClasses_ - 1); } - - private: - size_t numClasses_; - int maxCodeLength_; -}; - -} // namespace - -/** - * CodeTable class should support 3 functions: - * - * size_t size() - * return the number of codes - * - * int getMaxCodeLength() - * return the maximal code length - * - * Code operator()(size_t i) - * return the i-th code. Code class is descriebed below. - * - * Code class should support 3 functions: - * - * int getLength() - * return the length of the code - * - * bool calcIndex(int bit) - * bit ranges from 0 to getLength() - 1 - * return the index for the (1+bit) level parent - * - * bool calcBit(int bit) - * return true if the bit level parent is the right child of (1+bit) level - * parent - * - */ - -/* - for i: - for j < codeLength: - op(tmat(i, j), vec(0, index(i, j))) -*/ -template -static void addByBitCodeT( - Op op, CodeTable codeTable, const IVector& codes, TMat& tmat, Mat& vec) { - CHECK(!vec.useGpu()); - - size_t numClasses = codeTable.size(); - size_t maxCodeLength = codeTable.getMaxCodeLength(); - size_t numSamples = tmat.getHeight(); - size_t oWidth = tmat.getWidth(); - CHECK_EQ(tmat.getWidth(), maxCodeLength); - CHECK_EQ(codes.getSize(), numSamples); - CHECK_EQ(vec.getHeight(), (size_t)1); - CHECK_EQ(vec.getWidth(), numClasses - 1); - - auto data = tmat.getData(); - auto v = vec.getData(); - const int* c = codes.getData(); - for (size_t i = 0; i < numSamples; ++i) { - auto code = codeTable(c[i]); - int codeLength = code.getLength(); - for (int j = 0; j < codeLength; ++j) { - size_t index = code.calcIndex(j); - op(data[i * oWidth + j], v[index]); - } - } -} - -/* For j < codeLength: - this(i, j) += vec(0, index(i, j)) -*/ -void CpuMatrix::addByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& vec) { - auto op = [](real& t, real v) { t += v; }; - addByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, vec); -} - -/* For j < codeLength: - vec(0, index(i, j)) += this(i, j) -*/ -void CpuMatrix::addByBitCodeBackward(size_t numClasses, - const IVector& codes, - Matrix& vec) { - auto op = [](real t, real& v) { v += t; }; - addByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, vec); -} - -/* - for i: - for j < codeLength: - op(tmat(i, j), mat.row(index(i, j)), input.row(i)) -*/ -template -void mulByBitCodeT(Op op, - CodeTable codeTable, - IVec& codes, - TMat& tmat, - WMat& weight, - InMat& input) { - CHECK(!tmat.useGpu() && !weight.useGpu() && !input.useGpu()); - - size_t numClasses = codeTable.size(); - size_t maxCodeLength = codeTable.getMaxCodeLength(); - size_t numSamples = tmat.getHeight(); - size_t inputDim = input.getWidth(); - size_t oWidth = tmat.getWidth(); - CHECK_EQ(tmat.getWidth(), maxCodeLength); - CHECK_EQ(codes.getSize(), numSamples); - CHECK_EQ(input.getHeight(), numSamples); - CHECK_EQ(weight.getHeight(), numClasses - 1); - CHECK_EQ(weight.getWidth(), inputDim); - - real* data = tmat.getData(); - const int* c = codes.getData(); - for (size_t i = 0; i < numSamples; ++i) { - auto code = codeTable(c[i]); - int codeLength = code.getLength(); - for (int j = 0; j < codeLength; ++j) { - size_t index = code.calcIndex(j); - op(data[i * oWidth + j], weight.rowBuf(index), input.rowBuf(i), inputDim); - } - } -} - -/* For j < codeLength: - this(i, j) += -*/ -void CpuMatrix::mulByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& weight, - const Matrix& input) { - auto op = []( - real& t, const real* weightRow, const real* inputRow, size_t inputDim) { - real sum = 0; - for (size_t k = 0; k < inputDim; ++k) { - sum += weightRow[k] * inputRow[k]; - } - t += sum; - }; - - mulByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, weight, input); -} - -/* For index(i, j) >= 0: - weight.row(index(i, j)) += this(i, j) * input.row(i) -*/ -void CpuMatrix::mulByBitCodeBackwardWeight(size_t numClasses, - const IVector& codes, - Matrix& weight, - const Matrix& input) { - auto op = []( - const real t, real* weightRow, const real* inputRow, size_t inputDim) { - for (size_t k = 0; k < inputDim; ++k) { - weightRow[k] += t * inputRow[k]; - } - }; - - mulByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, weight, input); -} - -/* For j < codeLength: - input.row(i) += this(i, j) * weight.row(index(i, j)) -*/ -void CpuMatrix::mulByBitCodeBackwardError(size_t numClasses, - const IVector& codes, - const Matrix& weight, - Matrix& input) { - auto op = []( - const real t, const real* weightRow, real* inputRow, size_t inputDim) { - for (size_t k = 0; k < inputDim; ++k) { - inputRow[k] += t * weightRow[k]; - } - }; - - mulByBitCodeT(op, SimpleCodeTable(numClasses), codes, *this, weight, input); -} - -template -void sumByBitCodeT(CodeTable codeTable, - IVector& codes, - const CpuMatrix& tmat, - Matrix& sum, - real scaleSum) { - size_t maxCodeLength = codeTable.getMaxCodeLength(); - size_t numSamples = tmat.getHeight(); - size_t oWidth = tmat.getWidth(); - CHECK_EQ(tmat.getWidth(), maxCodeLength); - CHECK_EQ(codes.getSize(), numSamples); - CHECK_EQ(sum.getHeight(), numSamples); - CHECK_EQ(sum.getWidth(), (size_t)1); - - const real* data = tmat.getData(); - real* s = sum.getData(); - int* c = codes.getData(); - for (size_t i = 0; i < numSamples; ++i) { - real sm = 0; - auto code = codeTable(c[i]); - int codeLength = code.getLength(); - for (int j = 0; j < codeLength; ++j) { - if (code.calcBit(j)) { - sm += data[i * oWidth + j]; - } - } - s[i] = scaleSum * sm; - } -} - -/* For j < codeLength: - sum(i, 0) = \sum_j bit(i, j) * this(i, j) -*/ -void CpuMatrix::sumByBitCode(size_t numClasses, - IVector& codes, - Matrix& sum, - real scaleSum) { - sumByBitCodeT(SimpleCodeTable(numClasses), codes, *this, sum, scaleSum); -} - -template -void subByBitCodeT(CodeTable codeTable, IVector& codes, CpuMatrix& tmat) { - size_t maxCodeLength = codeTable.getMaxCodeLength(); - size_t numSamples = tmat.getHeight(); - size_t oWidth = tmat.getWidth(); - CHECK_EQ(tmat.getWidth(), maxCodeLength); - CHECK_EQ(codes.getSize(), numSamples); - - real* data = tmat.getData(); - int* c = codes.getData(); - for (size_t i = 0; i < numSamples; ++i) { - auto code = codeTable(c[i]); - int codeLength = code.getLength(); - for (int j = 0; j < codeLength; ++j) { - if (code.calcBit(j)) { - data[i * oWidth + j] -= 1; - } - } - } -} - -/* For j < codeLength - this(i, j) -= bit(i, j) -*/ -void CpuMatrix::subByBitCode(size_t numClasses, IVector& codes) { - subByBitCodeT(SimpleCodeTable(numClasses), codes, *this); -} - -} // namespace paddle diff --git a/paddle/legacy/math/MemoryHandle.cpp b/paddle/legacy/math/MemoryHandle.cpp deleted file mode 100644 index 1563314e92115e9e009b80b934d2fb83f4a7121e..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/MemoryHandle.cpp +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MemoryHandle.h" -#include -#include "Storage.h" - -namespace paddle { - -/** - * Calculate the actual allocation size according to the required size. - */ -MemoryHandle::MemoryHandle(size_t size) : size_(size), buf_(nullptr) { - if (size_ <= 256) { - // Memory allocation in cuda is always aligned to at least 256 bytes. - // In many cases it is 512 bytes. - allocSize_ = 256; - } else if (size_ <= 512) { - allocSize_ = 512; - } else if (size_ <= (1 << 16)) { - // Allocate multiple of 1024 bytes. - allocSize_ = (size + 1023) & ~(1023); - } else { - allocSize_ = size_; - } -} - -GpuMemoryHandle::GpuMemoryHandle(size_t size) : MemoryHandle(size) { - CHECK(size != 0) << " allocate 0 bytes"; - deviceId_ = hl_get_device(); - allocator_ = StorageEngine::singleton()->getGpuAllocator(deviceId_); - buf_ = allocator_->alloc(allocSize_); -} - -GpuMemoryHandle::~GpuMemoryHandle() { allocator_->free(buf_, allocSize_); } - -CpuMemoryHandle::CpuMemoryHandle(size_t size) : MemoryHandle(size) { - CHECK(size != 0) << " allocate 0 bytes"; - allocator_ = StorageEngine::singleton()->getCpuAllocator(); - buf_ = allocator_->alloc(allocSize_); -} - -CpuMemoryHandle::~CpuMemoryHandle() { allocator_->free(buf_, allocSize_); } - -} // namespace paddle diff --git a/paddle/legacy/math/MemoryHandle.h b/paddle/legacy/math/MemoryHandle.h deleted file mode 100644 index 516e09dbed47ac6b039ccb094614c9588eeb3cd5..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/MemoryHandle.h +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "PoolAllocator.h" - -namespace paddle { - -class MemoryHandle { - protected: - explicit MemoryHandle(size_t size); - virtual ~MemoryHandle() {} - - public: - void* getBuf() const { return buf_; } - size_t getSize() const { return size_; } - size_t getAllocSize() const { return allocSize_; } - - protected: - PoolAllocator* allocator_; - size_t size_; // the requested size - size_t allocSize_; // the allocated size - int deviceId_; // the device id of memory if gpu memory - void* buf_; -}; - -/** - * Wrapper class for raw gpu memory handle. - * - * The raw handle will be released at destructor - */ -class GpuMemoryHandle : public MemoryHandle { - public: - explicit GpuMemoryHandle(size_t size); - virtual ~GpuMemoryHandle(); -}; - -/** - * Wrapper class for raw cpu memory handle. - * - * The raw handle will be released at destructor - */ -class CpuMemoryHandle : public MemoryHandle { - public: - explicit CpuMemoryHandle(size_t size); - virtual ~CpuMemoryHandle(); -}; - -typedef std::shared_ptr MemoryHandlePtr; -typedef std::shared_ptr CpuMemHandlePtr; -typedef std::shared_ptr GpuMemHandlePtr; -} // namespace paddle diff --git a/paddle/legacy/math/NEONFunctions.cpp b/paddle/legacy/math/NEONFunctions.cpp deleted file mode 100644 index 953d5bb8c8148ff15adcac0267419a2e48f76267..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/NEONFunctions.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - -#include "NEONFunctions.h" -#include - -namespace paddle { -namespace neon { - -// b[i] = a[i] > 0.0f ? a[i] : 0.0f -void relu(const float* a, float* b, int len) { - int offset = len % 16; - float32x4_t ma0, ma1, ma2, ma3; - float32x4_t mb0, mb1, mb2, mb3; - - float32x4_t zero = vdupq_n_f32(0.f); - for (int k = 0; k < len / 16; k++, a += 16, b += 16) { - ma0 = vld1q_f32(a); - ma1 = vld1q_f32(a + 4); - ma2 = vld1q_f32(a + 8); - ma3 = vld1q_f32(a + 12); - - mb0 = vmaxq_f32(ma0, zero); - mb1 = vmaxq_f32(ma1, zero); - mb2 = vmaxq_f32(ma2, zero); - mb3 = vmaxq_f32(ma3, zero); - - vst1q_f32(b, mb0); - vst1q_f32(b + 4, mb1); - vst1q_f32(b + 8, mb2); - vst1q_f32(b + 12, mb3); - } - - for (int i = 0; i < offset; i++) { - b[i] = a[i] > 0.0f ? a[i] : 0.0f; - } -} - -// b[i] = a[i] > 0.0f ? a[i] : a[i] * w -void prelu(const float* a, float w, float* b, int len) { - int offset = len % 16; - float32x4_t ma0, ma1, ma2, ma3; - - float32x4_t zero = vdupq_n_f32(0.f); - float32x4_t vw = vdupq_n_f32(w); - - for (int k = 0; k < len / 16; k++, a += 16, b += 16) { - ma0 = vld1q_f32(a); - ma1 = vld1q_f32(a + 4); - ma2 = vld1q_f32(a + 8); - ma3 = vld1q_f32(a + 12); - - uint32x4_t flag0 = vcgtq_f32(ma0, zero); - uint32x4_t flag1 = vcgtq_f32(ma1, zero); - uint32x4_t flag2 = vcgtq_f32(ma2, zero); - uint32x4_t flag3 = vcgtq_f32(ma3, zero); - - float32x4_t mul0 = vmulq_f32(ma0, vw); - float32x4_t mul1 = vmulq_f32(ma1, vw); - float32x4_t mul2 = vmulq_f32(ma2, vw); - float32x4_t mul3 = vmulq_f32(ma3, vw); - - ma0 = vbslq_f32(flag0, ma0, mul0); - ma1 = vbslq_f32(flag1, ma1, mul1); - ma2 = vbslq_f32(flag2, ma2, mul2); - ma3 = vbslq_f32(flag3, ma3, mul3); - - vst1q_f32(b, ma0); - vst1q_f32(b + 4, ma1); - vst1q_f32(b + 8, ma2); - vst1q_f32(b + 12, ma3); - } - - for (int i = 0; i < offset; i++) { - b[i] = a[i] > 0.0f ? a[i] : a[i] * w; - } -} - -} // namespace neon -} // namespace paddle - -#endif diff --git a/paddle/legacy/math/NEONFunctions.h b/paddle/legacy/math/NEONFunctions.h deleted file mode 100644 index 33edd9d518daede1a4416d66639519bde7d03efb..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/NEONFunctions.h +++ /dev/null @@ -1,24 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -namespace paddle { -namespace neon { - -void relu(const float* a, float* b, int len); -void prelu(const float* a, float w, float* b, int len); - -} // namespace neon -} // namespace paddle diff --git a/paddle/legacy/math/PoolAllocator.cpp b/paddle/legacy/math/PoolAllocator.cpp deleted file mode 100644 index b6ad168856acc1f1e93ea1a000e5dcc670acbdf7..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/PoolAllocator.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PoolAllocator.h" - -namespace paddle { - -PoolAllocator::PoolAllocator(Allocator* allocator, - size_t sizeLimit, - const std::string& name) - : allocator_(allocator), - sizeLimit_(sizeLimit), - poolMemorySize_(0), - name_(name) {} - -PoolAllocator::~PoolAllocator() { freeAll(); } - -void* PoolAllocator::alloc(size_t size) { - if (sizeLimit_ > 0) { - std::lock_guard guard(mutex_); - auto it = pool_.find(size); - if (it == pool_.end() || it->second.size() == 0) { - if (poolMemorySize_ >= sizeLimit_) { - freeAll(); - } - return allocator_->alloc(size); - } else { - auto buf = it->second.back(); - it->second.pop_back(); - poolMemorySize_ -= size; - return buf; - } - } else { - return allocator_->alloc(size); - } -} - -void PoolAllocator::free(void* ptr, size_t size) { - if (sizeLimit_ > 0) { - std::lock_guard guard(mutex_); - auto& it = pool_[size]; - it.push_back(ptr); - poolMemorySize_ += size; - } else { - allocator_->free(ptr); - } -} - -void PoolAllocator::freeAll() { - for (auto it : pool_) { - for (auto ptr : it.second) { - allocator_->free(ptr); - } - } - poolMemorySize_ = 0; - pool_.clear(); -} - -void PoolAllocator::printAll() { - size_t memory = 0; - LOG(INFO) << name_ << ":"; - for (auto it : pool_) { - LOG(INFO) << " size:" << it.first; - for (auto ptr : it.second) { - LOG(INFO) << " ptr:" << ptr; - memory += it.first; - } - } - LOG(INFO) << "memory size: " << memory; -} - -} // namespace paddle diff --git a/paddle/legacy/math/PoolAllocator.h b/paddle/legacy/math/PoolAllocator.h deleted file mode 100644 index 7239cf1c4494e207081e325a7e6067ba26a9c852..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/PoolAllocator.h +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include "Allocator.h" - -namespace paddle { - -/** - * @brief Memory pool allocator implementation. - */ -class PoolAllocator { - public: - /** - * @brief constructor. - * @param allocator a Allocator object. - * @param sizeLimit The maximum size memory can be managed, - * if sizeLimit == 0, the pool allocator is a simple wrapper of allocator. - */ - PoolAllocator(Allocator* allocator, - size_t sizeLimit = 0, - const std::string& name = "pool"); - - /** - * @brief destructor. - */ - ~PoolAllocator(); - - void* alloc(size_t size); - void free(void* ptr, size_t size); - std::string getName() { return name_; } - - private: - void freeAll(); - void printAll(); - std::unique_ptr allocator_; - std::mutex mutex_; - std::unordered_map> pool_; - size_t sizeLimit_; - size_t poolMemorySize_; - std::string name_; -}; - -} // namespace paddle diff --git a/paddle/legacy/math/RowBuffer.h b/paddle/legacy/math/RowBuffer.h deleted file mode 100644 index 9dfd5eff06a39494cea6a8ce0b1f5ead6490b148..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/RowBuffer.h +++ /dev/null @@ -1,139 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include "MemoryHandle.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -/** - * @brief The RowBuffer class - * Represent the SparseRow Matrix Data. - * - * If not set memory handler, then the data could be auto growth. - */ -class RowBuffer { - public: - /** - * @brief RowBuffer create a auto-growth row buffer. The row length is width. - * @param width the length of each row, a.k.a matrix width. - */ - explicit RowBuffer(size_t width) : width_(width) {} - - /** - * @brief RowBuffer create a row buffer, which cannot be auto-growth. - * @param mem the pre-allocated memory. - * @param width the length of each row, a.k.a matrix width. - */ - RowBuffer(const CpuMemHandlePtr& mem, size_t width) - : preallocatedBuf_(mem), width_(width) {} - - /** - * @brief resize resize the buffer with rowCount - * @param rowCnt number of row. matrix height. - */ - inline void resize(int rowCnt) { - if (preallocatedBuf_) { - CHECK(preallocatedBuf_->getSize() >= rowCnt * width_ * sizeof(real)); - } else { - rowStore_.resize(rowCnt * width_); - } - } - - /** - * @brief get a row buffer with row index. - * @param row the index of row. - * @return row buffer. - */ - inline real* get(int row) const { - if (preallocatedBuf_) { - CHECK_LE((row)*width_ * sizeof(real), preallocatedBuf_->getSize()); - return reinterpret_cast(preallocatedBuf_->getBuf()) + row * width_; - } else { - CHECK_LE((row + 1) * width_, rowStore_.size()); - return const_cast(rowStore_.data() + row * width_); - } - } - - /** - * @brief get a row buffer with row index. If row index is larger than local - * buffer, the size of local buffer will grow. - * @param row the index of row. - * @return row buffer. - */ - inline real* getWithAutoGrowth(int row) { - if (preallocatedBuf_) { - return get(row); - } else { - if ((rowStore_.size() <= row * width_)) { - rowStore_.resize((row + 1) * width_); - } - return rowStore_.data() + row * width_; - } - } - - /** - * @return raw data buffer. - */ - inline real* data() { - if (preallocatedBuf_) { - return reinterpret_cast(preallocatedBuf_->getBuf()); - } else { - return rowStore_.data(); - } - } - - /** - * @brief clear local buffer. It only affect auto-growth buffer. - */ - inline void clear() { - // swap an empty vector to it to free the memory. - std::vector> empty; - rowStore_.swap(empty); - } - - /** - * @brief get current number of rows. - * @return number of rows. - */ - inline size_t getRowCount() const { - if (preallocatedBuf_) { - return preallocatedBuf_->getSize() / sizeof(real) / width_; - } else { - return rowStore_.size() / width_; - } - } - - /** - * @brief get is this buffer can automatically grow or not. - * @return ture if can automacitally grow. - */ - inline bool isAutoGrowth() const { return !preallocatedBuf_; } - - /** - * @brief return the width of matrix. a.k.a length of row. - * @return width of matrix - */ - inline size_t getWidth() const { return width_; } - - private: - //! TODO(yuyang18): Add resize method to CpuMemHandlePtr, then we can get rid - //! of std::vector here. - CpuMemHandlePtr preallocatedBuf_; - std::vector> rowStore_; - size_t width_; -}; -} // namespace paddle diff --git a/paddle/legacy/math/SIMDFunctions.cpp b/paddle/legacy/math/SIMDFunctions.cpp deleted file mode 100644 index 3cfc5d6f1e033e7cbaa0813b4dad443f7ea0ee55..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/SIMDFunctions.cpp +++ /dev/null @@ -1,397 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SIMDFunctions.h" -#ifdef __SSE3__ -#include -#endif -#include - -#ifdef __AVX__ -static void addto_avx(float* a, const float* b, size_t len) { - int offset = len % 32; - - __m256 ma0, ma1, ma2, ma3; - __m256 mb0, mb1, mb2, mb3; - - for (unsigned int k = 0; k < len / 32; k++, a += 32, b += 32) { - ma0 = _mm256_load_ps(a); - ma1 = _mm256_load_ps(a + 8); - ma2 = _mm256_load_ps(a + 16); - ma3 = _mm256_load_ps(a + 24); - - mb0 = _mm256_load_ps(b); - mb1 = _mm256_load_ps(b + 8); - mb2 = _mm256_load_ps(b + 16); - mb3 = _mm256_load_ps(b + 24); - - ma0 = _mm256_add_ps(ma0, mb0); - ma1 = _mm256_add_ps(ma1, mb1); - ma2 = _mm256_add_ps(ma2, mb2); - ma3 = _mm256_add_ps(ma3, mb3); - - _mm256_store_ps(a, ma0); - _mm256_store_ps(a + 8, ma1); - _mm256_store_ps(a + 16, ma2); - _mm256_store_ps(a + 24, ma3); - } - - for (int i = 0; i < offset; i++) a[i] += b[i]; - - return; -} - -static void batch_addto_avx(float* a, const float* b[], int batch, size_t len) { - int offset = len % 32; - - __m256 ma0, ma1, ma2, ma3; - __m256 mb0, mb1, mb2, mb3; - - for (unsigned int k = 0; k < len / 32; k++, a += 32) { - ma0 = _mm256_load_ps(a); - ma1 = _mm256_load_ps(a + 8); - ma2 = _mm256_load_ps(a + 16); - ma3 = _mm256_load_ps(a + 24); - - for (int i = 0; i < batch; i++) { - mb0 = _mm256_load_ps(b[i]); - mb1 = _mm256_load_ps(b[i] + 8); - mb2 = _mm256_load_ps(b[i] + 16); - mb3 = _mm256_load_ps(b[i] + 24); - ma0 = _mm256_add_ps(ma0, mb0); - ma1 = _mm256_add_ps(ma1, mb1); - ma2 = _mm256_add_ps(ma2, mb2); - ma3 = _mm256_add_ps(ma3, mb3); - b[i] += 32; - } - - _mm256_store_ps(a, ma0); - _mm256_store_ps(a + 8, ma1); - _mm256_store_ps(a + 16, ma2); - _mm256_store_ps(a + 24, ma3); - } - - for (int i = 0; i < offset; i++) { - for (int k = 0; k < batch; k++) a[i] += b[k][i]; - } - return; -} - -static void col_max_avx(float* result, - const float* data, - int dim, - int numSamples) { - // first sample, direct copy - for (int d = 0; d < dim; ++d) { - result[d] = data[d]; - } - int offset = dim % 32; - __m256 ma0, ma1, ma2, ma3; - __m256 mb0, mb1, mb2, mb3; - // first 16n dims - for (int k = 0; k < dim / 32; k++, result += 32, data += 32) { - ma0 = _mm256_load_ps(result); - ma1 = _mm256_load_ps(result + 8); - ma2 = _mm256_load_ps(result + 16); - ma3 = _mm256_load_ps(result + 24); - for (int i = 1; i < numSamples; i++) { - mb0 = _mm256_load_ps(data + i * dim); - mb1 = _mm256_load_ps(data + i * dim + 8); - mb2 = _mm256_load_ps(data + i * dim + 16); - mb3 = _mm256_load_ps(data + i * dim + 24); - ma0 = _mm256_max_ps(ma0, mb0); - ma1 = _mm256_max_ps(ma1, mb1); - ma2 = _mm256_max_ps(ma2, mb2); - ma3 = _mm256_max_ps(ma3, mb3); - } - _mm256_store_ps(result, ma0); - _mm256_store_ps(result + 8, ma1); - _mm256_store_ps(result + 16, ma2); - _mm256_store_ps(result + 24, ma3); - } - // last dims - for (int d = 0; d < offset; ++d) { - float sm = data[d]; - for (int i = 1; i < numSamples; ++i) { - sm = std::max(sm, data[i * dim + d]); - } - result[d] = sm; - } -} - -static void decayL1_avx(float* dst, float* src, float lambda, size_t sz) { - int64_t i; - int64_t size = sz; - float src_val; - - __m256 ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8; - // __m256 ymm9, ymm10; - - ymm1 = _mm256_set1_ps(lambda); - ymm2 = _mm256_setzero_ps(); - - for (i = 0; i <= size - 16; i += 16) { - ymm3 = _mm256_load_ps(src + i); - ymm6 = _mm256_load_ps(src + i + 8); - - ymm4 = _mm256_sub_ps(ymm3, ymm1); - ymm7 = _mm256_sub_ps(ymm6, ymm1); - - ymm5 = _mm256_add_ps(ymm3, ymm1); - ymm8 = _mm256_add_ps(ymm6, ymm1); - - ymm4 = _mm256_max_ps(ymm4, ymm2); - ymm7 = _mm256_max_ps(ymm7, ymm2); - - ymm5 = _mm256_min_ps(ymm5, ymm2); - ymm8 = _mm256_min_ps(ymm8, ymm2); - - ymm5 = _mm256_or_ps(ymm4, ymm5); - ymm8 = _mm256_or_ps(ymm7, ymm8); - - _mm256_store_ps(dst + i, ymm5); - _mm256_store_ps(dst + i + 8, ymm8); - } - if (i <= size - 8) { - ymm3 = _mm256_load_ps(src + i); - ymm4 = _mm256_sub_ps(ymm3, ymm1); - ymm5 = _mm256_add_ps(ymm3, ymm1); - ymm4 = _mm256_max_ps(ymm4, ymm2); - ymm5 = _mm256_min_ps(ymm5, ymm2); - ymm5 = _mm256_or_ps(ymm4, ymm5); - _mm256_store_ps(dst + i, ymm5); - - i += 8; - } - for (; i < size; i++) { - src_val = src[i]; - if (src_val > 0) { - dst[i] = ((src_val > lambda) ? (src_val - lambda) : 0); - } else { - dst[i] = ((-src_val > lambda) ? (src_val + lambda) : 0); - } - } -} - -static void decayL1_avx( - float* dst, float* src, float* lr, float lambda, size_t sz) { - int64_t i; - int64_t size = sz; - float src_val; - - __m256 ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8; - __m256 ymm9, ymm10; - - ymm1 = _mm256_set1_ps(lambda); - ymm2 = _mm256_setzero_ps(); - - for (i = 0; i <= size - 16; i += 16) { - ymm9 = _mm256_load_ps(lr + i); - ymm10 = _mm256_load_ps(lr + i + 8); - - ymm3 = _mm256_load_ps(src + i); - ymm6 = _mm256_load_ps(src + i + 8); - - ymm9 = _mm256_mul_ps(ymm9, ymm1); - ymm10 = _mm256_mul_ps(ymm10, ymm1); - - ymm4 = _mm256_sub_ps(ymm3, ymm9); - ymm7 = _mm256_sub_ps(ymm6, ymm10); - - ymm5 = _mm256_add_ps(ymm3, ymm9); - ymm8 = _mm256_add_ps(ymm6, ymm10); - - ymm4 = _mm256_max_ps(ymm4, ymm2); - ymm7 = _mm256_max_ps(ymm7, ymm2); - - ymm5 = _mm256_min_ps(ymm5, ymm2); - ymm8 = _mm256_min_ps(ymm8, ymm2); - - ymm5 = _mm256_or_ps(ymm4, ymm5); - ymm8 = _mm256_or_ps(ymm7, ymm8); - - _mm256_store_ps(dst + i, ymm5); - _mm256_store_ps(dst + i + 8, ymm8); - } - if (i <= size - 8) { - ymm3 = _mm256_load_ps(src + i); - ymm9 = _mm256_load_ps(lr + i); - ymm9 = _mm256_mul_ps(ymm9, ymm1); - ymm4 = _mm256_sub_ps(ymm3, ymm9); - ymm5 = _mm256_add_ps(ymm3, ymm9); - ymm4 = _mm256_max_ps(ymm4, ymm2); - ymm5 = _mm256_min_ps(ymm5, ymm2); - ymm5 = _mm256_or_ps(ymm4, ymm5); - _mm256_store_ps(dst + i, ymm5); - - i += 8; - } - for (; i < size; i++) { - src_val = src[i]; - float nlambda = lr[i] * lambda; - if (src_val > 0) { - dst[i] = ((src_val > nlambda) ? (src_val - nlambda) : 0); - } else { - dst[i] = ((-src_val > nlambda) ? (src_val + nlambda) : 0); - } - } -} - -#elif defined(__SSE3__) - -static void addto_sse(float* a, const float* b, size_t len) { - int offset = len % 16; - __m128 ma0, ma1, ma2, ma3; - __m128 mb0, mb1, mb2, mb3; - - for (unsigned int k = 0; k < len / 16; k++, a += 16, b += 16) { - ma0 = _mm_load_ps(a); - ma1 = _mm_load_ps(a + 4); - ma2 = _mm_load_ps(a + 8); - ma3 = _mm_load_ps(a + 12); - - mb0 = _mm_load_ps(b); - mb1 = _mm_load_ps(b + 4); - mb2 = _mm_load_ps(b + 8); - mb3 = _mm_load_ps(b + 12); - - ma0 = _mm_add_ps(ma0, mb0); - ma1 = _mm_add_ps(ma1, mb1); - ma2 = _mm_add_ps(ma2, mb2); - ma3 = _mm_add_ps(ma3, mb3); - - _mm_store_ps(a, ma0); - _mm_store_ps(a + 4, ma1); - _mm_store_ps(a + 8, ma2); - _mm_store_ps(a + 12, ma3); - } - - for (int i = 0; i < offset; i++) a[i] += b[i]; -} - -static void batch_addto_sse(float* a, const float* b[], int batch, size_t len) { - int offset = len % 16; - - __m128 ma0, ma1, ma2, ma3; - __m128 mb0, mb1, mb2, mb3; - - for (unsigned int k = 0; k < len / 16; k++, a += 16) { - ma0 = _mm_load_ps(a); - ma1 = _mm_load_ps(a + 4); - ma2 = _mm_load_ps(a + 8); - ma3 = _mm_load_ps(a + 12); - - for (int i = 0; i < batch; i++) { - mb0 = _mm_load_ps(b[i]); - mb1 = _mm_load_ps(b[i] + 4); - mb2 = _mm_load_ps(b[i] + 8); - mb3 = _mm_load_ps(b[i] + 12); - ma0 = _mm_add_ps(ma0, mb0); - ma1 = _mm_add_ps(ma1, mb1); - ma2 = _mm_add_ps(ma2, mb2); - ma3 = _mm_add_ps(ma3, mb3); - b[i] += 16; - } - - _mm_store_ps(a, ma0); - _mm_store_ps(a + 4, ma1); - _mm_store_ps(a + 8, ma2); - _mm_store_ps(a + 12, ma3); - } - - for (int i = 0; i < offset; i++) { - for (int k = 0; k < batch; k++) a[i] += b[k][i]; - } - return; -} - -static void col_max_sse(float* result, - const float* data, - int dim, - int numSamples) { - // first sample, direct copy - for (int d = 0; d < dim; ++d) { - result[d] = data[d]; - } - int offset = dim % 16; - __m128 ma0, ma1, ma2, ma3; - __m128 mb0, mb1, mb2, mb3; - // first 16n dims - for (int k = 0; k < dim / 16; k++, result += 16, data += 16) { - ma0 = _mm_load_ps(result); - ma1 = _mm_load_ps(result + 4); - ma2 = _mm_load_ps(result + 8); - ma3 = _mm_load_ps(result + 12); - for (int i = 1; i < numSamples; i++) { - mb0 = _mm_load_ps(data + i * dim); - mb1 = _mm_load_ps(data + i * dim + 4); - mb2 = _mm_load_ps(data + i * dim + 8); - mb3 = _mm_load_ps(data + i * dim + 12); - ma0 = _mm_max_ps(ma0, mb0); - ma1 = _mm_max_ps(ma1, mb1); - ma2 = _mm_max_ps(ma2, mb2); - ma3 = _mm_max_ps(ma3, mb3); - } - _mm_store_ps(result, ma0); - _mm_store_ps(result + 4, ma1); - _mm_store_ps(result + 8, ma2); - _mm_store_ps(result + 12, ma3); - } - // last dims - for (int d = 0; d < offset; ++d) { - float sm = data[d]; - for (int i = 1; i < numSamples; ++i) { - sm = std::max(sm, data[i * dim + d]); - } - result[d] = sm; - } -} - -#endif - -#if defined(__AVX__) -#define SIMD_INVOKE(func, ...) func##_avx(__VA_ARGS__) -#elif defined(__SSE3__) -#define SIMD_INVOKE(func, ...) func##_sse(__VA_ARGS__) -#endif - -namespace paddle { -namespace simd { -namespace internal { -#ifdef __SSE3__ -void addToImpl(float* a, const float* b, size_t len) { - SIMD_INVOKE(addto, a, b, len); -} -void batchAddToImpl(float* a, const float* b[], int batch, size_t len) { - SIMD_INVOKE(batch_addto, a, b, batch, len); -} - -void colMaxImpl(float* result, const float* data, int dim, int numSamples) { - SIMD_INVOKE(col_max, result, data, dim, numSamples); -} -#endif - -#ifdef __AVX__ -void decayL1AvxImpl(float* dst, float* src, float lambda, size_t len) { - decayL1_avx(dst, src, lambda, len); -} -void decayL1AvxImpl( - float* dst, float* src, float* lr, float lambda, size_t len) { - decayL1_avx(dst, src, lr, lambda, len); -} -#endif - -} // namespace internal -} // namespace simd -} // namespace paddle diff --git a/paddle/legacy/math/SIMDFunctions.h b/paddle/legacy/math/SIMDFunctions.h deleted file mode 100644 index 5b1dfea9d3c088a3fed98a350f1c1e3d865b6ba9..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/SIMDFunctions.h +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include - -namespace paddle { - -namespace simd { - -namespace naive { -template -inline void addTo(Type* a, const Type* b, size_t len) { - for (size_t i = 0; i < len; ++i) { - a[i] += b[i]; - } -} - -template -inline void batchAddTo(Type* a, const Type* b[], int batch, size_t len) { - for (int i = 0; i < batch; ++i) { - for (size_t j = 0; j < len; ++j) { - a[j] += b[i][j]; - } - } -} - -/** - * @note this method is unused in paddle. - */ -template -inline void colMax(Type* result, const Type* data, int dim, int numSamples) { - for (int d = 0; d < dim; ++d) { - Type sm = data[d]; - for (int i = 1; i < numSamples; ++i) { - sm = sm > data[i * dim + d] ? sm : data[i * dim + d]; - } - result[d] = sm; - } -} - -template -inline void decayL1(Type* dst, Type* src, Type* lr, Type lambda, size_t len) { - for (size_t i = 0; i < len; ++i) { - Type& src_val = src[i]; - float nlambda = lr[i] * lambda; - if (src_val > 0) { - dst[i] = ((src_val > nlambda) ? (src_val - nlambda) : 0); - } else { - dst[i] = ((-src_val > nlambda) ? (src_val + nlambda) : 0); - } - } -} - -template -inline void decayL1(Type* dst, Type* src, Type lambda, size_t len) { - for (size_t i = 0; i < len; ++i) { - Type& src_val = src[i]; - if (src_val > 0) { - dst[i] = ((src_val > lambda) ? (src_val - lambda) : 0); - } else { - dst[i] = ((-src_val > lambda) ? (src_val + lambda) : 0); - } - } -} -} // namespace naive - -template -inline void addTo(Type* a, const Type* b, size_t len) { - naive::addTo(a, b, len); -} - -template -inline void batchAddTo(Type* a, const Type* b[], int batch, size_t len) { - naive::batchAddTo(a, b, batch, len); -} - -template -inline void colMax(Type* result, const Type* data, int dim, int numSamples) { - naive::colMax(result, data, dim, numSamples); -} - -template -inline void decayL1(Type* dst, Type* src, Type* lr, Type lambda, size_t len) { - naive::decayL1(dst, src, lr, lambda, len); -} - -template -inline void decayL1(Type* dst, Type* src, Type lambda, size_t len) { - naive::decayL1(dst, src, lambda, len); -} - -template -inline bool isPointerAlign(void* ptr) { - return reinterpret_cast(ptr) % AlignSize == 0; -} - -inline bool vec_check(size_t len) { -#ifdef __AVX__ - return len % 8 == 0; -#else - return len % 4 == 0; -#endif -} - -namespace internal { -#ifdef __SSE3__ -void addToImpl(float* a, const float* b, size_t len); -void batchAddToImpl(float* a, const float* b[], int batch, size_t len); -void colMaxImpl(float* result, const float* data, int dim, int numSamples); -#endif -#ifdef __AVX__ -void decayL1AvxImpl(float* dst, float* src, float lambda, size_t len); -void decayL1AvxImpl( - float* dst, float* src, float* lr, float lambda, size_t len); -#endif -} // namespace internal - -template <> -inline void addTo(float* a, const float* b, size_t len) { -#ifdef __SSE3__ - internal::addToImpl(a, b, len); -#else - naive::addTo(a, b, len); -#endif -} - -template <> -inline void batchAddTo(float* a, const float* b[], int batch, size_t len) { -#ifdef __SSE3__ - internal::batchAddToImpl(a, b, batch, len); -#else - naive::batchAddTo(a, b, batch, len); -#endif -} - -template <> -inline void colMax(float* result, const float* data, int dim, int numSamples) { -#ifdef __SSE3__ - internal::colMaxImpl(result, data, dim, numSamples); -#else - naive::colMax(result, data, dim, numSamples); -#endif -} - -template <> -inline void decayL1(float* dst, float* src, float lambda, size_t len) { -#ifdef __AVX__ - internal::decayL1AvxImpl(dst, src, lambda, len); -#else - naive::decayL1(dst, src, lambda, len); -#endif -} - -template <> -inline void decayL1( - float* dst, float* src, float* lr, float lambda, size_t len) { -#ifdef __AVX__ - internal::decayL1AvxImpl(dst, src, lr, lambda, len); -#else - naive::decayL1(dst, src, lr, lambda, len); -#endif -} - -} // namespace simd - -} // namespace paddle diff --git a/paddle/legacy/math/SparseMatrix.cpp b/paddle/legacy/math/SparseMatrix.cpp deleted file mode 100644 index 6f68252b0a74802946e899e6e13e1da681d76986..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/SparseMatrix.cpp +++ /dev/null @@ -1,864 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SparseMatrix.h" -#include -#include -#include -#include "hl_gpu.h" -#include "hl_top_k.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -GpuSparseMatrix::GpuSparseMatrix(size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans) - : Matrix(NULL, height, width, trans, true) { - resize(height, width, nnz, valueType, format); -} - -GpuSparseMatrix::GpuSparseMatrix(GpuMemHandlePtr dataHandle, - hl_sparse_matrix_s_ptr sMatrix, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans, - MemoryHandlePtr sMemoryHandle) - : Matrix(dataHandle, height, width, trans, true) { - CHECK(dataHandle && sMatrix) << "Invalid argument pointer"; - - size_t size = 0; - if (format == SPARSE_CSR) { - size = (height + 1) * sizeof(int) + nnz * sizeof(int); - } else { - size = (width + 1) * sizeof(int) + nnz * sizeof(int); - } - - if (NO_VALUE != valueType) { - size += nnz * sizeof(real); - } - CHECK_LE(size, dataHandle->getSize()); - - sMatrix_ = sMatrix; - - if (sMemoryHandle == NULL) { - sMemoryHandle_ = std::make_shared(dataHandle->getSize()); - } else { - CHECK_EQ(sMemoryHandle->getSize(), dataHandle->getSize()); - sMemoryHandle_ = sMemoryHandle; - } - - elementCnt_ = nnz; - valueType_ = valueType; - format_ = format; - if (format_ == SPARSE_CSR) - sparseResizeCSR(); - else - sparseResizeCSC(); -} - -GpuSparseMatrix::GpuSparseMatrix(hl_sparse_matrix_s_ptr sMatrix, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans, - MemoryHandlePtr sMemoryHandle) - : Matrix(NULL, height, width, trans, true) { - CHECK(sMatrix) << "Invalid argument pointer"; - sMatrix_ = sMatrix; - sMemoryHandle_ = sMemoryHandle; - elementCnt_ = nnz; - format_ = format; - valueType_ = valueType; -} - -GpuSparseMatrix::GpuSparseMatrix(real* value, - int* rows, - int* cols, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans) - : Matrix(NULL, height, width, trans, true) { - size_t size = 0; - if (format == SPARSE_CSR) { - size = (height + 1) * sizeof(int) + nnz * sizeof(int); - } else { - size = (width + 1) * sizeof(int) + nnz * sizeof(int); - } - - if (NO_VALUE != valueType) { - size += nnz * sizeof(real); - } - elementCnt_ = nnz; - valueType_ = valueType; - format_ = format; - - sMemoryHandle_ = std::make_shared(size); - if (format_ == SPARSE_CSR) { - rows_ = reinterpret_cast( - reinterpret_cast(sMemoryHandle_->getBuf())); - cols_ = reinterpret_cast( - reinterpret_cast(sMemoryHandle_->getBuf()) + - (height_ + 1) * sizeof(int)); - if (NO_VALUE != valueType_) { - value_ = reinterpret_cast( - reinterpret_cast(sMemoryHandle_->getBuf()) + - (height_ + 1) * sizeof(int) + elementCnt_ * sizeof(int)); - } else { - value_ = NULL; - } - - if (sMatrix_ == NULL) { - /* construct hl_sparse_matrix_s */ - hl_sparse_matrix_s tmp; - hl_construct_sparse_matrix( - &tmp, - value, - rows, - cols, - HL_SPARSE_CSR, - valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE, - height_, - width_, - elementCnt_); - hl_sparse_matrix_s_ptr tmp2(tmp, hl_destruct_sparse_matrix); - sMatrix_ = tmp2; - } - - } else { - cols_ = reinterpret_cast( - reinterpret_cast(sMemoryHandle_->getBuf())); - rows_ = reinterpret_cast( - reinterpret_cast(sMemoryHandle_->getBuf()) + - (width_ + 1) * sizeof(int)); - if (NO_VALUE != valueType_) { - value_ = reinterpret_cast( - reinterpret_cast(sMemoryHandle_->getBuf()) + - (width_ + 1) * sizeof(int) + elementCnt_ * sizeof(int)); - } else { - value_ = NULL; - } - - if (sMatrix_ == NULL) { - /* construct hl_sparse_matrix_s */ - hl_sparse_matrix_s tmp; - hl_construct_sparse_matrix( - &tmp, - value, - rows, - cols, - HL_SPARSE_CSC, - valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE, - height_, - width_, - elementCnt_); - hl_sparse_matrix_s_ptr tmp2(tmp, hl_destruct_sparse_matrix); - sMatrix_ = tmp2; - } - } -} - -void GpuSparseMatrix::sparseResizeCSR() { - rows_ = - reinterpret_cast(reinterpret_cast(sMemoryHandle_->getBuf())); - cols_ = - reinterpret_cast(reinterpret_cast(sMemoryHandle_->getBuf()) + - (height_ + 1) * sizeof(int)); - if (NO_VALUE != valueType_) { - value_ = reinterpret_cast( - reinterpret_cast(sMemoryHandle_->getBuf()) + - (height_ + 1) * sizeof(int) + elementCnt_ * sizeof(int)); - } else { - value_ = NULL; - } - - if (sMatrix_ == NULL) { - /* construct hl_sparse_matrix_s */ - hl_sparse_matrix_s tmp; - hl_construct_sparse_matrix( - &tmp, - data_, - memoryHandle_->getSize(), - HL_SPARSE_CSR, - valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE, - height_, - width_, - elementCnt_); - hl_sparse_matrix_s_ptr tmp2(tmp, hl_destruct_sparse_matrix); - sMatrix_ = tmp2; - } -} - -void GpuSparseMatrix::sparseResizeCSC() { - cols_ = - reinterpret_cast(reinterpret_cast(sMemoryHandle_->getBuf())); - rows_ = - reinterpret_cast(reinterpret_cast(sMemoryHandle_->getBuf()) + - (width_ + 1) * sizeof(int)); - if (NO_VALUE != valueType_) { - value_ = reinterpret_cast( - reinterpret_cast(sMemoryHandle_->getBuf()) + - (width_ + 1) * sizeof(int) + elementCnt_ * sizeof(int)); - } else { - value_ = NULL; - } - - if (sMatrix_ == NULL) { - /* construct hl_sparse_matrix_s */ - hl_sparse_matrix_s tmp; - hl_construct_sparse_matrix( - &tmp, - memoryHandle_->getBuf(), - memoryHandle_->getSize(), - HL_SPARSE_CSC, - valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE, - height_, - width_, - elementCnt_); - hl_sparse_matrix_s_ptr tmp2(tmp, hl_destruct_sparse_matrix); - sMatrix_ = tmp2; - } -} - -void GpuSparseMatrix::resize(size_t newHeight, - size_t newWidth, - size_t newNnz, - SparseValueType valueType, - SparseFormat format) { - if (format == SPARSE_CSR) { - resizeCSR(newHeight, newWidth, newNnz, valueType); - } else { - resizeCSC(newHeight, newWidth, newNnz, valueType); - } -} - -void GpuSparseMatrix::resizeCSR(size_t newHeight, - size_t newWidth, - size_t newNnz, - SparseValueType valueType) { - size_t newSize = (newHeight + 1) * sizeof(int) + newNnz * sizeof(int); - if (NO_VALUE != valueType) { - newSize += newNnz * sizeof(real); - } - - if (NULL == memoryHandle_.get() || newSize > memoryHandle_->getSize()) { - memoryHandle_ = std::make_shared(newSize); - data_ = reinterpret_cast(memoryHandle_->getBuf()); - sMemoryHandle_ = std::make_shared(newSize); - end_ = reinterpret_cast(sMemoryHandle_->getBuf()) + - sMemoryHandle_->getSize(); - sMatrix_ = NULL; - } else if (valueType != valueType_) { - sMatrix_ = NULL; - } else { - /* - * newNnz > elementCnt_ is necessary for the following condition: - * Firstly, height_ is 9 elementCnt_ is 56 - * Secondly, height_ is 11 elementCnt_ is 44 - * ==> height_ is bigger, sMatrix_ will resize, and total item is 44 now - * Then, height_ is 10 elementCnt_ is 52 - * ==> Without newNnz > elementCnt_ condition, sMatrix_ will fail - */ - if ((ssize_t)((newHeight + 1) * sizeof(int)) > - ((char*)cols_ - (char*)rows_) || - newNnz > static_cast(sMatrix_->nnz)) { - sMatrix_ = NULL; - } else if (NO_VALUE == valueType) { - if ((ssize_t)(newNnz * sizeof(int)) > (end_ - (char*)cols_)) { - sMatrix_ = NULL; - } - } else { - if ((ssize_t)(newNnz * sizeof(int)) > ((char*)value_ - (char*)cols_) || - (ssize_t)(newNnz * sizeof(real)) > (end_ - (char*)value_)) { - sMatrix_ = NULL; - } - } - } - - height_ = newHeight; - width_ = newWidth; - elementCnt_ = newNnz; - valueType_ = valueType; - format_ = SPARSE_CSR; - - if (sMatrix_ == NULL) { - sparseResizeCSR(); - } -} - -void GpuSparseMatrix::resizeCSC(size_t newHeight, - size_t newWidth, - size_t newNnz, - SparseValueType valueType) { - size_t newSize = (newWidth + 1) * sizeof(int) + newNnz * sizeof(int); - if (NO_VALUE != valueType) { - newSize += newNnz * sizeof(real); - } - - if (NULL == memoryHandle_.get() || newSize > memoryHandle_->getSize()) { - memoryHandle_ = std::make_shared(newSize); - data_ = reinterpret_cast(memoryHandle_->getBuf()); - sMemoryHandle_ = std::make_shared(newSize); - end_ = reinterpret_cast(sMemoryHandle_->getBuf()) + - sMemoryHandle_->getSize(); - sMatrix_ = NULL; - } else if (valueType != valueType_) { - sMatrix_ = NULL; - } else { - /* - * newNnz > elementCnt_ is necessary for the following condition: - * Firstly, height_ is 9 elementCnt_ is 56 - * Secondly, height_ is 11 elementCnt_ is 44 - * ==> height_ is bigger, sMatrix_ will resize, - * and total item is 44 now - * Then, height_ is 10 elementCnt_ is 52 - * ==> Without newNnz > elementCnt_ condition, sMatrix_ will fail - */ - if ((ssize_t)((newWidth + 1) * sizeof(int)) > - ((char*)rows_ - (char*)cols_) || - newNnz > static_cast(sMatrix_->nnz)) { - sMatrix_ = NULL; - } else if (NO_VALUE == valueType) { - if ((ssize_t)(newNnz * sizeof(int)) > (end_ - (char*)rows_)) { - sMatrix_ = NULL; - } - } else { - if ((ssize_t)(newNnz * sizeof(int)) > ((char*)value_ - (char*)rows_) || - (ssize_t)(newNnz * sizeof(real)) > (end_ - (char*)value_)) { - sMatrix_ = NULL; - } - } - } - - height_ = newHeight; - width_ = newWidth; - elementCnt_ = newNnz; - valueType_ = valueType; - format_ = SPARSE_CSC; - - if (sMatrix_ == NULL) { - sparseResizeCSC(); - } -} - -void GpuSparseMatrix::resize(size_t newHeight, size_t newWidth) { - resize(newHeight, newWidth, elementCnt_, valueType_, format_); -} - -MatrixPtr GpuSparseMatrix::getTranspose() { - CHECK(memoryHandle_.get() || sMatrix_) << "not supported"; - if (memoryHandle_.get()) { - MatrixPtr copy_T(new GpuSparseMatrix( - std::dynamic_pointer_cast(memoryHandle_), - sMatrix_, - height_, - width_, - elementCnt_, - valueType_, - format_, - true, - sMemoryHandle_)); - return copy_T; - } else { - MatrixPtr copy_T(new GpuSparseMatrix(sMatrix_, - height_, - width_, - elementCnt_, - valueType_, - format_, - true, - sMemoryHandle_)); - return copy_T; - } -} - -void GpuSparseMatrix::copyRow(int offsets, - size_t colNum, - const sparse_non_value_t* row) { - memcpy(cols_ + offsets, row, sizeof(int) * colNum); -} - -void GpuSparseMatrix::copyRow(int offsets, - size_t colNum, - const sparse_float_value_t* row) { - for (size_t j = 0; j < colNum; j++) { - cols_[offsets + j] = row[j].col; - value_[offsets + j] = row[j].value; - } -} - -void GpuSparseMatrix::copyFrom(const Matrix& src, hl_stream_t stream) { - if (auto mat = dynamic_cast(&src)) { - copyFrom(*(const_cast(mat)), stream); - } else if (auto mat = dynamic_cast(&src)) { - copyFrom(*(const_cast(mat)), stream); - } else { - LOG(FATAL) << "Not implemented"; - } -} - -void GpuSparseMatrix::copyFrom(const Matrix& src) { - copyFrom(src, HPPL_STREAM_1); - hl_stream_synchronize(HPPL_STREAM_1); -} - -template -void GpuSparseMatrix::copyFrom(int64_t* ids, - int64_t* indices, - T* data, - hl_stream_t stream) { - CHECK_EQ(format_, SPARSE_CSR); - size_t nnz = 0; - for (size_t i = 0; i < height_; i++) { - int64_t id = ids[i]; - nnz += indices[id + 1] - indices[id]; - } - - resize(height_, - width_, - nnz, - sizeof(T) == sizeof(sparse_non_value_t) ? NO_VALUE : FLOAT_VALUE, - format_); - - rows_[0] = 0; - for (size_t i = 0; i < height_; i++) { - int64_t id = ids[i]; - size_t colNum = indices[id + 1] - indices[id]; - rows_[i + 1] = rows_[i] + colNum; - - T* row = data + indices[id]; - copyRow(rows_[i], colNum, row); - } - - sMatrix_->format = HL_SPARSE_CSR; - sMatrix_->type = valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE; - sMatrix_->rows = height_; - sMatrix_->cols = width_; - sMatrix_->nnz = nnz; - hl_memcpy_csr_matrix(sMatrix_.get(), value_, rows_, cols_, stream); -} - -void GpuSparseMatrix::setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) { - CHECK_EQ(format_, SPARSE_CSR); - if (NO_VALUE == valueType_) { - CHECK_LT(row, height_); - CHECK(NULL != cols); - CHECK(NULL == values); - } else { - CHECK_LT(row, height_); - CHECK(NULL != cols); - CHECK(NULL != values); - } - if (0 == row) { - rows_[row] = 0; - } - rows_[row + 1] = rows_[row] + colNum; - - memcpy(cols_ + rows_[row], cols, sizeof(*cols) * colNum); - if (FLOAT_VALUE == valueType_) { - memcpy(value_ + rows_[row], values, sizeof(*values) * colNum); - } - - if (height_ - 1 == row) { - sMatrix_->format = HL_SPARSE_CSR; - sMatrix_->type = valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE; - sMatrix_->rows = height_; - sMatrix_->cols = width_; - sMatrix_->nnz = elementCnt_; - hl_memcpy_csr_matrix( - sMatrix_.get(), value_, rows_, cols_, HPPL_STREAM_DEFAULT); - } -} - -SparseValueType GpuSparseMatrix::getValueType() const { return valueType_; } - -void GpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { - CHECK_EQ(format_, SPARSE_CSC); - int nnz = sMatrix_->nnz; - if (memAlloc) { - matTrans = std::make_shared( - width_, height_, nnz, valueType_, format_, false); - } else { - CHECK(matTrans != nullptr); - } - - CpuIVector rows(nnz); - CpuIVector cols(width_ + 1); - CpuIVector cols_full(nnz); - CpuVector value(nnz); - hl_stream_t stream = HPPL_STREAM_1; - hl_memcpy_from_csc_matrix(value.getData(), - nnz, - rows.getData(), - nnz, - cols.getData(), - width_ + 1, - sMatrix_.get(), - stream); - - hl_stream_synchronize(stream); - - /*for every non zero number, get its column index*/ - std::vector dataVec; - for (size_t i = 0; i < width_; i++) { - for (int j = cols.getData()[i]; j < cols.getData()[i + 1]; j++) { - cols_full.getData()[j] = i; - } - } - - /*sort row index and column index by the ascending order*/ - for (int i = 0; i < nnz; i++) { - dataVec.emplace_back( - rows.getData()[i], cols_full.getData()[i], value.getData()[i]); - } - std::sort(dataVec.begin(), dataVec.end(), [](Element a, Element b) { - return a.row < b.row || (a.row == b.row && a.col < b.col); - }); - - /*get sorted data, row index, and col index, put them in the right place*/ - cols.resize(height_ + 1); - rows.resize(nnz); - value.resize(nnz); - - cols.getData()[0] = 0; - rows.getData()[0] = dataVec[0].col; - value.getData()[0] = dataVec[0].val; - for (int i = 1; i < nnz; i++) { - if (dataVec[i].row != dataVec[i - 1].row) { - for (int j = dataVec[i - 1].row + 1; j <= dataVec[i].row; j++) { - cols.getData()[j] = i; - } - } - rows.getData()[i] = dataVec[i].col; - value.getData()[i] = dataVec[i].val; - } - cols.getData()[height_] = nnz; - - /*copy back from cpu*/ - GpuSparseMatrixPtr dest = - std::dynamic_pointer_cast(matTrans); - hl_memcpy_csc_matrix((dest->sMatrix_).get(), - value.getData(), - rows.getData(), - cols.getData(), - stream); - hl_stream_synchronize(stream); -} - -void GpuSparseMatrix::mul(const GpuMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT) { - CHECK(a.useGpu_ && b.useGpu_) << "type not match"; - CHECK(!trans_) << "trans not supported"; - real* A_d = (real*)a.getData(); - real* B_d = (real*)b.getData(); - hl_sparse_matrix_s C_d = sMatrix_.get(); - hl_trans_op_t a_trans = a.trans_ ? HPPL_OP_T : HPPL_OP_N; - hl_trans_op_t b_trans = b.trans_ ? HPPL_OP_T : HPPL_OP_N; - - if (!a.trans_ && !b.trans_) { - CHECK(height_ == a.getHeight()); - CHECK(width_ == b.getWidth()); - CHECK(a.getWidth() == b.getHeight()); - } else if (a.trans_ && !b.trans_) { - CHECK(height_ == a.getWidth()); - CHECK(width_ == b.getWidth()); - CHECK(a.getHeight() == b.getHeight()); - } else if (!a.trans_ && b.trans_) { - CHECK(height_ == a.getHeight()); - CHECK(width_ == b.getHeight()); - CHECK(a.getWidth() == b.getWidth()); - } else { - LOG(INFO) << "Not support"; - } - int dimM = height_; - int dimN = width_; - int dimK = !b.trans_ ? b.getHeight() : b.getWidth(); - hl_sparse_matrix_mul( - A_d, a_trans, B_d, b_trans, C_d, dimM, dimN, dimK, scaleAB, scaleT); -} - -void GpuSparseMatrix::mul(const Matrix& a, - const Matrix& b, - real scaleAB, - real scaleT) { - const auto a_ptr = dynamic_cast(&a); - const auto b_ptr = dynamic_cast(&b); - if (a_ptr && b_ptr) { - mul(*a_ptr, *b_ptr, scaleAB, scaleT); - } else { - LOG(FATAL) << "not supported"; - } -} - -template -void printBuf(std::ostream& os, T* a, size_t len, const char* name) { - os << "\n: " << name << " ["; - for (size_t i = 0; i < len; i++) { - os << a[i] << " "; - } - os << "]\n"; -} - -void GpuSparseMatrix::print(std::ostream& os) const { - if (format_ == SPARSE_CSC) { - int nnz = sMatrix_->nnz; - IVectorPtr rows = IVector::create(nnz, false); - IVectorPtr cols = IVector::create(width_ + 1, false); - VectorPtr value = Vector::create(nnz, false); - hl_stream_t stream = HPPL_STREAM_DEFAULT; - hl_memcpy_from_csc_matrix(value->getData(), - value->getSize(), - rows->getData(), - rows->getSize(), - cols->getData(), - cols->getSize(), - sMatrix_.get(), - stream); - hl_stream_synchronize(stream); - - printBuf(os, cols->getData(), width_ + 1, "col idx"); - printBuf(os, rows->getData(), elementCnt_, "row idx"); - printBuf(os, value->getData(), elementCnt_, "value"); - } -} - -void GpuSparseMatrix::copyFromCSR(CpuSparseMatrix& src, hl_stream_t stream) { - trans_ = src.trans_; - size_t nnz = src.getElementCnt(); - - resize(src.getHeight(), src.getWidth(), nnz, valueType_, src.getFormat()); - // if have different value type, only copy rows and cols - SparseValueType vType = - valueType_ != src.getValueType() ? NO_VALUE : valueType_; - - sMatrix_->format = HL_SPARSE_CSR; - sMatrix_->type = vType == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE; - sMatrix_->rows = height_; - sMatrix_->cols = width_; - sMatrix_->nnz = nnz; - - hl_memcpy_csr_matrix(sMatrix_.get(), - vType == NO_VALUE ? NULL : src.getValue(), - src.getRows(), - src.getCols(), - stream); - - // restore type of sMatrix_ - sMatrix_->type = valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE; -} - -void GpuSparseMatrix::copyFromCSC(CpuSparseMatrix& src, hl_stream_t stream) { - trans_ = src.trans_; - size_t nnz = src.getElementCnt(); - - resize(src.getHeight(), src.getWidth(), nnz, valueType_, src.getFormat()); - - // if have different value type, only copy rows and cols - SparseValueType vType = - valueType_ != src.getValueType() ? NO_VALUE : valueType_; - - sMatrix_->format = HL_SPARSE_CSC; - sMatrix_->type = vType == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE; - sMatrix_->rows = height_; - sMatrix_->cols = width_; - sMatrix_->nnz = nnz; - - hl_memcpy_csc_matrix(sMatrix_.get(), - vType == NO_VALUE ? NULL : src.getValue(), - src.getRows(), - src.getCols(), - stream); - - // restore type of sMatrix_ - sMatrix_->type = valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE; -} - -void GpuSparseMatrix::copyFrom(GpuSparseMatrix& src, hl_stream_t stream) { - CHECK(trans_ == src.trans_); - CHECK(format_ == src.getFormat()); - resize(src.getHeight(), - src.getWidth(), - elementCnt_, - valueType_, - src.getFormat()); - - size_t rowSize = format_ == SPARSE_CSC ? elementCnt_ : height_ + 1; - size_t colSize = format_ == SPARSE_CSC ? width_ + 1 : elementCnt_; - - if (valueType_ == FLOAT_VALUE && src.getValueType() == FLOAT_VALUE) { - hl_memcpy_async( - getValue(), src.getValue(), sizeof(real) * elementCnt_, stream); - } - CHECK(getRows()); - CHECK(src.getRows()); - - hl_memcpy_async(getRows(), src.getRows(), sizeof(int) * rowSize, stream); - hl_memcpy_async(getCols(), src.getCols(), sizeof(int) * colSize, stream); -} - -void GpuSparseMatrix::copyFrom(CpuSparseMatrix& src, hl_stream_t stream) { - if (format_ == SPARSE_CSR) { - copyFromCSR(src, stream); - } else { - copyFromCSC(src, stream); - } -} - -void GpuSparseMatrix::trimFromCSR(const CpuSparseMatrix& src) { - trans_ = src.trans_; - int* srcCols = src.getCols(); - size_t nnz = std::count_if(srcCols, - srcCols + src.getElementCnt(), - [this](size_t n) { return n < this->width_; }); - resize(height_, width_, nnz, valueType_, format_); - - rows_[0] = 0; - size_t index = 0; - for (size_t r = 0; r < height_; ++r) { - for (int i = src.getRows()[r]; i < src.getRows()[r + 1]; ++i) { - if (srcCols[i] < (int)width_) { - cols_[index] = srcCols[i]; - if (valueType_ == FLOAT_VALUE) { - value_[index] = src.getValue()[i]; - } - ++index; - } - } - rows_[r + 1] = index; - } - CHECK_EQ(index, nnz); - - sMatrix_->format = HL_SPARSE_CSR; - sMatrix_->type = valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE; - sMatrix_->rows = height_; - sMatrix_->cols = width_; - sMatrix_->nnz = nnz; - - hl_memcpy_csr_matrix(sMatrix_.get(), - valueType_ == NO_VALUE ? NULL : value_, - rows_, - cols_, - /*default stream = */ HPPL_STREAM_DEFAULT); -} - -void GpuSparseMatrix::trimFromCSC(const CpuSparseMatrix& src) { - trans_ = src.trans_; - size_t nnz = src.getCols()[width_] - src.getCols()[0]; - resize(height_, width_, nnz, valueType_, format_); - - cols_[0] = 0; - for (size_t i = 0; i < width_; i++) { - cols_[i + 1] = cols_[i] + (int)(src.getRowNum(i)); - } - memcpy(rows_, src.getRows() + src.getCols()[0], sizeof(int) * nnz); - if (valueType_ == FLOAT_VALUE) { - memcpy(value_, src.getValue() + src.getCols()[0], sizeof(real) * nnz); - } - - sMatrix_->format = HL_SPARSE_CSC; - sMatrix_->type = valueType_ == NO_VALUE ? HL_NO_VALUE : HL_FLOAT_VALUE; - sMatrix_->rows = height_; - sMatrix_->cols = width_; - sMatrix_->nnz = nnz; - - hl_memcpy_csc_matrix(sMatrix_.get(), - valueType_ == NO_VALUE ? NULL : value_, - rows_, - cols_, - /*default stream = */ HPPL_STREAM_DEFAULT); -} - -void GpuSparseMatrix::trimFrom(const CpuSparseMatrix& src) { - if (format_ == SPARSE_CSR) { - trimFromCSR(src); - } else { - trimFromCSC(src); - } -} - -void GpuSparseMatrix::addBias(Matrix& b, real scale) { - CHECK(b.getHeight() == 1) << "the Bias should be a vector"; - hl_sparse_matrix_s A_d = sMatrix_.get(); - hl_sparse_matrix_add_bias(A_d, b.getData(), scale); -} - -void GpuSparseMatrix::add3(GpuMatrix* b) { - CHECK(getFormat() != SPARSE_CSC) << "Not supported"; - CHECK(height_ == b->getHeight()); - CHECK(width_ == b->getWidth()); - real* B_d = b->getData(); - hl_sparse_matrix_s A_d = sMatrix_.get(); - hl_sparse_matrix_add_dense(A_d, B_d, height_, width_, 1, 0); -} - -void GpuSparseMatrix::add3(MatrixPtr b) { - if (dynamic_cast(b.get())) { - add3(dynamic_cast(b.get())); - } else { - LOG(FATAL) << "not supported"; - } -} - -void GpuSparseMatrix::zeroMem() { - CHECK(valueType_ == FLOAT_VALUE); - real* value = getValue(); - if (value == NULL) { - LOG(FATAL) << "value is nullptr"; - } - hl_matrix_zero_mem(value, elementCnt_); -} - -void GpuSparseMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { -#ifdef PADDLE_WITH_CUDA - CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal"; - size_t numSamples = getHeight(); - size_t beam = maxVal.getWidth(); - CHECK_EQ(maxIds.getSize(), numSamples * beam); - CHECK_EQ(maxVal.getHeight(), numSamples); - CHECK_EQ(format_, SPARSE_CSR) << "Only support SPARSE_CSR"; - - hl_sparse_matrix_top_k(maxVal.getData(), - maxVal.getStride(), - maxIds.getData(), - sMatrix_.get(), - beam, - numSamples); -#endif -} - -template void GpuSparseMatrix::copyFrom(int64_t* ids, - int64_t* indices, - sparse_non_value_t* data, - hl_stream_t stream); -template void GpuSparseMatrix::copyFrom(int64_t* ids, - int64_t* indices, - sparse_float_value_t* data, - hl_stream_t stream); -} // namespace paddle diff --git a/paddle/legacy/math/SparseMatrix.h b/paddle/legacy/math/SparseMatrix.h deleted file mode 100644 index 9181fa29233677d8f4fac503905cc31eb66cb6c1..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/SparseMatrix.h +++ /dev/null @@ -1,286 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifndef PADDLE_MOBILE_INFERENCE - -#include -#include "CpuSparseMatrix.h" -#include "Matrix.h" - -namespace paddle { - -typedef std::shared_ptr<_hl_sparse_matrix_s> hl_sparse_matrix_s_ptr; - -class GpuSparseMatrix : public Matrix { - public: - MemoryHandlePtr sMemoryHandle_; - int* rows_; - int* cols_; - real* value_; - const char* end_; /* point to the end of sMemoryHandle_ */ - - hl_sparse_matrix_s_ptr sMatrix_; - SparseValueType valueType_; - SparseFormat format_; - - public: - GpuSparseMatrix(size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType = FLOAT_VALUE, - SparseFormat format_ = SPARSE_CSR, - bool trans = false); - - GpuSparseMatrix(GpuMemHandlePtr dataHandle, - hl_sparse_matrix_s_ptr sMatrix, - size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType = FLOAT_VALUE, - SparseFormat format_ = SPARSE_CSR, - bool trans = false, - MemoryHandlePtr sMemoryHandle = NULL); - - GpuSparseMatrix(real* value, - int* rows, - int* cols, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans); - - GpuSparseMatrix(hl_sparse_matrix_s_ptr sMatrix, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans, - MemoryHandlePtr sMemoryHandle); - - protected: - struct Element { - int row; - int col; - real val; - Element(int rowIn, int colIn, real valIn) - : row(rowIn), col(colIn), val(valIn) {} - }; - - public: - ~GpuSparseMatrix() {} - - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format); - - void resize(size_t newHeight, size_t newWidth); - - void sparseResizeCSR(); - - void sparseResizeCSC(); - - void resizeCSR(size_t newHeight, - size_t newWidth, - size_t newNnz, - SparseValueType valueType); - - void resizeCSC(size_t newHeight, - size_t newWidth, - size_t newNnz, - SparseValueType valueType); - - void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT); - /// B = A , B.trans = !A.trans - MatrixPtr getTranspose(); - - /// B = A' - void transpose(MatrixPtr& matTrans, bool memAlloc); - - void copyFrom(const Matrix& src); - void copyFrom(const Matrix& src, hl_stream_t stream); - void copyFromCSR(CpuSparseMatrix& src, hl_stream_t stream); - void copyFromCSC(CpuSparseMatrix& src, hl_stream_t stream); - - void copyFrom(const IVector& src) { LOG(FATAL) << "not implemented"; } - void copyFrom(const IVector& src, hl_stream_t stream) { - LOG(FATAL) << "not implemented"; - } - - template - void copyFrom(int64_t* ids, int64_t* indices, T* data, hl_stream_t stream); - - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values); - SparseValueType getValueType() const; - SparseFormat getFormat() const { return format_; } - - const int* getRowCols(size_t x) const { return cols_ + rows_[x]; } - const real* getRowValues(size_t x) const { return value_ + rows_[x]; } - size_t getColNum(size_t x) const { return rows_[x + 1] - rows_[x]; } - void print(std::ostream& os) const; - - /** - * @brief only set value_ of FLOAT_VALUE sparse matrix to zero - */ - void zeroMem(); - - /** - * @brief sparseMatrix += denseMatrix - * - * Named add3 just because add/add2 has been used in BaseMatrix.cu - * and they are not virtual function. - * - * Only add value of same (row, col) index in dense matrix - * and do not use others values. - * - * @param[in] b dense matrix - */ - void add3(GpuMatrix* b); - void add3(MatrixPtr b); - - /** - * @brief sparseMatrix[i,j] += bias[j], (j is the col index of sparse matrix) - * - * @param[in] b bias, dense matrix and height = 1 - * @param[in] scale scale of b - */ - void addBias(Matrix& b, real scale); - - /** - * @brief return rows, which is gpu address - */ - int* getRows() const { - CHECK(sMatrix_.get()) << "sMatrix_ is NULL"; - return hl_sparse_matrix_get_rows(sMatrix_.get()); - } - - /** - * @brief return cols, which is gpu address - */ - int* getCols() const { - CHECK(sMatrix_.get()) << "sMatrix_ is NULL"; - return hl_sparse_matrix_get_cols(sMatrix_.get()); - } - - /** - * @brief return value, which is gpu address - */ - real* getValue() const { - CHECK(sMatrix_.get()) << "sMatrix_ is NULL"; - return hl_sparse_matrix_get_value(sMatrix_.get()); - } - - /** - * @brief return value_ of sparse matrix - * - * Some times CpuSparseMatrix maybe Matrix, - * if getValue, must dynamic_cast to CpuSparseMatrix, - * getData is convenient to get value - */ - real* getData() { return getValue(); } - const real* getData() const { return getValue(); } - - /** - * @brief Get top k value of each row in sparse matrix. - * - * Store the value in maxVal and theirs index in maxIds. - * k = maxVal.width - * - * @param[out] maxIds index of top k - * @param[out] maxVal value of top k - */ - void rowMax(IVector& maxIds, Matrix& maxVal); - - protected: - void sparseResize(); - - void copyRow(int offsets, size_t colNum, const sparse_non_value_t* row); - void copyRow(int offsets, size_t colNum, const sparse_float_value_t* row); - - public: - void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); - - void copyFrom(CpuSparseMatrix& src, hl_stream_t stream); - void copyFrom(GpuSparseMatrix& src, hl_stream_t stream); - - void trimFrom(const CpuSparseMatrix& src); - void trimFromCSR(const CpuSparseMatrix& src); - void trimFromCSC(const CpuSparseMatrix& src); - - // BaseMatrixT interface - public: - bool isSparse() const { return true; } - - private: - using Matrix::mul; - using Matrix::copyFrom; - using Matrix::rowMax; - using Matrix::print; - using Matrix::subMatrix; -}; - -} // namespace paddle - -#else - -#include "CpuSparseMatrix.h" - -namespace paddle { - -class GpuSparseMatrix : public Matrix { - public: - GpuSparseMatrix(size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType = FLOAT_VALUE, - SparseFormat format_ = SPARSE_CSR, - bool trans = false) - : Matrix(NULL, height, width, trans, false) {} - - GpuSparseMatrix(real* value, - int* rows, - int* cols, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType, - SparseFormat format, - bool trans) - : Matrix(NULL, height, width, trans, true) {} - - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format) {} - void resize(size_t newHeight, size_t newWidth) {} - MatrixPtr getTranspose() { return nullptr; } - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) {} -}; - -} // namespace paddle - -#endif diff --git a/paddle/legacy/math/SparseRowMatrix.cpp b/paddle/legacy/math/SparseRowMatrix.cpp deleted file mode 100644 index 39bcdf22984db766283a3b4fbf56f224f730c5f8..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/SparseRowMatrix.cpp +++ /dev/null @@ -1,282 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SparseRowMatrix.h" -#include "CpuSparseMatrix.h" - -#include - -#include "paddle/legacy/utils/Logging.h" - -#include "SIMDFunctions.h" - -#include "paddle/legacy/utils/Thread.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -const unsigned int SparseRowCpuMatrix::kUnusedId_ = -1U; - -void SparseRowCpuMatrix::init(size_t height, size_t width) { - height_ = height; - if (!indexDictHandle_) { - indexDictHandle_.reset(new IndexDict); - indexDictHandle_->globalIndices.assign(height, kUnusedId_); - } - localIndices_ = &indexDictHandle_->localIndices; - globalIndices_ = indexDictHandle_->globalIndices.data(); -} - -void SparseRowCpuMatrix::mul(CpuSparseMatrix* a, - CpuMatrix* b, - real scaleAB, - real scaleT) { - CpuMatrix::mul(a, b, this, scaleAB, scaleT); -} - -void SparseRowCpuMatrix::copyFrom(const real* src, size_t size) { - LOG(FATAL) << "This should not be called"; -} - -void SparseRowCpuMatrix::zeroMem() { - apply([](real* buf, size_t len) { memset(buf, 0, sizeof(real) * len); }); - clearRows(); -} - -void SparseRowCpuMatrix::applyL1(real learningRate, real decayRate) { - apply([=](real* buf, size_t len) { - CpuVector value(0, nullptr); - value.subVecFrom(buf, 0, len); - value.applyL1(learningRate, decayRate); - }); -} - -void SparseRowCpuMatrix::sgdUpdate(BaseMatrix& value, - IVector& t0, - real learningRate, - int currentTime, - real decayRate, - bool useL1, - bool fini) { - std::vector& localIndices = indexDictHandle_->localIndices; - - // t0 and value are vectors - CHECK_EQ(t0.getSize(), this->height_); - CHECK_EQ(value.width_, this->height_ * this->width_); - - if (decayRate == 0.0f) { - if (fini) { - return; - } - - for (size_t i = 0; i < localIndices.size(); ++i) { - real* g = getLocalRow(i); - real* v = value.rowBuf(localIndices[i]); - for (size_t j = 0; j < this->width_; ++j) { - v[j] -= learningRate * g[j]; - } - } - return; - } // else - - if (useL1) { // L1 decay - if (fini) { - for (size_t i = 0; i < this->height_; ++i) { - real* v = value.rowBuf(i); - int* t = t0.getData() + i; - if (t[0] < currentTime) { - // W(t0) -> W(t+1) - int tDiff = currentTime - t[0]; - real delta = tDiff * learningRate * decayRate; - simd::decayL1(v, v, delta, this->width_); - } - } - return; - } // else - - for (size_t i = 0; i < localIndices.size(); ++i) { - real* g = getLocalRow(i); - real* v = value.rowBuf(localIndices[i]); - int* t = t0.getData() + localIndices[i]; - if (t[0] < currentTime) { - // W(t0) -> W(t) - int tDiff = currentTime - t[0]; - real delta = tDiff * learningRate * decayRate; - simd::decayL1(v, v, delta, this->width_); - } - - // W(t) -> W(t+1) - for (size_t j = 0; j < this->width_; ++j) { - v[j] -= learningRate * g[j]; - } - simd::decayL1(v, v, learningRate * decayRate, this->width_); - - // state update to t+1 - t[0] = currentTime + 1; - } - - } else { // L2 decay - if (fini) { - for (size_t i = 0; i < this->height_; ++i) { - real* v = value.rowBuf(i); - int* t = t0.getData() + i; - if (t[0] < currentTime) { - // W(t0) -> W(t+1) - int tDiff = currentTime - t[0]; - real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate); - for (size_t j = 0; j < this->width_; ++j) { - v[j] *= recip; - } - } - } - return; - } // else - - real recipDecay = 1.0f / (1.0f + learningRate * decayRate); - - for (size_t i = 0; i < localIndices.size(); ++i) { - real* g = getLocalRow(i); - real* v = value.rowBuf(localIndices[i]); - int* t = t0.getData() + localIndices[i]; - if (t[0] < currentTime) { - // W(t0) -> W(t) - int tDiff = currentTime - t[0]; - real recip = 1.0f / (1.0f + tDiff * learningRate * decayRate); - for (size_t j = 0; j < this->width_; ++j) { - v[j] *= recip; - } - } - - // W(t) -> W(t+1) - for (size_t j = 0; j < this->width_; ++j) { - v[j] = recipDecay * (v[j] - learningRate * g[j]); - } - - // state update to t+1 - t[0] = currentTime + 1; - } - } -} - -void SparseRowCpuMatrix::addTo(BaseMatrix& dest, - std::vector& ids, - size_t tid, - size_t numThreads) { - CHECK(!dest.useGpu_); - CHECK_EQ(dest.height_ * dest.width_, this->height_ * this->width_); - - std::vector& localIndices = indexDictHandle_->localIndices; - for (size_t i = 0; i < localIndices.size(); ++i) { - uint32_t id = localIndices[i]; - if (id % numThreads == tid) { - simd::addTo(dest.rowBuf(id), getLocalRow(i), this->width_); - ids.push_back(id); - } - } -} - -void SparseRowCpuMatrix::addTo(SparseRowCpuMatrix& dest, - size_t tid, - size_t numThreads) { - CHECK(!dest.useGpu_); - CHECK_EQ(dest.height_ * dest.width_, this->height_ * this->width_); - - std::vector& localIndices = indexDictHandle_->localIndices; - for (size_t i = 0; i < localIndices.size(); ++i) { - uint32_t id = localIndices[i]; - if (id % numThreads == tid) { - dest.checkIndex(id); - simd::addTo(dest.getRow(id), getLocalRow(i), this->width_); - } - } -} - -void SparseRowCpuMatrix::zeroMemThread(size_t tid, size_t numThreads) { - std::vector& localIndices = indexDictHandle_->localIndices; - for (size_t i = 0; i < localIndices.size(); ++i) { - uint32_t id = localIndices[i]; - if (id % numThreads == tid) { - memset(this->getLocalRow(i), 0, this->width_ * sizeof(real)); - } - } -} - -void SparseAutoGrowRowCpuMatrix::mul(CpuSparseMatrix* a, - CpuMatrix* b, - real scaleAB, - real scaleT) { - CpuMatrix::mul( - a, b, this, scaleAB, scaleT); -} - -void CacheRowCpuMatrix::mul(CpuSparseMatrix* a, - CpuMatrix* b, - real scaleAB, - real scaleT) { - CpuMatrix::mul(a, b, this, scaleAB, scaleT); -} - -void SparsePrefetchRowCpuMatrix::addRows(const unsigned int* ids, size_t len) { - std::vector& localIndices = indexDictHandle_->localIndices; - for (size_t i = 0; i < len; i++) { - CHECK_LT(*(ids + i), this->getHeight()) - << "id:" << *(ids + i) << "Height:" << this->getHeight() - << "sparse id value exceeds the max input dimension, " - << "it could be caused invalid input data samples"; - } - localIndices.insert(localIndices.end(), ids, ids + len); -} - -void SparsePrefetchRowCpuMatrix::addRows(MatrixPtr input) { - CpuSparseMatrix* mat = dynamic_cast(input.get()); - CHECK(mat) << "only support sparse matrix"; - addRows(reinterpret_cast(mat->getCols()), - mat->getElementCnt()); -} - -void SparsePrefetchRowCpuMatrix::addRows(IVectorPtr ids) { - std::vector& localIndices = indexDictHandle_->localIndices; - size_t numSamples = ids->getSize(); - int* index = ids->getData(); - for (size_t i = 0; i < numSamples; ++i) { - if (index[i] == -1) continue; - - unsigned int id = (unsigned int)index[i]; - CHECK_LT(id, this->getHeight()) - << "id:" << id << "Height:" << this->getHeight() - << "sparse id value exceeds the max input dimension, " - << "it could be caused invalid input data samples"; - localIndices.push_back(id); - } -} - -void SparsePrefetchRowCpuMatrix::setupIndices() { - auto& localIndices = indexDictHandle_->localIndices; - uniqueIds(localIndices); - // for each sparse row - for (size_t id = 0; id < localIndices.size(); ++id) { - globalIndices_[localIndices[id]] = id; // sparse row -> local id - } - checkStoreSize(); -} - -void SparseRowCpuMatrix::checkIndices() { - std::vector& localIndices = indexDictHandle_->localIndices; - for (size_t i = 0; i < localIndices.size(); ++i) { - CHECK_EQ(globalIndices_[localIndices[i]], i); - } - checkStoreSize(); -} - -} // namespace paddle diff --git a/paddle/legacy/math/SparseRowMatrix.h b/paddle/legacy/math/SparseRowMatrix.h deleted file mode 100644 index e206747a41c9f3a0f058bf3b0a94472bf4b2c349..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/SparseRowMatrix.h +++ /dev/null @@ -1,341 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifndef PADDLE_MOBILE_INFERENCE - -#include -#include -#include -#include "Matrix.h" -#include "RowBuffer.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -/** - * Sparse Row - */ -class SparseRowCpuMatrix : public CpuMatrix { - public: - struct IndexDict { - // In the following, global id means the row id in the original matrix. - // Local id means the row id in the local storage which only contains - // the sparse rows. - std::vector localIndices; // local id -> global id - std::vector globalIndices; // global id -> local id - }; - typedef std::shared_ptr IndexDictPtr; - - /// heightStore is max number of rows of the sparse matrix. - SparseRowCpuMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - IndexDictPtr indexDictHandle = nullptr, - bool trans = false) - : CpuMatrix(nullptr, height, width, trans), - indexDictHandle_(indexDictHandle) { - init(height, width); - buf_.reset(new RowBuffer(dataHandle, width)); - } - - virtual ~SparseRowCpuMatrix() {} - - public: - /** - * Get the row buf - * - * @param row row id in the original matrix - */ - real* getRow(size_t row) { - CHECK_NE(globalIndices_[row], kUnusedId_); - return getLocalRow(globalIndices_[row]); - } - - /** - * Get the row buf - * - * @param row row id in local storage - */ - real* getLocalRow(size_t row) { return buf_->getWithAutoGrowth(row); } - - /** - * reserve the storage for rows according to current size of - * indexDictHandle. - * - * This is only used when SparseRowCpuMatrix is constructed with - * indexDictHandle. - */ - void reserveStore() { buf_->resize(localIndices_->size()); } - - // row is the row id in the original matrix - virtual real* getRowBuf(size_t row) { return getRow(row); } - - virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - - /** - * Fill data according to row indexs added, setup indices inside. - * - * *src* and *size* are data and size of normal dense CpuMatrix. - */ - virtual void copyFrom(const real* src, size_t size); - virtual void zeroMem(); - - /** - * apply L1 to all sparse rows, should be apply after indices ready. - */ - virtual void applyL1(real learningRate, real decayRate); - - void clearIndices() { clearRows(); } - void zeroMemThread(size_t tid, size_t numThreads); - - /** - * value -= grad * learningRate, this is gradient. - * - * If L1 decay set use L1, else if L2 set use L2, otherwise no decay atall. - * - * t0 is a int vector used by L1/L2 decay, size = height of parameter - * matrix, - * store the time that each weight row last updated. - * - * Time is batchId, currentTime is current batchId. - * - * While pass finished, caller should call this func one more time - * with (fini=true) to let weight decay catch up current time. - */ - void sgdUpdate(BaseMatrix& value, - IVector& t0, - real learningRate, - int currentTime, - real decayRate, - bool useL1, - bool fini = false); - - /** - * merge rows in *this* to *dest* for designated thread - * - * values add to *dest* matrix - * - * ids occured in *this* append to *ids* - * filtered by (id % numThreads == tid) - */ - void addTo(BaseMatrix& dest, - std::vector& ids, - size_t tid, - size_t numThreads); - - /** - * the second version addTo(), *dest* is a SparseRowCpuMatrix. - * - * The dest's indices should be setup already, addTo() will - * check src ids is exist in dest's indices. - */ - void addTo(SparseRowCpuMatrix& dest, size_t tid, size_t numThreads); - - const IndexDictPtr& getIndexDictHandle() const { return indexDictHandle_; } - - /** - * check all local and global indices consistency - */ - void checkIndices(); - /** - * check whether row *i* exist in indices - */ - void checkIndex(size_t i) { - size_t localId = globalIndices_[i]; - CHECK_LT(localId, localIndices_->size()); - CHECK_EQ((*localIndices_)[localId], i); - } - - std::vector& getLocalIndices() const { - return indexDictHandle_->localIndices; - } - - protected: - template - void apply(Func f) { - f(buf_->data(), localIndices_->size() * width_); - } - - void init(size_t height, size_t width); - - /// clear row indices. - void clearRows() { - for (auto id : *localIndices_) { - globalIndices_[id] = kUnusedId_; - } - localIndices_->clear(); - buf_->clear(); - } - - inline void checkStoreSize() { - if (buf_->isAutoGrowth()) { - if (buf_->getRowCount() > 0.5 * height_) { - LOG(WARNING) << "There are more than 0.5*height (" - << localIndices_->size() << ") rows are used for sparse " - << "update, which is not efficient. Considering not use " - << "sparse_update."; - } - } else { - CHECK_LE(localIndices_->size(), buf_->getRowCount()); - } - } - - std::unique_ptr buf_; - IndexDictPtr indexDictHandle_; - std::vector* localIndices_; // =&indexDictHandle_->localIndices - unsigned int* globalIndices_; // =indexDictHandle_->globalIndices.data(); - static const unsigned int kUnusedId_; -}; - -class SyncThreadPool; - -/// For prefetching parameters from remote Parameter server -class SparsePrefetchRowCpuMatrix : public SparseRowCpuMatrix { - public: - SparsePrefetchRowCpuMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - IndexDictPtr indexDictHandle = nullptr, - SyncThreadPool* pool = nullptr, - bool trans = false) - : SparseRowCpuMatrix(dataHandle, height, width, indexDictHandle, trans), - pool_(pool) {} - - /** - * Extract feature ids from *input*, to fill row indexs. - * - * *input* must be sparse matrix. - * - * Can call many times before setup. - */ - void addRows(MatrixPtr input); - void addRows(IVectorPtr ids); - - /** - * setup global indices of SparseRowMatrix after finish add rows. - */ - void setupIndices(); - - protected: - void addRows(const unsigned int* ids, size_t len); - SyncThreadPool* pool_; -}; - -class SparseAutoGrowRowCpuMatrix : public SparseRowCpuMatrix { - public: - SparseAutoGrowRowCpuMatrix(size_t height, - size_t width, - IndexDictPtr indexDictHandle = nullptr, - bool trans = false) - : SparseRowCpuMatrix(nullptr, height, width, indexDictHandle, trans) {} - - real* getRow(size_t row) { - auto id = globalIndices_[row]; - if (id == kUnusedId_) { - id = globalIndices_[row] = localIndices_->size(); - localIndices_->push_back(row); - checkStoreSize(); - } - return getLocalRow(id); - } - - virtual real* getRowBuf(size_t row) { return getRow(row); } - - virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); -}; - -class CacheRowCpuMatrix : public SparseAutoGrowRowCpuMatrix { - public: - CacheRowCpuMatrix(size_t height, - size_t width, - IndexDictPtr indexDictHandle = nullptr, - bool trans = false) - : SparseAutoGrowRowCpuMatrix(height, width, indexDictHandle, trans), - sourceData_(nullptr) {} - - void setSourceData(CpuVectorPtr sourceVec) { - sourceDataVec_ = sourceVec; - sourceData_ = sourceVec->getData(); - } - - real* getRow(size_t row) { - auto id = globalIndices_[row]; - if (id == kUnusedId_) { - id = globalIndices_[row] = localIndices_->size(); - localIndices_->push_back(row); - checkStoreSize(); - memcpy( - getLocalRow(id), sourceData_ + width_ * row, sizeof(float) * width_); - } - return getLocalRow(id); - } - - virtual real* getRowBuf(size_t row) { return getRow(row); } - - virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - - public: - CpuVectorPtr sourceDataVec_; - real* sourceData_; -}; - -/** - * Sparse Row Ids Matrix. - * - * mostly same as CpuMatrix, but maintain sparse row ids occured, - * ids are hashed by worker thread id. - */ -class SparseRowIdsCpuMatrix : public CpuMatrix { - public: - SparseRowIdsCpuMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : CpuMatrix(dataHandle, height, width, trans) {} - - void setNumOfThreads(size_t numOfThreads) { idsArray_.resize(numOfThreads); } - - std::vector& getIds(size_t threadId) { return idsArray_[threadId]; } - - private: - std::vector> idsArray_; -}; - -} // namespace paddle - -#else -namespace paddle { - -class SparseRowCpuMatrix : public CpuMatrix { - public: - void reserveStore() {} - void clearIndices() {} -}; - -class SparsePrefetchRowCpuMatrix : public SparseRowCpuMatrix { - public: - void setupIndices() {} - void addRows(MatrixPtr input) {} - void addRows(IVectorPtr ids) {} -}; - -class SparseAutoGrowRowCpuMatrix : public SparseRowCpuMatrix {}; -class CacheRowCpuMatrix : public SparseAutoGrowRowCpuMatrix {}; -class SparseRowIdsCpuMatrix : public CpuMatrix {}; - -} // namespace paddle - -#endif diff --git a/paddle/legacy/math/Storage.cpp b/paddle/legacy/math/Storage.cpp deleted file mode 100644 index 65d53aeaa926690c7fe9e6fcac7affdfb68fede9..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/Storage.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Storage.h" -#include "Allocator.h" -#include "paddle/legacy/utils/StringUtil.h" -#include "paddle/legacy/utils/Util.h" - -#ifndef PADDLE_MOBILE_INFERENCE -DEFINE_int32(pool_limit_size, - 536870912, - "maximum memory size managed by a memory pool, default is 512M"); -#else -DEFINE_int32(pool_limit_size, 0, "default is 0"); -#endif - -namespace paddle { - -// Initialization StorageEngine singleton. -// Other modules may rely on storage management, -// so StorageEngine need to be initialized before other modules. -static InitFunction __init_storage_engine([]() { StorageEngine::singleton(); }, - std::numeric_limits::max()); - -StorageEngine::StorageEngine() : cpuAllocator_(nullptr) {} - -StorageEngine::~StorageEngine() { - delete cpuAllocator_; - for (auto it : gpuAllocator_) { - delete it; - } -} - -StorageEngine* StorageEngine::singleton() { - static StorageEngine storage; - return &storage; -} - -PoolAllocator* StorageEngine::getGpuAllocator(int deviceId) { - { - // if gpuAllocator_ has been constructed - ReadLockGuard guard(lock_); - if (deviceId < static_cast(gpuAllocator_.size()) && - (gpuAllocator_[deviceId] != nullptr)) { - return gpuAllocator_[deviceId]; - } - } - - { - // Construct gpuAllocator_ - std::lock_guard guard(lock_); - if (deviceId >= static_cast(gpuAllocator_.size())) { - gpuAllocator_.resize(deviceId + 1); - } - if (gpuAllocator_[deviceId] == nullptr) { - std::string name = - "gpu" + str::to_string(deviceId) + std::string("_pool"); - gpuAllocator_[deviceId] = - new PoolAllocator(new GpuAllocator(), FLAGS_pool_limit_size, name); - } - return gpuAllocator_[deviceId]; - } -} - -PoolAllocator* StorageEngine::getCpuAllocator() { - { - // if cpuAllocator_ has been constructed - ReadLockGuard guard(lock_); - if (cpuAllocator_ != nullptr) { - return cpuAllocator_; - } - } - - { - // Construct cpuAllocator_ - std::lock_guard guard(lock_); - if (cpuAllocator_ == nullptr) { - if (FLAGS_use_gpu) { - cpuAllocator_ = new PoolAllocator( - new CudaHostAllocator(), FLAGS_pool_limit_size, "cuda_host_pool"); - } else { - cpuAllocator_ = new PoolAllocator( - new CpuAllocator(), FLAGS_pool_limit_size, "cpu_pool"); - } - } - return cpuAllocator_; - } -} - -} // namespace paddle diff --git a/paddle/legacy/math/Storage.h b/paddle/legacy/math/Storage.h deleted file mode 100644 index bd22dde2c85be5ba432cb3a259211c1900a17b6c..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/Storage.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "PoolAllocator.h" -#include "paddle/legacy/utils/Locks.h" - -namespace paddle { - -/** - * @brief Storage manager for multiple devices. - */ -class StorageEngine { - public: - /** - * @return Storage singleton - */ - static StorageEngine* singleton(); - - /** - * @return return one gpu allocator by deviceId - */ - PoolAllocator* getGpuAllocator(int deviceId); - - /** - * @return return cpu allocator - */ - PoolAllocator* getCpuAllocator(); - - protected: - StorageEngine(); - ~StorageEngine(); - RWLock lock_; - std::vector gpuAllocator_; - PoolAllocator* cpuAllocator_; -}; - -} // namespace paddle diff --git a/paddle/legacy/math/TensorApply.h b/paddle/legacy/math/TensorApply.h deleted file mode 100644 index 8b642047bffa33b47dfb8ffc8e3fd2a9b7dbae3a..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/TensorApply.h +++ /dev/null @@ -1,211 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -namespace paddle { - -/** - * \brief The tensor evaluator classes. - */ -template -class TensorApply { - public: - explicit INLINE TensorApply(const Derived& p) - : data_(p.data_), - stride_(p.stride_), - height_(p.height_), - width_(p.width_), - useGpu_(p.useGpu_) {} - - INLINE T apply(int i, int j) const { return data_[i * stride_ + j]; } - INLINE T apply(int index) const { return data_[index]; } - INLINE T& applyRef(int i, int j) { return data_[i * stride_ + j]; } - INLINE T& applyRef(int index) { return data_[index]; } - - INLINE size_t getWidth() const { return width_; } - INLINE size_t getHeight() const { return height_; } - INLINE bool isContiguous() const { return stride_ == width_ || height_ == 1; } - INLINE bool useGpu() const { return useGpu_; } - - T* data_; - size_t stride_; - size_t height_; - size_t width_; - bool useGpu_; -}; - -/** - * \brief The tensor evaluator classes. - * evaluator for rvalues - */ -template -class TensorApply { - public: - explicit INLINE TensorApply(const Derived& p) - : data_(p.data_), - stride_(p.stride_), - height_(p.height_), - width_(p.width_), - useGpu_(p.useGpu_) {} - - INLINE T apply(int i, int j) const { return data_[i * stride_ + j]; } - INLINE T apply(int index) const { return data_[index]; } - - INLINE size_t getWidth() const { return width_; } - INLINE size_t getHeight() const { return height_; } - INLINE bool isContiguous() const { return stride_ == width_ || height_ == 1; } - INLINE bool useGpu() const { return useGpu_; } - - const T* data_; - size_t stride_; - size_t height_; - size_t width_; - bool useGpu_; -}; - -template -class TensorApply, T> { - public: - explicit TensorApply(const TensorExpression& expr) - : expr_(expr.derived()) {} - - INLINE T apply(int i, int j) const { return expr_.apply(i, j); } - INLINE T apply(int index) const { return expr_.apply(index); } - - INLINE size_t getWidth() const { return expr_.getWidth(); } - INLINE size_t getHeight() const { return expr_.getHeight(); } - INLINE bool isContiguous() const { return expr_.isContiguous(); } - INLINE bool useGpu() const { return expr_.useGpu(); } - - TensorApply expr_; -}; - -/** - * \brief The unary expression evaluator classes. - */ -template -class TensorApply, T> { - public: - explicit INLINE TensorApply(const TensorUnaryOp& expr) - : op_(expr.op_), expr_(expr.expr_) {} - - INLINE T apply(int i, int j) const { return op_(expr_.apply(i, j)); } - INLINE T apply(int index) const { return op_(expr_.apply(index)); } - - INLINE size_t getWidth() const { return expr_.getWidth(); } - INLINE size_t getHeight() const { return expr_.getHeight(); } - INLINE bool isContiguous() const { return expr_.isContiguous(); } - INLINE bool useGpu() const { return expr_.useGpu(); } - - const OP op_; - TensorApply expr_; -}; - -/** - * \brief The binary expression evaluator classes. - */ -template -class TensorApply, T> { - public: - explicit INLINE TensorApply( - const TensorBinaryOp& expr) - : op_(expr.op_), lhs_(expr.lhs_), rhs_(expr.rhs_) { -#ifndef __CUDA_ARCH__ - CHECK_EQ(lhs_.getWidth(), rhs_.getWidth()); - CHECK_EQ(lhs_.getHeight(), rhs_.getHeight()); - CHECK_EQ(lhs_.useGpu(), rhs_.useGpu()); -#endif - } - - INLINE T apply(int i, int j) const { - return op_(lhs_.apply(i, j), rhs_.apply(i, j)); - } - INLINE T apply(int index) const { - return op_(lhs_.apply(index), rhs_.apply(index)); - } - - INLINE size_t getWidth() const { return lhs_.getWidth(); } - INLINE size_t getHeight() const { return rhs_.getHeight(); } - INLINE bool isContiguous() const { - return lhs_.isContiguous() && rhs_.isContiguous(); - } - INLINE bool useGpu() const { return lhs_.useGpu(); } - - const OP op_; - TensorApply lhs_; - TensorApply rhs_; -}; - -/** - * \brief The ternary expression evaluator classes. - */ -template -class TensorApply, T> { - public: - explicit INLINE TensorApply( - const TensorTernaryOp& expr) - : expr1_(expr.expr1_), expr2_(expr.expr2_), expr3_(expr.expr3_) { -#ifndef __CUDA_ARCH__ - CHECK_EQ(expr1_.getWidth(), expr2_.getWidth()); - CHECK_EQ(expr1_.getWidth(), expr3_.getWidth()); - CHECK_EQ(expr1_.getHeight(), expr2_.getHeight()); - CHECK_EQ(expr1_.getHeight(), expr3_.getHeight()); - CHECK_EQ(expr1_.useGpu(), expr2_.useGpu()); - CHECK_EQ(expr1_.useGpu(), expr3_.useGpu()); -#endif - } - - INLINE T apply(int i, int j) const { - return expr1_.apply(i, j) ? expr2_.apply(i, j) : expr3_.apply(i, j); - } - INLINE T apply(int index) const { - return expr1_.apply(index) ? expr2_.apply(index) : expr3_.apply(index); - } - - INLINE size_t getWidth() const { return expr1_.getWidth(); } - INLINE size_t getHeight() const { return expr1_.getHeight(); } - INLINE bool isContiguous() const { - return expr1_.isContiguous() && expr2_.isContiguous() && - expr3_.isContiguous(); - } - INLINE bool useGpu() const { return expr1_.useGpu(); } - - TensorApply expr1_; - TensorApply expr2_; - TensorApply expr3_; -}; - -/** - * \brief The const expression evaluator classes. - */ -template -class TensorApply, T> { - public: - explicit INLINE TensorApply(const TensorConstant& expr) - : op_(expr.op_), expr_(expr.expr_) {} - - INLINE T apply(int i, int j) const { return op_(i, j); } - INLINE T apply(int index) const { return op_(index); } - - INLINE size_t getWidth() const { return expr_.getWidth(); } - INLINE size_t getHeight() const { return expr_.getHeight(); } - INLINE bool isContiguous() const { return true; } - INLINE bool useGpu() const { return expr_.useGpu(); } - - const OP op_; - TensorApply expr_; -}; - -} // namespace paddle diff --git a/paddle/legacy/math/TensorAssign.h b/paddle/legacy/math/TensorAssign.h deleted file mode 100644 index efbfce6c4f88197f18285e3679698b8bbb1ed3b8..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/TensorAssign.h +++ /dev/null @@ -1,158 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -/** - * \brief Tensor Assign Expression(return by lazyAssign, - * and evaluated by AssignEvaluate) - */ -template -class TensorAssignOp { - public: - explicit TensorAssignOp(const LhsType& lhs, const RhsType& rhs) - : lhs_(lhs), rhs_(rhs) { -#ifndef __CUDA_ARCH__ - CHECK_EQ(lhs_.getWidth(), rhs_.getWidth()); - CHECK_EQ(lhs_.getHeight(), rhs_.getHeight()); - CHECK_EQ(lhs_.useGpu(), rhs_.useGpu()); -#endif - } - - INLINE void apply(const int i, const int j) { - lhs_.applyRef(i, j) = rhs_.apply(i, j); - } - INLINE void apply(const int index) { - lhs_.applyRef(index) = rhs_.apply(index); - } - - INLINE size_t getWidth() const { return lhs_.getWidth(); } - INLINE size_t getHeight() const { return rhs_.getHeight(); } - INLINE bool isContiguous() const { - return lhs_.isContiguous() && rhs_.isContiguous(); - } - INLINE bool useGpu() const { return lhs_.useGpu(); } - - private: - TensorApply lhs_; - TensorApply rhs_; -}; - -template -void AssignCpuEvaluate(int height, - int width, - bool isContiguous, - Assign&& assign, - AssignOp&&... args) { - if (isContiguous) { - int size = height * width; - for (int index = 0; index < size; index++) { - assign.apply(index); - __attribute__((unused)) int dummy[] = {(((args)).apply(index), 0)...}; - } - } else { - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - assign.apply(i, j); - __attribute__((unused)) int dummy[] = {(((args)).apply(i, j), 0)...}; - } - } - } -} - -#ifdef __NVCC__ -template -__global__ void AssignGpuEvaluate1(const int border, - Assign assign, - AssignOp... args) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < border) { - assign.apply(idx); - __attribute__((unused)) int dummy[] = {(((args)).apply(idx), 0)...}; - } -} - -template -__global__ void AssignGpuEvaluate2(const int height, - const int width, - Assign assign, - AssignOp... args) { - const int colIdx = blockIdx.x * blockDim.x + threadIdx.x; - const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y; - for (int i = rowIdx; i < height; i += gridDim.y * blockDim.y) { - for (int j = colIdx; j < width; j += gridDim.x * blockDim.x) { - assign.apply(i, j); - __attribute__((unused)) int dummy[] = {(((args)).apply(i, j), 0)...}; - } - } -} -#endif - -/** - * \brief Evaluate one or more TensorAssignOp objects. - * - * \note At least one assignment expression is required - */ -template -void AssignEvaluate(Assign&& assign, AssignOp&&... args) { - const bool useGpu_ = assign.useGpu(); - bool isContiguous_ = assign.isContiguous(); - const size_t height = assign.getHeight(); - const size_t width = assign.getWidth(); - - const int packSize = sizeof...(args); - const bool packUseGpu[] = {((args)).useGpu()...}; - const bool packIsContiguous[] = {((args)).isContiguous()...}; - const size_t packHeight[] = {((args)).getHeight()...}; - const size_t packWidth[] = {((args)).getWidth()...}; - - for (int i = 0; i < packSize; i++) { - CHECK_EQ(useGpu_, packUseGpu[i]); - CHECK_EQ(height, packHeight[i]); - CHECK_EQ(width, packWidth[i]); - isContiguous_ = isContiguous_ && packIsContiguous[i]; - } - - if (useGpu_) { -#ifdef __NVCC__ - if (isContiguous_) { - int size = height * width; - int blockSize = size <= 1024 ? size : 1024; - int gridSize = (size + 1024 - 1) / 1024; - AssignGpuEvaluate1<<>>( - size, assign, args...); - } else { - int blockSizeY = std::min(32, (int)height); - int blockSizeX = (32 / blockSizeY) * 32; - int gridSizeX = std::min(32, (int)(width + blockSizeX - 1) / blockSizeX); - int gridSizeY = std::min(32, (int)(height + blockSizeY - 1) / blockSizeY); - dim3 threads(blockSizeX, blockSizeY); - dim3 grid(gridSizeX, gridSizeY); - AssignGpuEvaluate2<<>>( - height, width, assign, args...); - } - - CHECK_SYNC("AssignEvaluate failed"); -#endif - } else { - AssignCpuEvaluate(height, width, isContiguous_, assign, args...); - } -} - -} // namespace paddle diff --git a/paddle/legacy/math/TensorEvaluate.h b/paddle/legacy/math/TensorEvaluate.h deleted file mode 100644 index 3029dd35fb05c893f99cde0689f816f4257f21c4..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/TensorEvaluate.h +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "hl_base.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -/** - * \brief The tensor cpu evaluate api. - */ -template -inline void TensorCpuApply(LeftType& lhs, const RightType& rhs) { - TensorApply lhs_(lhs); - TensorApply rhs_(rhs); - CHECK_EQ(lhs_.getWidth(), rhs_.getWidth()); - CHECK_EQ(lhs_.getHeight(), rhs_.getHeight()); - CHECK_EQ(lhs_.useGpu(), rhs_.useGpu()); - - int height = lhs_.getHeight(); - int width = lhs_.getWidth(); - if (lhs_.isContiguous() && rhs_.isContiguous()) { - int size = height * width; - for (int index = 0; index < size; index++) { - lhs_.applyRef(index) = rhs_.apply(index); - } - } else { - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - lhs_.applyRef(i, j) = rhs_.apply(i, j); - } - } - } -} - -#ifdef __NVCC__ -template -__global__ void TensorElementWiseOp(LeftType lhs, - RightType rhs, - const int border) { - const int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < border) { - lhs.applyRef(idx) = rhs.apply(idx); - } -} - -template -__global__ void TensorElementWiseOp(LeftType lhs, RightType rhs) { - const int colIdx = blockIdx.x * blockDim.x + threadIdx.x; - const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y; - for (int i = rowIdx; i < lhs.getHeight(); i += gridDim.y * blockDim.y) { - for (int j = colIdx; j < lhs.getWidth(); j += gridDim.x * blockDim.x) { - lhs.applyRef(i, j) = rhs.apply(i, j); - } - } -} - -/** - * \brief The tensor gpu evaluate api. - */ -template -inline void TensorGpuApply(LeftType& lhs, const RightType& rhs) { - TensorApply lhs_(lhs); - TensorApply rhs_(rhs); - CHECK_EQ(lhs_.getWidth(), rhs_.getWidth()); - CHECK_EQ(lhs_.getHeight(), rhs_.getHeight()); - CHECK_EQ(lhs_.useGpu(), rhs_.useGpu()); - - int dimM = lhs_.getHeight(); - int dimN = lhs_.getWidth(); - - if (lhs_.isContiguous() && rhs_.isContiguous()) { - int size = dimM * dimN; - int blockSize = size <= 1024 ? size : 1024; - int gridSize = (size + 1024 - 1) / 1024; - TensorElementWiseOp<<>>( - lhs_, rhs_, size); - } else { - int blockSizeY = std::min(32, dimM); - int blockSizeX = (32 / blockSizeY) * 32; - int gridSizeX = std::min(32, (dimN + blockSizeX - 1) / blockSizeX); - int gridSizeY = std::min(32, (dimM + blockSizeY - 1) / blockSizeY); - dim3 threads(blockSizeX, blockSizeY); - dim3 grid(gridSizeX, gridSizeY); - TensorElementWiseOp<<>>(lhs_, rhs_); - } - - CHECK_SYNC("TensorGpuApply failed"); -} -#else -template -inline void TensorGpuApply(LeftType& lhs, RightType& rhs) { - LOG(FATAL) << "Since it is gcc compiled, " - "this calculation does not support GPU implementation."; -} -#endif - -} // namespace paddle diff --git a/paddle/legacy/math/TensorExpression.h b/paddle/legacy/math/TensorExpression.h deleted file mode 100644 index 1c6cf07831487165445a3f59931c4ca9196375b9..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/TensorExpression.h +++ /dev/null @@ -1,446 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include "hl_tensor_ops.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -template -class TensorConstant; -template -class TensorUnaryOp; -template -class TensorBinaryOp; -template -class TensorTernaryOp; - -template -class TensorAssignOp; - -/** - * \brief Tensor base class. - * - * This is the base class of all Tensor and Expression class. - */ -template -class TensorExpression { - public: - /** - * Element wise unary expression. - */ - template - const TensorUnaryOp unaryExpression( - const UnaryOp& op) const { - return TensorUnaryOp(op, derived()); - } - - const TensorUnaryOp, const Derived, T> operator+( - T p) const { - return unaryExpression(hppl::unary::add_scale(p)); - } - - const TensorUnaryOp, const Derived, T> operator-( - T p) const { - return unaryExpression(hppl::unary::sub_scale(p)); - } - - const TensorUnaryOp, const Derived, T> operator*( - T p) const { - return unaryExpression(hppl::unary::mul_scale(p)); - } - - const TensorUnaryOp, const Derived, T> operator/( - T p) const { - return unaryExpression(hppl::unary::div_scale(p)); - } - - const TensorUnaryOp, const Derived, T> operator-() const { - return unaryExpression(hppl::unary::neg()); - } - - const TensorUnaryOp, const Derived, T> exp() const { - return unaryExpression(hppl::unary::exp_op()); - } - - const TensorUnaryOp, const Derived, T> log() const { - return unaryExpression(hppl::unary::log_op()); - } - - const TensorUnaryOp, const Derived, T> sqrt() const { - return unaryExpression(hppl::unary::sqrt_op()); - } - - const TensorUnaryOp, const Derived, T> square() const { - return unaryExpression(hppl::unary::square()); - } - - const TensorUnaryOp, const Derived, T> reciprocal() - const { - return unaryExpression(hppl::unary::reciprocal()); - } - - const TensorUnaryOp, const Derived, T> abs() const { - return unaryExpression(hppl::unary::abs()); - } - - const TensorUnaryOp, const Derived, T> sign() const { - return unaryExpression(hppl::unary::sign()); - } - - const TensorUnaryOp, const Derived, T> pow(T p) const { - return unaryExpression(hppl::unary::pow_op(p)); - } - - const TensorUnaryOp, const Derived, T> min(T p) const { - return unaryExpression(hppl::unary::min(p)); - } - - const TensorUnaryOp, const Derived, T> max(T p) const { - return unaryExpression(hppl::unary::max(p)); - } - - const TensorUnaryOp, const Derived, T> operator==( - T p) const { - return unaryExpression(hppl::unary::cmp_eq(p)); - } - - const TensorUnaryOp, const Derived, T> operator!=( - T p) const { - return unaryExpression(hppl::unary::cmp_ne(p)); - } - - const TensorUnaryOp, const Derived, T> operator<=( - T p) const { - return unaryExpression(hppl::unary::cmp_le(p)); - } - - const TensorUnaryOp, const Derived, T> operator<( - T p) const { - return unaryExpression(hppl::unary::cmp_lt(p)); - } - - const TensorUnaryOp, const Derived, T> operator>=( - T p) const { - return unaryExpression(hppl::unary::cmp_ge(p)); - } - - const TensorUnaryOp, const Derived, T> operator>( - T p) const { - return unaryExpression(hppl::unary::cmp_gt(p)); - } - - const TensorUnaryOp, const Derived, T> operator&&( - T p) const { - return unaryExpression(hppl::unary::and_op(p)); - } - - const TensorUnaryOp, const Derived, T> operator||( - T p) const { - return unaryExpression(hppl::unary::or_op(p)); - } - - /** - * Element wise binary expression. - */ - template - const TensorBinaryOp - binaryExpression(const BinaryOp& op, const ExpressionType& expr) const { - return TensorBinaryOp( - op, derived(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator==(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::cmp_eq(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator!=(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::cmp_ne(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator<=(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::cmp_le(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator<(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::cmp_lt(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator>=(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::cmp_ge(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator>(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::cmp_gt(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator&&(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::and_op(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator||(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::or_op(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator+(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::add(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator-(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::sub(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator*(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::mul(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - operator/(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::div(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - min(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::min(), expr); - } - - template - const TensorBinaryOp, - const Derived, - const ExpressionType, - T> - max(const ExpressionType& expr) const { - return binaryExpression(hppl::binary::max(), expr); - } - - /** - * Element wise ternary expression. - * - * ternary conditional operator(?: operator). - * The conditional expression returns one of two values depending on - * the result of derived expression. - * If derived expression evaluates to true, then expression1 is evaluated. - * If derived expression evaluates to false, then expression2 is evaluated. - */ - template - const TensorTernaryOp - condition(const ExprType1& expr1, const ExprType2& expr2) const { - return TensorTernaryOp( - derived(), expr1, expr2); - } - - template - const TensorTernaryOp< - const Derived, - const TensorConstant, const Derived, T>, - const ExprType, - T> - condition(T p, const ExprType& expr) const { - return condition(constant(p), expr); - } - - template - const TensorTernaryOp< - const Derived, - const ExprType, - const TensorConstant, const Derived, T>, - T> - condition(const ExprType& expr, T p) const { - return condition(expr, constant(p)); - } - - const TensorTernaryOp< - const Derived, - const TensorConstant, const Derived, T>, - const TensorConstant, const Derived, T>, - T> - condition(T p1, T p2) const { - return condition(constant(p1), constant(p2)); - } - - /** - * return a TensorConstant. A TensorConstant object hold a constant value. - */ - const TensorConstant, const Derived, T> constant( - T p) const { - return TensorConstant, const Derived, T>( - hppl::unary::constant(p), derived()); - } - - /** - * return a TensorAssignOp, and use AssignEvaluate to evaluate one or more - * TensorAssignOp objects. - */ - template - TensorAssignOp lazyAssign( - const ExpressionType& expr) const { - return TensorAssignOp(derived(), expr); - } - - protected: - const Derived& derived() const { return *static_cast(this); } -}; - -/** - * \brief Unary Operator Expression - */ -template -class TensorUnaryOp - : public TensorExpression, T> { - public: - explicit TensorUnaryOp(const OP op, const ExprType& expr) - : op_(op), expr_(expr) {} - - const OP op_; - const ExprType expr_; -}; - -/** - * \brief Binary Operator Expression - */ -template -class TensorBinaryOp - : public TensorExpression, T> { - public: - explicit TensorBinaryOp(const OP op, const LhsType& lhs, const RhsType& rhs) - : op_(op), lhs_(lhs), rhs_(rhs) {} - - const OP op_; - const LhsType lhs_; - const RhsType rhs_; -}; - -/** - * \brief Ternary Operator Expression - */ -template -class TensorTernaryOp : public TensorExpression< - TensorTernaryOp, - T> { - public: - explicit TensorTernaryOp(const ExprType1& expr1, - const ExprType2& expr2, - const ExprType3& expr3) - : expr1_(expr1), expr2_(expr2), expr3_(expr3) {} - - const ExprType1 expr1_; - const ExprType2 expr2_; - const ExprType3 expr3_; -}; - -/** - * \brief Constant Expression - */ -template -class TensorConstant - : public TensorExpression, T> { - public: - explicit TensorConstant(const OP op, const ExprType& expr) - : op_(op), expr_(expr) {} - - const OP op_; - const ExprType expr_; -}; - -/** - * \brief operator+ overload - * \return a unary operator expression - */ -template -const TensorUnaryOp, const Derived, T> operator+( - T p, const TensorExpression& expr) { - return expr + p; -} - -/** - * \brief operator* overload - * \return a unary operator expression - */ -template -const TensorUnaryOp, const Derived, T> operator*( - T p, const TensorExpression& expr) { - return expr * p; -} - -} // namespace paddle - -#include "TensorApply.h" -#include "TensorEvaluate.h" diff --git a/paddle/legacy/math/TrainingAlgorithmOp.cu b/paddle/legacy/math/TrainingAlgorithmOp.cu deleted file mode 100644 index 9e1eaa0f45ae94d12cf7763bbaff632fc473bcc8..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/TrainingAlgorithmOp.cu +++ /dev/null @@ -1,356 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "BaseMatrix.h" -#include "TrainingAlgorithmOp.h" -#include "paddle/legacy/utils/Logging.h" - -#if __cplusplus > 199711L - -#include "TensorAssign.h" - -namespace paddle { - -void sparseMomentumApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& momU, - BaseMatrix& momV, - real alpha, - real beta, - real gamma, - real tau, - real learningRate) { - auto expr1 = momU.lazyAssign(momU - (alpha * gamma * learningRate) * grad); - auto expr2 = - momV.lazyAssign(momV + (tau * alpha * gamma * learningRate) * grad); - auto expr3 = value.lazyAssign((tau / beta + (real)1 / alpha) * momU + - ((real)1 / beta) * momV); - - AssignEvaluate(expr1, expr2, expr3); -} - -void adadeltaApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& accum, - BaseMatrix& accum_update, - BaseMatrix& lr, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate) { - auto expr1 = accum.lazyAssign(rou * accum + ((real)1 - rou) * grad.square()); - auto expr2 = - lr.lazyAssign(((accum_update + epsilon) / (accum + epsilon)).sqrt()); - auto expr3 = accum_update.lazyAssign(rou * accum_update + - ((real)1 - rou) * (grad * lr).square()); - auto expr4 = mom.lazyAssign(mom * momentum - - learningRate * lr * (grad + value * decayRate)); - auto expr5 = value.lazyAssign(value + mom); - - AssignEvaluate(expr1, expr2, expr3, expr4, expr5); -} - -void adagradApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& accum_buffer, - BaseMatrix& accum, - BaseMatrix& lr, - real epsilon, - real learningRate, - real momentum, - real decayRate) { - auto expr1 = accum.lazyAssign(accum + grad.square()); - auto expr2 = - lr.lazyAssign((accum_buffer + accum + epsilon).sqrt().reciprocal()); - auto expr3 = mom.lazyAssign(mom * momentum - - learningRate * lr * (grad + value * decayRate)); - auto expr4 = value.lazyAssign(value + mom); - - AssignEvaluate(expr1, expr2, expr3, expr4); -} - -void rmspropApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& g, - BaseMatrix& f, - BaseMatrix& lr, - real accumulatedRou, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate, - bool firstTime) { - auto expr2 = f.lazyAssign(accumulatedRou * f + ((real)1 - rou) * grad); - auto expr3 = lr.lazyAssign((g - f.square() + epsilon).sqrt().reciprocal()); - auto expr4 = mom.lazyAssign(mom * momentum - - learningRate * lr * (grad + value * decayRate)); - auto expr5 = value.lazyAssign(value + mom); - - if (firstTime) { - auto expr1 = g.lazyAssign(accumulatedRou * g + grad.square()); - - AssignEvaluate(expr1, expr2, expr3, expr4, expr5); - } else { - auto expr1 = - g.lazyAssign(accumulatedRou * g + ((real)1 - rou) * grad.square()); - - AssignEvaluate(expr1, expr2, expr3, expr4, expr5); - } -} - -void decayedAdagradApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& accum, - BaseMatrix& lr, - real accumulatedRou, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate, - bool firstTime) { - auto expr2 = lr.lazyAssign((accum + epsilon).sqrt().reciprocal()); - auto expr3 = mom.lazyAssign(mom * momentum - - learningRate * lr * (grad + value * decayRate)); - auto expr4 = value.lazyAssign(value + mom); - - if (firstTime) { - auto expr1 = accum.lazyAssign(accumulatedRou * accum + grad.square()); - - AssignEvaluate(expr1, expr2, expr3, expr4); - } else { - auto expr1 = accum.lazyAssign(accumulatedRou * accum + - ((real)1 - rou) * grad.square()); - - AssignEvaluate(expr1, expr2, expr3, expr4); - } -} - -void adamApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, // firse moment - BaseMatrix& v, // second moment - real beta1, - real beta2, - real beta1_power, - real beta2_power, - real epsilon, - real learningRate) { - real alpha = - learningRate * std::sqrt((real)1 - beta2_power) / ((real)1 - beta1_power); - - auto expr1 = mom.lazyAssign(beta1 * mom + ((real)1 - beta1) * grad); - auto expr2 = v.lazyAssign(beta2 * v + ((real)1 - beta2) * grad.square()); - auto expr3 = value.lazyAssign(value - (mom * alpha) / (v.sqrt() + epsilon)); - - AssignEvaluate(expr1, expr2, expr3); -} - -void adamaxApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, // firse moment - BaseMatrix& u, // weighted infinity norm - real beta1, - real beta2, - int64_t step, - real alpha) { - auto expr1 = mom.lazyAssign(beta1 * mom + ((real)1 - beta1) * grad); - auto expr2 = - u.lazyAssign((beta2 * u > grad.abs()).condition(beta2 * u, grad.abs())); - auto expr3 = value.lazyAssign( - value - (alpha / ((real)1 - (real)std::pow(beta1, step))) * (mom / u)); - - AssignEvaluate(expr1, expr2, expr3); -} - -} // namespace paddle - -#else - -namespace paddle { - -void sparseMomentumApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& momU, - BaseMatrix& momV, - real alpha, - real beta, - real gamma, - real tau, - real learningRate) { - /** - * \alpha_t = \alpha_{t-1} / k - * \beta_t = \beta_{t-1} / (1 + \lambda\gamma_t) - * u_t = u_{t-1} - \alpha_t \gamma_t g_t - * v_t = v_{t-1} + \tau_{t-1} \alpha_t \gamma_t g_t - * \tau_t = \tau_{t-1} + \beta_t / \alpha_t - */ - momU -= (alpha * gamma * learningRate) * grad; - momV += (tau * alpha * gamma * learningRate) * grad; - value = (tau / beta + (real)1 / alpha) * momU + ((real)1 / beta) * momV; -} - -void adadeltaApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& accum, - BaseMatrix& accum_update, - BaseMatrix& lr, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate) { - // E(g_t^2) = \rou * E(g_{t-1}^2) + (1-\rou) * g^2 - accum = rou * accum + ((real)1 - rou) * grad.square(); - - // learn_rate: sqrt(( E(dx_{t-1}^2) + epsilon ) / ( E(g_t^2) + epsilon )) - lr = ((accum_update + epsilon) / (accum + epsilon)).sqrt(); - - // E(dx_t^2) = \rou * E(dx_{t-1}^2) + (1-\rou) * (-g*learn_rate)^2 - accum_update = rou * accum_update + ((real)1 - rou) * (grad * lr).square(); - - mom = mom * momentum - learningRate * lr * (grad + value * decayRate); - value += mom; -} - -void adagradApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& accum_buffer, - BaseMatrix& accum, - BaseMatrix& lr, - real epsilon, - real learningRate, - real momentum, - real decayRate) { - accum += grad.square(); - lr = (accum_buffer + accum + epsilon).sqrt().reciprocal(); - mom = mom * momentum - learningRate * lr * (grad + value * decayRate); - value += mom; -} - -void rmspropApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& g, - BaseMatrix& f, - BaseMatrix& lr, - real accumulatedRou, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate, - bool firstTime) { - // E(g_t^2) = \rou * E(g_{t-1}^2) + (1-\rou) * g^2 - // For the first time update, make the sum be the current square - // so that the initial estimation of E(g_t^2) will not be too small. - if (firstTime) { - g = accumulatedRou * g + grad.square(); - } else { - g = accumulatedRou * g + ((real)1 - rou) * grad.square(); - } - - // E(f_t) = \rou * E(f_{t-1}) + (1-\rou) * g - f = accumulatedRou * f + ((real)1 - rou) * grad; - - // learn_rate = 1/sqrt( ( E(g_t^2) - (E(f_t))^2 + epsilon ) - // Basiclly if the sign of the gradient changes more often, - // the learning rate will be decreased. - lr = (g - f.square() + epsilon).sqrt().reciprocal(); - - mom = mom * momentum - learningRate * lr * (grad + value * decayRate); - value += mom; -} - -void decayedAdagradApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& accum, - BaseMatrix& lr, - real accumulatedRou, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate, - bool firstTime) { - // E(g_t^2) = \rou * E(g_{t-1}^2) + (1-\rou) * g^2 - // For the first time update, make the sum be the current square - // so that the initial estimation of E(g_t^2) will not be too small. - if (firstTime) { - accum = accumulatedRou * accum + grad.square(); - } else { - accum = accumulatedRou * accum + ((real)1 - rou) * grad.square(); - } - - // learn_rate = 1/sqrt( ( E(g_t^2) + epsilon ) - // Basiclly if the bigger the magnitude gradient is, - // the smaller the learning rate will be. - lr = (accum + epsilon).sqrt().reciprocal(); - - mom = mom * momentum - learningRate * lr * (grad + value * decayRate); - value += mom; -} - -void adamApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, // firse moment - BaseMatrix& v, // second moment - real beta1, - real beta2, - real beta1_power, - real beta2_power, - real epsilon, - real learningRate) { - real alpha = - learningRate * std::sqrt((real)1 - beta2_power) / ((real)1 - beta1_power); - - // m_t = \beta_1 * m_{t-1} + (1-\beta_1)* g_t; - mom = beta1 * mom + ((real)1 - beta1) * grad; - - // v_t = \beta_2 * v_{t-1} + (1-\beta_2)* g_{t-1}^2 - v = beta2 * v + ((real)1 - beta2) * grad.square(); - - value -= (mom * alpha) / (v.sqrt() + epsilon); -} - -void adamaxApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, // firse moment - BaseMatrix& u, // weighted infinity norm - real beta1, - real beta2, - int64_t step, - real alpha) { - // m_t = \beta_1 * m_{t-1} + (1-\beta_1)* g_t; - mom = beta1 * mom + ((real)1 - beta1) * grad; - - // u_t = max(\beta_2*u_{t-1}, abs(g_t)) - u = (beta2 * u > grad.abs()).condition(beta2 * u, grad.abs()); - - // \theta_t = \theta_{t-1} - (\alpha/(1-\beta_1^t))*m_t/u_t - value -= (alpha / ((real)1 - (real)std::pow(beta1, step))) * (mom / u); -} - -} // namespace paddle - -#endif diff --git a/paddle/legacy/math/TrainingAlgorithmOp.h b/paddle/legacy/math/TrainingAlgorithmOp.h deleted file mode 100644 index 921c2742cfe2576785768da40ab11c94234be966..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/TrainingAlgorithmOp.h +++ /dev/null @@ -1,122 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "BaseMatrix.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -/** - * \brief Sparse Momentum optimizer. - */ -extern void sparseMomentumApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& momU, - BaseMatrix& momV, - real alpha, - real beta, - real gamma, - real tau, - real learningRate); - -/** - * \brief AdaDelta optimizer. - */ -extern void adadeltaApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& sum, - BaseMatrix& sum1, - BaseMatrix& mom, - BaseMatrix& lr, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate); - -/** - * \brief AdaGrad optimizer. - */ -extern void adagradApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& sum, - BaseMatrix& sum1, - BaseMatrix& mom, - BaseMatrix& lr, - real epsilon, - real learningRate, - real momentum, - real decayRate); - -/** - * \brief RMSProp optimizer. - */ -extern void rmspropApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& g, - BaseMatrix& f, - BaseMatrix& mom, - BaseMatrix& lr, - real accumulatedRou, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate, - bool firstTime); - -/** - * \brief Decayed AdaGrad optimizer. - */ -extern void decayedAdagradApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& accum, - BaseMatrix& lr, - real accumulatedRou, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate, - bool firstTime); - -/** - * \brief Adam optimizer. - */ -extern void adamApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, - BaseMatrix& v, - real beta1, - real beta2, - real beta1_power, - real beta2_power, - real epsilon, - real learningRate); - -/** - * \brief AdaMax optimizer. - */ -extern void adamaxApply(BaseMatrix& value, - BaseMatrix& grad, - BaseMatrix& mom, // firse moment - BaseMatrix& u, // weighted infinity norm - real beta1, - real beta2, - int64_t step, - real alpha); -} // namespace paddle diff --git a/paddle/legacy/math/Vector.cpp b/paddle/legacy/math/Vector.cpp deleted file mode 100644 index 87f48bb1622f28f8cb53e5afc924f5cadb14c528..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/Vector.cpp +++ /dev/null @@ -1,1091 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Vector.h" -#include "paddle/legacy/utils/Util.h" - -#include -#include "Matrix.h" -#include "hl_gpu.h" -#include "hl_matrix.h" -#include "hl_table_apply.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Thread.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -namespace paddle { - -template -std::shared_ptr> VectorT::create(size_t size, bool useGpu) { - if (useGpu) { - return std::make_shared>(size); - } else { - return std::make_shared>(size); - } -} - -template -std::shared_ptr> VectorT::createParallelVector( - size_t size, bool useGpu, SyncThreadPool* pool) { - if (!useGpu && FLAGS_trainer_count > 1 && FLAGS_enable_parallel_vector && - size >= (size_t)FLAGS_enable_parallel_vector) { - return std::make_shared>( - size, pool ? pool : getGlobalSyncThreadPool()); - } else { - return create(size, useGpu); - } -} - -template -std::shared_ptr> VectorT::create(T* data, - size_t size, - bool useGpu) { - if (useGpu) { - return std::make_shared>(size, data); - } else { - return std::make_shared>(size, data); - } -} - -template -std::shared_ptr> VectorT::create(size_t size, - MemoryHandlePtr memoryHandle, - size_t offset) { - if (auto cpuMemHandle = - std::dynamic_pointer_cast(memoryHandle)) { - return std::make_shared>(size, cpuMemHandle, offset); - } else if (auto gpuMemHandle = - std::dynamic_pointer_cast(memoryHandle)) { - return std::make_shared>(size, gpuMemHandle, offset); - } else { - LOG(FATAL) << "Wrong"; - return NULL; - } -} - -template <> -MatrixPtr VectorT::toOneHotSparseMatrix(size_t idRange, bool useGpu) { - LOG(FATAL) << "Wrong for real vector"; - return nullptr; -} - -template <> -MatrixPtr VectorT::toOneHotSparseMatrix(size_t idRange, bool useGpu) { - size_t height = getSize(); - size_t width = idRange; - MatrixPtr mat = Matrix::createSparseMatrix( - height, idRange, height, NO_VALUE, SPARSE_CSR, false, useGpu); - - CpuIVector cpuIds(height); - cpuIds.copyFrom(*this); - int* idData = cpuIds.getData(); - - for (decltype(height) i = 0; i < height; i++) { - const unsigned int id = idData[i]; - CHECK_LT(id, width); - mat->setRow(i, 1, &id, nullptr); - } - return mat; -} - -template <> -std::shared_ptr> VectorT::castToInt() { - std::shared_ptr> ret = IVector::create(this->getSize(), useGpu_); - if (useGpu_) { - hl_vector_cast2int(ret->getData(), this->getData(), this->getSize()); - } else { - for (size_t i = 0; i < getSize(); ++i) { - ret->getData()[i] = int(this->getData()[i]); - } - } - return ret; -} - -template -GpuVectorT::GpuVectorT(size_t size) - : VectorT(size, - std::make_shared(sizeof(T) * size), - 0, /* offset = 0 */ - true /* useGpu = true */) {} - -template -T GpuVectorT::getElement(size_t i) const { - T elem = 0; - hl_memcpy_device2host(&elem, const_cast(&this->getData()[i]), sizeof(T)); - return elem; -} -template -void GpuVectorT::setElement(size_t i, const T& value) { - hl_memcpy_host2device(&this->getData()[i], const_cast(&value), sizeof(T)); -} - -template -T* GpuVectorT::getPoint(const uint64_t beginPos) { - LOG(FATAL) << "Not implemented" << beginPos; - return NULL; -} - -template <> -int GpuVectorT::getAbsSum() { - LOG(FATAL) << "Not implemented"; - return 0; -} - -template <> -int GpuVectorT::getSum() { - LOG(FATAL) << "Not implemented"; - return 0; -} - -template <> -real GpuVectorT::getAbsSum() { - real* A = this->getData(); - real sum = 0; - hl_vector_abs_sum(A, &sum, this->getSize()); - return sum; -} - -template <> -real GpuVectorT::getSum() { - real* A = this->getData(); - real sum = 0; - hl_vector_sum(A, &sum, this->getSize()); - return sum; -} - -template <> -int GpuVectorT::getMax() { - CpuIVector cpuIVec = CpuIVector(this->getSize()); - copyTo(&cpuIVec); - return cpuIVec.getMax(); -} - -template <> -int GpuVectorT::getAbsMax() { - CpuIVector cpuIVec = CpuIVector(this->getSize()); - copyTo(&cpuIVec); - return cpuIVec.getAbsMax(); -} - -template -void GpuVectorT::isEqualTo(const VectorT& b, const T& value) { - BaseMatrixT::isEqualTo((BaseMatrixT&)b, value); -} - -template -void GpuVectorT::selectFrom(const VectorT& src, const VectorT& ids) { -#ifdef PADDLE_WITH_CUDA - hl_vector_select_from(this->getData(), - this->getSize(), - src.getData(), - src.getSize(), - ids.getData(), - ids.getSize()); -#endif -} - -template -real gpuRowFunc(Func f, GpuVector& v) { - static ThreadLocal>> local; - if (!*local) { - (*local).reset(new CpuVector(1)); - } - real* A = v.getData(); - f(A, (*local)->getData(), 1, v.getSize()); - return (*local)->getData()[0]; -} - -template <> -real GpuVectorT::getMax() { - return gpuRowFunc(hl_matrix_row_max, *this); -} - -template <> -real GpuVectorT::getAbsMax() { - return std::max(gpuRowFunc(hl_matrix_row_max, *this), - -gpuRowFunc(hl_matrix_row_min, *this)); -} - -template <> -int GpuVectorT::getMin() { - LOG(FATAL) << "Not implemented"; - return 0; -} - -template <> -real GpuVectorT::getMin() { - return gpuRowFunc(hl_matrix_row_min, *this); -} - -template -T GpuVectorT::get(size_t pos) { - T val = (T)0; - hl_memcpy_device2host((void*)&val, (void*)(this->getData() + pos), sizeof(T)); - return val; -} - -template -void GpuVectorT::histogram(std::ostream& os, int type) { - LOG(FATAL) << "Not implemented"; -} - -template -void GpuVectorT::zeroMem() { - BaseMatrixT::zero(); -} - -template -void GpuVectorT::reset(const T& value) { - BaseMatrixT::assign(value); -} - -template -void GpuVectorT::fillSequence() { - LOG(FATAL) << "not implemented"; -} - -template -void GpuVectorT::copyFrom(const VectorT& src) { - src.copyTo(this); -} - -template -void GpuVectorT::copyFrom(const VectorT& src, hl_stream_t stream) { - CHECK_EQ(src.getSize(), this->getSize()); - hl_memcpy_async((void*)this->getData(), - (void*)src.getData(), - sizeof(T) * this->getSize(), - stream); -} - -template -void GpuVectorT::copyFrom(const T* gpuSrc, size_t size) { - CHECK(gpuSrc != NULL); - CHECK_LE(size, this->size_); - - hl_memcpy((void*)this->getData(), (void*)gpuSrc, sizeof(T) * size); -} - -template -void GpuVectorT::copyFrom(const T* gpuSrc, size_t size, hl_stream_t stream) { - CHECK(gpuSrc != NULL); - CHECK_LE(size, this->size_); - - hl_memcpy_async( - (void*)this->getData(), (void*)gpuSrc, sizeof(T) * size, stream); -} - -template -void GpuVectorT::copyTo(CpuVectorT* dest) const { - CHECK_EQ(this->getSize(), dest->getSize()); - - hl_memcpy_device2host((void*)dest->getData(), - (void*)this->getData(), - sizeof(T) * this->getSize()); -} - -template -void GpuVectorT::copyTo(GpuVectorT* dest) const { - CHECK_EQ(this->getSize(), dest->getSize()); - - hl_memcpy_device2device((void*)dest->getData(), - (void*)this->getData(), - sizeof(T) * this->getSize()); -} - -template <> -void GpuVectorT::rand() { - LOG(FATAL) << "Not implemented"; -} - -template <> -void GpuVectorT::print(std::ostream& os, size_t num) const { - IVectorPtr dest = IVector::create(this->size_, false); - hl_memcpy_device2host((void*)dest->getData(), - (void*)this->getData(), - sizeof(int) * this->getSize()); - dest->print(os, num); -} - -template <> -void GpuVectorT::print(std::ostream& os, size_t num) const { - VectorPtr dest = Vector::create(this->size_, false); - hl_memcpy_device2host((void*)dest->getData(), - (void*)this->getData(), - sizeof(int) * this->getSize()); - dest->print(os, num); -} - -template <> -void GpuVectorT::printOneElement(std::ostream& os, size_t idx) const { - LOG(FATAL) << "Not implemented"; -} - -template <> -void GpuVectorT::printOneElement(std::ostream& os, size_t idx) const { - LOG(FATAL) << "Not implemented"; -} - -template <> -void CpuVectorT::rand() { - LOG(FATAL) << "Not implemented"; -} -template <> -void GpuVectorT::rand(size_t classNum) { - LOG(FATAL) << "Not implemented"; -} - -template <> -void CpuVectorT::rand(size_t classNum) { - LOG(FATAL) << "Not implemented"; -} - -template <> -void GpuVectorT::rand() { - VectorPtr cPtr = Vector::create(this->size_, false); - cPtr->rand(); - - hl_memcpy_host2device(data_, cPtr->getData(), this->size_ * sizeof(real)); -} - -template <> -void GpuVectorT::rand(size_t classNum) { - IVectorPtr cPtr = IVector::create(this->size_, false); - cPtr->rand(classNum); - - hl_memcpy_host2device(data_, cPtr->getData(), this->size_ * sizeof(int)); -} - -template <> -void CpuVectorT::rand(size_t classNum) { - size_t size = this->getSize(); - int* data = this->getData(); - for (size_t i = 0; i < size; i++) { - data[i] = - std::min(classNum - 1, - size_t(::rand() * (1. / ((double)RAND_MAX + 1)) * classNum)); - } -} - -template <> -void CpuVectorT::rand() { - size_t size = this->getSize(); - real* data = this->getData(); - for (size_t i = 0; i < size; i++) { - data[i] = ::rand() * (1. / (double)RAND_MAX); - // data[ii] = ((temp > RAND_MAX/2)? 1 : -1) * - // sqrt( abs((temp-RAND_MAX/2))/(double(RAND_MAX))/2048 ); - } -} - -template -void CpuVectorT::randnorm(real, real) { - LOG(FATAL) << "Not implemented"; -} - -template -void CpuVectorT::uniform(real, real) { - LOG(FATAL) << "Not implemented"; -} - -template -void GpuVectorT::randnorm(real, real) { - LOG(FATAL) << "Not implemented"; -} - -template -void GpuVectorT::uniform(real, real) { - LOG(FATAL) << "Not implemented"; -} - -template <> -void CpuVectorT::randnorm(real mean, real std) { - size_t size = this->getSize(); - real* data = this->getData(); - unsigned int* seed = ThreadLocalRand::getSeed(); - auto rand1 = [&]() { return (1. + ::rand_r(seed)) * (1. / (1. + RAND_MAX)); }; - for (size_t i = 0; i < size - 1; i += 2) { - real r1 = rand1(); - r1 = std::sqrt(-2 * std::log(r1)); - real r2 = rand1(); - data[i] = mean + std * r1 * cos(2 * M_PI * r2); - data[i + 1] = mean + std * r1 * sin(2 * M_PI * r2); - } - real r1 = rand1(); - r1 = std::sqrt(-2 * std::log(r1)); - real r2 = rand1(); - data[size - 1] = mean + std * r1 * cos(2 * M_PI * r2); -} - -template <> -void CpuVectorT::uniform(real left, real right) { - size_t size = this->getSize(); - real* data = this->getData(); - real range = right - left; - unsigned int* seed = ThreadLocalRand::getSeed(); - auto rand1 = [&]() { return ::rand_r(seed) * (1. / (1. + RAND_MAX)); }; - for (size_t i = 0; i < size; ++i) { - data[i] = rand1() * range + left; - } -} - -template <> -void GpuVectorT::randnorm(real mean, real std) { - CpuVector cpuVec = CpuVector(this->getSize()); - cpuVec.randnorm(mean, std); - - hl_memcpy_host2device( - data_, cpuVec.getData(), this->getSize() * sizeof(real)); -} - -template <> -void GpuVectorT::uniform(real left, real right) { - CpuVector cpuVec = CpuVector(this->getSize()); - cpuVec.uniform(left, right); - - hl_memcpy_host2device( - data_, cpuVec.getData(), this->getSize() * sizeof(real)); -} - -template -CpuVectorT::CpuVectorT(size_t size) - : VectorT(size, - std::make_shared(sizeof(T) * size), - 0, /* offset = 0 */ - false /* useGpu = false */) {} - -template -CpuVectorT::CpuVectorT(const VectorT& src) - : VectorT(src.getSize(), - src.getMemoryHandle(), - 0, /* offset = 0 */ - false /* useGpu = false */) { - if (typeid(*this->memoryHandle_.get()) != typeid(CpuMemoryHandle)) { - this->memoryHandle_ = - std::make_shared(sizeof(T) * this->getSize()); - this->data_ = reinterpret_cast(this->memoryHandle_->getBuf()); - } - src.copyTo(this); -} - -template -T CpuVectorT::getAbsSum() { - const T* A = this->getData(); - size_t size = this->getSize(); - T sum = 0; - for (size_t i = 0; i < size; i++) { - sum += (A[i] > 0) ? A[i] : -A[i]; - } - return sum; -} - -// cannot use above version, due to precision issue of float -template <> -real CpuVectorT::getAbsSum() { - const real* A = this->getData(); - size_t size = this->getSize(); - double sum = 0; - for (size_t i = 0; i < size; i++) { - sum += (A[i] > 0) ? A[i] : -A[i]; - } - return sum; -} - -template -T CpuVectorT::getSum() { - const T* A = this->getData(); - size_t size = this->getSize(); - T sum = 0; - for (size_t i = 0; i < size; i++) { - sum += A[i]; - } - return sum; -} - -template <> -real CpuVectorT::getSum() { - const real* A = this->getData(); - size_t size = this->getSize(); - double sum = 0; - for (size_t i = 0; i < size; i++) { - sum += A[i]; - } - return sum; -} - -template -T CpuVectorT::get(size_t pos) { - return this->getData()[pos]; -} - -template -T CpuVectorT::getMax() { - const T* A = this->getData(); - size_t size = this->getSize(); - T res = A[0]; - for (size_t i = 1; i < size; i++) { - if (res < A[i]) res = A[i]; - } - return res; -} - -template -T CpuVectorT::getAbsMax() { - const T* A = this->getData(); - size_t size = this->getSize(); - T res = std::abs(A[0]); - for (size_t i = 1; i < size; i++) { - if (res < std::abs(A[i])) res = std::abs(A[i]); - } - return res; -} - -template -T CpuVectorT::getMin() { - const T* A = this->getData(); - size_t size = this->getSize(); - T res = A[0]; - for (size_t i = 1; i < size; i++) { - if (res > A[i]) res = A[i]; - } - return res; -} - -template -void CpuVectorT::isEqualTo(const VectorT& b, const T& value) { - size_t size = this->getSize(); - CHECK_EQ(b.getSize(), size); - - const T* B = b.getData(); - T* A = this->getData(); - for (size_t i = 0; i < size; i++) { - A[i] = (B[i] == value); - } -} - -template -void CpuVectorT::selectFrom(const VectorT& src, const VectorT& ids) { - size_t size = this->getSize(); - CHECK_EQ(ids.getSize(), size); - - const int* indices = ids.getData(); - const T* B = src.getData(); - T* A = this->getData(); - for (size_t i = 0; i < size; i++) { - int index = indices[i]; - CHECK_LT(index, (int)src.getSize()); - A[i] = B[index]; - } -} - -static int getSignAndExponentOfFloat(float a) { - uint32_t* pa = reinterpret_cast(&a); - return *pa >> 23; -} - -template -void CpuVectorT::histogram(std::ostream& os, int type) { - LOG(FATAL) << "Not implemented"; -} - -template <> -void CpuVectorT::histogram(std::ostream& os, int type) { - int counters[512]; - memset(counters, 0, sizeof(counters)); - int counterZero = 0; - - const real* A = this->getData(); - size_t size = this->getSize(); - for (size_t i = 0; i < size; i++) { - if (A[i] == 0.0f) { - ++counterZero; - } else { - ++counters[getSignAndExponentOfFloat(A[i])]; - } - } - - int64_t sum = 0; - float sizeNonZero = size - counterZero; - os << "zero:" << counterZero; - for (int i = 0; i < 256; i++) { - int counter = counters[i]; - if (counter) { - os << " 2^" << i - 127 << ":" << counter / sizeNonZero * 100 << "%"; - sum += counter * (i - 127); - } - } - for (int i = 0; i < 256; i++) { - int counter = counters[i + 256]; - if (counter) { - os << " -2^" << i - 127 << ":" << counter / sizeNonZero * 100 << "%"; - sum += counter * (i - 127); - } - } - os << ", nonzero_exponent_avg=" << sum / sizeNonZero; -} - -template -void CpuVectorT::zeroMem() { - memset(this->getData(), 0, sizeof(T) * this->getSize()); -} - -template -void CpuVectorT::reset(const T& value) { - T* A = this->getData(); - size_t size = this->getSize(); - for (size_t i = 0; i < size; i++) { - A[i] = value; - } -} - -template -void CpuVectorT::fillSequence() { - T* A = this->getData(); - size_t size = this->getSize(); - for (size_t i = 0; i < size; i++) { - A[i] = i; - } -} - -template -void CpuVectorT::copyFrom(const VectorT& src) { - src.copyTo(this); -} - -template -void CpuVectorT::copyFrom(const VectorT& src, hl_stream_t stream) { - if (typeid(src) == typeid(GpuVectorT)) { - hl_memcpy_async((void*)this->getData(), - (void*)src.getData(), - sizeof(T) * this->getSize(), - stream); - // There is a need to add synchronization to ensure that the data is copied. - hl_stream_synchronize(stream); - } else { - src.copyTo(this); - } -} - -template -void CpuVectorT::copyFrom(const T* hostSrc, size_t size) { - CHECK(hostSrc != NULL); - CHECK_LE(size, this->size_); - memcpy(this->data_, hostSrc, sizeof(T) * size); -} - -template -void CpuVectorT::copyFrom(const T* hostSrc, - size_t size, - hl_stream_t stream) { - (void)stream; - - CHECK(hostSrc != NULL); - CHECK_LE(size, this->size_); - memcpy(this->data_, hostSrc, sizeof(T) * size); -} - -template -void CpuVectorT::copyTo(CpuVectorT* dest) const { - CHECK_EQ(this->getSize(), dest->getSize()); - memcpy(dest->getData(), this->getData(), sizeof(T) * this->getSize()); -} - -template -void CpuVectorT::copyTo(GpuVectorT* dest) const { - CHECK_EQ(this->getSize(), dest->getSize()); - hl_memcpy_host2device((void*)dest->getData(), - (void*)this->getData(), - sizeof(T) * this->getSize()); -} - -template <> -void CpuVectorT::print(std::ostream& os, size_t num) const { - size_t w = size_ < num ? size_ : num; - os << "["; - for (size_t i = 0; i < w; ++i) { - os << data_[i] << " "; - } - os << "]" << std::endl; -} - -template <> -void CpuVectorT::print(std::ostream& os, size_t num) const { - size_t w = size_ < num ? size_ : num; - os << "["; - for (size_t i = 0; i < w; ++i) { - os << (int)data_[i] << " "; - } - os << "]" << std::endl; -} - -template <> -void CpuVectorT::printOneElement(std::ostream& os, size_t idx) const { - CHECK_LT(idx, size_); - os << data_[idx] << ";"; -} - -template <> -void CpuVectorT::printOneElement(std::ostream& os, size_t idx) const { - CHECK_LT(idx, size_); - os << (int)data_[idx] << ";"; -} - -template -void ParallelCpuVectorT::parallelExec(ExecFunc func) { - LOG(FATAL) << "Not implemented"; -} - -template <> -void ParallelCpuVectorT::parallelExec(ExecFunc func) { - pool_->exec([this, func](int tid, size_t numThreads) { - auto interval = calcSplitArrayInterval( - this->getSize(), (size_t)tid, numThreads, 8LU /*for avx*/); - // setup sub bufs - CpuVector subVec(0, nullptr); - subVec.subVecFrom(*this, interval); - func(subVec); - }); -} - -template -void ParallelCpuVectorT::exec(SyncThreadPool::JobFunc func) { - LOG(FATAL) << "Not implemented"; -} - -template <> -void ParallelCpuVectorT::exec(SyncThreadPool::JobFunc func) { - pool_->exec(func); -} - -template -CpuGpuVectorT::CpuGpuVectorT(size_t size, bool useGpu) : sync_(nullptr) { - if (!useGpu) { - cpuVectorT_ = std::make_shared>(size); - } else { - gpuVectorT_ = std::make_shared>(size); - } - setSync(useGpu); -} - -template -CpuGpuVectorT::CpuGpuVectorT(const std::shared_ptr>& src) - : sync_(nullptr) { - bool useGpu = src->useGpu(); - if (useGpu) { - gpuVectorT_ = src; - } else { - cpuVectorT_ = src; - } - setSync(useGpu); -} - -template -CpuGpuVectorT::CpuGpuVectorT(size_t size, T* data, bool useGpu) - : sync_(nullptr) { - if (!useGpu) { - cpuVectorT_ = std::make_shared>(size, data); - setSync(DATA_AT_CPU); - } else { - gpuVectorT_ = std::make_shared>(size, data); - setSync(DATA_AT_GPU); - } -} - -template -std::shared_ptr> CpuGpuVectorT::create(size_t size, - bool useGpu) { - return std::make_shared>(size, useGpu); -} - -template -void CpuGpuVectorT::resize(size_t size, bool useGpu) { - if (useGpu) { - CHECK(gpuVectorT_) << "gpuVectorT_ is null"; - // If memoryHandle_ is nullptr, - // the data may be owned by the caller when it was constructed. - // It should not resize for this case. - if (gpuVectorT_->getMemoryHandle()) { - gpuVectorT_->resize(size); - } else { - CHECK_EQ(gpuVectorT_->getSize(), size); - } - } else { - CHECK(cpuVectorT_) << "cpuVectorT_ is null"; - // If memoryHandle_ is nullptr, - // the data may be owned by the caller when it was constructed. - // It should not resize for this case. - if (cpuVectorT_->getMemoryHandle()) { - cpuVectorT_->resize(size); - } else { - CHECK_EQ(cpuVectorT_->getSize(), size); - } - } - setSync(useGpu); -} - -template -void CpuGpuVectorT::resizeOrCreate(std::shared_ptr>& vec, - size_t size, - bool useGpu) { - if (vec) { - vec->resize(size, useGpu); - } else { - vec = create(size, useGpu); - } -} - -template -void CpuGpuVectorT::resizeOrCreate(size_t size, bool useGpu) { - if (useGpu && (!gpuVectorT_)) { - gpuVectorT_ = VectorT::create(size, true); - } else if ((!useGpu) && (!cpuVectorT_)) { - cpuVectorT_ = VectorT::create(size, false); - } else { - CHECK((useGpu && gpuVectorT_) || (!useGpu && cpuVectorT_)); - this->resize(size, useGpu); - } -} - -template -CpuGpuVectorT::CpuGpuVectorT(CpuGpuVectorT& src, - size_t offset, - size_t size) - : sync_(nullptr) { - CHECK_LE(offset + size, static_cast(src.getSize())); -#ifdef PADDLE_WITH_CUDA - SyncedFlag* flag = src.getSync(); - if (*flag == DATA_AT_CPU) { - src.copyToGpu(); // will set synchronous data between CPU and GPU - } else if (*flag == DATA_AT_GPU) { - src.copyToCpu(); // will set synchronous data between CPU and GPU - } -#endif - auto cMemHandle = (src.getVector(false))->getMemoryHandle(); - cpuVectorT_ = std::make_shared>( - size, std::dynamic_pointer_cast(cMemHandle), offset); -#ifdef PADDLE_WITH_CUDA - auto gMemHandle = (src.getVector(true))->getMemoryHandle(); - gpuVectorT_ = std::make_shared>( - size, std::dynamic_pointer_cast(gMemHandle), offset); - src.setSync(SYNCED); -#endif - setSync(src.getSync()); -} - -template -std::shared_ptr> CpuGpuVectorT::getVector( - bool useGpu) const { - auto* self = const_cast*>(this); - if (useGpu) { - self->copyToGpu(); - return std::const_pointer_cast>(gpuVectorT_); - } else { - self->copyToCpu(); - return std::const_pointer_cast>(cpuVectorT_); - } -} - -template -std::shared_ptr>& CpuGpuVectorT::getMutableVector(bool useGpu) { - setSync(useGpu); - if (useGpu) { - copyToGpu(); - return gpuVectorT_; - } else { - copyToCpu(); - return cpuVectorT_; - } -} - -template -const T* CpuGpuVectorT::getData(bool useGpu) const { - auto self = const_cast*>(this); - if (useGpu) { - self->copyToGpu(); - return gpuVectorT_->getData(); - } else { - self->copyToCpu(); - return cpuVectorT_->getData(); - } -} - -// Operation will change data and need to reset sync_ & syncFlag_. -#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \ - do { \ - if (useGpu) { \ - copyToGpu(); \ - setSync(useGpu); \ - return gpuVectorT_->OP(args); \ - } else { \ - copyToCpu(); \ - setSync(useGpu); \ - return cpuVectorT_->OP(args); \ - } \ - } while (0) - -template -T* CpuGpuVectorT::getMutableData(bool useGpu) { - MUTABLE_VECTOR_OP(getData, useGpu); -} - -template -void CpuGpuVectorT::zeroMem(bool useGpu) { - MUTABLE_VECTOR_OP(zeroMem, useGpu); -} - -template -void CpuGpuVectorT::fillSequence(bool useGpu) { - MUTABLE_VECTOR_OP(fillSequence, useGpu); -} - -template -void CpuGpuVectorT::setElement(size_t i, const T& value, bool useGpu) { - MUTABLE_VECTOR_OP(setElement, useGpu, i, value); -} - -template -T CpuGpuVectorT::getElement(size_t i) const { - switch (*this->getSync()) { - case SYNCED: - case DATA_AT_CPU: - return cpuVectorT_->getElement(i); - break; - case DATA_AT_GPU: - return gpuVectorT_->getElement(i); - break; - default: - LOG(FATAL) << "Not support"; - break; - } -} - -template -void CpuGpuVectorT::copyFrom(const VectorT& src, hl_stream_t stream) { - auto cVec = dynamic_cast*>(&src); - auto gVec = dynamic_cast*>(&src); - if (cVec) { - copyToCpu(cVec->getData(), cVec->getSize(), stream); - } else if (gVec) { - copyToGpu(gVec->getData(), gVec->getSize(), stream); - } else { - LOG(FATAL) << "Invalid type of src"; - } -} - -template -void CpuGpuVectorT::copyFrom(const T* data, size_t size, bool useGpu) { - if (useGpu) { - copyToGpu(data, size); - } else { - copyToCpu(data, size); - } -} - -template -void CpuGpuVectorT::copyFrom(const T* data, - size_t size, - hl_stream_t stream, - bool useGpu) { - if (useGpu) { - copyToGpu(data, size, stream); - } else { - copyToCpu(data, size, stream); - } -} - -template -void CpuGpuVectorT::copyFrom(CpuGpuVectorT& src, - size_t offset, - size_t size, - bool useGpu, - hl_stream_t stream) { - if (useGpu) { - VectorT::resizeOrCreate(gpuVectorT_, size, true); - gpuVectorT_->copyFrom(src.getData(true) + offset, size, stream); - } else { - VectorT::resizeOrCreate(cpuVectorT_, size, false); - cpuVectorT_->copyFrom(src.getData(false) + offset, size, stream); - } - setSync(useGpu); -} - -template -void CpuGpuVectorT::copyFrom(CpuGpuVectorT& src, hl_stream_t stream) { - switch (*src.getSync()) { - case DATA_AT_CPU: - copyFrom(*(src.getVector(false)), stream); - break; - case DATA_AT_GPU: - copyFrom(*(src.getVector(true)), stream); - break; - case SYNCED: - copyFrom(*(src.getVector(false)), stream); - copyFrom(*(src.getVector(true)), stream); - setSync(SYNCED); - break; - default: - LOG(FATAL) << "Not support"; - break; - } -} - -template -void CpuGpuVectorT::copyToCpu() { - switch (*this->getSync()) { - case DATA_AT_GPU: - CHECK(gpuVectorT_); - this->resizeOrCreate(gpuVectorT_->getSize(), false); - cpuVectorT_->copyFrom(*gpuVectorT_); - setSync(SYNCED); - break; - case DATA_AT_CPU: - case SYNCED: - CHECK(cpuVectorT_); - break; - default: - LOG(FATAL) << "Not support"; - break; - } -} - -template -void CpuGpuVectorT::copyToGpu() { - switch (*this->getSync()) { - case DATA_AT_CPU: - CHECK(cpuVectorT_); - this->resizeOrCreate(cpuVectorT_->getSize(), true); - gpuVectorT_->copyFrom(*cpuVectorT_); - setSync(SYNCED); - break; - case DATA_AT_GPU: - case SYNCED: - CHECK(gpuVectorT_); - break; - default: - LOG(FATAL) << "Not support"; - break; - } -} - -template class VectorT; -template class VectorT; -template class CpuVectorT; -template class CpuVectorT; -template class GpuVectorT; -template class GpuVectorT; -template class CpuGpuVectorT; -template class CpuGpuVectorT; - -} // namespace paddle diff --git a/paddle/legacy/math/Vector.h b/paddle/legacy/math/Vector.h deleted file mode 100644 index 63cb4651c52219807e11e778db9c42667759a055..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/Vector.h +++ /dev/null @@ -1,726 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include - -#include - -#include "BaseMatrix.h" -#include "MemoryHandle.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/Thread.h" - -namespace paddle { - -template -class GpuVectorT; -template -class CpuVectorT; - -template -class BaseVector; - -class SyncThreadPool; - -class Matrix; - -template -class BaseVector : public BaseMatrixT { - public: - BaseVector(size_t size, T* data, bool useGpu) - : BaseMatrixT(1, size, data, false, useGpu), size_(this->width_) {} - - ~BaseVector() {} - - protected: - size_t& size_; -}; - -/** - * Copy or assignemnt constructor will share the data as opposed to making a - * copy of the original data. To make a copy of the orinal data, use copyFrom() - * instead. - */ -template -class VectorT : public BaseVector { - protected: - VectorT(size_t size, MemoryHandlePtr memoryHandle, size_t offset, bool useGpu) - : BaseVector(size, - reinterpret_cast(memoryHandle->getBuf()) + offset, - useGpu) { - memoryHandle_ = memoryHandle; - } - - // data is still owned by the caller. - // data should be valid during the life of this vector. - // Caller is responsible for release the memory. - VectorT(size_t size, T* data, bool useGpu) - : BaseVector(size, data, useGpu) {} - - public: - virtual ~VectorT() {} - - static std::shared_ptr> create(size_t size, bool useGpu); - - static std::shared_ptr> create(T* data, size_t size, bool useGpu); - - static std::shared_ptr> create(size_t size, - MemoryHandlePtr memoryHandle, - size_t offset = 0); - - // owner can set SyncThreadPool, - // if not set, will use globalSyncThreadPool, - // which can be used in main thread only. - static std::shared_ptr> createParallelVector( - size_t size, bool useGpu, SyncThreadPool* pool = nullptr); - - size_t getSize() const { return this->size_; } - const T* getData() const { return this->data_; } - T* getData() { return this->data_; } - - virtual void zeroMem() = 0; - // set all elements to value - virtual void reset(const T& value) = 0; - // fill data by 0, 1, 2, ... - virtual void fillSequence() = 0; - - MemoryHandlePtr getMemoryHandle() const { return memoryHandle_; } - - /** - * resizing to a big vector will not preserve old values. - */ - void resize(size_t newSize) { - if (!memoryHandle_ || newSize * sizeof(T) > memoryHandle_->getAllocSize()) { - memoryHandle_ = newMemory(newSize * sizeof(T)); - this->data_ = reinterpret_cast(memoryHandle_->getBuf()); - } - this->size_ = newSize; - } - - static void resizeOrCreate(std::shared_ptr>& vec, - size_t size, - bool useGpu) { - if (vec) { - vec->resize(size); - } else { - vec = create(size, useGpu); - } - } - - virtual MemoryHandlePtr newMemory(size_t size) = 0; - - /** - * form sub vector from *src*, shallow copy - */ - void subVecFrom(const VectorT& src, size_t start, size_t size) { - CHECK_EQ(BaseVector::useGpu_, src.useGpu_); - CHECK_LT(start, src.size_); - CHECK_LE(start + size, src.size_); - - BaseVector::size_ = size; - BaseVector::data_ = const_cast(src.data_) + start; - } - - std::shared_ptr> subVec(size_t start, size_t size) { - CHECK_LE(start + size, static_cast(getSize())); - return VectorT::create(getData() + start, size, BaseVector::useGpu_); - } - - /** - * form sub vector from *src*, shallow copy - */ - void subVecFrom(const T* src, size_t start, size_t size) { - BaseVector::size_ = size; - BaseVector::data_ = const_cast(src) + start; - } - - /** - * form sub vector from *src*, shallow copy - * in *interval* [interval.first, interval.second) - */ - void subVecFrom(const VectorT& src, std::pair interval) { - subVecFrom(src, interval.first, interval.second - interval.first); - } - - /** - * convert the vector to a sparse one_hot matrix of width idRange - * only applies to IVector - */ - std::shared_ptr toOneHotSparseMatrix(size_t idRange, bool useGpu); - - /** - * @brief cast vector of "real" elements to "int" elements. - * - * @note: float -> int must be casted, or you'll get wrong data. - */ - std::shared_ptr> castToInt(); - - /** - * This function will crash if the size of src and dest is different. - */ - virtual void copyFrom(const VectorT& src) = 0; - - /** - * If GpuVector, this function is an asynchronous interface, - * will push the copy-task to the specifed-stream and return immediately. - * - * If CpuVector, this function is an synchronous interface, - * same as the copyFrom(const VectorT& src). - */ - virtual void copyFrom(const VectorT& src, hl_stream_t stream) = 0; - - /** - * copy size elements from src - * - * If this is GpuVector, src can be cpu or gpu memory - * - * If this is CpuVector, src is assumed to be cpu memory - */ - virtual void copyFrom(const T* src, size_t size) = 0; - - /** - * copy size elements from src - * - * If this is GpuVector, src can be cpu or gpu memory - * - * If this is CpuVector, src is assumed to be cpu memory, - */ - virtual void copyFrom(const T* src, size_t size, hl_stream_t stream) = 0; - - /** - * exec a func in single/multi thread - */ - virtual void exec(SyncThreadPool::JobFunc func) { func(0, 1); } - - /// Get the buffer point with beginPos - virtual T* getPoint(const uint64_t beginPos) = 0; - - /// Get the value for the i'th element - virtual T getElement(size_t i) const = 0; - virtual void setElement(size_t i, const T& value) = 0; - - //---------- math operations ---------------- - - // sum of the absolute value of each elements - virtual T getAbsSum() = 0; - - virtual T getSum() = 0; - virtual T getMax() = 0; - virtual T getAbsMax() = 0; - virtual T getMin() = 0; - - /// element-wise calc: this = (b == value) - virtual void isEqualTo(const VectorT& b, const T& value) = 0; - - /// select elements indexed by *ids* from vector *src* - virtual void selectFrom(const VectorT& src, const VectorT& ids) = 0; - - enum HistogramType { - HISTOGRAM_EXPONENT = 0, - }; - - /** - * @brief print histogram of vector values - * - * @note only exponent histogram supported currently - */ - virtual void histogram(std::ostream& os, int type = HISTOGRAM_EXPONENT) = 0; - - /// generate uniform random value for each element - virtual void rand() = 0; - /** - * generate uniform random value for each element, - * data range is from 0 to (classes - 1). - */ - virtual void rand(size_t classes) = 0; - - /** - * Debug use only. Very inefficient for GPU vector. - * get the value at pos. - */ - virtual T get(size_t pos) = 0; - - /** - * generate univariate Gaussian distributed random numbers - * with given mean and standardDeviation. - */ - virtual void randnorm(real mean, real standardDeviation) = 0; - - /** - * generate uniform distributed random numbers - * with given range. - */ - virtual void uniform(real left, real right) = 0; - - /// print the first "num" elements of the Vector - virtual void print(std::ostream& os, size_t num) const = 0; - - /// print the "idx" element of the Vector - virtual void printOneElement(std::ostream& os, size_t idx) const = 0; - - template - void operator=(const ExpressionType& expr) { - if (BaseVector::useGpu_) { - TensorGpuApply(*this, expr); - } else { - TensorCpuApply(*this, expr); - } - } - - protected: - friend class GpuVectorT; - friend class CpuVectorT; - virtual void copyTo(CpuVectorT* dest) const = 0; - virtual void copyTo(GpuVectorT* dest) const = 0; - MemoryHandlePtr memoryHandle_; -}; - -template -std::ostream& operator<<(std::ostream& os, const VectorT& vec) { - vec.print(os, vec.getSize()); - return os; -} - -template -class GpuVectorT : public VectorT { - public: - explicit GpuVectorT(size_t size); - GpuVectorT(size_t size, GpuMemHandlePtr memHandle, size_t offset) - : VectorT(size, memHandle, offset, true) {} - - // data is still owned by the caller. - // data should be valid during the life of this vector. - // Caller is responsible for release the memory. - GpuVectorT(size_t size, T* data) : VectorT(size, data, true) {} - - virtual MemoryHandlePtr newMemory(size_t size) { - return std::make_shared(size); - } - virtual void zeroMem(); - virtual void reset(const T& value); - virtual void fillSequence(); - - virtual void copyFrom(const T* src, size_t size); - virtual void copyFrom(const T* src, size_t size, hl_stream_t stream); - virtual void copyFrom(const VectorT& src); - virtual void copyFrom(const VectorT& src, hl_stream_t stream); - virtual T getElement(size_t i) const; - virtual void setElement(size_t i, const T& value); - virtual T* getPoint(const uint64_t beginPos); - - virtual T getAbsSum(); - virtual T getSum(); - virtual T getMax(); - virtual T getAbsMax(); - virtual T getMin(); - virtual void isEqualTo(const VectorT& b, const T& value); - virtual void selectFrom(const VectorT& src, const VectorT& ids); - virtual void histogram(std::ostream& os, int type); - virtual void rand(); - virtual void rand(size_t classes); - virtual void randnorm(real mean, real standardDeviation); - virtual void uniform(real left, real right); - virtual T get(size_t pos); - virtual void print(std::ostream& os, size_t num) const; - virtual void printOneElement(std::ostream& os, size_t idx) const; - - template - void operator=(const ExpressionType& expr) { - TensorGpuApply(*this, expr); - } - - protected: - virtual void copyTo(CpuVectorT* dest) const; - virtual void copyTo(GpuVectorT* dest) const; -}; - -template -class CpuVectorT : public VectorT { - public: - explicit CpuVectorT(size_t size); - CpuVectorT(size_t size, MemoryHandlePtr memoryHandle, size_t offset) - : VectorT(size, memoryHandle, offset, false) {} - - // data is still owned by the caller. - // data should be valid during the life of this vector. - // Caller is responsible for release the memory. - CpuVectorT(size_t size, T* data) : VectorT(size, data, false) {} - - /** - * If src is a CpuVector, the new CpuVector will share the data with src - * - * If src is a GpuVector, the new CpuVector will copy data from src - */ - explicit CpuVectorT(const VectorT& src); - - virtual MemoryHandlePtr newMemory(size_t size) { - return std::make_shared(size); - } - - virtual void zeroMem(); - virtual void reset(const T& value); - virtual void fillSequence(); - virtual void copyFrom(const T* src, size_t size); - virtual void copyFrom(const T* src, size_t size, hl_stream_t stream); - virtual void copyFrom(const VectorT& src); - virtual void copyFrom(const VectorT& src, hl_stream_t stream); - virtual void copyTo(CpuVectorT* dest) const; - virtual void copyTo(GpuVectorT* dest) const; - - /// Get the buffer point with beginPos - virtual T* getPoint(const uint64_t beginPos) { - return this->getData() + beginPos; - } - - virtual T getElement(size_t i) const { return this->getData()[i]; } - virtual void setElement(size_t i, const T& value) { - this->getData()[i] = value; - } - - virtual T getAbsSum(); - virtual T getSum(); - virtual T getMax(); - virtual T getAbsMax(); - virtual T getMin(); - virtual void isEqualTo(const VectorT& b, const T& value); - virtual void selectFrom(const VectorT& src, const VectorT& ids); - virtual void histogram(std::ostream& os, int type); - virtual void rand(); - virtual void rand(size_t classes); - virtual void randnorm(real mean, real standardDeviation); - virtual void uniform(real left, real right); - virtual T get(size_t pos); - virtual void print(std::ostream& os, size_t num) const; - virtual void printOneElement(std::ostream& os, size_t idx) const; - - template - void operator=(const ExpressionType& expr) { - TensorCpuApply(*this, expr); - } -}; - -template -class ParallelCpuVectorT : public CpuVectorT { - public: - ParallelCpuVectorT(size_t size, SyncThreadPool* pool) - : CpuVectorT(size), pool_(pool) {} - - virtual void zeroMem() { - parallelExec([](CpuVectorT& vec) { vec.CpuVectorT::zeroMem(); }); - } - virtual void randnorm(real mean, real standardDeviation) { - parallelExec([=](CpuVectorT& vec) { - vec.CpuVectorT::randnorm(mean, standardDeviation); - }); - } - virtual void uniform(real left, real right) { - parallelExec( - [=](CpuVectorT& vec) { vec.CpuVectorT::uniform(left, right); }); - } - - virtual void exec(SyncThreadPool::JobFunc jobFunc); - - private: - typedef std::function& vec)> ExecFunc; - void parallelExec(ExecFunc func); - SyncThreadPool* pool_; -}; - -/** - * A class to do conversion between CpuVector and GpuVector automatically. - */ -template -class CpuGpuVectorT { - public: - /** - * @brief An enum type of SyncedFlag using to - * mark data memory is in CPU or GPU. - * - * DATA_AT_CPU: data is located in CPU. - * - * DATA_AT_GPU: data is located in GPU. - * - * SYNCED: data is located in CPU and GPU simultaneously. - */ - enum SyncedFlag { DATA_AT_CPU = 0, DATA_AT_GPU = 1, SYNCED = 2 }; - - /** - * @brief A constructor, create cpuVectorT_ or gpuVectorT_. - * - * @param[in] size data size. - * @param[in] useGpu use gpu or not. - */ - explicit CpuGpuVectorT(size_t size, bool useGpu); - - /** - * @brief A constructor, create CpuGpuVectorT by VectorT. - * - * If src is CpuVector, cpuVectorT_ is shared data with src. - * - * If src is GpuVector, gpuVectorT_ is shared data with src. - */ - explicit CpuGpuVectorT(const std::shared_ptr>& src); - - /** - * @brief A constructor. - * - * If useGpu is true, data should be located in device and - * create gpuVectorT_ with data. - * - * If useGpu is false, data should be located in host and - * create cpuVectorT_ with data. - * - * @note Data is owned by the caller and should be valid during - * the life of this vector. - * Caller is responsible for release the memory. - */ - CpuGpuVectorT(size_t size, T* data, bool useGpu); - - CpuGpuVectorT(CpuGpuVectorT& src, size_t offset, size_t size); - - virtual ~CpuGpuVectorT() {} - - static std::shared_ptr> create(size_t size, bool useGpu); - - /** - * @brief resize vector. - * - * If useGpu is true, resize gpuVectorT_ and set syncFlag_ to DATA_AT_GPU, - * - * otherwise resize cpuVectorT_ and set syncFlag_ to DATA_AT_CPU. - */ - void resize(size_t size, bool useGpu); - - /** - * @brief resize or create CpuGpuVectorT. - */ - static void resizeOrCreate(std::shared_ptr>& vec, - size_t size, - bool useGpu); - - /** - * @brief return a const cpuVectorT_ or gpuVectorT_. - * - * If useGpu is true, return gpuVectorT_. - * - * If useGpu is false, return cpuVectorT_. - * - * @note Caller should not change the data. - * If caller changes const attribute, - * should set syncFlag_. - */ - std::shared_ptr> getVector(bool useGpu) const; - - /** - * @brief return a const cpuVectorT_ or gpuVectorT_. - * - * @note: This interface will change syncFlag_, so if you will - * not change the data, you should call getVector. - */ - std::shared_ptr>& getMutableVector(bool useGpu); - - /** - * @brief return const T* data. - * - * If useGpu is true, return device data. - * - * If useGpu is false, return host data. - */ - const T* getData(bool useGpu) const; - - // TODO(yuyang18): Make getData more c++ style. - // inline T* getData(bool useGpu) { - // return getMutableData(useGpu); - // } - - T* getMutableData(bool useGpu); - - /** - * If useGpu is true, gpuVectorT_->Op(). - * - * If useGpu is false, cpuVectorT_->Op(). - * - * Op is zeroMem, fillSequence, ... - */ - void zeroMem(bool useGpu); - void fillSequence(bool useGpu); - void setElement(size_t i, const T& value, bool useGpu); - - /** - * @brief return i-th element. - */ - T getElement(size_t i) const; - - /** - * @brief return vector size. - */ - size_t getSize() const { - size_t size = 0; - switch (*sync_) { - case SYNCED: - case DATA_AT_CPU: - size = cpuVectorT_->getSize(); - break; - case DATA_AT_GPU: - size = gpuVectorT_->getSize(); - break; - default: - LOG(FATAL) << "Not support"; - break; - } - return size; - } - - /// copy data to cpuVectorT_. - inline void copyToCpu(const T* data, size_t size) { - this->resizeOrCreate(size, false); - cpuVectorT_->copyFrom(data, size); - setSync(DATA_AT_CPU); - } - /// copy data to cpuVectorT_ using specifed-stream. - inline void copyToCpu(const T* data, size_t size, hl_stream_t stream) { - this->resizeOrCreate(size, false); - cpuVectorT_->copyFrom(data, size, stream); - setSync(DATA_AT_CPU); - } - - /// copy data to gpuVectorT_. - inline void copyToGpu(const T* data, size_t size) { - this->resizeOrCreate(size, true); - gpuVectorT_->copyFrom(data, size); - setSync(DATA_AT_GPU); - } - /// copy data to gpuVectorT_ using specifed-stream. - inline void copyToGpu(const T* data, size_t size, hl_stream_t stream) { - this->resizeOrCreate(size, true); - gpuVectorT_->copyFrom(data, size, stream); - setSync(DATA_AT_GPU); - } - - /** - * @brief copy from src using specifed-stream. - * - * If src is CpuVectorT, copy to cpuVectorT_. - * - * If src is GpuVectorT, copy to gpuVectorT_. - */ - void copyFrom(const VectorT& src, hl_stream_t stream); - - /** - * @brief copy data. - * - * If useGpu is false, copy host data to cpuVectorT_. - * - * If useGpu is true, copy device data to gpuVectorT_. - * - * @note data address should consistent with useGpu. - */ - void copyFrom(const T* data, size_t size, bool useGpu); - void copyFrom(const T* data, size_t size, hl_stream_t stream, bool useGpu); - - /** - * @brief copy from (src + offset) using specifed-stream. - */ - void copyFrom(CpuGpuVectorT& src, - size_t offset, - size_t size, - bool useGpu, - hl_stream_t stream); - - /** - * @brief copy from src using specifed-stream. - */ - void copyFrom(CpuGpuVectorT& src, hl_stream_t stream); - - /** - * @brief return sync_. - */ - inline SyncedFlag* getSync() const { return sync_; } - - /** - * @brief set sync_. - */ - inline void setSync(SyncedFlag* sync) { sync_ = sync; } - - inline void setSync(SyncedFlag syncFlag) { - if (sync_) { - *sync_ = syncFlag; - } else { - syncFlag_ = syncFlag; - sync_ = &syncFlag_; - } - } - - inline void setSync(bool useGpu) { - SyncedFlag flag = useGpu ? DATA_AT_GPU : DATA_AT_CPU; - setSync(flag); - } - - protected: - void resizeOrCreate(size_t size, bool useGpu); - - /** - * @brief copy between cpuVectorT_ and gpuVectorT_. - * - * If syncFlag_ is DATA_AT_CPU and SYNCED, do nothing. - * - * If syncFlag_ is DATA_AT_GPU, copy gpuVectorT_ to cpuVectorT_ - * and set syncFlag_ to SYNCED. - */ - void copyToCpu(); - - /** - * @brief copy between cpuVectorT_ and gpuVectorT_. - * - * If syncFlag_ is DATA_AT_GPU and SYNCED, do nothing. - * - * If syncFlag_ is DATA_AT_CPU, copy cpuVectorT_ to gpuVectorT_ - * and set syncFlag_ to SYNCED. - */ - void copyToGpu(); - - /// host pointer. - std::shared_ptr> cpuVectorT_; - /// device pointer. - std::shared_ptr> gpuVectorT_; - /// specify current data address. - SyncedFlag syncFlag_; - SyncedFlag* sync_; -}; - -typedef VectorT Vector; -typedef CpuVectorT CpuVector; -typedef GpuVectorT GpuVector; - -typedef VectorT IVector; -typedef CpuVectorT CpuIVector; -typedef GpuVectorT GpuIVector; - -typedef std::shared_ptr VectorPtr; -typedef std::shared_ptr CpuVectorPtr; -typedef std::shared_ptr GpuVectorPtr; - -typedef std::shared_ptr IVectorPtr; -typedef std::shared_ptr CpuIVectorPtr; -typedef std::shared_ptr GpuIVectorPtr; - -typedef CpuGpuVectorT CpuGpuVector; -typedef CpuGpuVectorT ICpuGpuVector; -typedef std::shared_ptr CpuGpuVectorPtr; -typedef std::shared_ptr ICpuGpuVectorPtr; - -} // namespace paddle diff --git a/paddle/legacy/math/tests/CMakeLists.txt b/paddle/legacy/math/tests/CMakeLists.txt deleted file mode 100644 index d8b7f9e3fc74040189ade83049e4a1c3348e08de..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -# unittest for common package - -add_simple_unittest(test_ExecViaCpu) -add_simple_unittest(test_SIMDFunctions) -add_simple_unittest(test_TrainingAlgorithm) -add_simple_unittest(test_RowBuffer) -if(NOT MOBILE_INFERENCE) - add_simple_unittest(test_SparseMatrix) -endif() - -# TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference. -add_unittest(test_matrixCompare - test_matrixCompare.cpp) - -add_simple_unittest(test_sparseMatrixCompare) -add_simple_unittest(test_perturbation) -add_simple_unittest(test_CpuGpuVector) -add_simple_unittest(test_Allocator) - -if(WITH_GPU) - CUDA_ADD_EXECUTABLE(test_Tensor test_Tensor.cu) - link_paddle_test(test_Tensor) - CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu) - link_paddle_test(test_lazyAssign) -else() - compile_cu_as_cpp(test_Tensor.cu) - add_unittest(test_Tensor test_Tensor.cu) - compile_cu_as_cpp(test_lazyAssign.cu) - add_unittest(test_lazyAssign test_lazyAssign.cu) -endif(WITH_GPU) - -add_simple_unittest(test_FPException) -add_simple_unittest(test_GpuProfiler) -add_simple_unittest(test_BaseMatrix) -add_simple_unittest(test_Matrix) diff --git a/paddle/legacy/math/tests/OriginalOptimizerApi.h b/paddle/legacy/math/tests/OriginalOptimizerApi.h deleted file mode 100644 index f386e19958a21214151776e6d0ae7bb2a4530b6c..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/OriginalOptimizerApi.h +++ /dev/null @@ -1,201 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/utils/GlobalConstants.h" - -using namespace paddle; // NOLINT - -void SparseMomentumParameterOptimizer(const VectorPtr vecs[], - real alpha, - real beta, - real gamma, - real tau, - real learningRate) { - vecs[PARAMETER_MOMENTUM_UT]->add(*vecs[PARAMETER_GRADIENT], - -alpha * gamma * learningRate); - vecs[PARAMETER_MOMENTUM_VT]->add(*vecs[PARAMETER_GRADIENT], - tau * alpha * gamma * learningRate); - vecs[PARAMETER_VALUE]->add(*vecs[PARAMETER_MOMENTUM_UT], - tau / beta + 1.0 / alpha, - *vecs[PARAMETER_MOMENTUM_VT], - 1.0 / beta); -} - -void AdagradParameterOptimizer(const VectorPtr vecs[], - real epsilon, - real learningRate, - real momentum, - real decayRate) { - vecs[PARAMETER_GRADIENT_SQURESUM1]->addSquare(*vecs[PARAMETER_GRADIENT], - 1.0f); - vecs[PARAMETER_LEARNING_RATE]->add(*vecs[PARAMETER_GRADIENT_SQURESUM], - *vecs[PARAMETER_GRADIENT_SQURESUM1]); - vecs[PARAMETER_LEARNING_RATE]->add(epsilon); - vecs[PARAMETER_LEARNING_RATE]->invSqrt(*vecs[PARAMETER_LEARNING_RATE]); - - vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT], - *vecs[PARAMETER_MOMENTUM], - *vecs[PARAMETER_LEARNING_RATE], - learningRate, - momentum, - decayRate); -} - -void AdaDeltaParameterOptimizer(const VectorPtr vecs[], - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate) { - // E(g_t^2) = \rou * E(g_{t-1}^2) + (1-\rou) * g^2 - vecs[PARAMETER_GRADIENT_SQURESUM]->decayAddSquare( - *vecs[PARAMETER_GRADIENT], rou, 1.0f - rou); - - // learn_rate = sqrt( ( E(dx_{t-1}^2) + epsilon ) / ( E(g_t^2) + epsilon ) ) - vecs[PARAMETER_LEARNING_RATE]->dotDiv(*vecs[PARAMETER_GRADIENT_SQURESUM1], - *vecs[PARAMETER_GRADIENT_SQURESUM], - epsilon, - epsilon); - vecs[PARAMETER_LEARNING_RATE]->sqrt2(); - - // E(dx_t^2) = \rou * E(dx_{t-1}^2) + (1-\rou) * (-g*learn_rate)^2 - vecs[PARAMETER_GRADIENT_SQURESUM1]->decayAddSquareMul( - *vecs[PARAMETER_GRADIENT], - *vecs[PARAMETER_LEARNING_RATE], - rou, - 1.0f - rou); - - vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT], - *vecs[PARAMETER_MOMENTUM], - *vecs[PARAMETER_LEARNING_RATE], - learningRate, - momentum, - decayRate); -} - -void RMSPropParameterOptimizer(const VectorPtr vecs[], - real accumulatedRou, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate, - bool firstTime) { - // E(g_t^2) = \rou * E(g_{t-1}^2) + (1-\rou) * g^2 - // For the first time update, make the sum be the current square - // so that the initial estimation of E(g_t^2) will not be too small. - vecs[PARAMETER_GRADIENT_SQURESUM]->decayAddSquare( - *vecs[PARAMETER_GRADIENT], accumulatedRou, firstTime ? 1.0f : 1.0f - rou); - - // E(g_t) = \rou * E(g_{t-1}) + (1-\rou) * g - vecs[PARAMETER_GRADIENT_SQURESUM1]->add( - *vecs[PARAMETER_GRADIENT], accumulatedRou, 1.0f - rou); - - // learn_rate = 1/sqrt( ( E(g_t^2) - (E(g_t))^2 + epsilon ) - // Basiclly if the sign of the gradient changes more often, - // the learning rate will be decreased. - vecs[PARAMETER_LEARNING_RATE]->assign(*vecs[PARAMETER_GRADIENT_SQURESUM]); - vecs[PARAMETER_LEARNING_RATE]->addSquare(*vecs[PARAMETER_GRADIENT_SQURESUM1], - -1.0f); - vecs[PARAMETER_LEARNING_RATE]->add(epsilon); - vecs[PARAMETER_LEARNING_RATE]->invSqrt(*vecs[PARAMETER_LEARNING_RATE]); - - vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT], - *vecs[PARAMETER_MOMENTUM], - *vecs[PARAMETER_LEARNING_RATE], - learningRate, - momentum, - decayRate); -} - -void DecayedAdagradParameterOptimizer(const VectorPtr vecs[], - real accumulatedRou, - real rou, - real epsilon, - real learningRate, - real momentum, - real decayRate, - bool firstTime) { - // E(g_t^2) = \rou * E(g_{t-1}^2) + (1-\rou) * g^2 - // For the first time update, make the sum be the current square - // so that the initial estimation of E(g_t^2) will not be too small. - vecs[PARAMETER_GRADIENT_SQURESUM]->decayAddSquare( - *vecs[PARAMETER_GRADIENT], accumulatedRou, firstTime ? 1.0f : 1.0f - rou); - - // learn_rate = 1/sqrt( ( E(g_t^2) + epsilon ) - // Basiclly if the bigger the magnitude gradient is, - // the smaller the learning rate will be. - vecs[PARAMETER_LEARNING_RATE]->assign(epsilon); - vecs[PARAMETER_LEARNING_RATE]->add(*vecs[PARAMETER_GRADIENT_SQURESUM]); - vecs[PARAMETER_LEARNING_RATE]->invSqrt(*vecs[PARAMETER_LEARNING_RATE]); - - vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT], - *vecs[PARAMETER_MOMENTUM], - *vecs[PARAMETER_LEARNING_RATE], - learningRate, - momentum, - decayRate); -} - -void AdamParameterOptimizer(const VectorPtr vecs[], - real beta1, - real beta2, - real beta1_power, - real beta2_power, - real epsilon, - real learningRate) { - Vector* m = vecs[PARAMETER_MOMENTUM].get(); - Vector* g = vecs[PARAMETER_GRADIENT].get(); - Vector* v = vecs[PARAMETER_SECOND_MOMENTUM].get(); - Vector* theta = vecs[PARAMETER_VALUE].get(); - - // m_t = \beta_1 * m_{t-1} + (1-\beta_1)* g_t; - m->add(*g, beta1, 1 - beta1); - - // v_t = \beta_2 * v_{t-1} + (1-\beta_2)* g_{t-1}^2 - g->square2(); - v->add(*g, beta2, 1 - beta2); - - // tmp = m_t / ( \sqrt{v_t} + \epsilon ) - // \theta_t = \theta_{t-1} - \alpha * \sqrt(1-\beta_2^t) / (1-\beta_1^t) * tmp - g->sqrt2(*v); - g->dotDiv(*m, *g, 0., epsilon); - real alpha = - learningRate * std::sqrt((real)1 - beta2_power) / ((real)1 - beta1_power); - theta->add(*theta, 1.0, *g, -alpha); -} - -void AdamaxParameterOptimizer( - const VectorPtr vecs[], real beta1, real beta2, int64_t step, real alpha) { - Vector* m = vecs[PARAMETER_MOMENTUM].get(); - Vector* g = vecs[PARAMETER_GRADIENT].get(); - Vector* u = vecs[PARAMETER_WEIGHTED_INFINITY_NORM].get(); - Vector* theta = vecs[PARAMETER_VALUE].get(); - - // m_t = \beta_1 * m_{t-1} + (1-\beta_1)* g_t; - m->add(*g, beta1, 1 - beta1); - - // u_t = max(\beta_2*u_{t-1}, abs(g_t)) - u->mulScalar(beta2); - g->abs2(); - u->max2(*u, *g); - - // \theta_t = \theta_{t-1} - (\alpha/(1-\beta_1^t))*m_t/u_t - g->dotDiv(*m, *u); - real learningRate = alpha / (1 - std::pow(beta1, step)); - theta->add(*theta, 1.0, *g, -learningRate); -} diff --git a/paddle/legacy/math/tests/PerfUtils.h b/paddle/legacy/math/tests/PerfUtils.h deleted file mode 100644 index eaf4869e4c994e5ec739fe650d0228687d24853f..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/PerfUtils.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -// Performance Check -#ifdef PADDLE_DISABLE_TIMER - -#define EXPRESSION_PERFORMANCE(expression) expression; - -#else - -#include "paddle/legacy/utils/Stat.h" -using namespace paddle; // NOLINT - -#define EXPRESSION_PERFORMANCE(expression) \ - do { \ - char expr[30]; \ - strncpy(expr, #expression, 30); \ - if (expr[29] != '\0') { \ - expr[27] = '.'; \ - expr[28] = '.'; \ - expr[29] = '\0'; \ - } \ - expression; \ - for (int i = 0; i < 20; i++) { \ - REGISTER_TIMER(expr); \ - expression; \ - } \ - LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ') \ - << *globalStat.getStat(expr); \ - globalStat.reset(); \ - } while (0) - -#endif diff --git a/paddle/legacy/math/tests/TensorCheck.h b/paddle/legacy/math/tests/TensorCheck.h deleted file mode 100644 index 41c8ece282e05f55d063e6ad0d8805629c847d34..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/TensorCheck.h +++ /dev/null @@ -1,216 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -/** - * This file provides a TensorCheck template function, which can be used to - * compare CpuMatrix and GpuMatrix, CpuVector and GpuVector, and so on. - */ - -#include -#include "paddle/legacy/math/Matrix.h" - -namespace autotest { - -using paddle::Matrix; -using paddle::CpuMatrix; -using paddle::GpuMatrix; -using paddle::VectorT; -using paddle::CpuVectorT; -using paddle::GpuVectorT; - -class AssertEqual { - public: - AssertEqual(real err = 0) : err_(err) {} - - inline bool operator()(real a, real b) { - if (err_ == 0) { - if (a != b) { - return false; - } - } else { - if (std::fabs(a - b) > err_) { - if ((std::fabs(a - b) / std::fabs(a)) > (err_ / 10.0f)) { - return false; - } - } - } - - return true; - } - - private: - real err_; -}; - -template -class CopyToCpu; - -template <> -class CopyToCpu { - public: - explicit CopyToCpu(const CpuMatrix& arg) : arg_(arg) {} - const CpuMatrix& copiedArg() const { return arg_; } - - private: - const CpuMatrix& arg_; -}; - -template <> -class CopyToCpu { - public: - explicit CopyToCpu(const GpuMatrix& arg) - : arg_(arg.getHeight(), arg.getWidth()) { - arg_.copyFrom(arg); - } - CpuMatrix& copiedArg() { return arg_; } - - private: - CpuMatrix arg_; -}; - -template <> -class CopyToCpu { - public: - explicit CopyToCpu(const Matrix& arg) - : arg_(arg.getHeight(), arg.getWidth()) { - arg_.copyFrom(arg); - } - CpuMatrix& copiedArg() { return arg_; } - - private: - CpuMatrix arg_; -}; - -template -class CopyToCpu> { - public: - explicit CopyToCpu(const CpuVectorT& arg) : arg_(arg) {} - const CpuVectorT& copiedArg() const { return arg_; } - - private: - const CpuVectorT& arg_; -}; - -template -class CopyToCpu> { - public: - explicit CopyToCpu(const GpuVectorT& arg) : arg_(arg.getSize()) { - arg_.copyFrom(arg); - } - CpuVectorT& copiedArg() { return arg_; } - - private: - CpuVectorT arg_; -}; - -template -class CopyToCpu> { - public: - explicit CopyToCpu(const VectorT& arg) : arg_(arg.getSize()) { - arg_.copyFrom(arg); - } - CpuVectorT& copiedArg() { return arg_; } - - private: - CpuVectorT arg_; -}; - -template -void TensorCheck(AssertEq compare, - const CpuMatrix& matrix1, - const CpuMatrix& matrix2) { - CHECK(matrix1.getHeight() == matrix2.getHeight()); - CHECK(matrix1.getWidth() == matrix2.getWidth()); - - int height = matrix1.getHeight(); - int width = matrix1.getWidth(); - const real* data1 = matrix1.getData(); - const real* data2 = matrix2.getData(); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - real a = data1[i * width + j]; - real b = data2[i * width + j]; - if (!compare(a, b)) { - count++; - } - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -template -void TensorCheck(AssertEq compare, - const CpuVectorT& vector1, - const CpuVectorT& vector2) { - CHECK(vector1.getSize() == vector2.getSize()); - - const T* data1 = vector1.getData(); - const T* data2 = vector2.getData(); - size_t size = vector1.getSize(); - int count = 0; - for (size_t i = 0; i < size; i++) { - real a = data1[i]; - real b = data2[i]; - if (!compare(a, b)) { - count++; - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different elements."; -} - -template -void TensorCheck(AssertEq compare, - const Tensor1& tensor1, - const Tensor2& tensor2) { - TensorCheck(compare, - CopyToCpu(tensor1).copiedArg(), - CopyToCpu(tensor2).copiedArg()); -} - -template -void TensorCheck(AssertEq compare, real args1, real args2) { - EXPECT_EQ(compare(args1, args2), true) << "[Test error] args1 = " << args1 - << ", args2 = " << args2; -} - -template -void TensorCheck(AssertEq compare, size_t args1, size_t args2) { - EXPECT_EQ(args1, args2) << "[Test error] args1 = " << args1 - << ", args2 = " << args2; -} - -template -void TensorCheckEqual(const Tensor1& tensor1, const Tensor2& tensor2) { - AssertEqual compare(0); - TensorCheck(compare, - CopyToCpu(tensor1).copiedArg(), - CopyToCpu(tensor2).copiedArg()); -} - -template -void TensorCheckErr(const Tensor1& tensor1, const Tensor2& tensor2) { -#ifndef PADDLE_TYPE_DOUBLE - AssertEqual compare(1e-3); -#else - AssertEqual compare(1e-10); -#endif - TensorCheck(compare, - CopyToCpu(tensor1).copiedArg(), - CopyToCpu(tensor2).copiedArg()); -} - -} // namespace autotest diff --git a/paddle/legacy/math/tests/TestUtils.h b/paddle/legacy/math/tests/TestUtils.h deleted file mode 100644 index 60e76359da61ac32346b093d9a9ff69104bfc494..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/TestUtils.h +++ /dev/null @@ -1,294 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -/** - * This file provides a AutoCompare calss to simplify the comparison - * of CPU and GPU member functions. - * - * This takes two steps - * 1. Construct an AutoCompare object. - * When constructing an AutoCompare object, you can set the err argument - * to specify the maximum error for CPU and GPU functions. - * - * 2. Use the template functions cmpWithArg or cmpWithoutArg. - * A. [cmpWithArg] Requires the caller construct the cpu arguments. - * - * AutoCompare test; - * Init Argument arg1,arg2... - * test.cmpWithArg(function, arg1, arg2....) - * - * B. [cmpWithoutArg] The caller do not need construct arguments. - * If matrix used in these functions arguments is the same size. - * Such as the element wise function and the aggregate function - * defined in the BaseMatrix.cpp. - * - * AutoCompare test; - * test.cmpWithoutArg(function, height, width) - */ - -#include -#include "TensorCheck.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/SparseMatrix.h" - -namespace autotest { - -using paddle::BaseMatrix; -using paddle::CpuMatrix; -using paddle::GpuMatrix; -using paddle::CpuIVector; -using paddle::GpuIVector; -using paddle::CpuSparseMatrix; -using paddle::GpuSparseMatrix; - -template -class ReplaceType { - public: - typedef T1 type; -}; - -template <> -class ReplaceType { - public: - typedef CpuMatrix type; -}; - -template <> -class ReplaceType { - public: - typedef GpuMatrix type; -}; - -template <> -class ReplaceType { - public: - typedef CpuMatrix type; -}; - -template <> -class ReplaceType { - public: - typedef GpuMatrix type; -}; - -// construct a argument -template -T construct(int height, int width); - -template <> -float construct(int height, int width) { - return 0.5; -} - -template <> -double construct(int height, int width) { - return 0.5; -} - -template <> -size_t construct(int height, int width) { - size_t offset = std::rand() % (height < width ? height : width); - return offset; -} - -template <> -CpuMatrix construct(int height, int width) { - CpuMatrix a(height, width); - return a; -} - -template <> -GpuMatrix construct(int height, int width) { - GpuMatrix a(height, width); - return a; -} - -// init a argument -template -void init(T& v) { - return; -} - -template <> -void init(CpuMatrix& v) { - v.randomizeUniform(); -} - -template <> -void init(GpuMatrix& v) { - v.randomizeUniform(); -} - -// init a tuple which contains a set of arguments. -template -inline typename std::enable_if::type initTuple( - std::tuple& t) {} - -template - inline typename std::enable_if < - I::type initTuple(std::tuple& t) { - init(std::get(t)); - initTuple(t); -} - -// copy a argument, copy src to dest -template -void copy(T1& dest, T2& src) { - dest = src; -} - -template <> -void copy(GpuMatrix& dest, CpuMatrix& src) { - dest.copyFrom(src); -} - -// copy a tuple, copy src to dest -template -inline typename std::enable_if::type copyTuple( - std::tuple& dest, std::tuple& src) {} - -template - inline typename std::enable_if < - I::type copyTuple(std::tuple& dest, - std::tuple& src) { - copy(std::get(dest), std::get(src)); - copyTuple(dest, src); -} - -// call member function -template -R call(C& obj, R (FC::*f)(FArgs...), Args&&... args) { - return (obj.*f)(args...); -} - -template -class ReturnType { - public: - typedef T type; -}; - -template <> -class ReturnType { - public: - typedef GpuMatrix type; -}; - -template <> -class ReturnType { - public: - typedef GpuIVector type; -}; - -template <> -class ReturnType { - public: - typedef GpuSparseMatrix type; -}; - -template -typename ReturnType::type autoArgs(T& v) { - return v; -} - -template <> -GpuMatrix autoArgs(CpuMatrix& v) { - GpuMatrix a(v.getHeight(), v.getWidth()); - a.copyFrom(v); - return a; -} - -template <> -GpuIVector autoArgs(CpuIVector& v) { - GpuIVector a(v.getSize()); - a.copyFrom(v); - return a; -} - -template <> -GpuSparseMatrix autoArgs(CpuSparseMatrix& v) { - GpuSparseMatrix a(v.getHeight(), - v.getWidth(), - v.getElementCnt(), - v.getValueType(), - v.getFormat()); - a.copyFrom(v, HPPL_STREAM_DEFAULT); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - return a; -} - -class AutoCompare { - public: - /** - * err is the allowed calculation error. - * The smaller the value of err, - * the stricter the comparison is between CPU and GPU calculations. - */ - AutoCompare(size_t height, size_t width, real err = 1e-3) - : cpu(height, width), gpu(height, width), compare(err) { - init(cpu); - copy(gpu, cpu); - } - - template - void cmpWithArg(R (C::*f)(FArgs...), Args&&... args) { - static_assert(sizeof...(FArgs) == sizeof...(Args), - "size of parameter packs are not equal"); - call(cpu, f, args...); - call(gpu, f, autoArgs(args)...); - - TensorCheck(compare, cpu, gpu); - } - - template - void cmpWithoutArg(R (C::*f)(Args...), size_t height, size_t width) { - static_assert(sizeof...(I) == sizeof...(Args), - "size of parameter packs are not equal"); - (void)height; - (void)width; - auto tuple1 = std::make_tuple( - construct>::type>::type, - CpuMatrix>::type>(height, width)...); - - auto tuple2 = std::make_tuple( - construct>::type>::type, - GpuMatrix>::type>(height, width)...); - - initTuple(tuple1); - copyTuple(tuple2, tuple1); - - call(cpu, f, std::get(tuple1)...); - call(gpu, f, std::get(tuple2)...); - - TensorCheck(compare, cpu, gpu); - } - - protected: - CpuMatrix cpu; - GpuMatrix gpu; - AssertEqual compare; -}; - -} // namespace autotest diff --git a/paddle/legacy/math/tests/test_Allocator.cpp b/paddle/legacy/math/tests/test_Allocator.cpp deleted file mode 100644 index 122be9082a8db33caf55661091caad115f575099..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_Allocator.cpp +++ /dev/null @@ -1,122 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Util.h" -#define private public -#include "paddle/legacy/math/Allocator.h" -#include "paddle/legacy/math/MemoryHandle.h" -#include "paddle/legacy/math/PoolAllocator.h" - -using namespace paddle; // NOLINT - -template -void testPoolAllocator() { - PoolAllocator* pool = - new PoolAllocator(new Allocator(), /* sizeLimit */ 1024); - - /* alloc from system memory */ - void* ptr1 = pool->alloc(10); - void* ptr2 = pool->alloc(200); - void* ptr3 = pool->alloc(200); - pool->free(ptr1, 10); - pool->free(ptr2, 200); - pool->free(ptr3, 200); - pool->printAll(); - EXPECT_EQ((size_t)2, pool->pool_.size()); - EXPECT_EQ((size_t)1, pool->pool_[10].size()); - EXPECT_EQ((size_t)2, pool->pool_[200].size()); - EXPECT_EQ(ptr1, pool->pool_[10][0]); - EXPECT_EQ(ptr2, pool->pool_[200][0]); - EXPECT_EQ(ptr3, pool->pool_[200][1]); - - /* alloc from pool */ - void* ptr4 = pool->alloc(10); - void* ptr5 = pool->alloc(200); - pool->printAll(); - EXPECT_EQ((size_t)0, pool->pool_[10].size()); - EXPECT_EQ((size_t)1, pool->pool_[200].size()); - EXPECT_EQ(ptr1, ptr4); - EXPECT_EQ(ptr3, ptr5); - pool->free(ptr4, 10); - pool->free(ptr5, 200); - - /* alloc size > sizeLimit */ - void* ptr6 = pool->alloc(1024); - pool->free(ptr6, 1024); - EXPECT_LE((size_t)1024, pool->poolMemorySize_); - - void* ptr7 = pool->alloc(1); - EXPECT_EQ((size_t)0, pool->poolMemorySize_); - EXPECT_EQ((size_t)0, pool->pool_.size()); - pool->free(ptr7, 1); - - delete pool; -} - -TEST(Allocator, Pool) { - testPoolAllocator(); -#ifdef PADDLE_WITH_CUDA - testPoolAllocator(); -#endif -} - -TEST(MemoryHandle, Cpu) { - for (auto size : {10, 30, 50, 100, 200, 512, 1000, 1023, 1024, 1025, 8193}) { - CpuMemoryHandle handle(size); - EXPECT_LE(handle.getSize(), handle.getAllocSize()); - } - - void* ptr1; - void* ptr2; - { - CpuMemoryHandle handle(256); - ptr1 = handle.getBuf(); - } - { - CpuMemoryHandle handle(256); - ptr2 = handle.getBuf(); - } - EXPECT_EQ(ptr1, ptr2); -} - -#ifdef PADDLE_WITH_CUDA -TEST(MemoryHandle, Gpu) { - int numGpu = hl_get_device_count(); - - /* alloc from system memory */ - void* ptr3[numGpu]; - void* ptr4[numGpu]; - for (int i = 0; i < numGpu; i++) { - SetDevice device(i); - GpuMemoryHandle handle1(30); - GpuMemoryHandle handle2(30); - GpuMemoryHandle handle3(4000); - GpuMemoryHandle handle4(500); - ptr3[i] = handle3.getBuf(); - ptr4[i] = handle4.getBuf(); - } - - /* alloc from pool */ - for (int i = 0; i < numGpu; i++) { - SetDevice device(i); - GpuMemoryHandle handle1(30); - GpuMemoryHandle handle3(4000); - GpuMemoryHandle handle4(500); - EXPECT_EQ(ptr3[i], handle3.getBuf()); - EXPECT_EQ(ptr4[i], handle4.getBuf()); - } -} -#endif diff --git a/paddle/legacy/math/tests/test_BaseMatrix.cpp b/paddle/legacy/math/tests/test_BaseMatrix.cpp deleted file mode 100644 index 488765c6ac203ad064146faaab7b8c423d53cf0b..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_BaseMatrix.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_CUDA -/** - * This test file use autotest::AutoCompare and cmpWithoutArg to compares the - * implementation of CPU and GPU member function in - * BaseMatrix.cpp and Matrix.cpp. - */ - -#include -#include "TestUtils.h" -#include "paddle/legacy/math/BaseMatrix.h" - -using paddle::BaseMatrix; -using paddle::Matrix; -using autotest::AutoCompare; - -// Test all void (BaseMatrix::*)() function -TEST(BaseMatrix, void) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - auto compare = [height, width](void (BaseMatrix::*f)()) { - AutoCompare test(height, width, 1e-5); - test.cmpWithoutArg(f, height, width); - }; - - compare(&BaseMatrix::neg); - compare(&BaseMatrix::exp2); - compare(&BaseMatrix::log2); - compare(&BaseMatrix::sqrt2); - compare(&BaseMatrix::square2); - compare(&BaseMatrix::reciprocal2); - compare(&BaseMatrix::abs2); - compare(&BaseMatrix::sign2); - compare(&BaseMatrix::zero); - compare(&BaseMatrix::one); - } - } -} - -// Test all void (BaseMatrix::*)(real) function -TEST(BaseMatrix, real) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - auto compare = [height, width](void (BaseMatrix::*f)(real)) { - AutoCompare test(height, width, 1e-5); - test.cmpWithoutArg<0>(f, height, width); - }; - - compare(&BaseMatrix::pow2); - compare(&BaseMatrix::subScalar); - compare(&BaseMatrix::mulScalar); - compare(&BaseMatrix::divScalar); - compare(&BaseMatrix::assign); - compare(&BaseMatrix::add); - compare(&BaseMatrix::biggerThanScalar); - compare(&BaseMatrix::downClip); - } - } -} - -// Test all void (BaseMatrix::*)(BaseMatrix&) function -TEST(BaseMatrix, BaseMatrix) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - auto compare = [height, width](void (BaseMatrix::*f)(BaseMatrix&)) { - AutoCompare test(height, width, 1e-5); - test.cmpWithoutArg<0>(f, height, width); - }; - - compare(&BaseMatrix::assign); - compare(&BaseMatrix::add); - compare(&BaseMatrix::relu); - compare(&BaseMatrix::reluDerivative); - compare(&BaseMatrix::softrelu); - compare(&BaseMatrix::softreluDerivative); - compare(&BaseMatrix::brelu); - compare(&BaseMatrix::breluDerivative); - compare(&BaseMatrix::square2); - compare(&BaseMatrix::squareDerivative); - compare(&BaseMatrix::tanh); - compare(&BaseMatrix::tanhDerivative); - compare(&BaseMatrix::reciprocal2); - compare(&BaseMatrix::reciprocalDerivative); - compare(&BaseMatrix::abs2); - compare(&BaseMatrix::absDerivative); - compare(&BaseMatrix::sigmoid); - compare(&BaseMatrix::sigmoidDerivative); - compare(&BaseMatrix::expDerivative); - compare(&BaseMatrix::sign2); - compare(&BaseMatrix::exp2); - compare(&BaseMatrix::log2); - compare(&BaseMatrix::sqrt2); - compare(&BaseMatrix::dotMul); - compare(&BaseMatrix::dotMulSquare); - compare(&BaseMatrix::dotSquareMul); - compare(&BaseMatrix::addColVector); - compare(&BaseMatrix::addRowVector); - compare(&BaseMatrix::mulRowVector); - compare(&BaseMatrix::divRowVector); - compare(&BaseMatrix::mulColVector); - compare(&BaseMatrix::divColVector); - compare(&BaseMatrix::addP2P); - compare(&BaseMatrix::invSqrt); - } - } -} - -// Test all void (BaseMatrix::*)(real, real) function -TEST(BaseMatrix, real_real) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - auto compare = [height, width](void (BaseMatrix::*f)(real, real)) { - AutoCompare test(height, width, 1e-5); - test.cmpWithoutArg<0, 1>(f, height, width); - }; - - compare(&BaseMatrix::add); - compare(&BaseMatrix::clip); - } - } -} - -// Test all void (BaseMatrix::*)(BaseMatrix&, real) function -TEST(BaseMatrix, BaseMatrix_real) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - auto compare = [height, width](void (BaseMatrix::*f)(BaseMatrix&, real)) { - AutoCompare test(height, width, 1e-5); - test.cmpWithoutArg<0, 1>(f, height, width); - }; - - compare(&BaseMatrix::addBias); - compare(&BaseMatrix::add); - compare(&BaseMatrix::sub); - compare(&BaseMatrix::pow2); - compare(&BaseMatrix::addScalar); - compare(&BaseMatrix::subScalar); - compare(&BaseMatrix::mulScalar); - compare(&BaseMatrix::divScalar); - compare(&BaseMatrix::scalarDiv); - compare(&BaseMatrix::addSquare); - compare(&BaseMatrix::isEqualTo); - } - } -} - -// Test all void (BaseMatrix::*)(BaseMatrix&, BaseMatrix&) function -TEST(BaseMatrix, BaseMatrix_BaseMatrix) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - auto compare = [height, - width](void (BaseMatrix::*f)(BaseMatrix&, BaseMatrix&)) { - AutoCompare test(height, width, 1e-5); - test.cmpWithoutArg<0, 1>(f, height, width); - }; - - compare(&BaseMatrix::softCrossEntropy); - compare(&BaseMatrix::softCrossEntropyBp); - compare(&BaseMatrix::binaryLabelCrossEntropy); - compare(&BaseMatrix::binaryLabelCrossEntropyBp); - compare(&BaseMatrix::sub); - compare(&BaseMatrix::add2); - compare(&BaseMatrix::dotMul); - compare(&BaseMatrix::dotDiv); - compare(&BaseMatrix::logisticRegressionLoss); - compare(&BaseMatrix::logisticRegressionLossBp); - compare(&BaseMatrix::biggerThan); - compare(&BaseMatrix::max2); - compare(&BaseMatrix::dotMulSquare); - compare(&BaseMatrix::dotSquareSquare); - } - } -} - -void TestEelementWise(size_t height, size_t width) { - AutoCompare rowScale(height, width); - rowScale.cmpWithoutArg<0, 1, 2>(&BaseMatrix::rowScale, height, width); - - AutoCompare rowDotMul(height, width); - rowDotMul.cmpWithoutArg<0, 1, 2>(&BaseMatrix::rowDotMul, height, width); - - AutoCompare binaryClassificationError(height, width); - binaryClassificationError.cmpWithoutArg<0, 1, 2, 3>( - &BaseMatrix::binaryClassificationError, height, width); - - AutoCompare sumOfSquaresBp(height, width); - sumOfSquaresBp.cmpWithoutArg<0, 1>(&Matrix::sumOfSquaresBp, height, width); -} - -void TestAggregateToRow(size_t height, size_t width) { - AutoCompare maxCols(1, width); - maxCols.cmpWithoutArg<0>(&BaseMatrix::maxCols, height, width); - - AutoCompare minCols(1, width); - minCols.cmpWithoutArg<0>(&BaseMatrix::minCols, height, width); - - AutoCompare addDotMulVMM(1, width); - addDotMulVMM.cmpWithoutArg<0, 1>(&BaseMatrix::addDotMulVMM, height, width); - - AutoCompare sumCols(1, width); - sumCols.cmpWithoutArg<0, 1, 2>(&BaseMatrix::sumCols, height, width); - - AutoCompare collectBias(1, width); - collectBias.cmpWithoutArg<0, 1>( - static_cast(&Matrix::collectBias), - height, - width); -} - -void TestAggregateToCol(size_t height, size_t width) { - AutoCompare maxRows(height, 1); - maxRows.cmpWithoutArg<0>(&BaseMatrix::maxRows, height, width); - - AutoCompare minRows(height, 1); - minRows.cmpWithoutArg<0>(&BaseMatrix::minRows, height, width); - - AutoCompare sumRows(height, 1); - sumRows.cmpWithoutArg<0, 1, 2>(&BaseMatrix::sumRows, height, width); - - AutoCompare sumOfSquares(height, 1); - sumOfSquares.cmpWithoutArg<0, 1>(&Matrix::sumOfSquares, height, width); -} - -TEST(BaseMatrix, Other) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - TestEelementWise(height, width); - TestAggregateToRow(height, width); - TestAggregateToCol(height, width); - } - } -} - -#endif diff --git a/paddle/legacy/math/tests/test_CpuGpuVector.cpp b/paddle/legacy/math/tests/test_CpuGpuVector.cpp deleted file mode 100644 index 010fef534d1e19d2d7d134298eb97aa1b56e2270..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_CpuGpuVector.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_CUDA - -#include -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/utils/Util.h" -#include "test_matrixUtil.h" - -using namespace paddle; // NOLINT - -TEST(CpuGpuVector, getData) { - size_t size = 500; - hl_stream_t stream(HPPL_STREAM_DEFAULT); - CpuVectorPtr cpuVec = std::make_shared(size); - GpuVectorPtr gpuVec = std::make_shared(size); - cpuVec->uniform(0.0, 10.0); - gpuVec->copyFrom(*cpuVec, stream); - hl_stream_synchronize(stream); - - CpuGpuVectorPtr vec = std::make_shared(gpuVec); - auto a = vec->getData(false); - auto b = cpuVec->getData(); - hl_stream_synchronize(stream); - checkDataEqual(a, b, size); -} - -TEST(CpuGpuVector, subCreate) { - size_t size1 = 1024; - size_t offset = 100; - size_t size2 = 500; - hl_stream_t stream(HPPL_STREAM_DEFAULT); - CpuGpuVectorPtr v1 = std::make_shared(size1, /*useGpu*/ false); - auto vec = v1->getMutableVector(false); - vec->uniform(0.0, 10.0); - auto v2 = std::make_shared(*v1, offset, size2); - CHECK_EQ(*v1->getSync(), *v2->getSync()); - - // check subVec equal - checkDataEqual(v1->getData(false) + offset, v2->getData(false), size2); - - CpuVectorPtr v1Check = std::make_shared(size1); - CpuVectorPtr v2Check = std::make_shared(size2); - v1Check->copyFrom(*(v1->getVector(true)), stream); - v2Check->copyFrom(*(v2->getVector(true)), stream); - hl_stream_synchronize(stream); - - checkDataEqual(v2->getData(false), v2Check->getData(), size2); - checkDataEqual(v1Check->getData() + offset, v2Check->getData(), size2); - - CpuVectorPtr noise = std::make_shared(size2); - noise->uniform(0.0, 1.0); - auto v = v2->getMutableVector(false); // will change header - // add noise to subVec - v->add(*noise); - - // check v1_cpu_data == v2_cpu_data - checkDataEqual(v1->getData(false) + offset, v2->getData(false), size2); - - v1Check->copyFrom(*(v1->getVector(true)), stream); - v2Check->copyFrom(*(v2->getVector(true)), stream); - hl_stream_synchronize(stream); - - // check v1_gpu_data == v2_gpu_data - checkDataEqual(v1Check->getData() + offset, v2Check->getData(), size2); -} - -#endif diff --git a/paddle/legacy/math/tests/test_ExecViaCpu.cpp b/paddle/legacy/math/tests/test_ExecViaCpu.cpp deleted file mode 100644 index b2ce0bc7ede133028fff8a855ff336ff83f55d82..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_ExecViaCpu.cpp +++ /dev/null @@ -1,116 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include "paddle/legacy/math/SparseMatrix.h" - -using namespace paddle; // NOLINT - -const int height = 10; -const int width = 16; - -real f(Matrix& mat1, - const Matrix& mat2, - IVector& vec1, - const IVector& vec2, - real scalar) { - CHECK(!mat1.useGpu()); - CHECK(!mat2.useGpu()); - CHECK(!vec1.useGpu()); - CHECK(!vec2.useGpu()); - mat1.copyFrom(mat2); - vec1.copyFrom(vec2); - - return scalar; -} - -class Functor { - public: - real operator()(Matrix& mat1, - const Matrix& mat2, - IVector& vec1, - const IVector& vec2, - real scalar) { - a_ = f(mat1, mat2, vec1, vec2, scalar); - return a_; - } - - private: - real a_; -}; - -template -void testWrapper(F&& f) { - MatrixPtr cpumat1 = Matrix::create(height, width, false, /*useGpu=*/false); - MatrixPtr cpumat2 = Matrix::create(height, width, false, /*useGpu=*/false); - - IVectorPtr cpuvec1 = IVector::create(height, /*useGpu=*/false); - IVectorPtr cpuvec2 = IVector::create(height, /*useGpu=*/false); - - const real scalar = 1.23456; - - MatrixPtr gpumat1 = Matrix::create(height, width, false, /*useGpu=*/true); - MatrixPtr gpumat2 = Matrix::create(height, width, false, /*useGpu=*/true); - IVectorPtr gpuvec1 = IVector::create(height, /*useGpu=*/true); - IVectorPtr gpuvec2 = IVector::create(height, /*useGpu=*/true); - - cpumat2->randomizeUniform(); - cpuvec2->rand(width); - gpumat2->copyFrom(*cpumat2); - gpuvec2->copyFrom(*cpuvec2); - - real ret = execViaCpu(f, *gpumat1, *gpumat2, *gpuvec1, *gpuvec2, 1.23456); - EXPECT_EQ(ret, scalar); - cpumat1->copyFrom(*gpumat1); - cpuvec1->copyFrom(*gpuvec1); - - for (int i = 0; i < height; ++i) { - EXPECT_EQ(cpuvec1->getElement(i), cpuvec2->getElement(i)); - for (int j = 0; j < width; ++j) { - EXPECT_EQ(cpumat1->getElement(i, j), cpumat2->getElement(i, j)); - } - } - gpumat1->resize(height, 1); - execViaCpu2(&CpuMatrix::selectElements, *gpumat1, *gpumat2, *gpuvec1); - - cpumat1->resize(height, 1); - cpumat1->selectElements(*cpumat2, *cpuvec1); - for (int i = 0; i < height; ++i) { - EXPECT_EQ(cpumat1->getElement(i, 0), gpumat1->getElement(i, 0)); - } -} - -#ifdef PADDLE_WITH_CUDA -TEST(ExecViaCpu, test1) { - testWrapper(f); - testWrapper(&f); - - auto lambda = [](Matrix& mat1, - const Matrix& mat2, - IVector& vec1, - const IVector& vec2, - real scalar) -> real { - return f(mat1, mat2, vec1, vec2, scalar); - }; - LOG(INFO) << "lambda is_class=" << std::is_class::value - << " is_function=" << std::is_function::value; - testWrapper(lambda); - - Functor functor; - testWrapper(functor); -} -#endif diff --git a/paddle/legacy/math/tests/test_FPException.cpp b/paddle/legacy/math/tests/test_FPException.cpp deleted file mode 100644 index aa6aea71c8d959834ff11c04969e13bb36b630ff..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_FPException.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/** - * This test is about floating point calculation exception. - * Paddle catches FE_INVALID, FE DIVBYZERO and FE_OVERFLOW exceptions. - * - * Some exceptions occur in the middle of a set of formulas, - * that can be circumvented by some tricks. - * For example, - * calculate tanh - * b = 2.0 / (1.0 + exp(-2 * a)) - 1.0 - * - * If the result of (-2 * a) is too large, - * a FE_OVERFLOW exception occurs when calculating exp. - * But the result of tanh is no overflow problem, - * so we can add some tricks to prevent exp calculate an excessive value. - * - */ - -#include -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Common.h" - -using namespace paddle; // NOLINT - -void SetTensorValue(Matrix& matrix, real value) { - int height = matrix.getHeight(); - int width = matrix.getWidth(); - int stride = matrix.getStride(); - real* data = matrix.getData(); - for (int i = 0; i < height; i++) { - int j = rand() % width; // NOLINT - if (typeid(matrix) == typeid(CpuMatrix)) { - data[i * stride + j] = value; - } else if (typeid(matrix) == typeid(GpuMatrix)) { - hl_memcpy(&data[i * stride + j], &value, sizeof(real)); - } else { - LOG(FATAL) << "should not reach here"; - } - } -} - -template -void testTanh(real illegal) { - MatrixPtr A = std::make_shared(10, 10); - MatrixPtr B = std::make_shared(10, 10); - A->randomizeUniform(); - B->randomizeUniform(); - - SetTensorValue(*A, illegal); - - A->tanh(*B); -} - -template -void testSigmoid(real illegal) { - MatrixPtr A = std::make_shared(10, 10); - MatrixPtr B = std::make_shared(10, 10); - A->randomizeUniform(); - B->randomizeUniform(); - - SetTensorValue(*A, illegal); - - A->sigmoid(*B); -} - -TEST(fp, overflow) { - for (auto illegal : {-90.0, 90.0}) { - LOG(INFO) << " illegal=" << illegal; - testTanh(illegal); - testSigmoid(illegal); - } -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - - feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/math/tests/test_GpuProfiler.cpp b/paddle/legacy/math/tests/test_GpuProfiler.cpp deleted file mode 100644 index ee27109f218ca56df8f42ca6395b22621f5fbc11..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_GpuProfiler.cpp +++ /dev/null @@ -1,165 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_CUDA - -#include -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/SparseMatrix.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -void MatrixCheckErr(const Matrix& matrix1, const Matrix& matrix2) { - CHECK(matrix1.getHeight() == matrix2.getHeight()); - CHECK(matrix1.getWidth() == matrix2.getWidth()); -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - - int height = matrix1.getHeight(); - int width = matrix1.getWidth(); - const real* data1 = matrix1.getData(); - const real* data2 = matrix2.getData(); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - real a = data1[i * width + j]; - real b = data2[i * width + j]; - if (fabs(a - b) > err) { - if ((fabsf(a - b) / fabsf(a)) > (err / 10.0f)) { - count++; - } - } - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void testBilinearFwdBwd(int numSamples, - int imgSizeH, - int imgSizeW, - int channels) { - int inWidth = imgSizeH * imgSizeW * channels; - int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels; - real ratioH = 0.5; - real ratioW = 0.5; - - // forward - MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - - MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); - - input->randomizeUniform(); - inputGpu->copyFrom(*input); - - { - // nvprof: GPU Proflier - REGISTER_GPU_PROFILER("testBilinearFwdBwd"); - target->bilinearForward(*input, - imgSizeH, - imgSizeW, - 2 * imgSizeH, - 2 * imgSizeW, - channels, - ratioH, - ratioW); - targetGpu->bilinearForward(*inputGpu, - imgSizeH, - imgSizeW, - 2 * imgSizeH, - 2 * imgSizeW, - channels, - ratioH, - ratioW); - } - - // check - targetCheck->copyFrom(*targetGpu); - MatrixCheckErr(*target, *targetCheck); - - // backward - MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); - - MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpuGrad = - GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr targetCheckGrad = - CpuMatrix::create(numSamples, inWidth, false, false); - - inputGrad->randomizeUniform(); - targetGrad->randomizeUniform(); - inputGpuGrad->copyFrom(*inputGrad); - targetGpuGrad->copyFrom(*targetGrad); - - inputGrad->bilinearBackward(*targetGrad, - 2 * imgSizeH, - 2 * imgSizeW, - imgSizeH, - imgSizeW, - channels, - ratioH, - ratioW); - inputGpuGrad->bilinearBackward(*targetGpuGrad, - 2 * imgSizeH, - 2 * imgSizeW, - imgSizeH, - imgSizeW, - channels, - ratioH, - ratioW); - - // check - targetCheckGrad->copyFrom(*inputGpuGrad); - MatrixCheckErr(*inputGrad, *targetCheckGrad); -} - -TEST(Profiler, testBilinearFwdBwd) { - auto numSamples = 10; - auto channels = 16; - auto imgSize = 64; - { - // nvprof: GPU Proflier - REGISTER_GPU_PROFILER("testBilinearFwdBwd"); - // Paddle built-in timer - REGISTER_TIMER_INFO( - "testBilinearFwdBwd", - "numSamples = 10, channels = 16, imgSizeX = 64, imgSizeY = 64"); - testBilinearFwdBwd(numSamples, imgSize, imgSize, channels); - } - globalStat.printAllStatus(); -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - - // nvprof: GPU Proflier - REGISTER_GPU_PROFILER( - "RecursiveProfilingTest", - "numSamples = 10, channels = 16, imgSizeX = 64, imgSizeY = 64"); - - return RUN_ALL_TESTS(); -} - -#endif diff --git a/paddle/legacy/math/tests/test_Matrix.cpp b/paddle/legacy/math/tests/test_Matrix.cpp deleted file mode 100644 index a9407a31f334a1bea0293ad772229dafc6705936..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_Matrix.cpp +++ /dev/null @@ -1,273 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_CUDA -/** - * This test file use autotest::AutoCompare and cmpWithArg to compares the - * implementation of CPU and GPU member function in Matrix.cpp. - */ - -#include -#include "TestUtils.h" - -using paddle::BaseMatrix; -using paddle::Matrix; -using paddle::CpuMatrix; -using paddle::CpuIVector; -using paddle::CpuSparseMatrix; -using autotest::AutoCompare; - -void testBilinearFwdBwd(int numSamples, - int imgSizeH, - int imgSizeW, - int channels) { - int inWidth = imgSizeH * imgSizeW * channels; - int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels; - real ratioH = 0.5; - real ratioW = 0.5; - - AutoCompare forward(numSamples, outWidth); - CpuMatrix arg1(numSamples, inWidth); - arg1.randomizeUniform(); - forward.cmpWithArg(&Matrix::bilinearForward, - arg1, - imgSizeH, - imgSizeW, - 2 * imgSizeH, - 2 * imgSizeW, - channels, - ratioH, - ratioW); - - AutoCompare backward(numSamples, inWidth); - CpuMatrix arg2(numSamples, outWidth); - arg2.randomizeUniform(); - backward.cmpWithArg(&Matrix::bilinearBackward, - arg2, - 2 * imgSizeH, - 2 * imgSizeW, - imgSizeH, - imgSizeW, - channels, - ratioH, - ratioW); -} - -TEST(Matrix, BilinearFwdBwd) { - for (auto numSamples : {5, 10}) { - for (auto channels : {8, 16}) { - for (auto imgSizeH : {14, 28}) { - for (auto imgSizeW : {16, 30}) { - VLOG(3) << " numSamples=" << numSamples << " channels=" << channels - << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; - testBilinearFwdBwd(numSamples, imgSizeH, imgSizeW, channels); - } - } - } - } -} - -void testMatrixAddBias(int height, int width, real scale) { - AutoCompare test(height, width); - CpuMatrix arg1(1, width); - arg1.randomizeUniform(); - test.cmpWithArg( - static_cast(&Matrix::addBias), - arg1, - scale); -} - -void testMatrixAddDotMulMMV(int height, int width) { - AutoCompare test(height, width); - CpuMatrix arg1(height, width); - CpuMatrix arg2(1, width); - arg1.randomizeUniform(); - arg2.randomizeUniform(); - test.cmpWithArg(&BaseMatrix::addDotMulMMV, arg1, arg2); -} - -TEST(Matrix, unary) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - VLOG(3) << " height=" << height << " width=" << width; - testMatrixAddBias(height, width, 1.0); - testMatrixAddBias(height, width, 3.5); - testMatrixAddDotMulMMV(height, width); - } - } -} - -void testMatrixAddAtOffset(int height, int width1, int width2, int offset) { - AutoCompare test(height, width2); - CpuMatrix arg1(height, width1); - arg1.randomizeUniform(); - test.cmpWithArg(&Matrix::addAtOffset, arg1, offset); -} - -void testMatrixAssignAtOffset(int height, int width1, int width2, int offset) { - AutoCompare test(height, width2); - CpuMatrix arg1(height, width1); - arg1.randomizeUniform(); - test.cmpWithArg(&Matrix::assignAtOffset, arg1, offset); -} - -TEST(Matrix, AtOffset) { - for (auto height : {1, 11, 73, 128, 200}) { - for (auto width1 : {1, 32, 100, 512, 1000}) { - for (auto width2 : {1, 32, 100, 512, 1000}) { - int columnOffset = 0; - int offset = std::abs(width1 - width2); - if (offset) { - columnOffset = std::rand() % offset; - } - VLOG(3) << " height=" << height << " width1=" << width1 - << " width2=" << width2 << " columnOffset = " << columnOffset; - testMatrixAddAtOffset(height, width1, width2, columnOffset); - testMatrixAssignAtOffset(height, width1, width2, columnOffset); - } - } - } -} - -void testMatrixSelectRows(int numSamples, int tableSize, int inputDim) { - AutoCompare test(numSamples, inputDim); - CpuMatrix arg1(tableSize, inputDim); - CpuIVector arg2(numSamples); - arg1.randomizeUniform(); - arg2.rand(tableSize); - test.cmpWithArg(&Matrix::selectRows, arg1, arg2); -} - -TEST(Matrix, tableProjection) { - for (auto numSamples : {10, 100, 1000, 10000, 80000}) { - for (auto tableSize : {10, 100}) { - for (auto inputDim : {20, 50}) { - VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize - << " inputDim=" << inputDim; - testMatrixSelectRows(numSamples, tableSize, inputDim); - } - } - } -} - -void testMatrixCopyByRowIndex(int outHeight, int inHeight, int width) { - AutoCompare test(outHeight, width); - CpuMatrix arg1(inHeight, width); - CpuIVector arg2(outHeight); - arg1.randomizeUniform(); - arg2.rand(inHeight); - test.cmpWithArg(&Matrix::copyByRowIndex, arg1, arg2); -} - -TEST(Matrix, copyByRowIndex) { - for (auto outHeight : {31, 500, 1000}) { - for (auto inHeight : {17, 257, 500, 1200}) { - for (auto width : {512, 1024}) { - VLOG(3) << outHeight << " " << inHeight << " " << width; - testMatrixCopyByRowIndex(outHeight, inHeight, width); - } - } - } -} - -void testParamReluForward(int height, int width, int w_height, int w_width) { - AutoCompare test(height, width); - CpuMatrix arg1(height, width); - CpuMatrix arg2(w_height, w_width); - arg1.randomizeUniform(); - arg2.randomizeUniform(); - arg1.add(-0.5); - test.cmpWithArg(&Matrix::paramReluForward, arg1, arg2); -} - -void testParamReluBackwardW(int height, int width, int w_height, int w_width) { - AutoCompare test(w_height, w_width); - CpuMatrix arg1(height, width); - CpuMatrix arg2(height, width); - arg1.randomizeUniform(); - arg2.randomizeUniform(); - arg2.add(-0.5); - test.cmpWithArg(&Matrix::paramReluBackwardW, arg1, arg2); -} - -TEST(Matrix, paramRelu) { - for (auto height : {10, 40, 100}) { - for (auto width : {10, 40, 100}) { - for (auto w_height : {1, 2}) { - for (auto w_width : {1, 2}) { - if (width % (w_height * w_width)) continue; - testParamReluForward(height, width, w_height, w_width); - testParamReluBackwardW(height, width, w_height, w_width); - } - } - } - } -} - -void testAddSharedBias(int numSamples, int dim, int channel) { - AutoCompare test(numSamples, dim); - CpuMatrix arg1(1, channel); - arg1.randomizeUniform(); - test.cmpWithArg(&Matrix::addSharedBias, arg1, 1.0); -} - -void testCollectSharedBias(int numSamples, int dim, int channel) { - AutoCompare test(1, channel); - CpuMatrix arg1(numSamples, dim); - arg1.randomizeUniform(); - test.cmpWithArg(&Matrix::collectSharedBias, arg1, 1.0); -} - -TEST(Matrix, sharedBias) { - for (auto numSamples : {1, 100, 520}) { - for (auto dim : {100 * 16, 100 * 32}) { - for (auto channel : {8, 16}) { - VLOG(3) << " numSamples=" << numSamples << " dim=" << dim - << " channel=" << channel; - testAddSharedBias(numSamples, dim, channel); - testCollectSharedBias(numSamples, dim, channel); - } - } - } -} - -void testMultiBinaryLabelCrossEntropy(int numSamples, int dim) { - AutoCompare forward(numSamples, 1); - CpuMatrix arg1(numSamples, dim); - CpuSparseMatrix arg2( - numSamples, dim, numSamples, paddle::NO_VALUE, paddle::SPARSE_CSR); - - CpuMatrix output1(numSamples, dim); - output1.randomizeUniform(); - output1.softmax(arg1); - for (int i = 0; i < numSamples; i++) { - const unsigned int id = std::rand() % dim; - arg2.setRow(i, 1, &id, nullptr); - } - forward.cmpWithArg(&Matrix::multiBinaryLabelCrossEntropy, arg1, arg2); - - AutoCompare backward(numSamples, dim); - backward.cmpWithArg(&Matrix::multiBinaryLabelCrossEntropyBp, arg1, arg2); -} - -TEST(Matrix, multiBinaryCrossEntropy) { - for (auto numSamples : {100, 1000, 10000}) { - for (auto dim : {100, 1000, 10000}) { - VLOG(3) << " numSamples=" << numSamples << " dim=" << dim; - testMultiBinaryLabelCrossEntropy(numSamples, dim); - } - } -} - -#endif diff --git a/paddle/legacy/math/tests/test_RowBuffer.cpp b/paddle/legacy/math/tests/test_RowBuffer.cpp deleted file mode 100644 index 2ef8cd303d65f50cd18adb7f80fa18a665b67340..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_RowBuffer.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "paddle/legacy/math/RowBuffer.h" - -TEST(RowBuffer, testAutoGrow) { - paddle::RowBuffer buf(128); - ASSERT_EQ(128UL, buf.getWidth()); - ASSERT_TRUE(buf.isAutoGrowth()); - buf.resize(2); - ASSERT_EQ(2UL, buf.getRowCount()); - for (size_t i = 0; i < buf.getWidth() * 2; ++i) { - buf.data()[i] = i; - } - for (size_t i = 0; i < buf.getRowCount(); ++i) { - for (size_t j = 0; j < buf.getWidth(); ++j) { - ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5); - } - } - - auto data = buf.getWithAutoGrowth(2); - for (size_t i = 0; i < buf.getWidth(); ++i) { - data[i] = i; - } - - ASSERT_EQ(3UL, buf.getRowCount()); - for (size_t i = 0; i < buf.getRowCount() - 1; ++i) { - for (size_t j = 0; j < buf.getWidth(); ++j) { - ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5); - } - } - for (size_t i = 0; i < buf.getWidth(); ++i) { - ASSERT_NEAR(i, buf.get(2)[i], 1e-5); - } -} - -TEST(RowBuffer, testWithMemBuf) { - paddle::CpuMemHandlePtr mem = - std::make_shared(128 * 2 * sizeof(real)); - paddle::RowBuffer buf(mem, 128); - ASSERT_TRUE(!buf.isAutoGrowth()); - ASSERT_EQ(2UL, buf.getRowCount()); - for (size_t i = 0; i < buf.getWidth() * 2; ++i) { - buf.data()[i] = i; - } - for (size_t i = 0; i < buf.getRowCount(); ++i) { - for (size_t j = 0; j < buf.getWidth(); ++j) { - ASSERT_NEAR(i * buf.getWidth() + j, buf.getWithAutoGrowth(i)[j], 1e-5); - } - } - - ASSERT_DEATH_IF_SUPPORTED(buf.getWithAutoGrowth(3), ".*"); -} diff --git a/paddle/legacy/math/tests/test_SIMDFunctions.cpp b/paddle/legacy/math/tests/test_SIMDFunctions.cpp deleted file mode 100644 index c6490f70e336dadcf6710c83ced2afddc13b7812..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_SIMDFunctions.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/math/SIMDFunctions.h" -#include "paddle/legacy/utils/Util.h" - -#include - -#include -#include -#include -#include - -#include -#include - -static constexpr size_t VECTOR_LEN = 3072; -static constexpr size_t BATCH_SIZE = 64; -static constexpr size_t ALIGN = 32; -static_assert(VECTOR_LEN % ALIGN == 0, "VECTOR_LEN % ALIGN == 0"); -static_assert(BATCH_SIZE % ALIGN == 0, "BATCH_SIZE % ALIGN == 0"); -static constexpr float EPSILON = 1e-5; -static std::mt19937 RandomEngine(time(0)); - -inline static std::unique_ptr NewVector(size_t len = VECTOR_LEN, - size_t align = ALIGN) { - float* ptr; - CHECK_EQ(posix_memalign((void**)&ptr, align, len * sizeof(float)), 0); - return std::unique_ptr(ptr); -} - -inline static std::unique_ptr NewRandomVector(size_t len = VECTOR_LEN, - size_t align = ALIGN) { - std::uniform_real_distribution dist(-100.0f, 100.0f); - auto generator = std::bind(dist, RandomEngine); - auto retv = NewVector(len, align); - std::generate_n(retv.get(), len, generator); - return retv; -} - -TEST(SIMDFunction, addTo) { - typedef std::function AddToMethodType; - - AddToMethodType naive = paddle::simd::naive::addTo; - AddToMethodType simd = paddle::simd::addTo; - - auto A = NewRandomVector(); - auto B = NewRandomVector(); - - auto ACopy = NewVector(); - memcpy(ACopy.get(), A.get(), VECTOR_LEN * sizeof(float)); - - naive(A.get(), B.get(), VECTOR_LEN); - simd(ACopy.get(), B.get(), VECTOR_LEN); - - for (size_t i = 0; i < VECTOR_LEN; ++i) { - ASSERT_NEAR(A[i], ACopy[i], EPSILON); - } -} - -TEST(SIMDFunction, batchAddTo) { - auto A = NewRandomVector(); - auto ACopy = NewVector(); - memcpy(ACopy.get(), A.get(), sizeof(float) * VECTOR_LEN); - - std::vector> B; - for (size_t i = 0; i < BATCH_SIZE; ++i) { - B.emplace_back(NewRandomVector()); - } - std::unique_ptr BRaw(new float*[BATCH_SIZE]); - for (size_t i = 0; i < BATCH_SIZE; ++i) { - BRaw[i] = B[i].get(); - } - - typedef std::function - BatchAddToMethodType; - - BatchAddToMethodType naive = paddle::simd::naive::batchAddTo; - BatchAddToMethodType simd = paddle::simd::batchAddTo; - - naive(A.get(), (const float**)BRaw.get(), BATCH_SIZE, VECTOR_LEN); - simd(ACopy.get(), (const float**)BRaw.get(), BATCH_SIZE, VECTOR_LEN); - - for (size_t i = 0; i < VECTOR_LEN; ++i) { - ASSERT_NEAR(A[i], ACopy[i], EPSILON); - } -} - -TEST(SIMDFunction, colMax) { - auto A = NewRandomVector(VECTOR_LEN * BATCH_SIZE); - auto naiveResult = NewVector(BATCH_SIZE); - auto simdResult = NewVector(BATCH_SIZE); - - typedef std::function ColMaxMethodType; - ColMaxMethodType naive = paddle::simd::naive::colMax; - ColMaxMethodType simd = paddle::simd::colMax; - - naive(naiveResult.get(), A.get(), BATCH_SIZE, VECTOR_LEN); - simd(simdResult.get(), A.get(), BATCH_SIZE, VECTOR_LEN); - - for (size_t i = 0; i < BATCH_SIZE; ++i) { - ASSERT_NEAR(naiveResult[i], simdResult[i], EPSILON); - } -} - -TEST(SIMDFunction, decayL1_WithLR) { - auto dest = NewRandomVector(); - auto src = NewRandomVector(); - auto lr = NewRandomVector(); - auto lambda = 0.23f; - - auto simd_dest = NewVector(); - memcpy(simd_dest.get(), dest.get(), sizeof(float) * VECTOR_LEN); - - typedef std::function - DecayL1MethodType; - - DecayL1MethodType naive = []( - float* d, float* s, float* lr, float l, size_t len) { - paddle::simd::naive::decayL1(d, s, lr, l, len); - }; - - DecayL1MethodType simd = []( - float* d, float* s, float* lr, float l, size_t len) { - paddle::simd::decayL1(d, s, lr, l, len); - }; - - naive(dest.get(), src.get(), lr.get(), lambda, VECTOR_LEN); - simd(simd_dest.get(), src.get(), lr.get(), lambda, VECTOR_LEN); - - for (size_t i = 0; i < VECTOR_LEN; ++i) { - ASSERT_NEAR(dest[i], simd_dest[i], EPSILON); - } -} - -TEST(SIMDFunction, decayL1_WithoutLR) { - auto dest = NewRandomVector(); - auto src = NewRandomVector(); - auto lambda = 0.23; - - auto simd_dest = NewVector(); - memcpy(simd_dest.get(), dest.get(), sizeof(float) * VECTOR_LEN); - - typedef std::function DecayL1MethodType; - - DecayL1MethodType naive = [](float* d, float* s, float l, size_t len) { - paddle::simd::naive::decayL1(d, s, l, len); - }; - - DecayL1MethodType simd = [](float* d, float* s, float l, size_t len) { - paddle::simd::decayL1(d, s, l, len); - }; - - naive(dest.get(), src.get(), lambda, VECTOR_LEN); - simd(simd_dest.get(), src.get(), lambda, VECTOR_LEN); - - for (size_t i = 0; i < VECTOR_LEN; ++i) { - ASSERT_NEAR(dest[i], simd_dest[i], EPSILON); - } -} diff --git a/paddle/legacy/math/tests/test_SparseMatrix.cpp b/paddle/legacy/math/tests/test_SparseMatrix.cpp deleted file mode 100644 index 30896a945ec6d111c35eea94d8008a62593d2893..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_SparseMatrix.cpp +++ /dev/null @@ -1,565 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "test_matrixUtil.h" - -using namespace paddle; // NOLINT - -TEST(Matrix, CopyCpuMatrixToSparseMatrix) { - const size_t HEIGHT = 20; - const size_t WIDTH = 10; - const size_t WIDTH_TEST = 15; - MatrixPtr testMatrix( - new CpuSparseMatrix(HEIGHT, WIDTH, HEIGHT * 5, FLOAT_VALUE, SPARSE_CSR)); - MatrixPtr testCpuMatrix(new CpuMatrix(HEIGHT, WIDTH)); - testCpuMatrix->randomizeUniform(); - testMatrix->copyFrom(*testCpuMatrix, HPPL_STREAM_DEFAULT); - MatrixPtr mulCpuMatrix(new CpuMatrix(WIDTH, WIDTH_TEST)); - mulCpuMatrix->randomizeUniform(); - MatrixPtr ret1(new CpuMatrix(HEIGHT, WIDTH_TEST)), - ret2(new CpuMatrix(HEIGHT, WIDTH_TEST)); - ret1->zeroMem(); - ret2->zeroMem(); - ret1->mul(*testMatrix, *mulCpuMatrix, 1.0, 1.0); - ret2->mul(*testCpuMatrix, *mulCpuMatrix, 1.0, 1.0); - checkMatrixEqual(ret1, ret2); -} - -struct MatrixPara { - size_t height; - size_t width; - bool trans; - bool sparse; - size_t nnz; - SparseFormat format; -}; - -#ifdef PADDLE_WITH_CUDA -void test_sparse_matrix_mul(MatrixPara paraA, - MatrixPara paraB, - MatrixPara paraC) { - // for cpu sparse matrix mul - MatrixPtr cpuMatrixA, cpuMatrixB, cpuMatrixC, gpuMatrixC_d2h; - // for gpu sparse matrix mul - MatrixPtr gpuMatrixA, gpuMatrixB, gpuMatrixC; - // for cpu dense matrix mul - MatrixPtr cpuDenseA, cpuDenseB, cpuDenseC; - - if (paraA.sparse) { - cpuMatrixA = Matrix::createSparseMatrix(paraA.height, - paraA.width, - paraA.nnz, - FLOAT_VALUE, - paraA.format, - paraA.trans, - false); - gpuMatrixA = Matrix::createSparseMatrix(paraA.height, - paraA.width, - paraA.nnz, - FLOAT_VALUE, - paraA.format, - paraA.trans, - true); - } else { - cpuMatrixA = Matrix::create(paraA.height, paraA.width, paraA.trans, false); - gpuMatrixA = Matrix::create(paraA.height, paraA.width, paraA.trans, true); - } - cpuDenseA = Matrix::create(paraA.height, paraA.width, paraA.trans, false); - - if (paraB.sparse) { - cpuMatrixB = Matrix::createSparseMatrix(paraB.height, - paraB.width, - paraB.nnz, - FLOAT_VALUE, - paraB.format, - paraB.trans, - false); - gpuMatrixB = Matrix::createSparseMatrix(paraB.height, - paraB.width, - paraB.nnz, - FLOAT_VALUE, - paraB.format, - paraB.trans, - true); - } else { - cpuMatrixB = Matrix::create(paraB.height, paraB.width, paraB.trans, false); - gpuMatrixB = Matrix::create(paraB.height, paraB.width, paraB.trans, true); - } - cpuDenseB = Matrix::create(paraB.height, paraB.width, paraB.trans, false); - - if (paraC.sparse) { - cpuMatrixC = Matrix::createSparseMatrix(paraC.height, - paraC.width, - paraC.nnz, - FLOAT_VALUE, - paraC.format, - paraC.trans, - false); - gpuMatrixC = Matrix::createSparseMatrix(paraC.height, - paraC.width, - paraC.nnz, - FLOAT_VALUE, - paraC.format, - paraC.trans, - true); - gpuMatrixC_d2h = Matrix::createSparseMatrix(paraC.height, - paraC.width, - paraC.nnz, - FLOAT_VALUE, - paraC.format, - paraC.trans, - false); - } else { - cpuMatrixC = Matrix::create(paraC.height, paraC.width, paraC.trans, false); - gpuMatrixC = Matrix::create(paraC.height, paraC.width, paraC.trans, true); - gpuMatrixC_d2h = - Matrix::create(paraC.height, paraC.width, paraC.trans, false); - } - cpuDenseC = Matrix::create(paraC.height, paraC.width, paraC.trans, false); - - /*matrix init*/ - hl_stream_t stream(HPPL_STREAM_1); - cpuMatrixA->randomizeUniform(); - cpuMatrixB->randomizeUniform(); - cpuMatrixC->randomizeUniform(); - - gpuMatrixA->copyFrom(*cpuMatrixA, stream); - gpuMatrixB->copyFrom(*cpuMatrixB, stream); - gpuMatrixC->copyFrom(*cpuMatrixC, stream); - - cpuDenseA->copyFrom(*cpuMatrixA); - cpuDenseB->copyFrom(*cpuMatrixB); - cpuDenseC->copyFrom(*cpuMatrixC); - - hl_stream_synchronize(stream); - - /*matrix mul*/ - cpuMatrixC->mul(*cpuMatrixA, *cpuMatrixB, 1.0, 1.0); - gpuMatrixC->mul(*gpuMatrixA, *gpuMatrixB, 1.0, 1.0); - cpuDenseC->mul(*cpuDenseA, *cpuDenseB, 1.0, 1.0); - - gpuMatrixC_d2h->copyFrom(*gpuMatrixC, stream); - hl_stream_synchronize(stream); - - /*check result*/ - if (paraC.sparse) { - checkSMatrixEqual( - std::dynamic_pointer_cast(cpuMatrixC), - std::dynamic_pointer_cast(gpuMatrixC_d2h)); - checkSMatrixEqual2Dense( - std::dynamic_pointer_cast(cpuMatrixC), - std::dynamic_pointer_cast(cpuDenseC)); - } else { - checkMatrixEqual(cpuMatrixC, gpuMatrixC_d2h); - checkMatrixEqual(cpuMatrixC, cpuDenseC); - } -} - -TEST(Matrix, SparseMatrixMul) { - const size_t DIM_M = 4; - const size_t DIM_N = 4; - const size_t DIM_K = 8; - const size_t NNZ = 5; - for (auto format : {SPARSE_CSC, SPARSE_CSR}) { - std::string str_format = format == SPARSE_CSC ? "CSC" : "CSR"; - LOG(INFO) << "test dense mul " << str_format; - test_sparse_matrix_mul( - {DIM_M, DIM_K, /*trans*/ false, /*sparse*/ false, NNZ, format}, - {DIM_K, DIM_N, /*trans*/ false, /*sparse*/ true, NNZ, format}, - {DIM_M, DIM_N, /*trans*/ false, /*sparse*/ false, NNZ, format}); - - LOG(INFO) << "test dense mul " << str_format << " trans"; - test_sparse_matrix_mul( - {DIM_M, DIM_K, /*trans*/ false, /*sparse*/ false, NNZ, format}, - {DIM_N, DIM_K, /*trans*/ true, /*sparse*/ true, NNZ, format}, - {DIM_M, DIM_N, /*trans*/ false, /*sparse*/ false, NNZ, format}); - - LOG(INFO) << "test dense mul dense 2 " << str_format; - test_sparse_matrix_mul( - {DIM_M, DIM_K, /*trans*/ false, /*sparse*/ false, NNZ, format}, - {DIM_K, DIM_N, /*trans*/ false, /*sparse*/ false, NNZ, format}, - {DIM_M, DIM_N, /*trans*/ false, /*sparse*/ true, NNZ, format}); - - LOG(INFO) << "test denseT mul dense 2 " << str_format; - test_sparse_matrix_mul( - {DIM_K, DIM_M, /*trans*/ true, /*sparse*/ false, NNZ, format}, - {DIM_K, DIM_N, /*trans*/ false, /*sparse*/ false, NNZ, format}, - {DIM_M, DIM_N, /*trans*/ false, /*sparse*/ true, NNZ, format}); - } -} - -TEST(Matrix, CopySparseMatrixToGpuSparseMatrix) { - const size_t HEIGHT = 20; - const size_t WIDTH = 10; - const size_t WIDTH_TEST = 15; - MatrixPtr testMatrix( - new CpuSparseMatrix(HEIGHT, WIDTH, HEIGHT * 2, FLOAT_VALUE, SPARSE_CSR)); - MatrixPtr testCpuMatrix(new CpuMatrix(HEIGHT, WIDTH)); - testCpuMatrix->randomizeUniform(); - testMatrix->copyFrom(*testCpuMatrix, HPPL_STREAM_DEFAULT); - - MatrixPtr testGpuMatrix = testMatrix->clone(HEIGHT, WIDTH, true); - hl_stream_t gpuStream(HPPL_STREAM_3); - testGpuMatrix->copyFrom(*testMatrix, gpuStream); - hl_stream_synchronize(gpuStream); - - MatrixPtr mulCpuMatrix(new CpuMatrix(WIDTH, WIDTH_TEST)); - mulCpuMatrix->randomizeUniform(); - MatrixPtr mulGpuMatrix(new GpuMatrix(WIDTH, WIDTH_TEST)); - mulGpuMatrix->copyFrom(*mulCpuMatrix); - MatrixPtr ret1(new CpuMatrix(HEIGHT, WIDTH_TEST)); - MatrixPtr ret2(new GpuMatrix(HEIGHT, WIDTH_TEST)); - ret1->zeroMem(); - ret2->zeroMem(); - ret1->mul(*testMatrix, *mulCpuMatrix, 1.0, 1.0); - ret2->mul(*testGpuMatrix, *mulGpuMatrix, 1.0, 1.0); - checkMatrixEqual(ret1, ret2); -} - -#endif - -TEST(Matrix, SparseMatrixTranspose) { - for (auto height : {10, 50, 100}) { - for (auto width : {10, 50, 100}) { - auto nnz = height * width; - for (auto valueType : {FLOAT_VALUE, NO_VALUE}) { - for (auto format : {SPARSE_CSR, SPARSE_CSC}) { - for (auto sparseRate : {0.1, 0.2, 0.5}) { - MatrixPtr matA = Matrix::createSparseMatrix( - height, width, size_t(nnz * sparseRate), valueType, format); - MatrixPtr matB(new CpuSparseMatrix( - width, height, size_t(nnz * sparseRate), valueType, format)); - matA->randomizeUniform(); - matA->transpose(matB, false); - - /*dense matrix transpose*/ - CpuMatrixPtr matC(new CpuMatrix(height, width)); - matC->copyFrom(*matA); - MatrixPtr matD(new CpuMatrix(width, height)); - matC->transpose(matD, false); - - /*check result*/ - checkSMatrixEqual2Dense( - std::dynamic_pointer_cast(matB), - std::dynamic_pointer_cast(matD)); - } - } - } - } - } -} - -TEST(Matrix, CpuSparseMatrixSubMatrix) { - const size_t HEIGHT = 10; - const size_t WIDTH = 10; - const size_t NNZ = HEIGHT * WIDTH; - for (auto valueType : {FLOAT_VALUE, NO_VALUE}) { - size_t startRow = 3; - size_t rowNum = 2; - real sparseRate = 0.1; - /*sparse matrix init and get subMatrix*/ - CpuSparseMatrixPtr matA = std::make_shared( - HEIGHT, WIDTH, size_t(NNZ * sparseRate), valueType, SPARSE_CSR); - matA->randomizeUniform(); - CpuSparseMatrixPtr matB = std::dynamic_pointer_cast( - matA->subMatrix(startRow, rowNum)); - - int start = matA->getRows()[startRow]; - int end = matA->getRows()[startRow + rowNum]; - - /*compare two matrix*/ - ASSERT_EQ(matB->getElementCnt(), size_t(end - start)); - if (valueType == FLOAT_VALUE) { - for (size_t i = 0; i < matB->getElementCnt(); i++) { - ASSERT_FLOAT_EQ(matB->getValue()[start + i], - matA->getValue()[start + i]); - } - } - - for (size_t i = 0; i < matB->getElementCnt(); i++) { - ASSERT_EQ(matB->getCols()[start + i], matA->getCols()[start + i]); - } - for (size_t i = 0; i < rowNum; i++) { - ASSERT_EQ(matB->getRows()[i], matA->getRows()[startRow + i]); - } - } -} - -void sparseValid( - int* major, int* minor, size_t nnz, size_t majorLen, size_t minorLen) { - CHECK_EQ(nnz, size_t(major[majorLen - 1])); - CHECK_EQ(nnz, minorLen); - for (size_t i = 0; i < majorLen - 1; i++) { - EXPECT_LE(major[i], major[i + 1]); - for (int j = major[i]; j < major[i + 1] - 1; j++) { - EXPECT_LE(minor[j], minor[j + 1]); - } - } -} - -TEST(Matrix, CpuSparseMatrixRandUniform) { - const size_t HEIGHT = 5; - const size_t WIDTH = 10; - const size_t NNZ = HEIGHT * WIDTH; - int* major = nullptr; - int* minor = nullptr; - size_t majorLen = 0; - size_t minorLen = 0; - size_t nnz = 0; - for (auto valueType : {NO_VALUE, FLOAT_VALUE}) { - for (auto format : {SPARSE_CSR, SPARSE_CSC}) { - CpuSparseMatrixPtr matA = std::make_shared( - HEIGHT, WIDTH, size_t(NNZ * 0.1), valueType, format); - matA->randomizeUniform(); - nnz = matA->getElementCnt(); - if (format == SPARSE_CSR) { - majorLen = matA->getHeight() + 1; - minorLen = matA->getElementCnt(); - major = matA->getRows(); - minor = matA->getCols(); - } else { - majorLen = matA->getWidth() + 1; - minorLen = matA->getElementCnt(); - major = matA->getCols(); - minor = matA->getRows(); - } - sparseValid(major, minor, nnz, majorLen, minorLen); - } - } -} - -TEST(Matrix, CpuSparseMatrixCopyFrom) { - size_t height = 10; - size_t width = 8; - int64_t indices[11] = {0, 1, 5, 5, 9, 13, 15, 17, 19, 30, 32}; - sparse_non_value_t data[32]; - for (size_t i = 0; i < 32; i++) { - data[i].col = ::rand() % width; - } - CpuSparseMatrixPtr mat = std::make_shared( - height, width, 32, NO_VALUE, SPARSE_CSR, false); - mat->copyFrom(indices, data); - - /*compare indices*/ - size_t sum = 0; - CHECK_EQ(sum, size_t(mat->getRows()[0])); - for (size_t i = 1; i < height + 1; i++) { - sum += indices[i] - indices[i - 1]; - CHECK_EQ(sum, size_t(mat->getRows()[i])); - } - CHECK_EQ(mat->getElementCnt(), size_t(indices[height] - indices[0])); - for (size_t i = 0; i < mat->getElementCnt(); i++) { - CHECK_EQ(size_t(mat->getCols()[i]), size_t(data[i].col)); - } -} - -TEST(Matrix, SparseMatrixCSRFormatTrimFrom) { - size_t height = 10; - size_t width = 8; - int64_t indices[11] = {0, 1, 5, 5, 9, 13, 15, 17, 19, 27, 32}; - sparse_float_value_t data[32]; - int value[32] = { - 1, // row_0 : 1 - 5, 3, 1, 6, // row_1 : 4 - 0, 1, 2, 3, // row_3 : 4 - 4, 5, 6, 7, // row_4 : 4 - 2, 3, // row_5 : 2 - 3, 5, // row_6 : 2 - 0, 1, // row_7 : 2 - 0, 1, 2, 3, 4, 5, 6, 7, // row_8 : 8 - 2, 4, 7, 3, 1 // row_9 : 5 - }; - for (size_t i = 0; i < 32; i++) { - data[i].col = value[i]; - data[i].value = float(value[i]); - } - CpuSparseMatrixPtr mat = std::make_shared( - height, width, 32, FLOAT_VALUE, SPARSE_CSR, false); - mat->copyFrom(indices, data); - - /*compare indices*/ - size_t sum = 0; - CHECK_EQ(sum, size_t(mat->getRows()[0])); - for (size_t i = 1; i < height + 1; i++) { - sum += indices[i] - indices[i - 1]; - CHECK_EQ(sum, size_t(mat->getRows()[i])); - } - CHECK_EQ(mat->getElementCnt(), size_t(indices[height] - indices[0])); - for (size_t i = 0; i < mat->getElementCnt(); i++) { - CHECK_EQ(size_t(mat->getCols()[i]), size_t(data[i].col)); - } - - size_t trimedWidth = 4; - int64_t trimedIndices[11] = {0, 1, 3, 3, 7, 7, 9, 10, 12, 16, 19}; - sparse_float_value_t trimedData[19]; - int trimedValue[19] = { - 1, // row_0 : 1 - 3, - 1, // row_1 : 2 - 0, - 1, - 2, - 3, // row_3 : 4 - 2, - 3, // row_5 : 2 - 3, // row_6 : 1 - 0, - 1, // row_7 : 2 - 0, - 1, - 2, - 3, // row_8 : 4 - 2, - 3, - 1 // row_9 : 3 - }; - for (size_t i = 0; i < 19; i++) { - trimedData[i].col = trimedValue[i]; - trimedData[i].value = float(trimedValue[i]); - } - CpuSparseMatrixPtr matA = std::make_shared( - height, trimedWidth, 19, FLOAT_VALUE, SPARSE_CSR, false); - matA->copyFrom(trimedIndices, trimedData); - - /*compare indices*/ - sum = 0; - CHECK_EQ(sum, size_t(matA->getRows()[0])); - for (size_t i = 1; i < height + 1; i++) { - sum += trimedIndices[i] - trimedIndices[i - 1]; - CHECK_EQ(sum, size_t(matA->getRows()[i])); - } - CHECK_EQ(matA->getElementCnt(), - size_t(trimedIndices[height] - trimedIndices[0])); - for (size_t i = 0; i < matA->getElementCnt(); i++) { - CHECK_EQ(size_t(matA->getCols()[i]), size_t(trimedData[i].col)); - } - - CpuSparseMatrixPtr matB = std::make_shared( - height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, false); - matB->trimFrom(*mat); - checkSMatrixEqual2(matA, matB); - -#ifdef PADDLE_WITH_CUDA - GpuSparseMatrixPtr matC = std::make_shared( - height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, true); - matC->trimFrom(*mat); - - CpuSparseMatrixPtr matD = - std::make_shared(height, - trimedWidth, - matC->getElementCnt(), - FLOAT_VALUE, - SPARSE_CSR, - false); - matD->copyFrom(*matC, HPPL_STREAM_DEFAULT); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - checkSMatrixEqual2(matA, matD); -#endif -} - -TEST(Matrix, SparseMatrixCSCFormatTrimFrom) { - size_t height = 8; - size_t width = 10; - int indices[11] = {0, 1, 5, 5, 9, 13, 15, 17, 19, 27, 32}; - int value[32] = { - 1, // col_0 : 1 - 5, 3, 1, 6, // col_1 : 4 - 0, 1, 2, 3, // col_3 : 4 - 4, 5, 6, 7, // col_4 : 4 - 2, 3, // col_5 : 2 - 3, 5, // col_6 : 2 - 0, 1, // col_7 : 2 - 0, 1, 2, 3, 4, 5, 6, 7, // col_8 : 8 - 2, 4, 7, 3, 1 // col_9 : 5 - }; - std::vector rows(value, value + 32); - std::vector cols(indices, indices + 11); - std::vector values(value, value + 32); - CpuSparseMatrixPtr mat = std::make_shared( - height, width, 32, FLOAT_VALUE, SPARSE_CSC, false); - mat->copyFrom(rows, cols, values); - - /*compare indices*/ - size_t sum = 0; - CHECK_EQ(sum, size_t(mat->getCols()[0])); - for (size_t i = 1; i < width + 1; i++) { - sum += indices[i] - indices[i - 1]; - CHECK_EQ(sum, size_t(mat->getCols()[i])); - } - CHECK_EQ(mat->getElementCnt(), size_t(indices[width] - indices[0])); - for (size_t i = 0; i < mat->getElementCnt(); i++) { - CHECK_EQ(size_t(mat->getRows()[i]), size_t(value[i])); - } - - size_t trimedWidth = 5; - int trimedIndices[6] = {0, 1, 5, 5, 9, 13}; - int trimedValue[13] = { - 1, // col_0 : 1 - 5, - 3, - 1, - 6, // col_1 : 4 - 0, - 1, - 2, - 3, // col_3 : 4 - 4, - 5, - 6, - 7 // col_4 : 4 - }; - std::vector rowsA(trimedValue, trimedValue + 13); - std::vector colsA(trimedIndices, trimedIndices + 6); - std::vector valuesA(trimedValue, trimedValue + 13); - CpuSparseMatrixPtr matA = std::make_shared( - height, trimedWidth, 13, FLOAT_VALUE, SPARSE_CSC, false); - matA->copyFrom(rowsA, colsA, valuesA); - - /*compare indices*/ - sum = 0; - CHECK_EQ(sum, size_t(matA->getCols()[0])); - for (size_t i = 1; i < trimedWidth + 1; i++) { - sum += trimedIndices[i] - trimedIndices[i - 1]; - CHECK_EQ(sum, size_t(matA->getCols()[i])); - } - CHECK_EQ(matA->getElementCnt(), - size_t(trimedIndices[trimedWidth] - trimedIndices[0])); - for (size_t i = 0; i < matA->getElementCnt(); i++) { - CHECK_EQ(size_t(matA->getRows()[i]), size_t(rowsA[i])); - } - - CpuSparseMatrixPtr matB = std::make_shared( - height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, false); - matB->trimFrom(*mat); - checkSMatrixEqual2(matA, matB); - -#ifdef PADDLE_WITH_CUDA - GpuSparseMatrixPtr matC = std::make_shared( - height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, true); - matC->trimFrom(*mat); - - CpuSparseMatrixPtr matD = - std::make_shared(height, - trimedWidth, - matC->getElementCnt(), - FLOAT_VALUE, - SPARSE_CSC, - false); - matD->copyFrom(*matC, HPPL_STREAM_DEFAULT); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - checkSMatrixEqual2(matA, matD); -#endif -} diff --git a/paddle/legacy/math/tests/test_Tensor.cu b/paddle/legacy/math/tests/test_Tensor.cu deleted file mode 100644 index 3ce056d66140059be8145f7f49bb80cbff4686eb..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_Tensor.cu +++ /dev/null @@ -1,1162 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "TensorCheck.h" -#include "paddle/legacy/math/Matrix.h" - -using paddle::Matrix; -using paddle::CpuMatrix; -using paddle::GpuMatrix; -using paddle::CpuVector; -using paddle::GpuVector; -using paddle::CpuIVector; -using paddle::GpuIVector; -using autotest::TensorCheckEqual; -using autotest::TensorCheckErr; - -#define INIT_UNARY(A1, A2) \ - Tensor A1(height, width); \ - Tensor A2(height, width); \ - A1.randomizeUniform(); \ - A2.copyFrom(A1) -#define INIT_BINARY(A1, A2, B) \ - INIT_UNARY(A1, A2); \ - Tensor B(height, width); \ - B.randomizeUniform() -#define INIT_TERNARY(A1, A2, B, C) \ - INIT_BINARY(A1, A2, B); \ - Tensor C(height, width); \ - C.randomizeUniform() -#define INIT_QUATERNARY(A1, A2, B, C, D) \ - INIT_TERNARY(A1, A2, B, C); \ - Tensor D(height, width); \ - D.randomizeUniform() - -template -struct TestUnaryMatrix { - typedef std::function UnaryFunc; - - explicit TestUnaryMatrix(UnaryFunc testUnaryFunc) { - for (auto height : {1, 11, 73, 128, 200, 330}) { - for (auto width : {1, 32, 100, 512, 1000, 3210}) { - LOG(INFO) << " height=" << height << " width=" << width; - INIT_UNARY(A1, A2); - testUnaryFunc(A1, A2); - } - } - } -}; - -template -struct TestBinaryMatrix { - typedef std::function BinaryFunc; - - explicit TestBinaryMatrix(BinaryFunc testBinaryFunc) { - for (auto height : {1, 11, 73, 128, 200, 330}) { - for (auto width : {1, 32, 100, 512, 1000, 3210}) { - LOG(INFO) << " height=" << height << " width=" << width; - INIT_BINARY(A1, A2, B); - testBinaryFunc(A1, A2, B); - } - } - } -}; - -template -struct TestTernaryMatrix { - typedef std::function - TernaryFunc; - - explicit TestTernaryMatrix(TernaryFunc testTernaryFunc) { - for (auto height : {1, 11, 73, 128, 200, 330}) { - for (auto width : {1, 32, 100, 512, 1000, 3210}) { - LOG(INFO) << " height=" << height << " width=" << width; - INIT_TERNARY(A1, A2, B, C); - testTernaryFunc(A1, A2, B, C); - } - } - } -}; - -template -struct TestQuaternaryMatrix { - typedef std::function - QuaternaryFunc; - - explicit TestQuaternaryMatrix(QuaternaryFunc testQuaternaryFunc) { - for (auto height : {1, 11, 73, 128, 200, 330}) { - for (auto width : {1, 32, 100, 512, 1000, 3210}) { - LOG(INFO) << " height=" << height << " width=" << width; - INIT_QUATERNARY(A1, A2, B, C, D); - testQuaternaryFunc(A1, A2, B, C, D); - } - } - } -}; - -template -struct TestUnaryVectorT { - typedef std::function UnaryFunc; - - explicit TestUnaryVectorT(UnaryFunc testUnaryFunc) { - for (auto size : {1, 11, 73, 128, 200, 330, 512, 1000, 4210}) { - LOG(INFO) << " size=" << size; - Tensor A1(size); - Tensor A2(size); - if (typeid(T) == typeid(real)) { - A1.rand(); - } else { - A1.rand(1000); - } - A2.copyFrom(A1); - testUnaryFunc(A1, A2); - } - } -}; - -void SetTensorValue(Matrix& matrix, real value) { - int height = matrix.getHeight(); - int width = matrix.getWidth(); - int stride = matrix.getStride(); - real* data = matrix.getData(); - for (int i = 0; i < height; i++) { - int j = rand() % width; // NOLINT - if (typeid(matrix) == typeid(CpuMatrix)) { - data[i * stride + j] = value; - } else if (typeid(matrix) == typeid(GpuMatrix)) { - hl_memcpy(&data[i * stride + j], &value, sizeof(real)); - } else { - } - } -} - -template -void testTensorAddScalar(Tensor& A1, Tensor& A2) { - real p1 = 2.5; - real p2 = 3.0; - A1.add(p1); // a += p - A2 += p1; - TensorCheckEqual(A1, A2); - - A1.add(p1, p2); // a = a * p1 + p2 - A2 = A2 * p1 + p2; - TensorCheckEqual(A1, A2); -} - -template -void testTensorSubScalar(Tensor& A1, Tensor& A2) { - real p = 2.5; - A1.subScalar(p); // a -= p - A2 -= p; - TensorCheckEqual(A1, A2); -} - -template -void testTensorMulScalar(Tensor& A1, Tensor& A2) { - real p = 2.5; - A1.mulScalar(p); // a *= p - A2 *= p; - TensorCheckEqual(A1, A2); - - real learningRate = 0.7f; - real decayRate = 1.2f; - A1.applyL2(learningRate, decayRate); - A2 = A2 * (1.0f / (1.0f + learningRate * decayRate)); - TensorCheckEqual(A1, A2); -} - -template -void testTensorDivScalar(Tensor& A1, Tensor& A2) { - real p = 2.5; - A1.divScalar(p); // a /= p - A2 /= p; - TensorCheckEqual(A1, A2); -} - -template -void testTensorNeg(Tensor& A1, Tensor& A2) { - A1.neg(); // a = -a - A2 = -A2; - TensorCheckEqual(A1, A2); -} - -template -void testTensorAbs(Tensor& A1, Tensor& A2) { - A1.abs2(); // a = a > 0 ? a : -a - A2 = A2.abs(); - TensorCheckEqual(A1, A2); -} - -template -void testTensorSquare(Tensor& A1, Tensor& A2) { - A1.square2(); // a = a * a - A2 = A2.square(); - TensorCheckEqual(A1, A2); -} - -template -void testTensorReciprocal(Tensor& A1, Tensor& A2) { - A1.reciprocal2(); // a = 1.0f / a - A2 = A2.reciprocal(); - TensorCheckEqual(A1, A2); -} - -template -void testTensorSign(Tensor& A1, Tensor& A2) { - A1.sign2(); // a = (a > 0) - (a < 0) - A2 = A2.sign(); - TensorCheckEqual(A1, A2); -} - -template -void testTensorAssign(Tensor& A1, Tensor& A2) { - A1.assign(1.5); // a = p - A2 = A2.constant(1.5); - TensorCheckEqual(A1, A2); - - A1.one(); // a = 1 - A2 = A2.constant(1.0); - TensorCheckEqual(A1, A2); - - A1.zero(); // a = 0 - A2 = A2.constant(0.0); - TensorCheckEqual(A1, A2); -} - -template -void testUnaryBaseOp(Tensor& A1, Tensor& A2) { - testTensorAddScalar(A1, A2); - testTensorSubScalar(A1, A2); - testTensorMulScalar(A1, A2); - testTensorDivScalar(A1, A2); - testTensorNeg(A1, A2); - testTensorAbs(A1, A2); - testTensorSquare(A1, A2); - testTensorReciprocal(A1, A2); - testTensorSign(A1, A2); - testTensorAssign(A1, A2); -} - -template -void testUnaryBaseOpInt(Tensor& A1, Tensor& A2) { - A1.add(2); // a += p - A2 += 2; - TensorCheckEqual(A1, A2); - - A1.add(3, 2); // a = a * p1 + p2 - A2 = A2 * 3 + 2; - TensorCheckEqual(A1, A2); - - testTensorNeg(A1, A2); - testTensorAbs(A1, A2); -} - -TEST(Unary, BaseOp) { - TestUnaryMatrix testCpuMatrix(testUnaryBaseOp); - TestUnaryVectorT testCpuVector(testUnaryBaseOp); - TestUnaryVectorT testCpuIVector( - testUnaryBaseOpInt); - -#ifdef PADDLE_WITH_GPU - TestUnaryMatrix testGpuMatrix(testUnaryBaseOp); - TestUnaryVectorT testGpuVector(testUnaryBaseOp); - TestUnaryVectorT testGpuIVector( - testUnaryBaseOpInt); -#endif -} - -template -void testTensorExp(Tensor& A1, Tensor& A2) { - A1.exp2(); // a = exp(a) - A2 = A2.exp(); - TensorCheckErr(A1, A2); -} - -template -void testTensorLog(Tensor& A1, Tensor& A2) { - A1.log2(); // a = log(a) - A2 = A2.log(); - TensorCheckErr(A1, A2); -} - -template -void testTensorSqrt(Tensor& A1, Tensor& A2) { - A1.sqrt2(); // a = sqrt(a) - A2 = A2.sqrt(); - TensorCheckErr(A1, A2); -} - -template -void testTensorPow(Tensor& A1, Tensor& A2) { - A1.pow2(3.2); // a = pow(a, p) - A2 = A2.pow(3.2); - TensorCheckErr(A1, A2); -} - -template -void testUnayrMathOp(Tensor& A1, Tensor& A2) { - testTensorExp(A1, A2); - testTensorLog(A1, A2); - testTensorSqrt(A1, A2); - testTensorPow(A1, A2); -} - -TEST(Unary, MathOp) { - TestUnaryMatrix testCpu(testUnayrMathOp); - -#ifdef PADDLE_WITH_GPU - TestUnaryMatrix testGpu(testUnayrMathOp); -#endif -} - -template -void testTensorClip(Tensor& A1, Tensor& A2) { - real p1 = 0.003f; - real p2 = 0.877f; - A1.clip(p1, p2); // a = a < p1 ? p1 : (a > p2 ? p2 : a) - // A2 = A2.min(0.877f).max(0.003f); - A2 = (A2 < p1).condition(p1, (A2 > p2).condition(p2, A2)); - TensorCheckEqual(A1, A2); -} - -template -void testTensorBiggerThanScalar(Tensor& A1, Tensor& A2) { - real p = 0.5f; - A1.biggerThanScalar(p); // a = a > p ? 1.0f : 0.0f - A2 = (A2 > p).condition((real)1.0, (real)0.0); - TensorCheckEqual(A1, A2); -} - -template -void testTensorapplyL1(Tensor& A1, Tensor& A2) { - /** - * T lambda = p; - * a = (a > lambda) ? (a - lambda) - * : (a < -lambda) ? (a + lambda) : 0 - * - * p = learningRate * decayRate; - */ - real learningRate = 0.7f; - real decayRate = 0.6f; - A1.applyL1(learningRate, decayRate); - A2 = (A2 > (learningRate * decayRate)) - .condition( - (A2 - (learningRate * decayRate)), - (A2 < -(learningRate * decayRate)) - .condition((A2 + (learningRate * decayRate)), (real)0.0)); - TensorCheckEqual(A1, A2); -} - -template -void testUnayrCompareOp(Tensor& A1, Tensor& A2) { - testTensorClip(A1, A2); - testTensorBiggerThanScalar(A1, A2); - - A1.randomizeUniform(); - A1.subScalar(0.5f); - A2.copyFrom(A1); - testTensorapplyL1(A1, A2); -} - -TEST(Unary, CompareOp) { - TestUnaryMatrix testCpu(testUnayrCompareOp); - -#ifdef PADDLE_WITH_GPU - TestUnaryMatrix testGpu(testUnayrCompareOp); -#endif -} - -template -void testTensorAdd(Tensor& A1, Tensor& A2, Tensor& B) { - real p1 = 2.5; - real p2 = 3.2; - A1.add(B); // a += b - A2 += B; - TensorCheckEqual(A1, A2); - - A1.add(B, p1); // a += b * p - A2 += B * p1; - TensorCheckEqual(A1, A2); - - A1.add(B, p1, p2); // a = p1 * a + p2 * b - A2 = A2 * p1 + B * p2; - TensorCheckEqual(A1, A2); - - A1.addScalar(B, p1); // a = b + p - A2 = B + p1; - TensorCheckEqual(A1, A2); - - A1.addSquare(B, p1); // a += p * b * b - A2 += B.constant(p1) * B * B; - TensorCheckEqual(A1, A2); - - A1.decayAddSquare(B, p1, p2); // a = p1 * a + p2 * b * b - A2 = A2 * p1 + B.constant(p2) * B * B; - TensorCheckEqual(A1, A2); -} - -template -void testTensorSub(Tensor& A1, Tensor& A2, Tensor& B) { - real p = 2.5; - A1.sub(B); // a -= b - A2 -= B; - TensorCheckEqual(A1, A2); - - A1.sub(B, p); // a -= b * p - A2 -= B * p; - TensorCheckEqual(A1, A2); - - A1.subScalar(B, p); // a = b - p - A2 = B - p; - TensorCheckEqual(A1, A2); -} - -template -void testTensorMul(Tensor& A1, Tensor& A2, Tensor& B) { - real p = 2.5; - A1.mulScalar(B, p); // a = b * p - A2 = B * p; - TensorCheckEqual(A1, A2); - - A1.dotMulSquare(B); // a *= b * b - A2 *= B * B; - TensorCheckEqual(A1, A2); - - A1.dotSquareMul(B); // a = a * a * b - A2 = A2 * A2 * B; - TensorCheckEqual(A1, A2); - - A1.dotMul(B); // a *= b - A2 *= B; - TensorCheckEqual(A1, A2); -} - -template -void testTensorDiv(Tensor& A1, Tensor& A2, Tensor& B) { - real p = 2.5; - A1.divScalar(B, p); // a = b / p - A2 = B / p; - TensorCheckEqual(A1, A2); - - A1.scalarDiv(B, p); // a = p / b - A2 = B.constant(p) / B; - TensorCheckEqual(A1, A2); -} - -template -void testTensorAssign(Tensor& A1, Tensor& A2, Tensor& B) { - A1.assign(B); // a = b - A2 = B; - TensorCheckEqual(A1, A2); -} - -template -void testTensorSquare(Tensor& A1, Tensor& A2, Tensor& B) { - B.square2(A1); // b = a * a - A2 = B.square(); - TensorCheckEqual(A1, A2); -} - -template -void testTensorSquareDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - A1.squareDerivative(B); // a *= 2.0 * b - A2 = A2 * (real)2.0 * B; - TensorCheckEqual(A1, A2); -} - -template -void testTensorReciprocal(Tensor& A1, Tensor& A2, Tensor& B) { - B.reciprocal2(A1); // b = 1.0f / a - A2 = B.reciprocal(); - TensorCheckEqual(A1, A2); - - real p1 = 0.58; - real p2 = 0.32; - A1.reciprocal2(B, p1, p2); // a = 1 / (p1 * b + p2) - A2 = (B * p1 + p2).reciprocal(); - TensorCheckEqual(A1, A2); - - real learningRate = 0.7f; - real decayRate = 1.2f; - A1.applyL2(B, learningRate, decayRate); // a *= (1.0f / (1.0f + p * b)) - A2 *= (B.constant(1.0f) + B.constant(learningRate * decayRate) * B) - .reciprocal(); - TensorCheckEqual(A1, A2); -} - -template -void testTensorReciprocalDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - A1.reciprocalDerivative(B); // a *= -b * b - A2 *= (-B) * B; - TensorCheckEqual(A1, A2); -} - -template -void testTensorSign(Tensor& A1, Tensor& A2, Tensor& B) { - B.sign2(A1); // b = a > 0.0f ? 1.0f : -1.0f - A2 = B.sign(); - TensorCheckEqual(A1, A2); -} - -template -void testTensorAbs(Tensor& A1, Tensor& A2, Tensor& B) { - B.abs2(A1); // b = a > 0.0f ? a : -a - A2 = B.abs(); - TensorCheckEqual(A1, A2); -} - -template -void testBinaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B) { - testTensorAdd(A1, A2, B); - testTensorSub(A1, A2, B); - testTensorMul(A1, A2, B); - testTensorDiv(A1, A2, B); - testTensorSquare(A1, A2, B); - testTensorSquareDerivative(A1, A2, B); - testTensorReciprocal(A1, A2, B); - testTensorReciprocalDerivative(A1, A2, B); - testTensorAbs(A1, A2, B); - testTensorSign(A1, A2, B); - testTensorAssign(A1, A2, B); -} - -TEST(Binary, BaseOp) { - TestBinaryMatrix testCpu(testBinaryBaseOp); - -#ifdef PADDLE_WITH_GPU - TestBinaryMatrix testGpu(testBinaryBaseOp); -#endif -} - -template -void testTensorExp(Tensor& A1, Tensor& A2, Tensor& B) { - // a = exp(b) - A1.exp2(B); - A2 = B.exp(); - TensorCheckErr(A1, A2); -} - -template -void testTensorExpDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - A1.expDerivative(B); // a *= b - A2 *= B; - TensorCheckEqual(A1, A2); -} - -template -void testTensorLog(Tensor& A1, Tensor& A2, Tensor& B) { - // a = log(b) - A1.log2(B); - A2 = B.log(); - TensorCheckErr(A1, A2); -} - -template -void testTensorSqrt(Tensor& A1, Tensor& A2, Tensor& B) { - // a = sqrt(b) - A1.sqrt2(B); - A2 = B.sqrt(); - TensorCheckErr(A1, A2); -} - -template -void testTensorInvSqrt(Tensor& A1, Tensor& A2, Tensor& B) { - // a = 1.0f / sqrt(b) - A1.invSqrt(B); - A2 = B.sqrt().reciprocal(); - TensorCheckErr(A1, A2); -} - -template -void testTensorPow(Tensor& A1, Tensor& A2, Tensor& B) { - A1.pow2(B, 2.5f); // a = pow(b, p) - A2 = B.pow(2.5f); - TensorCheckErr(A1, A2); -} - -template -void testTensorSoftrelu(Tensor& A1, Tensor& A2, Tensor& B) { - /* - * const T THRESHOLD = 40.0; - * b = log(1.0 + - * exp((a > THRESHOLD) ? THRESHOLD - * : ((a < -THRESHOLD) ? (-THRESHOLD) : a))) - */ - B.softrelu(A1); - - real THRESHOLD = 40.0; - A2 = (B.constant(1.0f) + - (B > THRESHOLD) - .condition(THRESHOLD, (B < -THRESHOLD).condition(-THRESHOLD, B)) - .exp()) - .log(); - TensorCheckErr(A1, A2); -} - -template -void testTensorSoftreluDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - /* - * const T THRESHOLD = 40.0; - * a *= (1.0 - exp(-1.0 * ((b > THRESHOLD) - * ? THRESHOLD - * : ((b < -THRESHOLD) ? (-THRESHOLD) : b))))); - */ - A1.softreluDerivative(B); - real THRESHOLD = 40.0; - A2 = A2 * - (B.constant(1.0f) - - (B.constant(-1.0f) * - (B > THRESHOLD) - .condition(THRESHOLD, (B < -THRESHOLD).condition(-THRESHOLD, B))) - .exp()); - TensorCheckErr(A1, A2); -} - -template -void testTensorSigmoid(Tensor& A1, Tensor& A2, Tensor& B) { - /* - const T THRESHOLD_MIN = -40.0; - const T THRESHOLD_MAX = 13.0; - T tmp = (a < THRESHOLD_MIN) ? THRESHOLD_MIN - : ((a > THRESHOLD_MAX) ? THRESHOLD_MAX : a); - b = 1.0f / (1.0f + exp(-tmp))) - */ - B.sigmoid(A1); - - const real THRESHOLD_MIN = -40.0; - const real THRESHOLD_MAX = 13.0; - auto tmp = (B < THRESHOLD_MIN) - .condition(THRESHOLD_MIN, - (B > THRESHOLD_MAX).condition(THRESHOLD_MAX, B)); - A2 = (B.constant(1.0f) + (-tmp).exp()).reciprocal(); - TensorCheckErr(A1, A2); -} - -template -void testTensorSigmoidDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - A1.sigmoidDerivative(B); // a *= b * (1 - b) - A2 *= B * (B.constant(1.0f) - B); - TensorCheckEqual(A1, A2); -} - -template -void testTensorTanh(Tensor& A1, Tensor& A2, Tensor& B) { - B.tanh(A1); // b = 2.0 / (1.0 + exp(-2 * a)) - 1.0 - A2 = B.constant(2.0f) / ((B * ((real)-2.0f)).exp() + (real)1.0f) - (real)1.0f; - TensorCheckErr(A1, A2); -} - -template -void testTensorTanhDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - A1.tanhDerivative(B); // a *= 1 - b * b - A2 *= B.constant(1.0f) - B * B; - TensorCheckEqual(A1, A2); -} - -template -void testTensorScaledTanh(Tensor& A1, Tensor& A2, Tensor& B) { - real p1 = 2.5; - real p2 = 3.1; - // b = p1 * (2.0 / (1.0 + exp(-2 * p2 * a)) - 1.0) - B.scaledTanh(A1, p1, p2); - A2 = B.constant(p1) * - (B.constant(2.0f) / ((B.constant(-2.0f) * p2 * B).exp() + (real)1.0) - - (real)1.0); - TensorCheckErr(A1, A2); -} - -template -void testTensorScaledTanhDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - real p1 = 2.5; - real p2 = 3.1; - // a *= (p2 / p1) * (p1 * p1 - b * b)); - A1.scaledTanhDerivative(B, p1, p2); - A2 = A2 * (B.constant(p2 / p1) * (B.constant(p1 * p1) - B * B)); - TensorCheckEqual(A1, A2); -} - -template -void testBinaryMathOp(Tensor& A1, Tensor& A2, Tensor& B) { - testTensorTanhDerivative(A1, A2, B); - testTensorScaledTanhDerivative(A1, A2, B); - testTensorSigmoidDerivative(A1, A2, B); - testTensorExpDerivative(A1, A2, B); - testTensorScaledTanh(A1, A2, B); - testTensorTanh(A1, A2, B); - testTensorExp(A1, A2, B); - testTensorLog(A1, A2, B); - testTensorSqrt(A1, A2, B); - testTensorInvSqrt(A1, A2, B); - testTensorPow(A1, A2, B); - - testTensorSoftrelu(A1, A2, B); - testTensorSoftreluDerivative(A1, A2, B); - testTensorSigmoid(A1, A2, B); -} - -TEST(Binary, MathOp) { - TestBinaryMatrix testCpu(testBinaryMathOp); - -#ifdef PADDLE_WITH_GPU - TestBinaryMatrix testGpu(testBinaryMathOp); -#endif -} - -template -void testTensorRelu(Tensor& A1, Tensor& A2, Tensor& B) { - B.relu(A1); // b = a > 0.0f ? a : 0.0f - A2 = (B > (real)0.0f).condition(B, (real)0.0f); - TensorCheckEqual(A1, A2); -} - -template -void testTensorReluDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - A1.reluDerivative(B); // a *= (b > 0.0f ? 1.0f : 0.0f) - A2 *= (B > (real)0.0).condition((real)1.0, (real)0.0); - TensorCheckEqual(A1, A2); -} - -template -void testTensorBrelu(Tensor& A1, Tensor& A2, Tensor& B) { - /* - * b = a > p1 ? a : p1 - * b = b < p2 ? b : p2 - * int p1 = 0, p2 = 24; - */ - SetTensorValue(B, 32.0f); - B.brelu(A1); - auto tmp = (B > (real)0.0f).condition(B, (real)0.0f); - A2 = (tmp < (real)24.0f).condition(tmp, (real)24.0f); - TensorCheckEqual(A1, A2); -} - -template -void testTensorBreluDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - SetTensorValue(B, 32.0f); - /* - * a *= (b > p1 && b < p2) ? 1.0 : 0.0 - * int p1 = 0, p2 = 24; - */ - A1.breluDerivative(B); - A2 *= (B > (real)0.0f && B < (real)24.0f).condition((real)1.0f, (real)0.0f); - TensorCheckEqual(A1, A2); -} - -template -void testTensorAbsDerivative(Tensor& A1, Tensor& A2, Tensor& B) { - A1.absDerivative(B); // a = (b > 0) ? a : (b < 0) ? -a : 0 - A2 = (B > (real)0.0f) - .condition(A2, (B < (real)0.0f).condition(-A2, (real)0.0f)); - TensorCheckEqual(A1, A2); -} - -template -void testTensorIsEqualTo(Tensor& A1, Tensor& A2, Tensor& B) { - real p = 0.613; - SetTensorValue(B, p); - A1.isEqualTo(B, p); // a = (b == p) - A2 = (B == p); - TensorCheckEqual(A1, A2); -} - -template -void testTensorapplyL1(Tensor& A1, Tensor& A2, Tensor& B) { - /** - * T lambda = p * b; - * a = (a > lambda) ? (a - lambda) - * : (a < -lambda) ? (a + lambda) : 0 - * - * p = learningRate * decayRate; - */ - real learningRate = 0.7f; - real decayRate = 0.6f; - A1.applyL1(B, learningRate, decayRate); - auto lambda = B.constant(learningRate * decayRate) * B; - A2 = (A2 > lambda) - .condition((A2 - lambda), - (A2 < -lambda).condition((A2 + lambda), (real)0.0f)); - TensorCheckEqual(A1, A2); -} - -template -void testBinaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B) { - B.subScalar(0.5f); - SetTensorValue(B, 0.0f); - testTensorReluDerivative(A1, A2, B); - - A1.randomizeUniform(); - A2.copyFrom(A1); - testTensorBreluDerivative(A1, A2, B); - - testTensorAbsDerivative(A1, A2, B); - testTensorRelu(A1, A2, B); - testTensorBrelu(A1, A2, B); - testTensorIsEqualTo(A1, A2, B); -} - -TEST(Binary, CompareOp) { - TestBinaryMatrix testCpu(testBinaryCompareOp); - -#ifdef PADDLE_WITH_GPU - TestBinaryMatrix testGpu(testBinaryCompareOp); -#endif -} - -template -void testTensorAdd(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - A1.add(B, C); // a = b + c - A2 = B + C; - TensorCheckEqual(A1, A2); - - real p1 = 1.5; - real p2 = 2.5; - real p3 = 3.8; - A1.add(B, p1, C, p2); // a = p1 * b + p2 * c - A2 = B * p1 + C * p2; - TensorCheckEqual(A1, A2); - - A1.add2(B, C); // a = a + b + c - A2 = A2 + B + C; - TensorCheckEqual(A1, A2); - - A1.add2(B, C, p1, p2, p3); // a = p1 * a + p2 * b + p3 * c - A2 = A2 * p1 + B * p2 + C * p3; - TensorCheckEqual(A1, A2); - - A1.decayAddSquareMul(B, C, p1, p2); // a = p1 * a + p2 * b * b * c * c - A2 = A2 * p1 + B.constant(p2) * B * B * C * C; - TensorCheckEqual(A1, A2); -} - -template -void testTensorSub(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - A1.sub(B, C); // a = b - c - A2 = B - C; - TensorCheckEqual(A1, A2); - - real p1 = 1.5; - real p2 = 2.5; - A1.sub(B, p1, C, p2); // a = p1 * b - p2 * c - A2 = B * p1 - C * p2; - TensorCheckEqual(A1, A2); -} - -template -void testTensorMul(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - A1.dotMul(B, C); // a = b * c - A2 = B * C; - TensorCheckEqual(A1, A2); - - A1.dotMulSquare(B, C); // a = b * c * c - A2 = B * C * C; - TensorCheckEqual(A1, A2); - - A1.dotSquareSquare(B, C); // a = b * b * c * c - A2 = B * B * C * C; - TensorCheckEqual(A1, A2); - - real p1 = 1.5; - real p2 = 2.5; - - /* - * T tmp = p1 * b + p2 * c; - * a *= tmp * tmp - */ - A1.dotMulSquareSum(B, C, p1, p2); - auto tmp = B * p1 + C * p2; - A2 *= tmp * tmp; - TensorCheckEqual(A1, A2); - - /* - * T tmp = p1 * b + p2 * c; - * a = tmp * tmp - */ - A1.dotSquareSum(B, C, p1, p2); - auto tmp2 = B * p1 + C * p2; - A2 = tmp2 * tmp2; - TensorCheckEqual(A1, A2); - - // a *= p1 * b + p2 * c - A1.dotMulSum(B, C, p1, p2); - A2 *= B * p1 + C * p2; - TensorCheckEqual(A1, A2); - - // a = p1 * a + p2 * b * c - A1.addDotMul(B, C, p1, p2); - A2 = A2 * p1 + B.constant(p2) * B * C; - TensorCheckEqual(A1, A2); -} - -template -void testTensorDiv(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - A1.dotDiv(B, C); // a = (b == 0.0) ? 0.0 : b / c - A2 = (B == (real)0.0).condition((real)0.0, B / C); - TensorCheckEqual(A1, A2); - - real p1 = 1.5; - real p2 = 2.5; - A1.dotDiv(B, C, p1, p2); // a = (b + p1) / (c + p2) - A2 = (B + p1) / (C + p2); - TensorCheckEqual(A1, A2); -} - -template -void testTensorReciprocal(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - real p1 = 1.5; - real p2 = 2.5; - real p3 = 3.5; - A1.reciprocalSum(B, C, p1, p2, p3); // a = 1 / (p1 * b + p2 * c + p3) - A2 = (B * p1 + C * p2 + p3).reciprocal(); - TensorCheckEqual(A1, A2); -} - -template -void testTensorSoftCrossEntropy(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - A1.softCrossEntropy(B, C); // a = -c * log(b) - (1 - c) * log(1 - b) - A2 = -C * B.log() - (C.constant(1.0f) - C) * (B.constant(1.0f) - B).log(); - TensorCheckErr(A1, A2); -} - -template -void testTensorSoftCrossEntropyBp(Tensor& A1, - Tensor& A2, - Tensor& B, - Tensor& C) { - A1.softCrossEntropyBp(B, C); // a += (b - c) / (b * (1 - b)) - A2 += (B - C) / (B * (B.constant(1.0f) - B)); - TensorCheckEqual(A1, A2); -} - -template -void testTernaryBaseOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - testTensorAdd(A1, A2, B, C); - testTensorSub(A1, A2, B, C); - testTensorMul(A1, A2, B, C); - testTensorDiv(A1, A2, B, C); - testTensorReciprocal(A1, A2, B, C); - testTensorSoftCrossEntropyBp(A1, A2, B, C); - - testTensorSoftCrossEntropy(A1, A2, B, C); -} - -TEST(Ternary, BaseOp) { - TestTernaryMatrix testCpu(testTernaryBaseOp); - -#ifdef PADDLE_WITH_GPU - TestTernaryMatrix testGpu(testTernaryBaseOp); -#endif -} - -template -void testTensorBinaryLabelCrossEntropy(Tensor& A1, - Tensor& A2, - Tensor& B, - Tensor& C) { - A1.binaryLabelCrossEntropy(B, C); // a = c > 0.5 ? -log(b) : -log(1.0 - b) - A2 = (C > (real)0.5).condition(-(B.log()), -((B.constant(1.0f) - B).log())); - TensorCheckErr(A1, A2); -} - -template -void testTensorBinaryLabelCrossEntropyBp(Tensor& A1, - Tensor& A2, - Tensor& B, - Tensor& C) { - // a += c > 0.5 ? -1.0 / b : 1.0 / (1.0 - b) - A1.binaryLabelCrossEntropyBp(B, C); - A2 += (C > (real)0.5) - .condition((B.constant(-1.0f) / B), - (B.constant(1.0f) - B).reciprocal()); - TensorCheckErr(A1, A2); -} - -template -void testTensorLogisticRegressionLoss(Tensor& A1, - Tensor& A2, - Tensor& B, - Tensor& C) { - SetTensorValue(B, 50.0f); - SetTensorValue(B, -50.0f); - /** - * const T THRESHOLD = 40.0; - * T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD) - * ? -THRESHOLD - * : b; - * a = log(1 + exp(x)) - c * x - */ - A1.logisticRegressionLoss(B, C); - real THRESHOLD = 40.0; - auto tmp = - (B > THRESHOLD) - .condition(THRESHOLD, (B < -THRESHOLD).condition(-THRESHOLD, B)); - A2 = (C.constant(1.0f) + tmp.exp()).log() - C * tmp; - TensorCheckErr(A1, A2); -} - -template -void testTensorLogisticRegressionLossBp(Tensor& A1, - Tensor& A2, - Tensor& B, - Tensor& C) { - SetTensorValue(B, 50.0f); - SetTensorValue(B, -50.0f); - /** - * const T THRESHOLD = 40.0; - * T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD) - * ? -THRESHOLD - * : b; - * x = exp(x); a = x / (1 + x) - c - */ - A1.logisticRegressionLossBp(B, C); - real THRESHOLD = 40.0; - auto tmp = - (B > THRESHOLD) - .condition(THRESHOLD, (B < -THRESHOLD).condition(-THRESHOLD, B)); - auto tmp2 = tmp.exp(); - A2 = tmp2 / (C.constant(1.0) + tmp2) - C; - TensorCheckErr(A1, A2); -} - -template -void testTensorBiggerThan(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - A1.biggerThan(B, C); // a = (b > c) ? 1.0f : 0.0f - A2 = (B > C).condition((real)1.0f, (real)0.0f); - TensorCheckEqual(A1, A2); -} - -template -void testTensorMax(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - A1.max2(B, C); // a = (b > c) ? b : c - A2 = (B > C).condition(B, C); - TensorCheckEqual(A1, A2); -} - -template -void testTernaryCompareOp(Tensor& A1, Tensor& A2, Tensor& B, Tensor& C) { - testTensorBinaryLabelCrossEntropyBp(A1, A2, B, C); - testTensorBinaryLabelCrossEntropy(A1, A2, B, C); - testTensorBiggerThan(A1, A2, B, C); - testTensorMax(A1, A2, B, C); - - testTensorLogisticRegressionLoss(A1, A2, B, C); - testTensorLogisticRegressionLossBp(A1, A2, B, C); -} - -TEST(Ternary, CompareOp) { - TestTernaryMatrix testCpu(testTernaryCompareOp); - -#ifdef PADDLE_WITH_GPU - TestTernaryMatrix testGpu(testTernaryCompareOp); -#endif -} - -template -void testQuaternaryAdd( - Tensor& A1, Tensor& A2, Tensor& B, Tensor& C, Tensor& D) { - // A1.add3(B, C, D, 1.5f, 2.5f, 3.5f); // a = p1 * b + p2 * c + p3 * d - // A2 = B * 1.5f + C * 2.5f + D * 3.5f; - // TensorCheckEqual(A1, A2); - - /* - * T tmp = p1 * b + p2 * c + p3 * d; - * a += tmp * tmp - */ - real p1 = 1.5f; - real p2 = 2.5f; - real p3 = 3.5f; - A1.addSquareSum(B, C, D, p1, p2, p3); - auto tmp = B * p1 + C * p2 + D * p3; - A2 += tmp * tmp; - TensorCheckEqual(A1, A2); -} - -TEST(Quaternary, BaseOp) { - TestQuaternaryMatrix testCpu(testQuaternaryAdd); - -#ifdef PADDLE_WITH_GPU - TestQuaternaryMatrix testGpu(testQuaternaryAdd); -#endif -} - -template -void testTensorBiggerThan( - Tensor& A1, Tensor& A2, Tensor& B, Tensor& C, Tensor& D) { - // a = ((b > c && d > 0.5f) || (b < c && d < 0.5f)) ? 1.0f : 0.0f); - A1.biggerThan(B, C, D); - A2 = ((B > C && D > (real)0.5) || (B < C && D < (real)0.5)) - .condition((real)1.0, (real)0.0); - TensorCheckEqual(A1, A2); -} - -template -void testTensorRankLoss( - Tensor& A1, Tensor& A2, Tensor& B, Tensor& C, Tensor& D) { - /** - * const T THRESHOLD = 40.0; a = b - c; - * a = (a > THRESHOLD) - * ? THRESHOLD - * : ((a < -THRESHOLD) ? (-THRESHOLD) : a); - * a = log(1 + exp(a)) - a * d - */ - A1.rankLoss(B, C, D); - - real THRESHOLD = 40.0; - auto tmp = B - C; - auto tmp2 = - (tmp > THRESHOLD) - .condition(THRESHOLD, (tmp < -THRESHOLD).condition(-THRESHOLD, tmp)); - A2 = (D.constant(1.0f) + tmp2.exp()).log() - tmp2 * D; - - TensorCheckErr(A1, A2); -} - -template -void testTensorRankLossBp( - Tensor& A1, Tensor& A2, Tensor& B, Tensor& C, Tensor& D) { - /** - * const T THRESHOLD = 40.0; a = b - c; - * a = (a > THRESHOLD) - * ? THRESHOLD - * : ((a < -THRESHOLD) ? (-THRESHOLD) : a); - * a = exp(a); a = (a / (1 + a) - d) - */ - A1.rankLossBp(B, C, D); - real THRESHOLD = 40.0; - auto tmp = B - C; - auto tmp2 = - (tmp > THRESHOLD) - .condition(THRESHOLD, (tmp < -THRESHOLD).condition(-THRESHOLD, tmp)); - auto tmp3 = tmp2.exp(); - A2 = tmp3 / (D.constant(1.0f) + tmp3) - D; - - TensorCheckErr(A1, A2); -} - -template -void testQuaternaryCompareOp( - Tensor& A1, Tensor& A2, Tensor& B, Tensor& C, Tensor& D) { - testTensorBiggerThan(A1, A2, B, C, D); - testTensorRankLoss(A1, A2, B, C, D); - testTensorRankLossBp(A1, A2, B, C, D); -} - -TEST(Quaternary, CompareOp) { - TestQuaternaryMatrix testCpu(testQuaternaryCompareOp); - -#ifdef PADDLE_WITH_GPU - TestQuaternaryMatrix testGpu(testQuaternaryCompareOp); -#endif -} diff --git a/paddle/legacy/math/tests/test_TrainingAlgorithm.cpp b/paddle/legacy/math/tests/test_TrainingAlgorithm.cpp deleted file mode 100644 index 214ae8971ae953ce0266f03dc3bba8c6160f1cf6..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_TrainingAlgorithm.cpp +++ /dev/null @@ -1,461 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "OriginalOptimizerApi.h" -#include "PerfUtils.h" -#include "TensorCheck.h" -#include "paddle/legacy/math/TrainingAlgorithmOp.h" -#include "paddle/legacy/utils/Util.h" - -using namespace paddle; // NOLINT - -#ifndef PADDLE_TYPE_DOUBLE -DEFINE_double(max_diff, 1e-5, "max diff allowed"); -#else -DEFINE_double(max_diff, 1e-13, "max diff allowed"); -#endif - -class SetMaxDiff { - public: - explicit SetMaxDiff(double max_diff) { - max_diff_ = FLAGS_max_diff; - FLAGS_max_diff = max_diff; - } - ~SetMaxDiff() { FLAGS_max_diff = max_diff_; } - - private: - double max_diff_; -}; - -#define COPY_VECTOR_TO_CPU(cpuVec, vector) \ - do { \ - if (vector->useGpu()) { \ - cpuVec = Vector::create(vector->getSize(), false); \ - cpuVec->copyFrom(*vector); \ - } else { \ - cpuVec = vector; \ - } \ - } while (0) - -int VectorCheckErr(const Vector& vector1, const Vector& vector2) { - CHECK(vector1.getSize() == vector2.getSize()); - - const real* data1 = vector1.getData(); - const real* data2 = vector2.getData(); - size_t size = vector1.getSize(); - int count = 0; - for (size_t i = 0; i < size; i++) { - real a = data1[i]; - real b = data2[i]; - if (fabs(a - b) > FLAGS_max_diff) { - if ((fabsf(a - b) / fabsf(a)) > (FLAGS_max_diff / 10.0f)) { - count++; - } - } - } - - return count; -} - -int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) { - VectorPtr tmp1; - VectorPtr tmp2; - COPY_VECTOR_TO_CPU(tmp1, vector1); - COPY_VECTOR_TO_CPU(tmp2, vector2); - return VectorCheckErr(*tmp1, *tmp2); -} - -#ifdef PADDLE_DISABLE_TIMER - -#define CHECK_VECTORPTR(vector1, vector2) \ - EXPECT_EQ(VectorCheckErr(vector1, vector2), 0) - -#else - -#define CHECK_VECTORPTR(vector1, vector2) - -#endif - -typedef std::function testMatrixFunc; - -void testCase(testMatrixFunc matrixFunc) { -#ifdef PADDLE_WITH_CUDA - for (auto useGpu : {false, true}) { -#else - for (auto useGpu : {false}) { -#endif - for (auto size : {1, - 32, - 64, - 128, - 512, - 1024, - 4096, - 32768, - 65536, - 131072, - 262144, - 524288, - 1048576, - 2097152}) { - LOG(INFO) << " size=" << size << " useGpu=" << useGpu; - matrixFunc(size, useGpu); - } - } -} - -#define INIT_VECTOR(vec1, vec2, type, size, useGpu) \ - vec1[type] = Vector::create(size, useGpu); \ - vec2[type] = Vector::create(size, useGpu); \ - vec1[type]->rand(); \ - vec2[type]->copyFrom(*vec1[type]); - -void testAdagrad(size_t size, bool useGpu) { - VectorPtr bufs1[NUM_PARAMETER_TYPES]; - VectorPtr bufs2[NUM_PARAMETER_TYPES]; - INIT_VECTOR(bufs1, bufs2, PARAMETER_VALUE, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM1, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu); - - real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT - real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT - real momentum = (real)rand() / (real)RAND_MAX; // NOLINT - real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT - - EXPRESSION_PERFORMANCE(AdagradParameterOptimizer( - bufs1, epsilon, learningRate, momentum, decayRate)); - - BaseMatrix& value = *bufs2[PARAMETER_VALUE]; - BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; - BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM]; - BaseMatrix& accum_buffer = *bufs2[PARAMETER_GRADIENT_SQURESUM]; - BaseMatrix& accum = *bufs2[PARAMETER_GRADIENT_SQURESUM1]; - BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE]; - - EXPRESSION_PERFORMANCE(adagradApply(value, - grad, - mom, - accum_buffer, - accum, - lr, - epsilon, - learningRate, - momentum, - decayRate)); - - CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); - CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); - CHECK_VECTORPTR(bufs1[PARAMETER_GRADIENT_SQURESUM1], - bufs2[PARAMETER_GRADIENT_SQURESUM1]); - CHECK_VECTORPTR(bufs1[PARAMETER_LEARNING_RATE], - bufs2[PARAMETER_LEARNING_RATE]); -} - -TEST(Training, Adagrad) { testCase(testAdagrad); } - -void testAdaDelta(size_t size, bool useGpu) { - VectorPtr bufs1[NUM_PARAMETER_TYPES]; - VectorPtr bufs2[NUM_PARAMETER_TYPES]; - INIT_VECTOR(bufs1, bufs2, PARAMETER_VALUE, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM1, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu); - - real rou = (real)rand() / (real)RAND_MAX; // NOLINT - real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT - real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT - real momentum = (real)rand() / (real)RAND_MAX; // NOLINT - real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT - - EXPRESSION_PERFORMANCE(AdaDeltaParameterOptimizer( - bufs1, rou, epsilon, learningRate, momentum, decayRate)); - - BaseMatrix& value = *bufs2[PARAMETER_VALUE]; - BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; - BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM]; - BaseMatrix& accum = *bufs2[PARAMETER_GRADIENT_SQURESUM]; - BaseMatrix& accum_update = *bufs2[PARAMETER_GRADIENT_SQURESUM1]; - BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE]; - - EXPRESSION_PERFORMANCE(adadeltaApply(value, - grad, - mom, - accum, - accum_update, - lr, - rou, - epsilon, - learningRate, - momentum, - decayRate)); - - CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); - CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); - CHECK_VECTORPTR(bufs1[PARAMETER_GRADIENT_SQURESUM], - bufs2[PARAMETER_GRADIENT_SQURESUM]); - CHECK_VECTORPTR(bufs1[PARAMETER_GRADIENT_SQURESUM1], - bufs2[PARAMETER_GRADIENT_SQURESUM1]); - CHECK_VECTORPTR(bufs1[PARAMETER_LEARNING_RATE], - bufs2[PARAMETER_LEARNING_RATE]); -} - -TEST(Training, AdaDelta) { testCase(testAdaDelta); } - -template -void testRMSProp(size_t size, bool useGpu) { - VectorPtr bufs1[NUM_PARAMETER_TYPES]; - VectorPtr bufs2[NUM_PARAMETER_TYPES]; - INIT_VECTOR(bufs1, bufs2, PARAMETER_VALUE, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM1, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu); - - /* make sure 'g - f.square()' greater than 0 */ - bufs1[PARAMETER_GRADIENT_SQURESUM]->add(1.0); - bufs2[PARAMETER_GRADIENT_SQURESUM]->copyFrom( - *bufs1[PARAMETER_GRADIENT_SQURESUM]); - - real rou = (real)rand() / (real)RAND_MAX; // NOLINT - real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT - real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT - real momentum = (real)rand() / (real)RAND_MAX; // NOLINT - real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT - real accumulatedRou = rou; - - EXPRESSION_PERFORMANCE(RMSPropParameterOptimizer(bufs1, - accumulatedRou, - rou, - epsilon, - learningRate, - momentum, - decayRate, - isFirstTime)); - - BaseMatrix& value = *bufs2[PARAMETER_VALUE]; - BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; - BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM]; - BaseMatrix& sum = *bufs2[PARAMETER_GRADIENT_SQURESUM]; - BaseMatrix& sum1 = *bufs2[PARAMETER_GRADIENT_SQURESUM1]; - BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE]; - - EXPRESSION_PERFORMANCE(rmspropApply(value, - grad, - mom, - sum, - sum1, - lr, - accumulatedRou, - rou, - epsilon, - learningRate, - momentum, - decayRate, - isFirstTime)); - - CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); - CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); - CHECK_VECTORPTR(bufs1[PARAMETER_GRADIENT_SQURESUM], - bufs2[PARAMETER_GRADIENT_SQURESUM]); - CHECK_VECTORPTR(bufs1[PARAMETER_GRADIENT_SQURESUM1], - bufs2[PARAMETER_GRADIENT_SQURESUM1]); - CHECK_VECTORPTR(bufs1[PARAMETER_LEARNING_RATE], - bufs2[PARAMETER_LEARNING_RATE]); -} - -TEST(Training, RMSProp) { - testCase(testRMSProp); - testCase(testRMSProp); -} - -template -void testDecayedAdagrad(size_t size, bool useGpu) { - VectorPtr bufs1[NUM_PARAMETER_TYPES]; - VectorPtr bufs2[NUM_PARAMETER_TYPES]; - INIT_VECTOR(bufs1, bufs2, PARAMETER_VALUE, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT_SQURESUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_LEARNING_RATE, size, useGpu); - - real rou = (real)rand() / (real)RAND_MAX; // NOLINT - real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT - real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT - real momentum = (real)rand() / (real)RAND_MAX; // NOLINT - real decayRate = (real)rand() / (real)RAND_MAX; // NOLINT - real accumulatedRou = rou; - - if (isFirstTime) { - bufs1[PARAMETER_GRADIENT_SQURESUM]->zeroMem(); - bufs2[PARAMETER_GRADIENT_SQURESUM]->zeroMem(); - } - - EXPRESSION_PERFORMANCE(DecayedAdagradParameterOptimizer(bufs1, - accumulatedRou, - rou, - epsilon, - learningRate, - momentum, - decayRate, - isFirstTime)); - - BaseMatrix& value = *bufs2[PARAMETER_VALUE]; - BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; - BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM]; - BaseMatrix& sum = *bufs2[PARAMETER_GRADIENT_SQURESUM]; - BaseMatrix& lr = *bufs2[PARAMETER_LEARNING_RATE]; - - EXPRESSION_PERFORMANCE(decayedAdagradApply(value, - grad, - mom, - sum, - lr, - accumulatedRou, - rou, - epsilon, - learningRate, - momentum, - decayRate, - isFirstTime)); - - CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); - CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); - CHECK_VECTORPTR(bufs1[PARAMETER_GRADIENT_SQURESUM], - bufs2[PARAMETER_GRADIENT_SQURESUM]); - CHECK_VECTORPTR(bufs1[PARAMETER_LEARNING_RATE], - bufs2[PARAMETER_LEARNING_RATE]); -} - -TEST(Training, DecayedAdagrad) { - testCase(testDecayedAdagrad); - testCase(testDecayedAdagrad); -} - -void testAdam(size_t size, bool useGpu) { - VectorPtr bufs1[NUM_PARAMETER_TYPES]; - VectorPtr bufs2[NUM_PARAMETER_TYPES]; - INIT_VECTOR(bufs1, bufs2, PARAMETER_VALUE, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_SECOND_MOMENTUM, size, useGpu); - - real beta1 = (real)rand() / (real)RAND_MAX; // NOLINT - real beta2 = (real)rand() / (real)RAND_MAX; // NOLINT - real beta1_power = (real)rand() / (real)RAND_MAX; // NOLINT - real beta2_power = (real)rand() / (real)RAND_MAX; // NOLINT - real epsilon = (real)rand() / (real)RAND_MAX; // NOLINT - real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT - - EXPRESSION_PERFORMANCE(AdamParameterOptimizer( - bufs1, beta1, beta2, beta1_power, beta2_power, epsilon, learningRate)); - - BaseMatrix& value = *bufs2[PARAMETER_VALUE]; - BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; - BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM]; - BaseMatrix& v = *bufs2[PARAMETER_SECOND_MOMENTUM]; - - EXPRESSION_PERFORMANCE(adamApply(value, - grad, - mom, - v, - beta1, - beta2, - beta1_power, - beta2_power, - epsilon, - learningRate)); - - CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); - CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); - CHECK_VECTORPTR(bufs1[PARAMETER_SECOND_MOMENTUM], - bufs2[PARAMETER_SECOND_MOMENTUM]); -} - -TEST(Training, Adam) { testCase(testAdam); } - -void testAdamax(size_t size, bool useGpu) { - VectorPtr bufs1[NUM_PARAMETER_TYPES]; - VectorPtr bufs2[NUM_PARAMETER_TYPES]; - INIT_VECTOR(bufs1, bufs2, PARAMETER_VALUE, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_WEIGHTED_INFINITY_NORM, size, useGpu); - - real beta1 = (real)rand() / (real)RAND_MAX; // NOLINT - real beta2 = (real)rand() / (real)RAND_MAX; // NOLINT - real alpha = (real)rand() / (real)RAND_MAX; // NOLINT - int64_t step = 2; - - EXPRESSION_PERFORMANCE( - AdamaxParameterOptimizer(bufs1, beta1, beta2, step, alpha)); - - BaseMatrix& value = *bufs2[PARAMETER_VALUE]; - BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; - BaseMatrix& mom = *bufs2[PARAMETER_MOMENTUM]; - BaseMatrix& u = *bufs2[PARAMETER_WEIGHTED_INFINITY_NORM]; - - EXPRESSION_PERFORMANCE( - adamaxApply(value, grad, mom, u, beta1, beta2, step, alpha)); - - CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); - CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM], bufs2[PARAMETER_MOMENTUM]); - CHECK_VECTORPTR(bufs1[PARAMETER_WEIGHTED_INFINITY_NORM], - bufs2[PARAMETER_WEIGHTED_INFINITY_NORM]); -} - -TEST(Training, Adamax) { -#ifndef PADDLE_TYPE_DOUBLE - SetMaxDiff diff(1e-4); -#endif - testCase(testAdamax); -} - -void testSparseMomentum(size_t size, bool useGpu) { - VectorPtr bufs1[NUM_PARAMETER_TYPES]; - VectorPtr bufs2[NUM_PARAMETER_TYPES]; - INIT_VECTOR(bufs1, bufs2, PARAMETER_VALUE, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_GRADIENT, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM_UT, size, useGpu); - INIT_VECTOR(bufs1, bufs2, PARAMETER_MOMENTUM_VT, size, useGpu); - - real alpha = (real)rand() / (real)RAND_MAX; // NOLINT - real beta = (real)rand() / (real)RAND_MAX; // NOLINT - real gamma = (real)rand() / (real)RAND_MAX; // NOLINT - real tau = (real)rand() / (real)RAND_MAX; // NOLINT - real learningRate = (real)rand() / (real)RAND_MAX; // NOLINT - - EXPRESSION_PERFORMANCE(SparseMomentumParameterOptimizer( - bufs1, alpha, beta, gamma, tau, learningRate)); - - BaseMatrix& value = *bufs2[PARAMETER_VALUE]; - BaseMatrix& grad = *bufs2[PARAMETER_GRADIENT]; - BaseMatrix& momU = *bufs2[PARAMETER_MOMENTUM_UT]; - BaseMatrix& momV = *bufs2[PARAMETER_MOMENTUM_VT]; - - EXPRESSION_PERFORMANCE(sparseMomentumApply( - value, grad, momU, momV, alpha, beta, gamma, tau, learningRate)); - - CHECK_VECTORPTR(bufs1[PARAMETER_VALUE], bufs2[PARAMETER_VALUE]); - CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_UT], bufs2[PARAMETER_MOMENTUM_UT]); - CHECK_VECTORPTR(bufs1[PARAMETER_MOMENTUM_VT], bufs2[PARAMETER_MOMENTUM_VT]); -} - -TEST(Training, SparseMomentum) { testCase(testSparseMomentum); } diff --git a/paddle/legacy/math/tests/test_batchTranspose.cpp b/paddle/legacy/math/tests/test_batchTranspose.cpp deleted file mode 100644 index ccfd6d5aae212fdc574456682b50bc19ae81714e..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_batchTranspose.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "hl_batch_transpose.h" -#include "test_matrixUtil.h" - -using namespace paddle; // NOLINT - -#ifdef PADDLE_WITH_CUDA -TEST(MatrixBatchTransTest, test_batch_matrix_transpose) { - const int nx = 100; - const int ny = 50; - const int numSamples = 50; - - MatrixPtr cMat = Matrix::create(numSamples, nx * ny, false, false); - MatrixPtr gMat = Matrix::create(numSamples, nx * ny, false, true); - - MatrixPtr cBatchTransMat = Matrix::create(numSamples, nx * ny, false, false); - MatrixPtr gBatchTransMat = Matrix::create(numSamples, nx * ny, false, true); - MatrixPtr cMat_d2h = Matrix::create(numSamples, nx * ny, false, false); - - real* cData = cMat->getData(); - real* gold = cBatchTransMat->getData(); - - // host - for (int sample_id = 0; sample_id < numSamples; ++sample_id) - for (int j = 0; j < ny; j++) - for (int i = 0; i < nx; i++) - cData[sample_id * nx * ny + j * nx + i] = j * nx + i; - - // correct result for error checking - for (int sample_id = 0; sample_id < numSamples; ++sample_id) - for (int j = 0; j < ny; j++) - for (int i = 0; i < nx; i++) - gold[sample_id * nx * ny + i * ny + j] = - cData[sample_id * nx * ny + j * nx + i]; - // device - gMat->copyFrom(*cMat, HPPL_STREAM_DEFAULT); - batchTranspose( - gMat->getData(), gBatchTransMat->getData(), nx, ny, numSamples); - cMat_d2h->copyFrom(*gBatchTransMat, HPPL_STREAM_DEFAULT); - checkMatrixEqual(cBatchTransMat, cMat_d2h); -} -#endif diff --git a/paddle/legacy/math/tests/test_lazyAssign.cu b/paddle/legacy/math/tests/test_lazyAssign.cu deleted file mode 100644 index cf8c3d77199571dff314446a1e1b14e9b746e947..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_lazyAssign.cu +++ /dev/null @@ -1,147 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "PerfUtils.h" -#include "TensorCheck.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/TensorAssign.h" - -using paddle::BaseMatrix; -using paddle::CpuMatrix; -using paddle::GpuMatrix; -using autotest::TensorCheckEqual; -using autotest::TensorCheckErr; - -typedef std::function testMatrixFunc; -void testMatrixCase(testMatrixFunc matrixFunc) { - for (auto height : {1}) { - for (auto width : {1, - 32, - 64, - 128, - 512, - 1024, - 4096, - 32768, - 65536, - 131072, - 262144, - 524288, - 1048576, - 2097152, - 4194304, - 8388608}) { - matrixFunc(height, width); - } - } -} - -template -void testLazyAssign(int height, int width) { - Tensor A1(height, width); - Tensor A2(height, width); - Tensor B(height, width); - Tensor C(height, width); - Tensor D(height, width); - A1.randomizeUniform(); - B.randomizeUniform(); - C.randomizeUniform(); - D.randomizeUniform(); - A2.copyFrom(A1); - - EXPRESSION_PERFORMANCE(A1 = B + C; A1 = A1 * D;); - - EXPRESSION_PERFORMANCE(auto expr1 = A2.lazyAssign(B + C); - auto expr2 = A2.lazyAssign(A2 * D); - AssignEvaluate(expr1, expr2);); - - TensorCheckErr(A1, A2); -} - -TEST(lazyAssign, CPU) { testMatrixCase(testLazyAssign); } - -#ifdef PADDLE_WITH_GPU -TEST(lazyAssign, GPU) { testMatrixCase(testLazyAssign); } -#endif - -template -void sgdUpdateTensor( - Tensor& A, Tensor& B, Tensor& C, Tensor& D, real p1, real p2, real p3) { - C = C * p2 - D * (B + A * p3) * p1; - A += C; -} - -void sgdUpdateLazyAssign(BaseMatrix& A, - BaseMatrix& B, - BaseMatrix& C, - BaseMatrix& D, - real p1, - real p2, - real p3) { - auto expr1 = C.lazyAssign(C * p2 - D * (B + A * p3) * p1); - auto expr2 = A.lazyAssign(A + C); - AssignEvaluate(expr1, expr2); -} - -template -void testSgdUpdate(int height, int width) { - Tensor A1(height, width); - Tensor A2(height, width); - Tensor A3(height, width); - A1.randomizeUniform(); - A2.copyFrom(A1); - A3.copyFrom(A1); - - Tensor B(height, width); - B.randomizeUniform(); - - Tensor C1(height, width); - Tensor C2(height, width); - Tensor C3(height, width); - C1.randomizeUniform(); - C2.copyFrom(C1); - C3.copyFrom(C1); - - Tensor D(height, width); - D.randomizeUniform(); - - real p1 = 0.2; - real p2 = 0.3; - real p3 = 0.5; - - /** - * c = p2 * c - p1 * (b + p3 * a); - * a = a + c; - */ - // BaseMatrix API - EXPRESSION_PERFORMANCE(A1.sgdUpdate(B, C1, D, p1, p2, p3);); - - // Tensor expression - EXPRESSION_PERFORMANCE(sgdUpdateTensor(A2, B, C2, D, p1, p2, p3)); - - // lazyAssign - EXPRESSION_PERFORMANCE(sgdUpdateLazyAssign(A3, B, C3, D, p1, p2, p3)); - - TensorCheckErr(A1, A2); - TensorCheckErr(A1, A3); - TensorCheckErr(C1, C2); - TensorCheckErr(C1, C3); -} - -TEST(sgdUpdate, CPU) { testMatrixCase(testSgdUpdate); } - -#ifdef PADDLE_WITH_GPU -TEST(sgdUpdate, GPU) { testMatrixCase(testSgdUpdate); } -#endif diff --git a/paddle/legacy/math/tests/test_matrixCompare.cpp b/paddle/legacy/math/tests/test_matrixCompare.cpp deleted file mode 100644 index a43adde46fc6526cc3ff5affec2ce1c7c3a44214..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_matrixCompare.cpp +++ /dev/null @@ -1,1698 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_CUDA -/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when -/// only cpu version. - -#include -#include "TensorCheck.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/SparseMatrix.h" -#include "paddle/legacy/utils/DynamicLoader.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" -#include "paddle/testing/TestUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT -using autotest::TensorCheckEqual; -using autotest::TensorCheckErr; - -void testMatrixMaxSequence(int batchSize, int inputDim) { - // forward - MatrixPtr cpuInput = std::make_shared(batchSize, inputDim); - MatrixPtr gpuInput = std::make_shared(batchSize, inputDim); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - - IVectorPtr cpuSequence; - generateSequenceStartPositions(batchSize, cpuSequence); - IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true); - gpuSequence->copyFrom(*cpuSequence); - - int newBatchSize = cpuSequence->getSize() - 1; - MatrixPtr cpuOutput = std::make_shared(newBatchSize, inputDim); - MatrixPtr gpuOutput = std::make_shared(newBatchSize, inputDim); - cpuOutput->zero(); - gpuOutput->zero(); - - IVectorPtr cpuIndex = nullptr; - IVectorPtr gpuIndex = nullptr; - IVector::resizeOrCreate(cpuIndex, newBatchSize * inputDim, false); - IVector::resizeOrCreate(gpuIndex, newBatchSize * inputDim, true); - cpuIndex->zeroMem(); - gpuIndex->zeroMem(); - - cpuOutput->maxSequenceForward(*cpuInput, *cpuSequence, *cpuIndex); - gpuOutput->maxSequenceForward(*gpuInput, *gpuSequence, *gpuIndex); - - TensorCheckEqual(*cpuOutput, *gpuOutput); - TensorCheckEqual(*cpuIndex, *gpuIndex); - - // backward - MatrixPtr cpuOutputGrad = std::make_shared(newBatchSize, inputDim); - MatrixPtr gpuOutputGrad = std::make_shared(newBatchSize, inputDim); - cpuOutputGrad->randomizeUniform(); - gpuOutputGrad->copyFrom(*cpuOutputGrad); - - MatrixPtr cpuInputGrad = std::make_shared(batchSize, inputDim); - MatrixPtr gpuInputGrad = std::make_shared(batchSize, inputDim); - cpuInputGrad->randomizeUniform(); - gpuInputGrad->copyFrom(*cpuInputGrad); - - cpuInputGrad->maxSequenceBackward(*cpuOutputGrad, *cpuSequence, *cpuIndex); - gpuInputGrad->maxSequenceBackward(*gpuOutputGrad, *gpuSequence, *gpuIndex); - - TensorCheckEqual(*cpuInputGrad, *gpuInputGrad); -} - -TEST(Matrix, maxSequence) { - for (auto batchSize : {1, 3, 997}) { // prime numbers close to 1, 4, 1024 - for (auto inputDim : {1, 7, 131}) { // prime numbers close to 1, 8, 128 - VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim; - testMatrixMaxSequence(batchSize, inputDim); - } - } -} - -void testMatrixGetSum(int height, int width) { - MatrixPtr cpuInput = std::make_shared(height, width); - MatrixPtr gpuInput = std::make_shared(height, width); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - -#ifndef PADDLE_TYPE_DOUBLE - int x = log10(height * width); - real err = 1e-6 * pow(10, x); -#else - real err = 1e-8; -#endif - - real cpuSum = cpuInput->getSum(); - real gpuSum = gpuInput->getSum(); - - EXPECT_LE(fabs(cpuSum - gpuSum), err); -} - -void testMatrixGetMinMax(int height, int width) { - MatrixPtr cpuInput = std::make_shared(height, width); - MatrixPtr gpuInput = std::make_shared(height, width); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - - real cpuMin = cpuInput->getMin(); - real gpuMin = gpuInput->getMin(); - real cpuMax = cpuInput->getMax(); - real gpuMax = gpuInput->getMax(); - - EXPECT_EQ(cpuMin, gpuMin); - EXPECT_EQ(cpuMax, gpuMax); -} - -void testMatrixZeroAtOffset(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr gpuA = std::make_shared(height, width); - MatrixPtr cpuTest = std::make_shared(height, width); - - cpuA->randomizeUniform(); - gpuA->copyFrom(*cpuA); - cpuTest->copyFrom(*cpuA); - - int columnOffset = rand() % width; // NOLINT we just use rand() for test. - int numColumns = rand() % (width - columnOffset); // NOLINT - - if (numColumns == 0) return; - - cpuA->zeroAtOffset(columnOffset, numColumns); - gpuA->zeroAtOffset(columnOffset, numColumns); - - /* cpuTest */ - real* a = cpuTest->getData() + columnOffset; - for (int64_t i = 0; i < height; ++i) { - for (int64_t j = 0; j < numColumns; ++j) { - a[i * width + j] = 0; - } - } - - TensorCheckEqual(*cpuA, *gpuA); - TensorCheckEqual(*cpuA, *cpuTest); -} - -void testMatrixDeepSwap(int height, int width) { - MatrixPtr cpuA = std::make_shared(height, width); - MatrixPtr cpuB = std::make_shared(height, width); - MatrixPtr cpuCopyA = std::make_shared(height, width); - MatrixPtr cpuCopyB = std::make_shared(height, width); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuCopyA->copyFrom(*cpuA); - cpuCopyB->copyFrom(*cpuB); - - // swap matrix cpuA and cpuB - cpuA->deepSwap(*cpuB); - - TensorCheckEqual(*cpuA, *cpuCopyB); - TensorCheckEqual(*cpuB, *cpuCopyA); -} - -void testMatrixTranspose(int height, int width) { - MatrixPtr cpu = std::make_shared(height, width); - MatrixPtr gpu = std::make_shared(height, width); - MatrixPtr cpuT = std::make_shared(width, height); - MatrixPtr gpuT = std::make_shared(width, height); - - cpu->randomizeUniform(); - gpu->copyFrom(*cpu); - cpu->transpose(cpuT, false); - gpu->transpose(gpuT, true); - - TensorCheckEqual(*cpuT, *gpuT); -} - -void testMatrixRotate(int height, int width) { - MatrixPtr cpu = std::make_shared(height, width); - MatrixPtr gpu = std::make_shared(height, width); - MatrixPtr cpuR = std::make_shared(width, height); - MatrixPtr gpuR = std::make_shared(width, height); - - cpu->randomizeUniform(); - gpu->copyFrom(*cpu); - - cpu->rotate(cpuR, false, true); - gpu->rotate(gpuR, true, true); - TensorCheckEqual(*cpuR, *gpuR); - - cpu->rotate(cpuR, true, false); - gpu->rotate(gpuR, false, false); - TensorCheckEqual(*cpuR, *gpuR); -} - -void testMatrixInverse(int height) { - MatrixPtr cpu = std::make_shared(height, height); - MatrixPtr gpu = std::make_shared(height, height); - MatrixPtr cpuI = std::make_shared(height, height); - MatrixPtr gpuI = std::make_shared(height, height); - - /* Make matrix well conditioned: cpu * cpuT + Identity */ - cpu->randomizeUniform(); - MatrixPtr cpuT = cpu->getTranspose(); - MatrixPtr outputCheck = std::make_shared(height, height); - outputCheck->mul(*cpu, *cpuT); - cpu->setDiag(1.0); - cpu->add(*outputCheck); - - gpu->copyFrom(*cpu); - cpu->inverse(cpuI, true); - gpu->inverse(gpuI, false); - - TensorCheckErr(*cpuI, *gpuI); - - outputCheck->mul(*cpu, *cpuI); - cpu->setDiag(1.0); - TensorCheckErr(*cpu, *outputCheck); -} - -TEST(Matrix, unary) { - for (auto height : {1, 3, 11, 73, 128, 200, 330}) { - for (auto width : {1, 3, 32, 100, 512, 1000, 3210}) { - VLOG(3) << " height=" << height << " width=" << width; - - testMatrixDeepSwap(height, width); - testMatrixZeroAtOffset(height, width); - testMatrixGetSum(height, width); - testMatrixTranspose(height, width); - testMatrixRotate(height, width); - } -#ifdef LAPACK_FOUND - // inverse matrix - testMatrixInverse(height); -#else - LOG(WARNING) << "This version of PaddlePaddle was not built with LAPACK" - << "support so we cannot test matrix inverse. To test " - << "matrix inverse, please install LAPACKE " - << "and MKL/Openblas, and re-build PaddlePaddle."; -#endif - } -} - -void testMatrixSoftmax(int height, int width) { - MatrixPtr cpuInput = std::make_shared(height, width); - MatrixPtr cpuOutput = std::make_shared(height, width); - MatrixPtr gpuInput = std::make_shared(height, width); - MatrixPtr gpuOutput = std::make_shared(height, width); - - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - cpuOutput->zero(); - gpuOutput->zero(); - cpuInput->softmax(*cpuOutput); - gpuInput->softmax(*gpuOutput); - - TensorCheckErr(*cpuOutput, *gpuOutput); -} - -void testSequenceSoftmax(int batchSize) { - // forward - int inputDim = 1; - MatrixPtr cpuInput = std::make_shared(batchSize, inputDim); - MatrixPtr gpuInput = std::make_shared(batchSize, inputDim); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - - IVectorPtr cpuSequence; - generateSequenceStartPositions(batchSize, cpuSequence); - IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true); - gpuSequence->copyFrom(*cpuSequence); - - cpuInput->sequenceSoftmax(*cpuInput, *cpuSequence); - gpuInput->sequenceSoftmax(*gpuInput, *gpuSequence); - - TensorCheckErr(*cpuInput, *gpuInput); -} - -void testMatrixSoftmaxThreshold(int height, int width) { - MatrixPtr cpuInput = std::make_shared(height, width); - MatrixPtr cpuOutput = std::make_shared(height, width); - MatrixPtr gpuInput = std::make_shared(height, width); - MatrixPtr gpuOutput = std::make_shared(height, width); - - cpuInput->randomizeUniform(); - cpuInput->getData()[0] = 100.0; - gpuInput->copyFrom(*cpuInput); - cpuOutput->zero(); - gpuOutput->zero(); - cpuInput->softmax(*cpuOutput); - gpuInput->softmax(*gpuOutput); - - MatrixPtr outputCheck = std::make_shared(height, width); - outputCheck->copyFrom(*gpuOutput); - // check output zero - int cpuCount = 0; - int gpuCount = 0; - auto zeroNum = [](MatrixPtr out, int& count) { - for (size_t i = 0; i < out->getHeight(); i++) { - for (size_t j = 0; j < out->getWidth(); j++) { - if (out->getElement(i, j) == 0) count++; - } - } - }; - zeroNum(cpuOutput, cpuCount); - zeroNum(outputCheck, gpuCount); - EXPECT_EQ(cpuCount, 0) << "Cpu softmax output value 0"; - EXPECT_EQ(gpuCount, 0) << "Gpu softmax output value 0"; -} - -void testMatrixSoftmaxBp(int height, int width) { - MatrixPtr cpuInput = std::make_shared(height, width); - MatrixPtr cpuOutput = std::make_shared(height, width); - MatrixPtr gpuInput = std::make_shared(height, width); - MatrixPtr gpuOutput = std::make_shared(height, width); - - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - gpuOutput->softmaxBackward(*gpuInput); - - MatrixPtr sftMaxSum = std::make_shared(height, 1); - MatrixPtr sftMaxDot = std::make_shared(height, width); - sftMaxDot->dotMul(*cpuOutput, *cpuInput); - sftMaxSum->colMerge(*sftMaxDot); - cpuOutput->softmaxDerivative(*cpuInput, *sftMaxSum); - - TensorCheckErr(*cpuOutput, *gpuOutput); -} - -TEST(Matrix, softmax) { - for (auto height : {1, 3, 131}) { // prime numbers close to 1, 4, 127 - for (auto width : {1, 17, 251}) { // prime numbers close to 1, 16, 256 - VLOG(3) << " height=" << height << " width=" << width; - - testMatrixSoftmax(height, width); - testMatrixSoftmaxBp(height, width); - testMatrixSoftmaxThreshold(height, width); - } - testSequenceSoftmax(height); - } -} - -void testMatrixAddToRows(int numSamples, int tableSize, int inputDim) { - MatrixPtr cpuTable = std::make_shared(tableSize, inputDim); - MatrixPtr gpuTable = std::make_shared(tableSize, inputDim); - cpuTable->randomizeUniform(); - gpuTable->copyFrom(*cpuTable); - - IVectorPtr cpuIds; - IVectorPtr gpuIds; - cpuIds = VectorT::create(numSamples, false); - gpuIds = VectorT::create(numSamples, true); - cpuIds->rand(tableSize); - gpuIds->copyFrom(*cpuIds); - - MatrixPtr cpuOutput = std::make_shared(numSamples, inputDim); - MatrixPtr gpuOutput = std::make_shared(numSamples, inputDim); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - - cpuOutput->addToRows(*cpuTable, *cpuIds); - gpuOutput->addToRows(*gpuTable, *gpuIds); - - TensorCheckErr(*cpuTable, *gpuTable); -} - -TEST(Matrix, tableProjection) { - for (auto numSamples : {10, 100, 1000, 10000, 80000}) { - for (auto tableSize : {10, 100}) { - for (auto inputDim : {20, 50}) { - VLOG(3) << " numSamples=" << numSamples << " tableSize=" << tableSize - << " inputDim=" << inputDim; - testMatrixAddToRows(numSamples, tableSize, inputDim); - } - } - } -} - -void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { - int heightA = transa == false ? dimM : dimK; - int widthA = transa == false ? dimK : dimM; - int heightB = transb == false ? dimK : dimN; - int widthB = transb == false ? dimN : dimK; - int heightC = dimM; - int widthC = dimN; - - MatrixPtr cpuA = std::make_shared(heightA, widthA, transa); - MatrixPtr cpuB = std::make_shared(heightB, widthB, transb); - MatrixPtr cpuC = std::make_shared(heightC, widthC); - MatrixPtr gpuA = std::make_shared(heightA, widthA, transa); - MatrixPtr gpuB = std::make_shared(heightB, widthB, transb); - MatrixPtr gpuC = std::make_shared(heightC, widthC); - - real alpha = 1.5; - real beta = 2.0; - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - - cpuC->mul(*cpuA, *cpuB, alpha, beta); - gpuC->mul(*gpuA, *gpuB, alpha, beta); - - TensorCheckErr(*cpuC, *gpuC); -} - -void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { - int heightA = transa == false ? dimM : dimK; - int widthA = transa == false ? dimK : dimM; - int heightB = transb == false ? dimK : dimN; - int widthB = transb == false ? dimN : dimK; - int heightC = dimM; - int widthC = dimN; - - MatrixPtr cpuA = std::make_shared(heightA, widthA, transa); - MatrixPtr cpuB = std::make_shared(heightB, widthB, transb); - MatrixPtr cpuC = std::make_shared(heightC, widthC); - MatrixPtr gpuA = std::make_shared(heightA, widthA, transa); - MatrixPtr gpuB = std::make_shared(heightB, widthB, transb); - MatrixPtr gpuC = std::make_shared(heightC, widthC); - - real alpha = 1.5; - real beta = 2.0; - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - gpuA->copyFrom(*cpuA); - gpuB->copyFrom(*cpuB); - gpuC->copyFrom(*cpuC); - - auto subSize = [](int& start, int& end, int dim) { - if (dim == 1) { - start = 0; - end = dim; - } else { - int subDim = rand() % (dim - 1) + 1; // NOLINT - start = rand() % (dim - subDim); // NOLINT - end = start + subDim; - } - }; - - auto subMatrix = [](MatrixPtr& sub, - MatrixPtr matrix, - size_t startRow, - size_t endRow, - size_t startCol, - size_t endCol) { - if (!matrix->isTransposed()) { - sub = matrix->subMatrix(startRow, endRow, startCol, endCol); - } else { - sub = matrix->subMatrix(startCol, endCol, startRow, endRow); - } - }; - - int startM, endM; - int startN, endN; - int startK, endK; - subSize(startM, endM, dimM); - subSize(startN, endN, dimN); - subSize(startK, endK, dimK); - - MatrixPtr subCpuA; - MatrixPtr subCpuB; - MatrixPtr subGpuA; - MatrixPtr subGpuB; - subMatrix(subCpuA, cpuA, startM, endM, startK, endK); - subMatrix(subGpuA, gpuA, startM, endM, startK, endK); - subMatrix(subCpuB, cpuB, startK, endK, startN, endN); - subMatrix(subGpuB, gpuB, startK, endK, startN, endN); - MatrixPtr subCpuC = cpuC->subMatrix(startM, endM, startN, endN); - MatrixPtr subGpuC = gpuC->subMatrix(startM, endM, startN, endN); - - subCpuC->mul(*subCpuA, *subCpuB, alpha, beta); - subGpuC->mul(*subGpuA, *subGpuB, alpha, beta); - - TensorCheckErr(*cpuC, *gpuC); -} - -TEST(Matrix, mul) { - for (auto transa : {false, true}) { - for (auto transb : {false, true}) { - for (auto dimM : {1, 9, 53, 127, 345, 1023, 2135}) { - for (auto dimN : {1, 5, 37, 256, 1024}) { - for (auto dimK : {8, 45, 346, 784, 1025}) { - if (true == transa && true == transb) { - continue; - } - VLOG(3) << setiosflags(ios::left) << setfill(' ') - << " transa=" << transa << " transb=" << transb - << " dimM=" << setw(5) << dimM << " dimN=" << setw(5) - << dimN << " dimK=" << setw(5) << dimK; - - testMatrixMul(transa, transb, dimM, dimN, dimK); - testSubMatrixMul(transa, transb, dimM, dimN, dimK); - } - } - } - } - } -} - -void testVectorRowFunc(int size) { - CpuVectorPtr cpu = std::make_shared>(size); - GpuVectorPtr gpu = std::make_shared>(size); - - cpu->rand(); - gpu->copyFrom(*cpu); - - EXPECT_EQ(cpu->getMax(), gpu->getMax()); - EXPECT_EQ(cpu->getMin(), gpu->getMin()); - EXPECT_EQ(cpu->getAbsMax(), gpu->getAbsMax()); -} - -TEST(Vector, rowFunc) { - for (auto size : {1, 3, 997}) { // prime numbers close to 1, 4, 1024 - VLOG(3) << " size=" << size; - testVectorRowFunc(size); - } -} - -template -void testVectorReset(int size) { - std::shared_ptr> cpu = std::make_shared>(size); - std::shared_ptr> gpu = std::make_shared>(size); - - T value = (T)((int)rand() % 100 + 1.0f / ((int)rand() % 100)); - cpu->reset(value); - gpu->reset(value); - - TensorCheckEqual(*cpu, *gpu); -} - -template -void testVecortSelectFrom(int size) { - std::shared_ptr> cpuDst = std::make_shared>(size); - std::shared_ptr> gpuDst = std::make_shared>(size); - std::shared_ptr> cpuSrc = - std::make_shared>(size * 2); - std::shared_ptr> gpuSrc = - std::make_shared>(size * 2); - CpuIVectorPtr cpuIds = std::make_shared>(size); - GpuIVectorPtr gpuIds = std::make_shared>(size); - - if (std::is_same::value) { - cpuSrc->rand(); - } else { - cpuSrc->rand(100000); - } - gpuSrc->copyFrom(*cpuSrc); - cpuIds->rand(size); - gpuIds->copyFrom(*cpuIds); - - cpuDst->selectFrom(*cpuSrc, *cpuIds); - gpuDst->selectFrom(*gpuSrc, *gpuIds); - - TensorCheckEqual(*cpuDst, *gpuDst); -} - -template -void testVecotrZeroMem(int size) { - std::shared_ptr> cpu = std::make_shared>(size); - std::shared_ptr> gpu = std::make_shared>(size); - - cpu->zeroMem(); - gpu->zeroMem(); - - TensorCheckEqual(*cpu, *gpu); -} - -template -void testVectorIsEqual(int size) { - std::shared_ptr> cpuA = std::make_shared>(size); - std::shared_ptr> cpuB = std::make_shared>(size); - std::shared_ptr> gpuA = std::make_shared>(size); - std::shared_ptr> gpuB = std::make_shared>(size); - - if (std::is_same::value) { - cpuB->rand(); - } else { - cpuB->rand(100000); - } - gpuB->copyFrom(*cpuB); - - T value = (T)((int)rand() % 100 + 1.0f / ((int)rand() % 100)); - cpuA->isEqualTo(*cpuB, value); - gpuA->isEqualTo(*gpuB, value); - - TensorCheckEqual(*cpuA, *gpuA); -} - -TEST(Vector, Equal) { - for (auto size : {1, 3, 997}) { // prime numbers close to 1, 4, 1024 - VLOG(3) << " size=" << size; - testVectorReset(size); - testVectorReset(size); - testVecortSelectFrom(size); - testVecortSelectFrom(size); - testVecotrZeroMem(size); - testVecotrZeroMem(size); - testVectorIsEqual(size); - testVectorIsEqual(size); - } -} - -void testMatrixTopK(int samples, int dim, int beamSize) { - MatrixPtr cpuSrc = std::make_shared(samples, dim); - MatrixPtr gpuSrc = std::make_shared(samples, dim); - MatrixPtr cpuVal = std::make_shared(samples, beamSize); - MatrixPtr gpuVal = std::make_shared(samples, beamSize); - IVectorPtr cpuIds = std::make_shared(samples * beamSize); - IVectorPtr gpuIds = std::make_shared(samples * beamSize); - - cpuSrc->randomizeUniform(); - gpuSrc->copyFrom(*cpuSrc); - - cpuSrc->rowMax(*cpuIds, *cpuVal); - gpuSrc->rowMax(*gpuIds, *gpuVal); - - TensorCheckEqual(*cpuVal, *gpuVal); -} - -TEST(Matrix, topK) { - for (auto samples : {1, 17, 131}) { // prime numbers close to 1, 16, 127 - for (auto dim : {1, 3, 997}) { // prime numbers close to 1, 4, 1024 - for (auto beamSize : {1, 5, 10, 20, 40, (int)rand() % dim + 1}) { - if (beamSize > dim) continue; - VLOG(3) << " samples=" << samples << " beamSize=" << beamSize - << " dim=" << dim; - testMatrixTopK(samples, dim, beamSize); - } - } - } -} - -void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) { - int nnz = samples * dim * ratio; - if (nnz < 1) nnz = 1; // Because sparseRand in MathUtil.cpp requires this. - MatrixPtr cpuSrc = std::make_shared(samples, dim, nnz); - MatrixPtr gpuSrc = std::make_shared(samples, dim, nnz); - MatrixPtr cpuVal = std::make_shared(samples, beamSize); - MatrixPtr gpuVal = std::make_shared(samples, beamSize); - IVectorPtr cpuIds = std::make_shared(samples * beamSize); - IVectorPtr gpuIds = std::make_shared(samples * beamSize); - - cpuSrc->randomizeUniform(); - gpuSrc->copyFrom(*cpuSrc); - cpuVal->zero(); - cpuIds->zero(); - gpuVal->zero(); - gpuIds->zero(); - - cpuSrc->rowMax(*cpuIds, *cpuVal); - gpuSrc->rowMax(*gpuIds, *gpuVal); - - TensorCheckEqual(*cpuVal, *gpuVal); - - IVectorPtr outCheckIds = std::make_shared(samples * beamSize); - outCheckIds->copyFrom(*gpuIds); - - const int* data1 = cpuIds->getData(); - const int* data2 = outCheckIds->getData(); - size_t size = cpuIds->getSize(); - for (size_t i = 0; i < size; i++) { - if (data1[i] == -1 && data1[i] != data2[i]) { - EXPECT_EQ(data1[i], data2[i]); - } - } -} - -TEST(SMatrix, topK) { - for (auto samples : {1, 3, 61}) { - for (auto dim : {1, 3, 61}) { - for (auto beamSize : {1, 3, 61}) { - for (auto ratio : {0.01, 0.001}) { - if (beamSize > dim) continue; - VLOG(3) << " samples=" << samples << " beamSize=" << beamSize - << " dim=" << dim << " ratio=" << ratio; - testSMatrixTopK(samples, dim, beamSize, ratio); - } - } - } - } -} - -void testMatrixSequenceAvg(int batchSize, int inputDim, int mode) { - MatrixPtr cpuInput = std::make_shared(batchSize, inputDim); - MatrixPtr gpuInput = std::make_shared(batchSize, inputDim); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - - IVectorPtr cpuSequence; - generateSequenceStartPositions(batchSize, cpuSequence); - IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true); - gpuSequence->copyFrom(*cpuSequence); - - int newBatchSize = cpuSequence->getSize() - 1; - MatrixPtr cpuOutput = std::make_shared(newBatchSize, inputDim); - MatrixPtr gpuOutput = std::make_shared(newBatchSize, inputDim); - cpuOutput->zero(); - gpuOutput->zero(); - - cpuOutput->sequenceAvgForward(*cpuInput, *cpuSequence, mode); - gpuOutput->sequenceAvgForward(*gpuInput, *gpuSequence, mode); - - TensorCheckErr(*cpuOutput, *gpuOutput); - - MatrixPtr cpuInGrad = std::make_shared(batchSize, inputDim); - MatrixPtr gpuInGrad = std::make_shared(batchSize, inputDim); - cpuInGrad->randomizeUniform(); - gpuInGrad->copyFrom(*cpuInGrad); - - cpuInGrad->sequenceAvgBackward(*cpuOutput, *cpuSequence, mode); - gpuInGrad->sequenceAvgBackward(*gpuOutput, *gpuSequence, mode); - - TensorCheckErr(*cpuInGrad, *gpuInGrad); -} - -TEST(Matrix, sequenceAvg) { - for (auto batchSize : {10, 128, 6000}) { - for (auto inputDim : {32, 100, 512}) { - for (auto mode : {0, 1, 2}) { - VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim - << " mode=" << mode; - testMatrixSequenceAvg(batchSize, inputDim, mode); - } - } - } -} - -void testParamReluBackwardDiff(int height, - int width, - int w_height, - int w_width) { - MatrixPtr oGrad = CpuMatrix::create(height, width, false, false); - MatrixPtr input = CpuMatrix::create(height, width, false, false); - MatrixPtr diff = CpuMatrix::create(height, width, false, false); - MatrixPtr w = CpuMatrix::create(w_height, w_width, false, false); - - oGrad->randomizeUniform(); - input->randomizeUniform(); - w->randomizeUniform(); - diff->randomizeUniform(); - input->add(-0.5); - - MatrixPtr oGradGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr inputGpu = GpuMatrix::create(height, width, false, true); - MatrixPtr diffGpu = CpuMatrix::create(height, width, false, true); - MatrixPtr wGpu = GpuMatrix::create(w_height, w_width, false, true); - - oGradGpu->copyFrom(*oGrad); - inputGpu->copyFrom(*input); - wGpu->copyFrom(*w); - diffGpu->copyFrom(*diff); - - diff->paramReluBackwardDiff(*oGrad, *input, *w); - diffGpu->paramReluBackwardDiff(*oGradGpu, *inputGpu, *wGpu); - - TensorCheckErr(*diff, *diffGpu); -} - -TEST(Matrix, paramReluBackwardDiff) { - for (auto height : {10, 40, 100}) { - for (auto width : {10, 40, 100}) { - for (auto w_height : {1, 2}) { - for (auto w_width : {1, 2}) { - if (width % (w_height * w_width)) continue; - testParamReluBackwardDiff(height, width, w_height, w_width); - } - } - } - } -} - -void testClassificationError(int numSamples, int dim, int topkSize) { - MatrixPtr cpuError = std::make_shared(numSamples, 1); - MatrixPtr gpuError = std::make_shared(numSamples, 1); - MatrixPtr cpuOutput = std::make_shared(numSamples, dim); - MatrixPtr gpuOutput = std::make_shared(numSamples, dim); - IVectorPtr cpuLabel = std::make_shared(numSamples); - IVectorPtr gpuLabel = std::make_shared(numSamples); - - cpuOutput->randomizeUniform(); - cpuLabel->rand(dim); - gpuOutput->copyFrom(*cpuOutput); - gpuLabel->copyFrom(*cpuLabel); - - cpuError->classificationError(*cpuOutput, *cpuLabel, topkSize); - gpuError->classificationError(*gpuOutput, *gpuLabel, topkSize); - - TensorCheckEqual(*cpuError, *gpuError); -} - -TEST(Matrix, classificationError) { - for (auto numSamples : {1, 3, 31}) { - for (auto dim : {1, 3, 31}) { - for (auto topkSize : {1, 3, (int)rand() % dim + 1}) { - if (topkSize > dim) continue; - VLOG(3) << " sample= " << numSamples << " topkSize= " << topkSize - << " dim= " << dim; - testClassificationError(numSamples, dim, topkSize); - } - } - } -} - -void testMaxPoolFwdBwd(int numSamples, - int channels, - int imgSizeH, - int imgSizeW, - int ksizeH, - int ksizeW, - int strideH, - int strideW, - int padH, - int padW) { - int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); - int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); - - int inWidth = imgSizeH * imgSizeW * channels; - MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - - int outWidth = channels * outH * outW; - MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - - input->randomizeUniform(); - target->randomizeUniform(); - inputGpu->copyFrom(*input); - targetGpu->copyFrom(*target); - - target->maxPoolForward(*input, - imgSizeH, - imgSizeW, - channels, - ksizeW, - ksizeH, - strideH, - strideW, - outH, - outW, - padH, - padW); - targetGpu->maxPoolForward(*inputGpu, - imgSizeH, - imgSizeW, - channels, - ksizeW, - ksizeH, - strideH, - strideW, - outH, - outW, - padH, - padW); - MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); - targetCheck->copyFrom(*targetGpu); - checkMatrixEqual(target, targetCheck); - - MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); - MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpuGrad = - GpuMatrix::create(numSamples, outWidth, false, true); - - inputGrad->randomizeUniform(); - targetGrad->randomizeUniform(); - inputGpuGrad->copyFrom(*inputGrad); - targetGpuGrad->copyFrom(*targetGrad); - - inputGrad->maxPoolBackward(*input, - imgSizeH, - imgSizeW, - *targetGrad, - *target, - ksizeW, - ksizeH, - strideH, - strideW, - outH, - outW, - 1.0, - 1.0, - padH, - padW); - inputGpuGrad->maxPoolBackward(*inputGpu, - imgSizeH, - imgSizeW, - *targetGpuGrad, - *targetGpu, - ksizeW, - ksizeH, - strideH, - strideW, - outH, - outW, - 1.0, - 1.0, - padH, - padW); - MatrixPtr targetBwdCheck = - CpuMatrix::create(numSamples, inWidth, false, false); - targetBwdCheck->copyFrom(*inputGpuGrad); - checkMatrixEqual(inputGrad, targetBwdCheck); -} - -void testAvgPoolFwdBwd(int numSamples, - int channels, - int imgSizeH, - int imgSizeW, - int ksizeH, - int ksizeW, - int strideH, - int strideW, - int padH, - int padW) { - int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); - int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); - - int inWidth = imgSizeH * imgSizeW * channels; - MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - - int outWidth = channels * outH * outW; - MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - - input->randomizeUniform(); - target->randomizeUniform(); - inputGpu->copyFrom(*input); - targetGpu->copyFrom(*target); - - target->avgPoolForward(*input, - imgSizeH, - imgSizeW, - channels, - ksizeW, - ksizeH, - strideH, - strideW, - outH, - outW, - padH, - padW); - targetGpu->avgPoolForward(*inputGpu, - imgSizeH, - imgSizeW, - channels, - ksizeW, - ksizeH, - strideH, - strideW, - outH, - outW, - padH, - padW); - - TensorCheckErr(*target, *targetGpu); - - MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); - MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpuGrad = - GpuMatrix::create(numSamples, outWidth, false, true); - - inputGrad->randomizeUniform(); - targetGrad->randomizeUniform(); - inputGpuGrad->copyFrom(*inputGrad); - targetGpuGrad->copyFrom(*targetGrad); - - inputGrad->avgPoolBackward(*targetGrad, - imgSizeH, - imgSizeW, - ksizeW, - ksizeH, - strideH, - strideW, - outH, - outW, - 1.0, - 1.0, - padH, - padW); - inputGpuGrad->avgPoolBackward(*targetGpuGrad, - imgSizeH, - imgSizeW, - ksizeW, - ksizeH, - strideH, - strideW, - outH, - outW, - 1.0, - 1.0, - padH, - padW); - - TensorCheckErr(*inputGrad, *inputGpuGrad); -} - -// TODO(yi): I noticed many such blindly combinatorial tests in this -// file. They are no help to locate defects at all. -TEST(Matrix, PoolFwdBwd) { - for (auto numSamples : {1, 3}) { - for (auto channels : {1, 3}) { - for (auto imgSizeH : {13, 17}) { - for (auto imgSizeW : {17, 19}) { - for (auto sizeX : {2, 3}) { - for (auto sizeY : {2, 3}) { - for (auto sH : {1, 2}) { - for (auto sW : {1, 2}) { - for (auto pH : {0, (sizeY - 1) / 2}) { - for (auto pW : {0, (sizeX - 1) / 2}) { - VLOG(3) << " numSamples=" << numSamples - << " channels=" << channels - << " imgSizeH=" << imgSizeH - << " imgSizeW=" << imgSizeW << " sizeX=" << sizeX - << " sizeY=" << sizeY << " strideH=" << sH - << " strideW=" << sW << " padingH=" << pH - << " padingW=" << pW; - testMaxPoolFwdBwd(numSamples, - channels, - imgSizeH, - imgSizeW, - sizeX, - sizeY, - sH, - sW, - pH, - pW); - testAvgPoolFwdBwd(numSamples, - channels, - imgSizeH, - imgSizeW, - sizeX, - sizeY, - sH, - sW, - pH, - pW); - } - } - } - } - } - } - } - } - } - } -} - -void testMaxOutFwdBwd( - int numSamples, int imgSizeH, int imgSizeW, int channels, int groups) { - int inWidth = imgSizeH * imgSizeW * channels; - int outChannels = channels / groups; - int outWidth = imgSizeH * imgSizeW * outChannels; - - // forward - MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - - MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - - IVectorPtr id = CpuIVector::create(numSamples * outWidth, false); - IVectorPtr idGpu = GpuIVector::create(numSamples * outWidth, true); - - input->randomizeUniform(); - inputGpu->copyFrom(*input); - - target->maxoutForward(*input, *id, outChannels, groups); - targetGpu->maxoutForward(*inputGpu, *idGpu, outChannels, groups); - - TensorCheckErr(*target, *targetGpu); - TensorCheckEqual(*id, *idGpu); - - // backward - MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); - - MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpuGrad = - GpuMatrix::create(numSamples, outWidth, false, true); - - inputGrad->randomizeUniform(); - targetGrad->randomizeUniform(); - inputGpuGrad->copyFrom(*inputGrad); - targetGpuGrad->copyFrom(*targetGrad); - - inputGrad->maxoutBackward(*targetGrad, *id, outChannels, groups); - inputGpuGrad->maxoutBackward(*targetGpuGrad, *idGpu, outChannels, groups); - - TensorCheckErr(*inputGrad, *inputGpuGrad); -} - -TEST(Matrix, MaxOutFwdBwd) { - for (auto numSamples : {5, 10}) { - for (auto channels : {8, 16}) { - for (auto imgSizeH : {14, 28}) { - for (auto imgSizeW : {16, 30}) { - for (auto groups : {2, 4}) { - VLOG(3) << " numSamples=" << numSamples << " channels=" << channels - << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW - << " groups=" << groups; - testMaxOutFwdBwd(numSamples, imgSizeH, imgSizeW, channels, groups); - } - } - } - } - } -} - -TEST(CpuMatrix, copyFrom) { - const size_t height = 31; - const size_t width = 53; - CpuMatrix cpu(height, width); - GpuMatrix gpu(height, width); - CpuMatrix copy(height, width); - - cpu.randomizeUniform(); - gpu.copyFrom(cpu); - copy.copyFrom(gpu, HPPL_STREAM_DEFAULT); - - TensorCheckEqual(cpu, copy); -} - -void testBatch2seqPadding(int batchSize, int inputDim) { - MatrixPtr cpuInput = std::make_shared(batchSize, inputDim); - MatrixPtr gpuInput = std::make_shared(batchSize, inputDim); - cpuInput->randomizeUniform(); - gpuInput->copyFrom(*cpuInput); - - IVectorPtr cpuSequence; - generateSequenceStartPositions(batchSize, cpuSequence); - for (int i = 0; i < int(cpuSequence->getSize()); ++i) { - (cpuSequence->getData())[i] += 1; // so no way that maxSeqLen is 0; - } - - IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true); - gpuSequence->copyFrom(*cpuSequence); - - size_t numSeq = cpuSequence->getSize() - 1; - size_t maxSeqLen = *std::max_element(cpuSequence->getData(), - cpuSequence->getData() + numSeq); - - printf("numSeq = %ld, maxSeqLen = %ld\n", numSeq, maxSeqLen); - MatrixPtr cBatch = std::make_shared(numSeq * maxSeqLen, inputDim); - MatrixPtr gBatch = std::make_shared(numSeq * maxSeqLen, inputDim); - MatrixPtr cCheck = std::make_shared(numSeq * maxSeqLen, inputDim); - - // hl_sequence2batch_copy_padding(gBatch->getData(), - // gpuInput->getData(), - // cpuSequence->getData(), - // inputDim, - // maxSeqLen, - // numSeq, - // false, - // true); - // cCheck->copyFrom(*gBatch); - - // int* seqStart = cpuSequence->getData(); - // float* batchData = cBatch->getData(); - // float* seqData = cpuInput->getData(); - // for (size_t i = 0; i < maxSeqLen; i++) { - // for (size_t j = 0; j < numSeq; j++) { - // size_t sequenceStart = seqStart[j]; - // size_t sequenceLength = seqStart[j + 1] - seqStart[j]; - // if (i < sequenceLength) { - // memcpy(batchData + (i * numSeq + j) * inputDim, - // seqData + (sequenceStart + i) * inputDim, - // inputDim * sizeof(real)); - // } else { - // memset(batchData + (i * numSeq + j) * inputDim, - // 0, - // inputDim * sizeof(real)); - // } - // } - // } - - // TensorCheckErr(*cBatch, *cCheck); -} - -TEST(Matrix, warpCTC) { - for (auto batchSize : {1, 3, 17}) { - for (auto inputDim : {1, 3, 31}) { - VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim; - testBatch2seqPadding(batchSize, inputDim); - } - } -} - -void testMaxPool3DFwdBwd(int numSamples, - int channels, - int imgSizeD, - int imgSizeH, - int imgSizeW, - int ksizeD, - int ksizeH, - int ksizeW, - int strideD, - int strideH, - int strideW, - int padD, - int padH, - int padW) { - int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true); - int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); - int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); - - int inWidth = channels * imgSizeD * imgSizeH * imgSizeW; - MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - - int outWidth = channels * outD * outH * outW; - MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - MatrixPtr maxIdx = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr maxIdxGpu = GpuMatrix::create(numSamples, outWidth, false, true); - - input->randomizeUniform(); - target->randomizeUniform(); - inputGpu->copyFrom(*input); - targetGpu->copyFrom(*target); - - target->maxPool3DForward(*input, - *maxIdx, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - outD, - outH, - outW, - ksizeD, - ksizeH, - ksizeW, - strideD, - strideH, - strideW, - padD, - padH, - padW); - targetGpu->maxPool3DForward(*inputGpu, - *maxIdxGpu, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - outD, - outH, - outW, - ksizeD, - ksizeH, - ksizeW, - strideD, - strideH, - strideW, - padD, - padH, - padW); - MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); - targetCheck->copyFrom(*targetGpu); - checkMatrixEqual(target, targetCheck); - - MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); - MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpuGrad = - GpuMatrix::create(numSamples, outWidth, false, true); - - inputGrad->randomizeUniform(); - targetGrad->randomizeUniform(); - inputGpuGrad->copyFrom(*inputGrad); - targetGpuGrad->copyFrom(*targetGrad); - - inputGrad->maxPool3DBackward(*targetGrad, - *maxIdx, - imgSizeD, - imgSizeH, - imgSizeW, - outD, - outH, - outW, - ksizeD, - ksizeH, - ksizeW, - strideD, - strideH, - strideW, - padD, - padH, - padW, - 1.0, - 1.0); - inputGpuGrad->maxPool3DBackward(*targetGpuGrad, - *maxIdxGpu, - imgSizeD, - imgSizeH, - imgSizeW, - outD, - outH, - outW, - ksizeD, - ksizeH, - ksizeW, - strideD, - strideH, - strideW, - padD, - padH, - padW, - 1.0, - 1.0); - MatrixPtr targetBwdCheck = - CpuMatrix::create(numSamples, inWidth, false, false); - targetBwdCheck->copyFrom(*inputGpuGrad); - checkMatrixEqual(inputGrad, targetBwdCheck); -} - -void testAvgPool3DFwdBwd(int numSamples, - int channels, - int imgSizeD, - int imgSizeH, - int imgSizeW, - int ksizeD, - int ksizeH, - int ksizeW, - int strideD, - int strideH, - int strideW, - int padD, - int padH, - int padW) { - int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true); - int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); - int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); - - int inWidth = imgSizeD * imgSizeH * imgSizeW * channels; - MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - - int outWidth = channels * outD * outH * outW; - MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); - - input->randomizeUniform(); - target->randomizeUniform(); - inputGpu->copyFrom(*input); - targetGpu->copyFrom(*target); - - target->avgPool3DForward(*input, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - outD, - outH, - outW, - ksizeD, - ksizeH, - ksizeW, - strideD, - strideH, - strideW, - padD, - padH, - padW); - - targetGpu->avgPool3DForward(*inputGpu, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - outD, - outH, - outW, - ksizeD, - ksizeH, - ksizeW, - strideD, - strideH, - strideW, - padD, - padH, - padW); - - TensorCheckErr(*target, *targetGpu); - - MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); - MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); - MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); - MatrixPtr targetGpuGrad = - GpuMatrix::create(numSamples, outWidth, false, true); - - inputGrad->randomizeUniform(); - targetGrad->randomizeUniform(); - inputGpuGrad->copyFrom(*inputGrad); - targetGpuGrad->copyFrom(*targetGrad); - - inputGrad->avgPool3DBackward(*targetGrad, - imgSizeD, - imgSizeH, - imgSizeW, - outD, - outH, - outW, - ksizeD, - ksizeH, - ksizeW, - strideD, - strideH, - strideW, - padD, - padH, - padW, - 1.0, - 1.0); - - inputGpuGrad->avgPool3DBackward(*targetGpuGrad, - imgSizeD, - imgSizeH, - imgSizeW, - outD, - outH, - outW, - ksizeD, - ksizeH, - ksizeW, - strideD, - strideH, - strideW, - padD, - padH, - padW, - 1.0, - 1.0); - TensorCheckErr(*inputGrad, *inputGpuGrad); -} - -// TODO(yi): I noticed many such blindly combinatorial tests in this -// file. They are no help to locate defects at all. -TEST(Matrix, Pool3DFwdBwd) { - for (auto numSamples : {1, 3}) { - for (auto channels : {3}) { - for (auto imgSizeD : {9, 16}) { - for (auto imgSizeH : {9, 32}) { - for (auto imgSizeW : {9, 32}) { - for (auto sizeX : {3}) { - for (auto sizeY : {3}) { - for (auto sizeZ : {3}) { - for (auto sD : {2}) { - for (auto sH : {2}) { - for (auto sW : {2}) { - for (auto pD : {0, (sizeZ - 1) / 2}) { - for (auto pH : {0, (sizeY - 1) / 2}) { - for (auto pW : {0, (sizeX - 1) / 2}) { - VLOG(3) << " numSamples=" << numSamples - << " channels=" << channels - << " imgSizeD=" << imgSizeD - << " imgSizeH=" << imgSizeH - << " imgSizeW=" << imgSizeW - << " sizeX=" << sizeX - << " sizeY=" << sizeY - << " sizeZ=" << sizeZ << " strideD=" << sD - << " strideH=" << sH << " strideW=" << sW - << " padingD=" << pD << " padingH=" << pH - << " padingW=" << pW; - - testMaxPool3DFwdBwd(numSamples, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - sizeX, - sizeY, - sizeZ, - sD, - sH, - sW, - pD, - pH, - pW); - testAvgPool3DFwdBwd(numSamples, - channels, - imgSizeD, - imgSizeH, - imgSizeW, - sizeX, - sizeY, - sizeZ, - sD, - sH, - sW, - pD, - pH, - pW); - } - } - } - } - } - } - } - } - } - } - } - } - } - } - - // for (auto numSamples : {1, 3}) { - // for (auto channels : {1, 3}) { - // for (auto imgSizeD : {9,16}) { - // for (auto imgSizeH : {9, 32}) { - // for (auto imgSizeW : {9, 32}) { - // for (auto sizeX : {2, 3}) { - // for (auto sizeY : {2, 3}) { - // for (auto sizeZ : {2,3}){ - // for (auto sD : {1, 2}) { - // for (auto sH : {1, 2}) { - // for (auto sW : {1, 2}) { - // for (auto pD : {0, (sizeZ - 1) / 2}){ - // for (auto pH : {0, (sizeY - 1) / 2}) { - // for (auto pW : {0, (sizeX - 1) / 2}) { - // VLOG(3) << " numSamples=" << numSamples - // << " channels=" << channels - // << " imgSizeD=" << imgSizeD - // << " imgSizeH=" << imgSizeH - // << " imgSizeW=" << imgSizeW - // << " sizeX=" << sizeX - // << " sizeY=" << sizeY - // << " sizeZ=" << sizeZ - // << " strideD=" << sD - // << " strideH=" << sH - // << " strideW=" << sW - // << " padingD=" << pD - // << " padingH=" << pH - // << " padingW=" << pW; - // - // testMaxPool3DFwdBwd(numSamples, - // channels, - // imgSizeD, - // imgSizeH, - // imgSizeW, - // sizeX, - // sizeY, - // sizeZ, - // sD, - // sH, - // sW, - // pD, - // pH, - // pW); - // testAvgPool3DFwdBwd(numSamples, - // channels, - // imgSizeD, - // imgSizeH, - // imgSizeW, - // sizeX, - // sizeY, - // sizeZ, - // sD, - // sH, - // sW, - // pD, - // pH, - // pW); - // } - // } - // } - // } - // } - // } - // } - // } - // } - // } - // } - // } - // } - // } -} - -void testMatrixCol2Vol(int depth, int height, int width) { - int channel = 3; - int filterX = 3, filterY = 4, filterZ = 5; - int strideX = 2, strideY = 2, strideZ = 2; - int padX = 1, padY = 1, padZ = 1; - - MatrixPtr cpuImage = - std::make_shared(channel, depth * height * width); - MatrixPtr gpuImage = - std::make_shared(channel, depth * height * width); - cpuImage->randomizeUniform(); - gpuImage->copyFrom(*cpuImage); - - int outD = outputSize(depth, filterZ, padZ, strideZ, true); - int outH = outputSize(height, filterY, padY, strideY, true); - int outW = outputSize(width, filterX, padX, strideX, true); - - int colBufHeight = channel * filterZ * filterY * filterX; - int colBufWidth = outD * outH * outW; - MatrixPtr cpuColBuf = std::make_shared(colBufHeight, colBufWidth); - MatrixPtr gpuColBuf = std::make_shared(colBufHeight, colBufWidth); - cpuColBuf->vol2Col(cpuImage->getData(), - channel, - depth, - height, - width, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - gpuColBuf->vol2Col(gpuImage->getData(), - channel, - depth, - height, - width, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - TensorCheckEqual(*cpuColBuf, *gpuColBuf); - - cpuColBuf->randomizeUniform(); - gpuColBuf->copyFrom(*cpuColBuf); - cpuColBuf->col2Vol(cpuImage->getData(), - channel, - depth, - height, - width, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - gpuColBuf->col2Vol(gpuImage->getData(), - channel, - depth, - height, - width, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - TensorCheckErr(*cpuImage, *gpuImage); -} - -TEST(Matrix, col2Vol) { - for (auto depth : {9, 16, 64}) { - for (auto height : {9, 11, 128}) { - for (auto width : {9, 32, 128}) { - VLOG(3) << "depth=" << depth << " height=" << height - << " width=" << width; - testMatrixCol2Vol(depth, height, width); - } - } - } -} - -#endif diff --git a/paddle/legacy/math/tests/test_matrixUtil.h b/paddle/legacy/math/tests/test_matrixUtil.h deleted file mode 100644 index 58c93f746e7ef4e2f2f98d4f410c74909a723812..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_matrixUtil.h +++ /dev/null @@ -1,233 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include "paddle/legacy/math/SparseMatrix.h" - -namespace paddle { - -void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b) { - ASSERT_EQ(a->getWidth(), b->getWidth()); - ASSERT_EQ(a->getHeight(), b->getHeight()); - ASSERT_EQ(a->isTransposed(), b->isTransposed()); - for (size_t r = 0; r < a->getHeight(); ++r) { - for (size_t c = 0; c < a->getWidth(); ++c) { - ASSERT_FLOAT_EQ(a->getElement(r, c), b->getElement(r, c)); - } - } -} - -void checkSMatrixEqual(const CpuSparseMatrix& a, const CpuSparseMatrix& b) { - ASSERT_EQ(a.getWidth(), b.getWidth()); - ASSERT_EQ(a.getHeight(), b.getHeight()); - ASSERT_EQ(a.isTransposed(), b.isTransposed()); - ASSERT_EQ(a.getFormat(), b.getFormat()); - ASSERT_EQ(a.getElementCnt(), b.getElementCnt()); - for (size_t r = 0; r < a.getElementCnt(); ++r) { - ASSERT_FLOAT_EQ(a.getValue()[r], b.getValue()[r]); - } -} - -void checkSMatrixEqual(const CpuSparseMatrixPtr& a, - const CpuSparseMatrixPtr& b) { - ASSERT_EQ(a->getWidth(), b->getWidth()); - ASSERT_EQ(a->getHeight(), b->getHeight()); - ASSERT_EQ(a->isTransposed(), b->isTransposed()); - ASSERT_EQ(a->getFormat(), b->getFormat()); - ASSERT_EQ(a->getElementCnt(), b->getElementCnt()); - for (size_t r = 0; r < a->getElementCnt(); ++r) { - ASSERT_FLOAT_EQ(a->getValue()[r], b->getValue()[r]); - } -} - -void checkSMatrixEqual2(const CpuSparseMatrixPtr& a, - const CpuSparseMatrixPtr& b) { - ASSERT_EQ(a->getWidth(), b->getWidth()); - ASSERT_EQ(a->getHeight(), b->getHeight()); - ASSERT_EQ(a->isTransposed(), b->isTransposed()); - ASSERT_EQ(a->getFormat(), b->getFormat()); - ASSERT_EQ(a->getValueType(), b->getValueType()); - ASSERT_EQ(a->getElementCnt(), b->getElementCnt()); - if (a->getFormat() == SPARSE_CSR) { - for (size_t r = 0; r < a->getElementCnt(); ++r) { - ASSERT_EQ(a->getCols()[r], b->getCols()[r]); - if (a->getValueType() == FLOAT_VALUE) { - ASSERT_FLOAT_EQ(a->getValue()[r], b->getValue()[r]); - } - } - for (size_t r = 0; r <= a->getHeight(); r++) { - ASSERT_EQ(a->getRows()[r], b->getRows()[r]); - } - } else { - for (size_t r = 0; r < a->getElementCnt(); ++r) { - ASSERT_EQ(a->getRows()[r], b->getRows()[r]); - if (a->getValueType() == FLOAT_VALUE) { - ASSERT_FLOAT_EQ(a->getValue()[r], b->getValue()[r]); - } - } - for (size_t r = 0; r <= a->getWidth(); r++) { - ASSERT_EQ(a->getCols()[r], b->getCols()[r]); - } - } -} - -void checkSMatrixEqual2Dense(const CpuSparseMatrix& a, const CpuMatrix& b) { - ASSERT_EQ(a.getWidth(), b.getWidth()); - ASSERT_EQ(a.getHeight(), b.getHeight()); - ASSERT_EQ(a.isTransposed(), b.isTransposed()); - - if (a.getFormat() == SPARSE_CSC) { - int* rows = a.getRows(); - for (size_t i = 0; i < a.getWidth(); i++) { - for (size_t j = a.getColStartIdx(i); j < a.getColStartIdx(i + 1); j++) { - if (a.getValueType() == FLOAT_VALUE) { - ASSERT_FLOAT_EQ(a.getValue()[j], b.getElement(rows[j], i)); - } else { - ASSERT_FLOAT_EQ(1.0, b.getElement(rows[j], i)); - } - } - } - } else { - int* cols = a.getCols(); - for (size_t i = 0; i < a.getHeight(); i++) { - for (size_t j = a.getRowStartIdx(i); j < a.getRowStartIdx(i + 1); j++) { - if (a.getValueType() == FLOAT_VALUE) { - ASSERT_FLOAT_EQ(a.getValue()[j], b.getElement(i, cols[j])); - } else { - ASSERT_FLOAT_EQ(1.0, b.getElement(i, cols[j])); - } - } - } - } -} - -void checkSMatrixEqual2Dense(const CpuSparseMatrixPtr& a, - const CpuMatrixPtr& b) { - ASSERT_EQ(a->getWidth(), b->getWidth()); - ASSERT_EQ(a->getHeight(), b->getHeight()); - ASSERT_EQ(a->isTransposed(), b->isTransposed()); - - if (a->getFormat() == SPARSE_CSC) { - int* rows = a->getRows(); - for (size_t i = 0; i < a->getWidth(); i++) { - for (size_t j = a->getColStartIdx(i); j < a->getColStartIdx(i + 1); j++) { - if (a->getValueType() == FLOAT_VALUE) { - ASSERT_FLOAT_EQ(a->getValue()[j], b->getElement(rows[j], i)); - } else { - ASSERT_FLOAT_EQ(1.0, b->getElement(rows[j], i)); - } - } - } - } else { - int* cols = a->getCols(); - for (size_t i = 0; i < a->getHeight(); i++) { - for (size_t j = a->getRowStartIdx(i); j < a->getRowStartIdx(i + 1); j++) { - if (a->getValueType() == FLOAT_VALUE) { - ASSERT_FLOAT_EQ(a->getValue()[j], b->getElement(i, cols[j])); - } else { - ASSERT_FLOAT_EQ(1.0, b->getElement(i, cols[j])); - } - } - } - } -} - -void checkSMatrixErr(const CpuSparseMatrixPtr& a, const CpuSparseMatrixPtr& b) { -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - ASSERT_EQ(a->getWidth(), b->getWidth()); - ASSERT_EQ(a->getHeight(), b->getHeight()); - ASSERT_EQ(a->isTransposed(), b->isTransposed()); - ASSERT_EQ(a->getFormat(), b->getFormat()); - ASSERT_EQ(a->getValueType(), b->getValueType()); - ASSERT_EQ(a->getElementCnt(), b->getElementCnt()); - int count = 0; - if (a->getFormat() == SPARSE_CSR) { - for (size_t r = 0; r < a->getElementCnt(); ++r) { - ASSERT_EQ(a->getCols()[r], b->getCols()[r]); - if (a->getValueType() == FLOAT_VALUE) { - real aVal = a->getValue()[r]; - real bVal = b->getValue()[r]; - if (std::abs(aVal - bVal) > err) { - if ((std::abs(aVal - bVal) / std::abs(aVal)) > (err / 10.0f)) { - LOG(INFO) << "a=" << aVal << "\t" - << "b=" << bVal; - count++; - } - } - } - } - for (size_t r = 0; r <= a->getHeight(); r++) { - ASSERT_EQ(a->getRows()[r], b->getRows()[r]); - } - } else { - for (size_t r = 0; r < a->getElementCnt(); ++r) { - ASSERT_EQ(a->getRows()[r], b->getRows()[r]); - if (a->getValueType() == FLOAT_VALUE) { - real aVal = a->getValue()[r]; - real bVal = b->getValue()[r]; - if (std::abs(aVal - bVal) > err) { - if ((std::abs(aVal - bVal) / std::abs(aVal)) > (err / 10.0f)) { - count++; - } - } - } - } - for (size_t r = 0; r <= a->getWidth(); r++) { - ASSERT_EQ(a->getCols()[r], b->getCols()[r]); - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void checkMatrixErr(const Matrix& matrix1, const Matrix& matrix2) { - CHECK(matrix1.getHeight() == matrix2.getHeight()); - CHECK(matrix1.getWidth() == matrix2.getWidth()); -#ifndef PADDLE_TYPE_DOUBLE - real err = 1e-3; -#else - real err = 1e-10; -#endif - - int height = matrix1.getHeight(); - int width = matrix1.getWidth(); - const real* data1 = matrix1.getData(); - const real* data2 = matrix2.getData(); - int count = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - real a = data1[i * width + j]; - real b = data2[i * width + j]; - if (std::abs(a - b) > err) { - if ((std::abs(a - b) / std::abs(a)) > (err / 10.0f)) { - count++; - } - } - } - } - EXPECT_EQ(count, 0) << "There are " << count << " different element."; -} - -void checkDataEqual(const real* a, const real* b, size_t size) { - for (size_t i = 0; i < size; ++i) { - ASSERT_FLOAT_EQ(a[i], b[i]); - } -} - -} // namespace paddle diff --git a/paddle/legacy/math/tests/test_perturbation.cpp b/paddle/legacy/math/tests/test_perturbation.cpp deleted file mode 100644 index 969400666f12e4c6001f270be3ec144e7e4d0702..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_perturbation.cpp +++ /dev/null @@ -1,318 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_CUDA - -#include -#include -#include -#include -#include "hl_cuda.h" -#include "hl_perturbation_util.cuh" - -using namespace std; // NOLINT - -#define _USE_MATH_DEFINES - -const int NUM_IMAGES = 2; -const int SAMPLING_RATE = 2; -const int IMG_SIZE = 41; -const int TGT_SIZE = 21; -const int CHANNELS = 3; - -class PerturbationTest : public testing::Test { - protected: - virtual void SetUp() { generateTestImages(gpuImages_); } - - virtual void TearDown() {} - - void allocateMem(real*& gpuAngle, - real*& gpuScale, - int*& gpuCenterR, - int*& gpuCenterC) { - gpuAngle = (real*)hl_malloc_device(sizeof(real) * NUM_IMAGES); - gpuScale = (real*)hl_malloc_device(sizeof(real) * NUM_IMAGES); - gpuCenterR = - (int*)hl_malloc_device(sizeof(int) * NUM_IMAGES * SAMPLING_RATE); - gpuCenterC = - (int*)hl_malloc_device(sizeof(int) * NUM_IMAGES * SAMPLING_RATE); - } - - // Generate translation parameters for testing. - void generateTranslationParams(int*& gpuCenterR, - int*& gpuCenterC, - int imgSize) { - int cpuCenterR[NUM_IMAGES * SAMPLING_RATE]; - int cpuCenterC[NUM_IMAGES * SAMPLING_RATE]; - for (int i = 0; i < NUM_IMAGES * SAMPLING_RATE; ++i) { - cpuCenterR[i] = (imgSize - 1) / 2; - cpuCenterC[i] = (imgSize - 1) / 2 - 1; - } - - gpuCenterR = - (int*)hl_malloc_device(sizeof(int) * NUM_IMAGES * SAMPLING_RATE); - hl_memcpy_host2device( - gpuCenterR, cpuCenterR, sizeof(int) * NUM_IMAGES * SAMPLING_RATE); - - gpuCenterC = - (int*)hl_malloc_device(sizeof(int) * NUM_IMAGES * SAMPLING_RATE); - hl_memcpy_host2device( - gpuCenterC, cpuCenterC, sizeof(int) * NUM_IMAGES * SAMPLING_RATE); - } - - // Generate rotation parameters for testing. - void generateRotationParams(real*& gpuAngle) { - real cpuAngle[NUM_IMAGES]; - for (int i = 0; i < NUM_IMAGES; ++i) { - cpuAngle[i] = 90.0 * M_PI / 180.0; - } - gpuAngle = (real*)hl_malloc_device(sizeof(real) * NUM_IMAGES); - hl_memcpy_host2device(gpuAngle, cpuAngle, sizeof(real) * NUM_IMAGES); - } - - void generateScaleParams(real*& gpuScale) { - real cpuScale[NUM_IMAGES]; - for (int i = 0; i < NUM_IMAGES; ++i) { - cpuScale[i] = static_cast(TGT_SIZE - 2) / TGT_SIZE; - } - gpuScale = (real*)hl_malloc_device(sizeof(real) * NUM_IMAGES); - hl_memcpy_host2device(gpuScale, cpuScale, sizeof(real) * NUM_IMAGES); - } - - // Generate the test images, only the center regions are set to 1. - // The other parts are set to 0. - void generateTestImages(real*& gpuImages) { - const int IMAGE_MEM_SIZE = NUM_IMAGES * IMG_SIZE * IMG_SIZE * CHANNELS; - real cpuImages[IMAGE_MEM_SIZE]; - // Set the middle of each image to 1. - real* ptr = cpuImages; - for (int i = 0; i < NUM_IMAGES; ++i) { - for (int r = 0; r < IMG_SIZE; ++r) { - for (int c = 0; c < IMG_SIZE; ++c) { - for (int ch = 0; ch < CHANNELS; ++ch) { - if (r >= IMG_SIZE / 4 && r < IMG_SIZE - IMG_SIZE / 4 && - c >= IMG_SIZE / 4 && c < IMG_SIZE - IMG_SIZE / 4) { - *ptr = 1.0; - } else { - *ptr = 0.0; - } - ++ptr; - } - } - } - } - gpuImages = (real*)hl_malloc_device(sizeof(real) * IMAGE_MEM_SIZE); - hl_memcpy_host2device(gpuImages, cpuImages, sizeof(real) * IMAGE_MEM_SIZE); - } - - real* gpuImages_; -}; - -// Random perturbation. Only to make sure the code does not break. -TEST_F(PerturbationTest, random_perturb) { - real *gpuAngle, *gpuScaleRatio; - int *gpuCenterR, *gpuCenterC; - allocateMem(gpuAngle, gpuScaleRatio, gpuCenterR, gpuCenterC); - - real* targets = NULL; - const int TARGET_MEM_SIZE = - NUM_IMAGES * SAMPLING_RATE * TGT_SIZE * TGT_SIZE * CHANNELS; - targets = (real*)hl_malloc_device(sizeof(real) * TARGET_MEM_SIZE); - hl_conv_random_disturb(gpuImages_, - IMG_SIZE, - TGT_SIZE, - CHANNELS, - NUM_IMAGES, - 1.0, - 1.0, - SAMPLING_RATE, - gpuAngle, - gpuScaleRatio, - gpuCenterR, - gpuCenterC, - 2, - true, - targets); - real cpuTargets[TARGET_MEM_SIZE]; - hl_memcpy_device2host(cpuTargets, targets, sizeof(real) * TARGET_MEM_SIZE); -} - -TEST_F(PerturbationTest, identity_perturb) { - real *gpuAngle, *gpuScaleRatio; - int *gpuCenterR, *gpuCenterC; - allocateMem(gpuAngle, gpuScaleRatio, gpuCenterR, gpuCenterC); - - real* targets = NULL; - const int TARGET_MEM_SIZE = - NUM_IMAGES * SAMPLING_RATE * TGT_SIZE * TGT_SIZE * CHANNELS; - targets = (real*)hl_malloc_device(sizeof(real) * TARGET_MEM_SIZE); - hl_conv_random_disturb(gpuImages_, - IMG_SIZE, - TGT_SIZE, - CHANNELS, - NUM_IMAGES, - 1.0, - 1.0, - SAMPLING_RATE, - gpuAngle, - gpuScaleRatio, - gpuCenterR, - gpuCenterC, - 2, - false, - targets); - real cpuTargets[TARGET_MEM_SIZE]; - hl_memcpy_device2host(cpuTargets, targets, sizeof(real) * TARGET_MEM_SIZE); - for (int i = 0; i < TARGET_MEM_SIZE; ++i) { - EXPECT_FLOAT_EQ(1.0, cpuTargets[i]); - } -} - -TEST_F(PerturbationTest, translation_test) { - real *gpuAngle, *gpuScaleRatio; - int *gpuCenterR, *gpuCenterC; - allocateMem(gpuAngle, gpuScaleRatio, gpuCenterR, gpuCenterC); - hl_generate_disturb_params(gpuAngle, - gpuScaleRatio, - gpuCenterR, - gpuCenterC, - NUM_IMAGES, - IMG_SIZE, - 0.0, - 0.0, - SAMPLING_RATE, - false); - generateTranslationParams(gpuCenterR, gpuCenterC, IMG_SIZE); - - real* targets = NULL; - const int TARGET_MEM_SIZE = - NUM_IMAGES * SAMPLING_RATE * TGT_SIZE * TGT_SIZE * CHANNELS; - targets = (real*)hl_malloc_device(sizeof(real) * TARGET_MEM_SIZE); - hl_conv_random_disturb_with_params(gpuImages_, - IMG_SIZE, - TGT_SIZE, - CHANNELS, - NUM_IMAGES, - SAMPLING_RATE, - gpuAngle, - gpuScaleRatio, - gpuCenterR, - gpuCenterC, - 2, - targets); - - real cpuTargets[TARGET_MEM_SIZE]; - hl_memcpy_device2host(cpuTargets, targets, sizeof(real) * TARGET_MEM_SIZE); - for (int i = 0; i < SAMPLING_RATE * NUM_IMAGES; ++i) { - for (int p = 0; p < TGT_SIZE * TGT_SIZE * CHANNELS; ++p) { - const int offset = i * TGT_SIZE * TGT_SIZE * CHANNELS + p; - if (p < TGT_SIZE * CHANNELS) { - EXPECT_FLOAT_EQ(0.0, cpuTargets[offset]); - } else { - EXPECT_FLOAT_EQ(1.0, cpuTargets[offset]); - } - } - } -} - -TEST_F(PerturbationTest, rotation_test) { - real *gpuAngle, *gpuScaleRatio; - int *gpuCenterR, *gpuCenterC; - allocateMem(gpuAngle, gpuScaleRatio, gpuCenterR, gpuCenterC); - hl_generate_disturb_params(gpuAngle, - gpuScaleRatio, - gpuCenterR, - gpuCenterC, - NUM_IMAGES, - IMG_SIZE, - 0.0, - 0.0, - SAMPLING_RATE, - false); - generateRotationParams(gpuAngle); - - real* targets = NULL; - const int TARGET_MEM_SIZE = - NUM_IMAGES * SAMPLING_RATE * TGT_SIZE * TGT_SIZE * CHANNELS; - targets = (real*)hl_malloc_device(sizeof(real) * TARGET_MEM_SIZE); - hl_conv_random_disturb_with_params(gpuImages_, - IMG_SIZE, - TGT_SIZE, - CHANNELS, - NUM_IMAGES, - SAMPLING_RATE, - gpuAngle, - gpuScaleRatio, - gpuCenterR, - gpuCenterC, - 2, - targets); - - real cpuTargets[TARGET_MEM_SIZE]; - hl_memcpy_device2host(cpuTargets, targets, sizeof(real) * TARGET_MEM_SIZE); - for (int i = 0; i < TARGET_MEM_SIZE; ++i) { - EXPECT_FLOAT_EQ(1.0, cpuTargets[i]); - } -} - -TEST_F(PerturbationTest, scale_test) { - real *gpuAngle, *gpuScaleRatio; - int *gpuCenterR, *gpuCenterC; - allocateMem(gpuAngle, gpuScaleRatio, gpuCenterR, gpuCenterC); - hl_generate_disturb_params(gpuAngle, - gpuScaleRatio, - gpuCenterR, - gpuCenterC, - NUM_IMAGES, - IMG_SIZE, - 0.0, - 0.0, - SAMPLING_RATE, - false); - generateScaleParams(gpuScaleRatio); - - real* targets = NULL; - const int TARGET_MEM_SIZE = - NUM_IMAGES * SAMPLING_RATE * TGT_SIZE * TGT_SIZE * CHANNELS; - targets = (real*)hl_malloc_device(sizeof(real) * TARGET_MEM_SIZE); - hl_conv_random_disturb_with_params(gpuImages_, - IMG_SIZE, - TGT_SIZE, - CHANNELS, - NUM_IMAGES, - SAMPLING_RATE, - gpuAngle, - gpuScaleRatio, - gpuCenterR, - gpuCenterC, - 2, - targets); - - real cpuTargets[TARGET_MEM_SIZE]; - hl_memcpy_device2host(cpuTargets, targets, sizeof(real) * TARGET_MEM_SIZE); - for (int i = 0; i < SAMPLING_RATE * NUM_IMAGES; ++i) { - for (int p = 0; p < TGT_SIZE * TGT_SIZE * CHANNELS; ++p) { - const int offset = i * TGT_SIZE * TGT_SIZE * CHANNELS + p; - int c = (p / CHANNELS) % TGT_SIZE; - int r = (p / CHANNELS) / TGT_SIZE; - if (r == 0 || r == TGT_SIZE - 1 || c == 0 || c == TGT_SIZE - 1) { - EXPECT_FLOAT_EQ(0.0, cpuTargets[offset]); - } else { - EXPECT_FLOAT_EQ(1.0, cpuTargets[offset]); - } - } - } -} - -#endif diff --git a/paddle/legacy/math/tests/test_sparseMatrixCompare.cpp b/paddle/legacy/math/tests/test_sparseMatrixCompare.cpp deleted file mode 100644 index 492aa0a689540dbb2c687326ff8a2919d89d2e6f..0000000000000000000000000000000000000000 --- a/paddle/legacy/math/tests/test_sparseMatrixCompare.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_CUDA -/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result, -// so disable when -/// only cpu version. - -#include -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/utils/Util.h" -#include "test_matrixUtil.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -static inline int uniformRandom(int n) { return n == 0 ? 0 : rand() % n; } - -void testSpMatrixAddBias(int M, int N, real rate, real scale) { - int nnz = M * N * rate; - - MatrixPtr cpuA(new CpuSparseMatrix(M, N, nnz)); - MatrixPtr cpuB = std::make_shared(1, N); - - MatrixPtr gpuA(new GpuSparseMatrix(M, N, nnz)); - MatrixPtr gpuB = std::make_shared(1, N); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - - hl_stream_t stream(HPPL_STREAM_1); - gpuA->copyFrom(*cpuA, stream); - gpuB->copyFrom(*cpuB, stream); - hl_stream_synchronize(stream); - - cpuA->addBias(*cpuB, scale); - gpuA->addBias(*gpuB, scale); - - MatrixPtr outputCheck(new CpuSparseMatrix(M, N, nnz)); - outputCheck->copyFrom(*gpuA, stream); - hl_stream_synchronize(stream); - checkSMatrixEqual2(std::dynamic_pointer_cast(cpuA), - std::dynamic_pointer_cast(outputCheck)); -} - -void testSpMatrixAddDense(int M, int N, real rate) { // add3 - int nnz = M * N * rate; - - MatrixPtr cpuA(new CpuSparseMatrix(M, N, nnz)); - MatrixPtr cpuB = std::make_shared(M, N); - - MatrixPtr gpuA(new GpuSparseMatrix(M, N, nnz)); - MatrixPtr gpuB = std::make_shared(M, N); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - - hl_stream_t stream(HPPL_STREAM_3); - gpuA->copyFrom(*cpuA, stream); - gpuB->copyFrom(*cpuB, stream); - hl_stream_synchronize(stream); - - cpuA->add3(cpuB); - gpuA->add3(gpuB); - - MatrixPtr outputCheck(new CpuSparseMatrix(M, N, nnz)); - outputCheck->copyFrom(*gpuA, stream); - hl_stream_synchronize(stream); - checkSMatrixEqual2(std::dynamic_pointer_cast(cpuA), - std::dynamic_pointer_cast(outputCheck)); -} - -void testSpMatrixMul(int M, int N, int K, real rate) { - int nnz = M * N * rate; - - MatrixPtr cpuA = std::make_shared(M, K); - MatrixPtr cpuB = std::make_shared(N, K); - MatrixPtr cpuC(new CpuSparseMatrix(M, N, nnz)); - - MatrixPtr gpuA = std::make_shared(M, K); - MatrixPtr gpuB = std::make_shared(N, K); - MatrixPtr gpuC(new GpuSparseMatrix(M, N, nnz)); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - cpuC->randomizeUniform(); - - hl_stream_t stream(HPPL_STREAM_3); - gpuA->copyFrom(*cpuA, stream); - gpuB->copyFrom(*cpuB, stream); - gpuC->copyFrom(*cpuC, stream); - hl_stream_synchronize(stream); - - cpuC->mul(*cpuA, *cpuB->getTranspose(), 1, 1); - gpuC->mul(*gpuA, *gpuB->getTranspose(), 1, 1); - - MatrixPtr outputCheck(new CpuSparseMatrix(M, N, nnz)); - outputCheck->copyFrom(*gpuC, stream); - hl_stream_synchronize(stream); - checkSMatrixErr(std::dynamic_pointer_cast(cpuC), - std::dynamic_pointer_cast(outputCheck)); -} - -void testSpMatrixCollectBias(int M, int N, real rate) { - int nnz = M * N * rate; - LOG(INFO) << "nnz=" << nnz; - - MatrixPtr cpuA(new CpuSparseMatrix(M, N, nnz)); - MatrixPtr cpuB = std::make_shared(1, N); - - MatrixPtr gpuA(new GpuSparseMatrix(M, N, nnz)); - MatrixPtr gpuB = std::make_shared(1, N); - - cpuA->randomizeUniform(); - cpuB->randomizeUniform(); - - hl_stream_t stream(HPPL_STREAM_3); - gpuA->copyFrom(*cpuA, stream); - gpuB->copyFrom(*cpuB, stream); - hl_stream_synchronize(stream); - - cpuB->collectBias(*cpuA, 1); - gpuB->collectBias(*gpuA, 1); - - MatrixPtr outputCheck = std::make_shared(1, N); - outputCheck->copyFrom(*gpuB, stream); - hl_stream_synchronize(stream); - checkMatrixErr(*cpuB, *outputCheck); -} - -TEST(SMatrix, sMatrixOp) { - for (auto height : {1, 11, 200}) { - for (auto width : {200, 2048, 20480}) { - VLOG(3) << " height=" << height << " width=" << width; - for (auto rate : {0.02, 0.1}) { - testSpMatrixAddDense(height, width, rate); - testSpMatrixAddBias(height, width, rate, 1.0); - } - } - } -} - -TEST(SMatrix, sMatrixMul) { - for (auto M : {1, 40, 128, 200}) { - for (auto N : {100, 2000, 20480}) { - for (auto K : {100, 512, 1024}) { - VLOG(3) << " M=" << M << " N=" << N << " K=" << K; - testSpMatrixMul(M, N, K, 0.05); - } - } - } -} - -TEST(SMatrix, sMatrixCollectBias) { - for (auto height : {1, 128, 200}) { - for (auto width : {100, 2048, 20480}) { - VLOG(3) << " height=" << height << " width=" << width; - testSpMatrixCollectBias(height, width, 0.1); - } - } -} - -#endif diff --git a/paddle/legacy/optimizer/CMakeLists.txt b/paddle/legacy/optimizer/CMakeLists.txt deleted file mode 100644 index 7c80faa48ce960a3a7eb7d88eda4f2b09756410e..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -set(OPITMIZER_SRCS - adadelta_optimizer.cc - adagrad_optimizer.cc - adam_optimizer.cc - optimizer.cc - parameter_optimizer.cc - sgd_optimizer.cc - ) - -add_library(paddle_optimizer ${OPITMIZER_SRCS}) -target_link_libraries(paddle_optimizer paddle_proto glog) - -if (WITH_TESTING) - add_unittest(serialization_test serialization_test.cc) - add_unittest(parameter_optimizer_test parameter_optimizer_test.cc) -endif() diff --git a/paddle/legacy/optimizer/adadelta_optimizer.cc b/paddle/legacy/optimizer/adadelta_optimizer.cc deleted file mode 100644 index 1faeb0cd31e4a748331d5c5c3569df89bcdd4600..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/adadelta_optimizer.cc +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "adadelta_optimizer.h" -#include -#include - -namespace paddle { -namespace optimizer { - -void AdadeltaOptimizer::Update(const Tensor* gradient) { - num_sample_passed_ += 1; - double learning_rate = lr_policy_->LearningRate(num_sample_passed_); - Tensor& param = *parameter_; - const Tensor& grad = *gradient; - Tensor& accum_g = *accum_gradient_; - Tensor& accum_d = *accum_delta_; - Tensor& update_d = *update_delta_; - for (size_t i = 0; i < param.size(); ++i) { - accum_g[i] = rho_ * accum_g[i] + (1.0 - rho_) * grad[i] * grad[i]; - - update_d[i] = std::sqrt(accum_d[i] + epsilon_) / - std::sqrt(accum_g[i] + epsilon_) * grad[i]; - - accum_d[i] = rho_ * accum_d[i] + (1.0 - rho_) * update_d[i] * update_d[i]; - - param[i] -= learning_rate * update_d[i] + learning_rate * decay_ * param[i]; - } -} - -std::string AdadeltaOptimizer::SerializeState() { - AdadeltaOptimizerState state; - state.set_num_sample_passed(num_sample_passed_); - std::string lr_str = this->lr_policy_->SerializeState(); - state.mutable_lr_state()->ParseFromString(lr_str); - - TensorToProto(*parameter_, state.mutable_parameter()); - TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); - TensorToProto(*accum_delta_, state.mutable_accum_delta()); - TensorToProto(*update_delta_, state.mutable_update_delta()); - return state.SerializeAsString(); -} - -void AdadeltaOptimizer::DeserializeState(const std::string& str) { - AdadeltaOptimizerState state; - state.ParseFromString(str); - auto lr_state = state.lr_state(); - this->lr_policy_->DeserializeState(lr_state.SerializeAsString()); - num_sample_passed_ = state.num_sample_passed(); - - ProtoToTensor(state.parameter(), parameter_); - ProtoToTensor(state.accum_gradient(), accum_gradient_); - ProtoToTensor(state.accum_delta(), accum_delta_); - ProtoToTensor(state.update_delta(), update_delta_); -} - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/adadelta_optimizer.h b/paddle/legacy/optimizer/adadelta_optimizer.h deleted file mode 100644 index 5beb62295a83ba4826e9a6b9caf21de78d2e8ced..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/adadelta_optimizer.h +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "parameter_optimizer.h" - -namespace paddle { -namespace optimizer { - -class AdadeltaOptimizer : public ParameterOptimizer { - public: - AdadeltaOptimizer( - Tensor *parameter, LrPolicy *lr, double rho, double epsilon, double decay) - : ParameterOptimizer(parameter, lr), - accum_gradient_(new Tensor(parameter->size())), - accum_delta_(new Tensor(parameter->size())), - update_delta_(new Tensor(parameter->size())), - rho_(rho), - epsilon_(epsilon), - decay_(decay) {} - - ~AdadeltaOptimizer() { - if (accum_gradient_) delete accum_gradient_; - if (accum_delta_) delete accum_delta_; - if (update_delta_) delete update_delta_; - } - void Update(const Tensor *gradient); - std::string SerializeState(); - void DeserializeState(const std::string &state); - - private: - Tensor *accum_gradient_; - Tensor *accum_delta_; - Tensor *update_delta_; - double rho_; - double epsilon_; - double decay_; -}; - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/adagrad_optimizer.cc b/paddle/legacy/optimizer/adagrad_optimizer.cc deleted file mode 100644 index 5ac65dbd72092679575edf5e2eb357c0f6609e34..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/adagrad_optimizer.cc +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "adagrad_optimizer.h" - -namespace paddle { -namespace optimizer { - -void AdagradOptimizer::Update(const Tensor* gradient) { - num_sample_passed_ += 1; - double learning_rate = lr_policy_->LearningRate(num_sample_passed_); - Tensor& param = *parameter_; - Tensor& accum_g = *accum_gradient_; - const Tensor& grad = *gradient; - for (size_t i = 0; i < param.size(); ++i) { - accum_g[i] += grad[i] * grad[i]; - param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon_) + - learning_rate * decay_ * param[i]; - } -} -std::string AdagradOptimizer::SerializeState() { - AdagradOptimizerState state; - state.set_num_sample_passed(num_sample_passed_); - std::string lr_str = this->lr_policy_->SerializeState(); - state.mutable_lr_state()->ParseFromString(lr_str); - - TensorToProto(*parameter_, state.mutable_parameter()); - TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); - return state.SerializeAsString(); -} - -void AdagradOptimizer::DeserializeState(const std::string& str) { - AdagradOptimizerState state; - state.ParseFromString(str); - auto lr_state = state.lr_state(); - this->lr_policy_->DeserializeState(lr_state.SerializeAsString()); - - num_sample_passed_ = state.num_sample_passed(); - ProtoToTensor(state.parameter(), parameter_); - ProtoToTensor(state.accum_gradient(), accum_gradient_); -} - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/adagrad_optimizer.h b/paddle/legacy/optimizer/adagrad_optimizer.h deleted file mode 100644 index b6fc06739970984cf4bbd27d3e6e1e9066bc350f..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/adagrad_optimizer.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "parameter_optimizer.h" - -namespace paddle { -namespace optimizer { - -class AdagradOptimizer : public ParameterOptimizer { - public: - AdagradOptimizer(Tensor *parameter, - LrPolicy *lr, - double epsilon, - double decay) - : ParameterOptimizer(parameter, lr), - accum_gradient_(new Tensor(parameter->size())), - epsilon_(epsilon), - decay_(decay) {} - ~AdagradOptimizer() { - if (accum_gradient_) delete accum_gradient_; - } - void Update(const Tensor *gradient); - std::string SerializeState(); - void DeserializeState(const std::string &state); - - private: - Tensor *accum_gradient_; - double epsilon_; - double decay_; -}; - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/adam_optimizer.cc b/paddle/legacy/optimizer/adam_optimizer.cc deleted file mode 100644 index 9a4ff5ecc0f93c06b20dfd91d4fbcf6e81fa066c..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/adam_optimizer.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "adam_optimizer.h" -#include - -namespace paddle { -namespace optimizer { - -void AdamOptimizer::Update(const Tensor *gradient) { - num_sample_passed_ += 1; - double learning_rate = lr_policy_->LearningRate(num_sample_passed_); - double coef1 = 1.0 - std::pow(beta_1_, num_sample_passed_); - double coef2 = 1.0 - std::pow(beta_2_, num_sample_passed_); - learning_rate *= std::sqrt(coef2) / coef1; - Tensor ¶m = *parameter_; - const Tensor &grad = *gradient; - Tensor &m = *momentums_; - Tensor &v = *velocitys_; - for (size_t i = 0; i < param.size(); ++i) { - m[i] = beta_1_ * m[i] + (1.0 - beta_1_) * grad[i]; - v[i] = beta_2_ * v[i] + (1.0 - beta_2_) * grad[i] * grad[i]; - param[i] -= - learning_rate * (m[i] / std::sqrt(v[i] + epsilon_) + decay_ * param[i]); - } -} - -std::string AdamOptimizer::SerializeState() { - AdamOptimizerState state; - std::string lr_str = this->lr_policy_->SerializeState(); - state.mutable_lr_state()->ParseFromString(lr_str); - state.set_num_sample_passed(num_sample_passed_); - - TensorToProto(*parameter_, state.mutable_parameter()); - TensorToProto(*momentums_, state.mutable_momentums()); - TensorToProto(*velocitys_, state.mutable_velocitys()); - return state.SerializeAsString(); -} - -void AdamOptimizer::DeserializeState(const std::string &str) { - AdamOptimizerState state; - state.ParseFromString(str); - auto lr_state = state.lr_state(); - this->lr_policy_->DeserializeState(lr_state.SerializeAsString()); - num_sample_passed_ = state.num_sample_passed(); - - ProtoToTensor(state.parameter(), parameter_); - ProtoToTensor(state.momentums(), momentums_); - ProtoToTensor(state.velocitys(), velocitys_); -} -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/adam_optimizer.h b/paddle/legacy/optimizer/adam_optimizer.h deleted file mode 100644 index fce10960068364b40592b26a6b439494d75cfa03..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/adam_optimizer.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "parameter_optimizer.h" - -namespace paddle { -namespace optimizer { - -class AdamOptimizer : public ParameterOptimizer { - public: - AdamOptimizer(Tensor *parameter, - LrPolicy *lr, - double beta_1, - double beta_2, - double epsilon, - double decay) - : ParameterOptimizer(parameter, lr), - momentums_(new Tensor(parameter->size())), - velocitys_(new Tensor(parameter->size())), - beta_1_(beta_1), - beta_2_(beta_2), - epsilon_(epsilon), - decay_(decay) {} - ~AdamOptimizer() { - if (momentums_) delete momentums_; - if (velocitys_) delete velocitys_; - } - void Update(const Tensor *gradient); - std::string SerializeState(); - void DeserializeState(const std::string &state); - - private: - Tensor *momentums_; - Tensor *velocitys_; - double beta_1_; - double beta_2_; - double epsilon_; - double decay_; -}; - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/lr_policy.h b/paddle/legacy/optimizer/lr_policy.h deleted file mode 100644 index d639c9f22c8ad77267f68e2c3b35257211bf90df..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/lr_policy.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include -#include "OptimizerConfig.pb.h" - -namespace paddle { -namespace optimizer { - -class LrPolicy { - public: - virtual ~LrPolicy() {} - virtual double LearningRate(const uint64_t num_sample_passed) = 0; - virtual std::string SerializeState() = 0; - virtual void DeserializeState(const std::string &state) = 0; -}; - -// constant learning rate policy -class ConstLr final : public LrPolicy { - public: - ConstLr(double lr) : learning_rate_(lr){}; - double LearningRate(const uint64_t num_sample_passed) { - return learning_rate_; - } - std::string SerializeState() { - LrPolicyState state; - state.set_learning_rate(learning_rate_); - return state.SerializeAsString(); - } - void DeserializeState(const std::string &str) { - LrPolicyState state; - state.ParseFromString(str); - learning_rate_ = state.learning_rate(); - } - - private: - double learning_rate_; -}; - -class LinearLr final : public LrPolicy { - public: - LinearLr(double lr, double lr_decay_a, double lr_decay_b) - : learning_rate_(lr), lr_decay_a_(lr_decay_a), lr_decay_b_(lr_decay_b) {} - double LearningRate(const uint64_t num_sample_passed) { - return std::max(learning_rate_ - lr_decay_a_ * num_sample_passed, - lr_decay_b_); - } - std::string SerializeState() { - LrPolicyState state; - state.set_learning_rate(learning_rate_); - state.set_lr_decay_a(lr_decay_a_); - state.set_lr_decay_b(lr_decay_b_); - return state.SerializeAsString(); - } - void DeserializeState(const std::string &str) { - LrPolicyState state; - state.ParseFromString(str); - learning_rate_ = state.learning_rate(); - lr_decay_a_ = state.lr_decay_a(); - lr_decay_b_ = state.lr_decay_b(); - } - - private: - double learning_rate_; - double lr_decay_a_; - double lr_decay_b_; -}; - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/optimizer.cc b/paddle/legacy/optimizer/optimizer.cc deleted file mode 100644 index e583aebd77a07b770bc6726393836c212e3f02fc..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/optimizer.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "optimizer.h" -#include -#include -#include -#include - -#include "parameter_optimizer.h" - -using paddle::optimizer::ParameterOptimizer; -using paddle::optimizer::Tensor; - -template -struct EnumToType {}; - -template -struct TypeToEnum {}; - -#define MATCH_ENUM_TYPE(TYPE, ENUM) \ - template <> \ - struct TypeToEnum { \ - static paddle_element_type v() { return ENUM; } \ - static constexpr TYPE value = ENUM; \ - }; \ - template <> \ - struct EnumToType { \ - typedef TYPE Type; \ - } - -MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32); -MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32); -MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64); -MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64); -MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32); -MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64); - -struct paddle_optimizer { - paddle::optimizer::ParameterOptimizer* impl; -}; - -paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, - const int config_proto_len, - const paddle_element_type data_type, - void* param_buffer, - int num_bytes, - const char* state, - const int state_len) { - paddle_optimizer* optimizer = new paddle_optimizer; - std::string config(config_proto, config_proto + config_proto_len); - Tensor* parameter = new Tensor(reinterpret_cast(param_buffer), - num_bytes / sizeof(float)); - optimizer->impl = ParameterOptimizer::Create(config, parameter); - if (state != nullptr) { - std::string s(state, state + state_len); - optimizer->impl->DeserializeState(s); - } - return optimizer; -} - -int paddle_release_optimizer(paddle_optimizer* o) { - if (o != nullptr) delete o->impl; - return PADDLE_SUCCESS; -} - -int paddle_update_parameter(paddle_optimizer* o, - const paddle_element_type data_type, - const void* grad_buffer, - int num_bytes) { - // TOOD(zhihong): datatype not work. need to add the runtime datatype - auto grad_type = reinterpret_cast(grad_buffer); - Tensor* gradient = - new Tensor(const_cast(grad_type), num_bytes / sizeof(float)); - o->impl->Update(gradient); - return PADDLE_SUCCESS; -} - -int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer) { - int param_size = 0; - *param_buffer = (void*)o->impl->get_weight(¶m_size); - return param_size; -} - -int paddle_optimizer_get_state(paddle_optimizer* o, const char** state) { - std::string s = o->impl->SerializeState(); - int state_len = s.size(); - - if (state_len > 0) { - *state = (char*)std::malloc(state_len); - std::memcpy((void*)*state, (const void*)s.c_str(), state_len); - } - - return state_len; -} diff --git a/paddle/legacy/optimizer/optimizer.h b/paddle/legacy/optimizer/optimizer.h deleted file mode 100644 index c079de921fa1f77dca0b2c3da85decf67d627034..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/optimizer.h +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include - -/** - * @brief optimizer library in independent with other module - * which will be used in : - * Case A, the gradient optimized locally on the trainer. - * - * Case B, the gradient optimized on the parameter server. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - PADDLE_ELEMENT_TYPE_INT32 = 0, - PADDLE_ELEMENT_TYPE_UINT32 = 1, - PADDLE_ELEMENT_TYPE_INT64 = 2, - PADDLE_ELEMENT_TYPE_UINT64 = 3, - PADDLE_ELEMENT_TYPE_FLOAT32 = 4, - PADDLE_ELEMENT_TYPE_FLOAT64 = 5, -} paddle_element_type; - -/** - * @brief execution status code - */ -const int32_t PADDLE_SUCCESS = 0; -const int32_t PADDLE_ERROR = -1; - -typedef struct paddle_optimizer paddle_optimizer; -/** - * this group interface called in order : - * 1. create optimizer with config - * 2. set weights - * 3. update_parameter - * 4. get_weights - * 5. release optimizer - */ - -/** - * @brief create optimizer with proto_config - * @param config_proto, optimizer protobuf, see OptimizerConfig.proto in detail - * @return return optimizer instance - */ -paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, - const int config_proto_len, - const paddle_element_type data_type, - void* param_buffer, - int num_bytes, - const char* state, - const int state_len); - -/** - * @brief release optimizer - * @param optimizer - * @return return exec status - */ -int paddle_release_optimizer(paddle_optimizer* o); - -/** - * @brief optimizer instance - * @param datatype of gradient and parameter - * @param gradient, calculate by optimzizer caller. - * TODO(zhihong): just pass loss to reduce communicate overhead. - * Project Adam Ms'14 paper for detail - * @param num_bytes, gradient size - * @return return exec status - */ -int paddle_update_parameter(paddle_optimizer* o, - const paddle_element_type data_type, - const void* gradient, - int num_bytes); - -/** - * @brief optimizer for get parameter buffer - * @param param_buffer, initilized parameter buffer - * @return return content length - */ -int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer); - -/** - * @brief optimzizer for saving training state - * @param training state for receive SerializeState - * @return return state_buffer length - */ -int paddle_optimizer_get_state(paddle_optimizer* o, const char** state); - -#ifdef __cplusplus -} -#endif diff --git a/paddle/legacy/optimizer/parameter_optimizer.cc b/paddle/legacy/optimizer/parameter_optimizer.cc deleted file mode 100644 index f9474b315d519037be4beea2c6011d9e8366e0b1..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/parameter_optimizer.cc +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "adadelta_optimizer.h" -#include "adagrad_optimizer.h" -#include "adam_optimizer.h" -#include "lr_policy.h" -#include "sgd_optimizer.h" - -#include "parameter_optimizer.h" - -namespace paddle { -namespace optimizer { - -ParameterOptimizer *ParameterOptimizer::Create(const std::string &config_proto, - Tensor *parameter) { - paddle::OptimizerConfig config; - CHECK(config.ParseFromString(config_proto) == true) - << "failed parse optimizer config"; - auto select_lr_policy = [=](const OptimizerConfig &config) -> LrPolicy * { - if (config.lr_policy() == OptimizerConfig::Const) - return new ConstLr(config.const_lr().learning_rate()); - if (config.lr_policy() == OptimizerConfig::Linear) - return new LinearLr(config.linear_lr().learning_rate(), - config.linear_lr().lr_decay_a(), - config.linear_lr().lr_decay_b()); - // default - LOG(WARNING) << " have not select any LrPolicy. use ConstLr in default"; - return new ConstLr(0.1); - }; - - LrPolicy *lr = select_lr_policy(config); - auto select_optimizer = [=]( - Tensor *parameter, - const OptimizerConfig &config) -> ParameterOptimizer * { - if (config.optimizer() == OptimizerConfig::SGD) { - LOG(INFO) << "creating SGD optimizer"; - return new SGDOptimizer(parameter, - lr, - config.sgd().momentum(), - config.sgd().decay(), - config.sgd().nesterov()); - } - if (config.optimizer() == OptimizerConfig::Adadelta) { - LOG(INFO) << "creating Adadelta optimizer"; - return new AdadeltaOptimizer(parameter, - lr, - config.adadelta().rho(), - config.adadelta().epsilon(), - config.adadelta().decay()); - } - if (config.optimizer() == OptimizerConfig::Adagrad) { - LOG(INFO) << "creating Adagrad optimizer"; - return new AdagradOptimizer( - parameter, lr, config.adagrad().epsilon(), config.adagrad().decay()); - } - if (config.optimizer() == OptimizerConfig::Adam) { - LOG(INFO) << "creating Adam optimizer"; - return new AdamOptimizer(parameter, - lr, - config.adam().beta_1(), - config.adam().beta_2(), - config.adam().epsilon(), - config.adam().decay()); - } - // default - LOG(WARNING) - << "have not select any Optimizer. use SGDOptimizer in default"; - return new SGDOptimizer(parameter, lr, 0.0, 0.0, false); - }; - return select_optimizer(parameter, config); -} - -float *ParameterOptimizer::get_weight(int *param_size) const { - *param_size = (int)parameter_->size(); - return parameter_->get_buffer(); -} - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/parameter_optimizer.h b/paddle/legacy/optimizer/parameter_optimizer.h deleted file mode 100644 index d5abca82d55c12aed0f4fca0c4c1f21d20586155..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/parameter_optimizer.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include "OptimizerConfig.pb.h" -#include "lr_policy.h" -#include "serialization.h" -#include "tensor.h" - -namespace paddle { -namespace optimizer { - -class ParameterOptimizer { - public: - /** - * @brief update hook for algorithm need to traverse parameter more than - * once. - */ - ParameterOptimizer(Tensor *parameter, LrPolicy *lr) - : parameter_(parameter), lr_policy_(lr), num_sample_passed_(0) {} - virtual ~ParameterOptimizer() { - delete parameter_; - delete lr_policy_; - } - - static ParameterOptimizer *Create(const std::string &config_proto, - Tensor *parameter); - virtual void Update(const Tensor *gradient) = 0; - virtual float *get_weight(int *param_size) const; - virtual std::string SerializeState() = 0; - virtual void DeserializeState(const std::string &state) = 0; - - protected: - Tensor *parameter_; - // learning rate policy - LrPolicy *lr_policy_; - uint64_t num_sample_passed_; -}; - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/parameter_optimizer_test.cc b/paddle/legacy/optimizer/parameter_optimizer_test.cc deleted file mode 100644 index 1d9572999e9e0f10092eecbc1b41369a89629da7..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/parameter_optimizer_test.cc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "parameter_optimizer.h" -#include -#include -#include -#include "gtest/gtest.h" -#include "lr_policy.h" - -paddle::optimizer::Tensor* FillTensor(size_t size) { - paddle::optimizer::Tensor* param = new paddle::optimizer::Tensor(size); - paddle::optimizer::Tensor& p = *param; - for (size_t i = 0; i < p.size(); ++i) { - p[i] = (float)rand() / (float)RAND_MAX; - } - return param; -} - -paddle::optimizer::Tensor* FixedTensor(size_t size) { - paddle::optimizer::Tensor* param = new paddle::optimizer::Tensor(size); - paddle::optimizer::Tensor& p = *param; - for (size_t i = 0; i < p.size(); ++i) { - p[i] = i; - } - return param; -} - -class OptimizerTest : public testing::Test { - public: - virtual ~OptimizerTest() {} - // init paddle::optimizer::Tensor shape - const size_t kSize = 5; - - virtual void SetUp() { - CreateSGD(); - CreateAdam(); - } - virtual void TearDown() {} - - void CreateSGD() { - paddle::optimizer::Tensor* parameter = FixedTensor(kSize); - config_.set_optimizer(paddle::OptimizerConfig::SGD); - config_.mutable_sgd()->set_momentum(0.0); - config_.mutable_sgd()->set_decay(0.0); - config_.mutable_sgd()->set_nesterov(false); - config_.set_lr_policy(paddle::OptimizerConfig::Const); - config_.mutable_const_lr()->set_learning_rate(0.1); - std::string str = config_.SerializeAsString(); - paddle::optimizer::ParameterOptimizer* opt = - paddle::optimizer::ParameterOptimizer::Create(str, parameter); - opts_.push_back(opt); - } - - void CreateAdam() { - paddle::optimizer::Tensor* parameter = FixedTensor(kSize); - config_.set_optimizer(paddle::OptimizerConfig::Adam); - config_.mutable_adam()->set_beta_1(0.9); - config_.mutable_adam()->set_beta_2(0.1); - config_.mutable_adam()->set_epsilon(1e-3); - config_.mutable_adam()->set_decay(0.0); - config_.set_lr_policy(paddle::OptimizerConfig::Const); - config_.mutable_const_lr()->set_learning_rate(0.1); - std::string str = config_.SerializeAsString(); - paddle::optimizer::ParameterOptimizer* opt = - paddle::optimizer::ParameterOptimizer::Create(str, parameter); - opts_.push_back(opt); - } - - void TestGetWeight() { - paddle::optimizer::Tensor* p = FixedTensor(kSize); - for (size_t i = 0; i < opts_.size(); ++i) { - int s = 0; - float* newp = (float*)opts_[i]->get_weight(&s); - EXPECT_EQ(static_cast(s), kSize); - for (size_t j = 0; j < kSize; ++j) { - EXPECT_EQ(newp[j], (*p)[j]); - } - } - } - - void TestUpdate() { - paddle::optimizer::Tensor* g = FixedTensor(kSize); - for (size_t i = 0; i < opts_.size(); ++i) { - opts_[i]->Update(g); - } - } - - void TestCheckPoint() { - paddle::optimizer::Tensor* p = FixedTensor(kSize); - for (size_t i = 0; i < opts_.size(); ++i) { - auto state = opts_[i]->SerializeState(); - opts_[i]->DeserializeState(state); - auto state1 = opts_[i]->SerializeState(); - opts_[i]->DeserializeState(state); - EXPECT_EQ(state, state1); - - int s = 0; - float* newp = (float*)opts_[i]->get_weight(&s); - EXPECT_EQ(static_cast(s), kSize); - for (size_t j = 0; j < kSize; ++j) { - EXPECT_EQ(newp[j], (*p)[j]); - } - } - } - - private: - std::vector opts_; - paddle::OptimizerConfig config_; -}; - -TEST_F(OptimizerTest, TestGetWeight) { TestGetWeight(); } - -TEST_F(OptimizerTest, TestUpdate) { TestUpdate(); } - -TEST_F(OptimizerTest, TestCheckPoint) { TestCheckPoint(); } diff --git a/paddle/legacy/optimizer/serialization.h b/paddle/legacy/optimizer/serialization.h deleted file mode 100644 index 2067a8d8cff23bff975d23a4df4d0aa7df20b00f..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/serialization.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include "OptimizerConfig.pb.h" -#include "paddle/legacy/utils/Logging.h" -#include "tensor.h" - -namespace paddle { -namespace optimizer { - -static void TensorToProto(const Tensor& tensor, TensorProto* proto) { - proto->set_data_type(TensorProto::PADDLE_ELEMENT_TYPE_FLOAT32); - std::stringstream os; - for (size_t i = 0; i < tensor.size(); ++i) { - os << tensor[i]; - proto->add_content(os.str()); - os.str(std::string()); - } -} - -static void ProtoToTensor(const TensorProto& proto, Tensor* tensor) { - std::stringstream sin; - for (auto i = 0; i < proto.content_size(); ++i) { - sin << proto.content(i); - sin >> (*tensor)[i]; - sin.str(std::string()); - sin.clear(); - } -} - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/serialization_test.cc b/paddle/legacy/optimizer/serialization_test.cc deleted file mode 100644 index 93ee1f492f06d45614822cfa6acb41c962426df1..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/serialization_test.cc +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "serialization.h" -#include "gtest/gtest.h" - -TEST(TensorToProto, Case1) { - paddle::optimizer::Tensor t(3), t1(3); - for (size_t i = 0; i < t.size(); ++i) { - t[i] = i; - t1[i] = 10; - } - - paddle::TensorProto proto; - paddle::optimizer::TensorToProto(t, &proto); - paddle::optimizer::ProtoToTensor(proto, &t1); - for (size_t i = 0; i < t1.size(); ++i) { - EXPECT_EQ(t1[i], t[i]); - } -} - -TEST(TensorToProto, Case2) { - paddle::optimizer::Tensor t(1), t1(1); - for (size_t i = 0; i < t.size(); ++i) { - t[i] = i; - t1[i] = 10; - } - - paddle::TensorProto proto; - paddle::optimizer::TensorToProto(t, &proto); - paddle::optimizer::ProtoToTensor(proto, &t1); - for (size_t i = 0; i < t1.size(); ++i) { - EXPECT_EQ(t1[i], t[i]); - } -} diff --git a/paddle/legacy/optimizer/sgd_optimizer.cc b/paddle/legacy/optimizer/sgd_optimizer.cc deleted file mode 100644 index c1e2064de75f1c1be0503a4425fe4a691071731b..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/sgd_optimizer.cc +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "sgd_optimizer.h" -#include "serialization.h" - -namespace paddle { -namespace optimizer { - -void SGDOptimizer::Update(const Tensor *gradient) { - num_sample_passed_ += 1; - double learning_rate = lr_policy_->LearningRate(num_sample_passed_); - float velocity = 0.0; - Tensor ¶m = *parameter_; - const Tensor &grad = *gradient; - Tensor &m = *momentums_; - for (size_t i = 0; i < param.size(); ++i) { - if (momentum_ == 0.0) { - velocity = -learning_rate * grad[i] - learning_rate * decay_ * param[i]; - } else { - m[i] = momentum_ * m[i] - learning_rate * grad[i] - - learning_rate * decay_ * param[i]; - velocity = m[i]; - } - if (nesterov_) { - param[i] += momentum_ * velocity - learning_rate * grad[i]; - } else { - param[i] += velocity; - } - } -} - -std::string SGDOptimizer::SerializeState() { - SGDOptimizerState state; - state.set_num_sample_passed(num_sample_passed_); - std::string lr_str = this->lr_policy_->SerializeState(); - state.mutable_lr_state()->ParseFromString(lr_str); - TensorToProto(*parameter_, state.mutable_parameter()); - if (momentum_ != 0.0) TensorToProto(*momentums_, state.mutable_momentums()); - return state.SerializeAsString(); -} - -void SGDOptimizer::DeserializeState(const std::string &str) { - SGDOptimizerState state; - state.ParseFromString(str); - auto lr_state = state.lr_state(); - this->lr_policy_->DeserializeState(lr_state.SerializeAsString()); - num_sample_passed_ = state.num_sample_passed(); - ProtoToTensor(state.parameter(), parameter_); - if (momentum_ != 0.0) ProtoToTensor(state.momentums(), momentums_); -} - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/sgd_optimizer.h b/paddle/legacy/optimizer/sgd_optimizer.h deleted file mode 100644 index a8957cde54abd6667143d2a8265d732c849294e3..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/sgd_optimizer.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "parameter_optimizer.h" - -namespace paddle { -namespace optimizer { - -class SGDOptimizer : public ParameterOptimizer { - public: - SGDOptimizer(Tensor* parameter, LrPolicy* lr, double m, double d, bool n) - : ParameterOptimizer(parameter, lr), - momentums_(nullptr), - momentum_(m), - decay_(d), - nesterov_(n) { - if (momentum_ != 0.0) { - size_t size = parameter->size(); - momentums_ = new Tensor(size); - } - } - virtual ~SGDOptimizer() { - if (momentums_) delete momentums_; - } - void Update(const Tensor* gradient); - std::string SerializeState(); - void DeserializeState(const std::string& state); - - private: - Tensor* momentums_; - double momentum_; - double decay_; - bool nesterov_; -}; - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/optimizer/tensor.h b/paddle/legacy/optimizer/tensor.h deleted file mode 100644 index 2e58577d4df7aabd8cd218dc13837461cc681ac6..0000000000000000000000000000000000000000 --- a/paddle/legacy/optimizer/tensor.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once -/** - * @brief tensor used by optimizer - */ - -#include -#include -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { -namespace optimizer { - -template -class TensorT { - public: - TensorT(size_t size) : height_(1), width_(size) { - // new T[size]() initializes all element to zero value. - data_ptr_ = std::shared_ptr(new T[size](), std::default_delete()); - data_ = data_ptr_.get(); - } - - TensorT(T* data, size_t size) - : height_(1), width_(size), data_ptr_(nullptr), data_(data) {} - - TensorT(T* data, size_t h, size_t w) - : height_(h), width_(w), data_ptr_(nullptr), data_(data) {} - - virtual ~TensorT() {} - - T* get_buffer() { return this->data_; } - - T& operator[](const size_t idx) { - CHECK(idx >= 0 && idx < this->width_) << "out of index range"; - return data_[idx]; - } - T& operator[](const size_t idx) const { - CHECK(idx >= 0 && idx < this->width_) << "out of index range"; - return data_[idx]; - } - // TODO: replace with tensorshape - size_t size() const { return this->width_ * this->height_; } - - protected: - size_t height_; - size_t width_; - std::shared_ptr data_ptr_; - T* data_; -}; - -// TODO(zhihong): design problem of dynamic datatype, need to fix it -typedef TensorT Tensor; - -} // namespace optimizer -} // namespace paddle diff --git a/paddle/legacy/parameter/Argument.cpp b/paddle/legacy/parameter/Argument.cpp deleted file mode 100644 index 3f1d599e901110a1c9390d76c45f8b4b1f4cab2a..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/Argument.cpp +++ /dev/null @@ -1,707 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Argument.h" -#include "paddle/legacy/math/SparseMatrix.h" - -#include - -namespace paddle { -static void resizeAndCopy(MatrixPtr& dest, - const MatrixPtr& src, - bool useGpu, - hl_stream_t stream) { - if (src) { - if (!dest) { - dest = src->clone(0, 0, useGpu); - } else { - CHECK_EQ(dest->useGpu(), useGpu); - dest->resize(src->getHeight(), src->getWidth()); - } - dest->copyFrom(*src, stream); - } else { - dest.reset(); - } -} - -static void resizeAndCopy(IVectorPtr& dest, - const IVectorPtr& src, - bool useGpu, - hl_stream_t stream) { - if (src) { - IVector::resizeOrCreate(dest, src->getSize(), useGpu); - dest->copyFrom(*src, stream); - } else { - dest.reset(); - } -} - -static void resizeAndCopy(ICpuGpuVectorPtr& dest, - const ICpuGpuVectorPtr& src, - bool useGpu, - hl_stream_t stream) { - if (src) { - ICpuGpuVector::resizeOrCreate(dest, src->getSize(), useGpu); - dest->copyFrom(*src, stream); - } else { - dest.reset(); - } -} - -static void resizeAndCopy(MatrixPtr& dest, - const MatrixPtr& src, - int32_t startRow, - int32_t copySize, - bool useGpu, - hl_stream_t stream = HPPL_STREAM_DEFAULT) { - if (src) { - CHECK_LE((size_t)startRow + copySize, src->getHeight()); - int height = copySize; - int width = src->getWidth(); - if (!dest) { - dest = src->clone(height, width, useGpu); - } else { - CHECK_EQ(dest->useGpu(), useGpu); - dest->resize(height, width); - } - MatrixPtr submat = src->subMatrix(startRow, copySize); - if (dynamic_cast(dest.get())) { - // copy a subMatrix of CpuSparseMatrix to GpuSparseMatrix. - // First copy it to CPU, and then copy it to the GPU. - MatrixPtr tmp = src->clone(height, width, false); - tmp->copyFrom(*submat, stream); - dest->copyFrom(*tmp, stream); - } else { - dest->copyFrom(*submat, stream); - } - } else { - dest.reset(); - } -} - -static void resizeAndCopy(IVectorPtr& dest, - const IVectorPtr& src, - int32_t startPos, - int32_t copySize, - bool useGpu, - hl_stream_t stream = HPPL_STREAM_DEFAULT) { - if (src) { - CHECK_LE((size_t)startPos + copySize, src->getSize()); - - int height = copySize; - IVector::resizeOrCreate(dest, height, useGpu); - dest->copyFrom(src->getData() + startPos, height, stream); - } else { - dest.reset(); - } -} - -static void resizeAndCopy(ICpuGpuVectorPtr& dest, - const ICpuGpuVectorPtr& src, - int32_t startPos, - int32_t copySize, - bool useGpu, - hl_stream_t stream = HPPL_STREAM_DEFAULT) { - if (src) { - CHECK_LE((size_t)startPos + copySize, src->getSize()); - - ICpuGpuVector::resizeOrCreate(dest, copySize, useGpu); - dest->copyFrom(*src, startPos, copySize, useGpu, stream); - } else { - dest.reset(); - } -} - -static void resizeAndCopy(SVectorPtr& dest, - const SVectorPtr& src, - bool useGpu, - hl_stream_t stream) { - if (src) { - size_t height = src->size(); - if (!dest) { - dest = std::make_shared>(height); - } else { - dest->resize(height); - } - std::copy_n(src->begin(), height, dest->begin()); - } else { - dest.reset(); - } -} - -static void resizeAndCopy(SVectorPtr& dest, - const SVectorPtr& src, - int32_t startPos, - int32_t copySize, - bool useGpu, - hl_stream_t stream = HPPL_STREAM_DEFAULT) { - if (src) { - CHECK_LE((size_t)startPos + copySize, src->size()); - size_t height = copySize; - if (!dest) { - dest = std::make_shared>(height); - } else { - dest->resize(height); - } - std::copy_n(src->begin() + startPos, height, dest->begin()); - } else { - dest.reset(); - } -} - -void Argument::resizeAndCopyFrom(const Argument& src, bool useGpu) { - resizeAndCopyFrom(src, useGpu, HPPL_STREAM_DEFAULT); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); -} - -void Argument::resizeAndCopyFrom(const Argument& src, - bool useGpu, - hl_stream_t stream) { - dataId = src.dataId; - resizeAndCopy(value, src.value, useGpu, stream); - resizeAndCopy(grad, src.grad, useGpu, stream); - resizeAndCopy(in, src.in, useGpu, stream); - resizeAndCopy(ids, src.ids, useGpu, stream); - resizeAndCopy(sequenceStartPositions, - src.sequenceStartPositions, - false /* useGpu */, - stream); - if (src.hasSubseq()) { - resizeAndCopy(subSequenceStartPositions, - src.subSequenceStartPositions, - false /* useGpu */, - stream); - } - resizeAndCopy(strs, src.strs, useGpu, stream); - frameWidth = src.frameWidth; - frameHeight = src.frameHeight; - frameDepth = src.frameDepth; -} - -int32_t Argument::resizeAndCopyFrom(const Argument& src, - int32_t startSeq, - int32_t copySize, - bool useGpu) { - int32_t size = - resizeAndCopyFrom(src, startSeq, copySize, useGpu, HPPL_STREAM_DEFAULT); - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - return size; -} - -int32_t Argument::resizeAndCopyFrom(const Argument& src, - int32_t startSeq, - int32_t copySize, - bool useGpu, - hl_stream_t stream) { - dataId = src.dataId; - frameWidth = src.frameWidth; - frameHeight = src.frameHeight; - frameDepth = src.frameDepth; - - if (!src.sequenceStartPositions) { - // non-sequence input, copy samples directly - int32_t startRow = startSeq; - resizeAndCopy(in, src.in, startRow, copySize, useGpu, stream); - resizeAndCopy(value, src.value, startRow, copySize, useGpu, stream); - resizeAndCopy(grad, src.grad, startRow, copySize, useGpu, stream); - resizeAndCopy(ids, src.ids, startRow, copySize, useGpu, stream); - resizeAndCopy(strs, src.strs, startRow, copySize, useGpu, stream); - return copySize; - } else { - // sequence input - const int* sequence = src.sequenceStartPositions->getData(false); - int32_t startRow = sequence[startSeq]; // sample start from here - int32_t endRow = sequence[startSeq + copySize]; // sample end - int32_t copyFeatureSize = endRow - startRow; // num of samples - resizeAndCopy(in, src.in, startRow, copyFeatureSize, useGpu, stream); - resizeAndCopy(value, src.value, startRow, copyFeatureSize, useGpu, stream); - resizeAndCopy(grad, src.grad, startRow, copyFeatureSize, useGpu, stream); - resizeAndCopy(ids, src.ids, startRow, copyFeatureSize, useGpu, stream); - resizeAndCopy(sequenceStartPositions, - src.sequenceStartPositions, - startSeq, - copySize + 1, - false, - stream); - // modify new sequenceStartPositions - int* destSequences = sequenceStartPositions->getMutableData(false); - for (int i = 0; i < copySize + 1; i++) { - destSequences[i] -= startRow; - } - CHECK_EQ(destSequences[0], 0); - CHECK_EQ(destSequences[copySize], copyFeatureSize); - if (src.hasSubseq()) { - // sequence has sub-sequence - int* subSequence = src.subSequenceStartPositions->getMutableData(false); - int32_t subStartSeq = 0; - int32_t subEndSeq = 0; - int numSubSequences = src.getNumSubSequences(); - for (int i = 0; i < numSubSequences + 1; i++) { - if (subSequence[i] == startRow) { - subStartSeq = i; - } else if (subSequence[i] == endRow) { - subEndSeq = i; - break; - } - } - int32_t copySubSize = subEndSeq - subStartSeq; - resizeAndCopy(subSequenceStartPositions, - src.subSequenceStartPositions, - subStartSeq, - copySubSize + 1, - false, - stream); - // modify new subSequenceStartPositions - int* destSubSequences = subSequenceStartPositions->getMutableData(false); - for (int i = 0; i < copySubSize + 1; i++) { - destSubSequences[i] -= startRow; - } - CHECK_EQ(destSubSequences[0], 0); - CHECK_EQ(destSubSequences[copySubSize], copyFeatureSize); - } - resizeAndCopy(strs, src.strs, startRow, copySize, useGpu, stream); - return copyFeatureSize; - } -} - -void Argument::concat(const std::vector& args, - const std::vector& selectRows, - const std::vector& seqStartPos, - const std::vector& copySize, - bool useGpu, - hl_stream_t stream, - PassType passType) { - CHECK(!subSequenceStartPositions) - << "undefined behavior for subsequence positions"; - - size_t batchSize = 0; - for (size_t i = 0; i < copySize.size(); ++i) - batchSize += copySize[i] * (seqStartPos[i + 1] - seqStartPos[i]); - - auto copyArg = [batchSize, stream](MatrixPtr& dst, - MatrixPtr src, - int desStartRow, - int srcStartRow, - int size, - bool useGpu) { - if (!src) { - dst.reset(); - return; - } - size_t width = src->getWidth(); - if (!dst) { - dst = src->clone(batchSize, width, useGpu); - } else { - dst->resize(batchSize, width); - } - - MatrixPtr tmpMatrix = dst->subMatrix(desStartRow, size); - tmpMatrix->copyFrom(*src->subMatrix(srcStartRow, size), stream); - }; - - auto copyIds = [batchSize, stream](IVectorPtr& dst, - const IVectorPtr& src, - int desStartRow, - int srcStartRow, - int size, - bool useGpu) { - if (!src) { - dst.reset(); - return; - } - IVector::resizeOrCreate(dst, batchSize, useGpu); - dst->subVec(desStartRow, size) - ->copyFrom(*src->subVec(srcStartRow, size), stream); - }; - - auto copyStrs = [batchSize](SVectorPtr& dst, - const SVectorPtr& src, - int desStartRow, - int srcStartRow, - int size, - bool useGpu) { - if (!src) { - dst.reset(); - return; - } - if (!dst) { - dst = std::make_shared>(batchSize); - } else { - dst->resize(batchSize); - } - std::copy(src->begin() + srcStartRow, - src->begin() + srcStartRow + size, - dst->begin() + desStartRow); - }; - - dataId = args[0].dataId; - CHECK_NE(seqStartPos.size(), 0UL); - int desStartRow = 0; - for (size_t i = 0; i < copySize.size(); ++i) { - int startPos = seqStartPos[i]; - int endPos = seqStartPos[i + 1]; - CHECK_GE(args.size(), static_cast(endPos - startPos)); - for (int j = startPos; j < endPos; ++j) { - const Argument& arg = args[j - startPos]; - CHECK_EQ(arg.dataId, dataId) << "Arguments to concatenate should have " - << "the same dataId."; - const int srcStartRow = selectRows[j]; - copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu); - copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu); - if (passType != PASS_TEST) { - copyArg(grad, arg.grad, desStartRow, srcStartRow, copySize[i], useGpu); - } - copyIds(ids, arg.ids, desStartRow, srcStartRow, copySize[i], useGpu); - copyStrs(strs, arg.strs, desStartRow, srcStartRow, copySize[i], useGpu); - desStartRow += copySize[i]; - } - } - ICpuGpuVector::resizeOrCreate( - sequenceStartPositions, seqStartPos.size(), useGpu); - sequenceStartPositions->copyFrom( - seqStartPos.data(), seqStartPos.size(), useGpu); -} - -void Argument::concat(const std::vector& args, - bool useGpu, - hl_stream_t stream, - PassType passType) { - int32_t batchSize = 0; - int64_t numSequences = 0; - int64_t numSubSequences = 0; - for (auto& arg : args) { - batchSize += arg.getBatchSize(); - numSequences += arg.getNumSequences(); - numSubSequences += arg.getNumSubSequences(); - } - - auto copyArg = [batchSize, stream]( - MatrixPtr& dst, MatrixPtr src, int startRow, bool useGpu) { - if (!src) { - dst.reset(); - return; - } - size_t width = src->getWidth(); - if (!dst) { - dst = src->clone(batchSize, width, useGpu); - } else { - dst->resize(batchSize, width); - } - - MatrixPtr tmpMatrix = dst->subMatrix(startRow, src->getHeight()); - tmpMatrix->copyFrom(*src, stream); - }; - - auto copyIds = [batchSize, stream]( - IVectorPtr& dst, const IVectorPtr& src, int startRow, bool useGpu) { - if (!src) { - dst.reset(); - return; - } - IVector::resizeOrCreate(dst, batchSize, useGpu); - dst->subVec(startRow, src->getSize())->copyFrom(*src, stream); - }; - - auto copyStrs = [batchSize]( - SVectorPtr& dst, const SVectorPtr& src, int startRow, bool useGpu) { - if (!src) { - dst.reset(); - return; - } - if (!dst) { - dst = std::make_shared>(batchSize); - } else { - dst->resize(batchSize); - } - std::copy(src->begin(), src->end(), dst->begin() + startRow); - }; - - auto copySequencePos = [](ICpuGpuVectorPtr& dstSeq, - const ICpuGpuVectorPtr& srcSeq, - int dstNumSequences, - int srcNumSequences, - int& startSequences, - int startRow) { - if (srcSeq) { - ICpuGpuVector::resizeOrCreate(dstSeq, dstNumSequences + 1, false); - const int* src = srcSeq->getData(false); - int* dest = dstSeq->getMutableData(false); - for (int i = 0; i < srcNumSequences + 1; ++i) { - dest[i + startSequences] = src[i] + startRow; - } - startSequences += srcNumSequences; - } else { - dstSeq.reset(); - } - }; - - int startRow = 0; - int startSequences = 0; - int startSubSequences = 0; - dataId = args[0].dataId; - for (auto& arg : args) { - CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have" - << " same dataId"; - copyArg(in, arg.in, startRow, useGpu); - copyArg(value, arg.value, startRow, useGpu); - if (passType != PASS_TEST) copyArg(grad, arg.grad, startRow, useGpu); - copyIds(ids, arg.ids, startRow, useGpu); - copySequencePos(sequenceStartPositions, - arg.sequenceStartPositions, - numSequences, - arg.getNumSequences(), - startSequences, - startRow); - copySequencePos(subSequenceStartPositions, - arg.subSequenceStartPositions, - numSubSequences, - arg.getNumSubSequences(), - startSubSequences, - startRow); - copyStrs(strs, arg.strs, startRow, useGpu); - startRow += arg.getBatchSize(); - } -} - -void Argument::splitByDataId(const std::vector& argus, - std::vector>* arguGroups) { - arguGroups->clear(); - int lastDataId = -1; - for (const auto& argu : argus) { - if (argu.dataId == -1) { - // is -1, then create a new group - arguGroups->emplace_back(); - lastDataId = -1; - } else if (argu.dataId != lastDataId) { - // not -1, also not equal to last Argument, then create a new group - arguGroups->emplace_back(); - lastDataId = argu.dataId; - } else { - // not -1, and equal to last Argument, do nothing - } - arguGroups->back().push_back(argu); - } -} - -void Argument::getSeqInfo(std::vector* seqInfo) const { - const int* starts = sequenceStartPositions->getData(false); - const int* subStarts = - hasSubseq() ? subSequenceStartPositions->getData(false) : nullptr; - size_t numSequences = getNumSequences(); - seqInfo->reserve(numSequences); - int subSeqEnd = 0; - for (size_t i = 0; i < numSequences; ++i) { - SeqInfo info; - info.seqStart = starts[i]; - info.subLevelLength = starts[i + 1] - starts[i]; - info.seqId = i; - if (hasSubseq()) { - info.subSeqStart = subSeqEnd; - while (subStarts[subSeqEnd] < starts[i + 1]) { - ++subSeqEnd; - } - info.topLevelLength = subSeqEnd - info.subSeqStart; - } else { - info.topLevelLength = info.subLevelLength; - info.subSeqStart = 0; // not used - } - seqInfo->push_back(info); - } - std::sort( - seqInfo->begin(), seqInfo->end(), [](const SeqInfo& a, const SeqInfo& b) { - return a.topLevelLength > b.topLevelLength; - }); -} - -void Argument::checkSubset() const { - if (getNumSequences() > getNumSubSequences()) { - LOG(FATAL) << "numSubSequences is less than numSequences (" - << getNumSubSequences() << " vs. " << getNumSequences() << ")"; - } - const int* start = sequenceStartPositions->getData(false); - const int* subStart = subSequenceStartPositions->getData(false); - int seqId = 0; - int subSeqId = 0; - while (seqId < getNumSequences() && subSeqId < getNumSubSequences()) { - if (start[seqId] > subStart[subSeqId]) { - ++subSeqId; - } else if (start[seqId] == subStart[subSeqId]) { - ++subSeqId; - ++seqId; - } else { - LOG(FATAL) << "seqStartPositions is not subset of subSeqStartPositions"; - } - } - if (seqId < getNumSequences()) { - LOG(FATAL) << "seqStartPositions is not subset of subSeqStartPositions"; - } -} - -void Argument::degradeSequence(const Argument& input) { - CHECK_EQ(input.hasSubseq(), 1UL); - size_t numSequences = input.getNumSequences(); - size_t numSubSequences = input.getNumSubSequences(); - ICpuGpuVector::resizeOrCreate( - sequenceStartPositions, numSequences + 1, false); - int* tgtBuf = sequenceStartPositions->getMutableData(false); - const int* starts = input.sequenceStartPositions->getData(false); - const int* subStarts = input.subSequenceStartPositions->getData(false); - int seqId = 0; - for (size_t subSeqId = 0; subSeqId < numSubSequences; ++subSeqId) { - if (subStarts[subSeqId] == starts[seqId]) { - tgtBuf[seqId] = subSeqId; - seqId++; - } - } - tgtBuf[numSequences] = numSubSequences; -} - -void Argument::poolSequenceWithStride(const Argument& input, - size_t stride, - ICpuGpuVectorPtr* stridePostions, - bool reversed) { - // If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, - // then sequenceStartPositions = [0, 2, 3, 4, 7]. - // If reversed = false, stridePostions = [0, 5, 9, 14, 17, 22, 27, 30]; - // else reversed = true, stridePostions = [0, 4, 9, 14, 17, 20, 25, 30] - - CHECK(input.sequenceStartPositions); - CHECK_EQ(input.hasSubseq(), 0UL); - CHECK_GT(stride, 0UL) << "stride must larger than 0"; - size_t numSequences = input.getNumSequences(); - ICpuGpuVector::resizeOrCreate( - sequenceStartPositions, numSequences + 1, false); - const int* starts = input.sequenceStartPositions->getData(false); - int* tgtBuf = sequenceStartPositions->getMutableData(false); - // first index of target sequence and stride positions are both 0 - tgtBuf[0] = 0; - std::vector stridePos; - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - size_t seqLength = starts[seqId + 1] - starts[seqId]; - stridePos.emplace_back(starts[seqId]); - if (seqLength == 0) { - // empty sequence - tgtBuf[seqId + 1] = tgtBuf[seqId]; - } else { - int size = ceil((float)seqLength / stride); - tgtBuf[seqId + 1] = tgtBuf[seqId] + size; - for (int i = 0; i < size - 1; ++i) { - int cur = reversed ? starts[seqId + 1] - (size - 1 - i) * stride - : stridePos.back() + stride; - stridePos.emplace_back(cur); - } - } - } - stridePos.emplace_back(starts[numSequences]); - int size = stridePos.size(); - CHECK_EQ(size - 1, tgtBuf[numSequences]); - ICpuGpuVector::resizeOrCreate(*stridePostions, size, false); - (*stridePostions)->getMutableVector(false)->copyFrom(stridePos.data(), size); -} - -void Argument::getValueString( - std::unordered_map* out) const { - if (value) { - std::ostringstream os; - value->print(os); - out->insert({"value", os.str()}); - } - if (ids) { - std::ostringstream os; - ids->print(os, ids->getSize()); - out->insert({"ids", os.str()}); - } - if (sequenceStartPositions) { - std::ostringstream os; - sequenceStartPositions->getVector(false)->print( - os, sequenceStartPositions->getSize()); - out->insert({"sequence pos", os.str()}); - } - if (subSequenceStartPositions) { - std::ostringstream os; - subSequenceStartPositions->getVector(false)->print( - os, subSequenceStartPositions->getSize()); - out->insert({"sub-sequence pos", os.str()}); - } -} - -void Argument::printValueString(std::ostream& stream, - const std::string& prefix) const { - std::unordered_map out; - getValueString(&out); - for (auto field : {"value", "ids", "sequence pos", "sub-sequence pos"}) { - auto it = out.find(field); - if (it != out.end()) { - stream << prefix << field << ":\n" << it->second; - } - } -} - -void Argument::subArgFrom(const Argument& input, - size_t offset, - size_t height, - size_t width, - bool useGpu, - bool trans, - bool seqFlag, - size_t seqStart, - size_t seqSize) { - if (input.value) { - value = Matrix::create( - input.value->getData() + offset * width, height, width, trans, useGpu); - } - if (input.ids) { - ids = IVector::create(input.ids->getData() + offset, height, useGpu); - } - if (input.grad) { - grad = Matrix::create( - input.grad->getData() + offset * width, height, width, trans, useGpu); - } - if (seqFlag) { - sequenceStartPositions = std::make_shared( - *(input.sequenceStartPositions), seqStart, seqSize); - } -} - -void Argument::reorganizeSeqInfo( - const ICpuGpuVectorPtr seqStartPos, - const ICpuGpuVectorPtr subSeqStartPos, - std::vector>& reorganizedSeqInfo) { - CHECK(seqStartPos); - reorganizedSeqInfo.clear(); - - int seqNum = seqStartPos->getSize() - 1; - int* seqStarts = seqStartPos->getMutableData(false); - - if (subSeqStartPos) { - int* subSeqStarts = subSeqStartPos->getMutableData(false); - reorganizedSeqInfo.resize(seqNum, std::vector()); - int seqIdx = 0; - for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { - reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); - if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { - seqIdx++; - if (seqIdx == seqNum) return; - reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); - } - } - } else { - reorganizedSeqInfo.resize(1, std::vector(seqNum + 1, 0)); - memcpy(reorganizedSeqInfo[0].data(), - seqStarts, - sizeof(int) * seqStartPos->getSize()); - } -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/Argument.h b/paddle/legacy/parameter/Argument.h deleted file mode 100644 index ea8634896c18c7c3516c0d584aec4b475d626e61..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/Argument.h +++ /dev/null @@ -1,349 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "hl_gpu.h" - -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -typedef std::shared_ptr> SVectorPtr; - -struct Argument { - Argument() - : in(nullptr), - value(nullptr), - ids(nullptr), - grad(nullptr), - strs(nullptr), - frameHeight(0), - frameWidth(0), - frameDepth(0), - sequenceStartPositions(nullptr), - subSequenceStartPositions(nullptr), - cpuSequenceDims(nullptr), - deviceId(-1), - allCount(0), - valueCount(0), - gradCount(0), - dataId(0) {} - Argument(const Argument& argument) { - *this = argument; - valueCount = 0; - gradCount = 0; - dataId = argument.dataId; - } - ~Argument() {} - - void operator=(const Argument& argument) { - in = argument.in; - value = argument.value; - ids = argument.ids; - grad = argument.grad; - strs = argument.strs; - sequenceStartPositions = argument.sequenceStartPositions; - subSequenceStartPositions = argument.subSequenceStartPositions; - cpuSequenceDims = argument.cpuSequenceDims; - deviceId = argument.deviceId; - allCount = argument.allCount; - frameHeight = argument.frameHeight; - frameWidth = argument.frameWidth; - frameDepth = argument.frameDepth; - dataId = argument.dataId; - } - - MatrixPtr in; // used if needed - MatrixPtr value; - IVectorPtr ids; // a sequence of ids. Can be use for class id for costLayer - MatrixPtr grad; // If empty, gradient is not needed. - SVectorPtr strs; - - // A dataBatch includes batchSize frames, one frame maybe not only vector - size_t frameHeight; - size_t frameWidth; - size_t frameDepth; - - // If NULL, each position is treated independently. - // Otherwise, its size should be #NumberOfSequences + 1. - // The first position is always 0 and - // the last position should be equal to batchSize. - ICpuGpuVectorPtr sequenceStartPositions; - - // If NULL, each sequence has no subsequence. - // Otherwise, its size should be #NumberOfSubSequences + 1. - // The first position is always 0 and - // the last position should be equal to batchSize. - ICpuGpuVectorPtr subSequenceStartPositions; - - // dimension of sequence, stored only in CPU - IVectorPtr cpuSequenceDims; - - int deviceId; // the GPU device id which the argument in - int allCount; // the number of output layers using this argument - mutable int valueCount; // waiting this member when layer do forward - mutable int gradCount; // waiting this member when layer do backward - mutable LockedCondition valueReadyCond; - mutable LockedCondition gradReadyCond; - - int dataId; // dataProvider id - - /* Increase the reference count of the argument. */ - void countIncrement() { allCount++; } - - int getAllCount() const { return allCount; } - - void waitValueReady() const { - valueReadyCond.wait([this] { return (valueCount != 0); }); - - std::lock_guard guard(*valueReadyCond.mutex()); - valueCount--; - } - - void notifyValueReady() const { - valueReadyCond.notify_all([this] { valueCount = allCount; }); - } - - void waitGradReady() const { - gradReadyCond.wait([this] { return (gradCount == allCount); }); - gradCount = 0; - } - - void notifyGradReady() const { - gradReadyCond.notify_all([this] { gradCount++; }); - } - - int64_t getBatchSize() const { - if (value) return value->getHeight(); - if (ids) return ids->getSize(); - if (grad) return grad->getHeight(); - if (in) return in->getHeight(); - if (strs) return strs->size(); - return 0; - } - size_t getFrameHeight() const { return frameHeight; } - size_t getFrameWidth() const { return frameWidth; } - size_t getFrameDepth() const { return frameDepth; } - void setFrameHeight(size_t h) { frameHeight = h; } - void setFrameWidth(size_t w) { frameWidth = w; } - void setFrameDepth(size_t d) { frameDepth = d; } - - int64_t getNumSequences() const { - return sequenceStartPositions ? sequenceStartPositions->getSize() - 1 - : getBatchSize(); - } - - int64_t getNumSubSequences() const { - return subSequenceStartPositions ? subSequenceStartPositions->getSize() - 1 - : getBatchSize(); - } - - bool hasSeq() const { return sequenceStartPositions != nullptr; } - bool hasSubseq() const { return subSequenceStartPositions != nullptr; } - - const int* getCpuStartPositions() const { - return hasSubseq() ? subSequenceStartPositions->getData(false) - : sequenceStartPositions->getData(false); - } - - static inline real sum(const std::vector& arguments) { - real cost = 0; - for (auto& arg : arguments) { - if (arg.value) { - SetDevice device(arg.deviceId); - cost += arg.value->getSum(); - } - } - return cost; - } - - /** - * @brief (value, ids, grad, sequenceStartPositions) of output are subset of - * input. Note that, output share the same memory of input. - * - * @param input[in] input - * @param offset[in] offset in terms of rows - * @param height[in] height of output.value - * @param width[in] width of output.value - * @param useGpu[in] - * @param trans[in] whether input.value is transform - * @param seqFlag[in] whether input has sequenceStartPositions - * @param seqStart[in] offset of input.sequenceStartPositions - * @param seqSize[in] lenght of output.sequenceStartPositions - */ - void subArgFrom(const Argument& input, - size_t offset, - size_t height, - size_t width, - bool useGpu, - bool trans = false, - bool seqFlag = false, - size_t seqStart = 0, - size_t seqSize = 0); - /* - * for sequence input: - * startSeq: the sequence id of start - * copySize: how many sequences need to copy - * return value: how many samples are copied - * for non-sequence input: - * startSeq: the sample id of start - * copySize: how many samples need to copy - * return value: how many samples are copied - * Note that when specifying the stream explicitly in this case, - * synchronize should also be called somewhere after this function - */ - int32_t resizeAndCopyFrom(const Argument& src, - int32_t startSeq, - int32_t copySize, - bool useGpu, - hl_stream_t stream); - - /* - * same with the above function, except that the stream is - * HPPL_STREAM_DEFAULT and synchronize is automatically called - * inside it - */ - int32_t resizeAndCopyFrom(const Argument& src, - int32_t startSeq, - int32_t copySize, - bool useGpu = FLAGS_use_gpu); - - void resizeAndCopyFrom(const Argument& src, bool useGpu, hl_stream_t stream); - - /* - * same with the above function, except that the stream is - * HPPL_STREAM_DEFAULT and synchronize is automatically called - * inside it - */ - void resizeAndCopyFrom(const Argument& src, bool useGpu = FLAGS_use_gpu); - - /* - @brief Concatenate several arguments into one and put the result into it. - @param args : a vector of argument, each element of which is a frame in a - batch of sequences. - @param selectRows : select several row of args to concatenate - @param seqStartPos : sequence start positions in the final Argument - @param hl_stream_t : cuda stream - @param passTyoe : type of task, training or testing - */ - void concat(const std::vector& args, - const std::vector& selectRows, - const std::vector& seqStartPos, - const std::vector& copySize, - bool useGpu, - hl_stream_t stream, - PassType passType); - - /* - Concatenate several args into one and put the result into this. - */ - void concat(const std::vector& src, - bool useGpu = FLAGS_use_gpu, - hl_stream_t stream = HPPL_STREAM_DEFAULT, - PassType passType = PASS_TEST); - - /* - * split vector to several vectors according to dataId - */ - static void splitByDataId(const std::vector& argus, - std::vector>* arguGroups); - - struct SeqInfo { - // Equal to sequence length for sequence data - // Equal to number of subsequences for subsequence data - int topLevelLength; - - int seqStart; - int seqId; - - // Equal to topLevelLength for sequence data - // Equal to sum of the length of subsequences for subsequence data - int subLevelLength; - - // Only used for subsequence data, start position of this sequence - // is subSequenceStartPositions, i.e. - // subSequenceStartPositions[subSeqStart] == seqStart - int subSeqStart; - }; - /* - Get SeqInfo for each sequence of this argument - Elements in *seqInfo are sorted by topLevelLength in descending order - */ - void getSeqInfo(std::vector* segInfo) const; - - /* - Check Whether sequenceStartPositions is subset of - subSequenceStartPositions. - */ - void checkSubset() const; - - /* - sequence has sub-sequence degrades to a sequence. - */ - void degradeSequence(const Argument& input); - - /* - After pooling with stride n (n is smaller than sequence length), - a long sequence will be shorten. - This function is invalid for sequence having sub-sequence. - */ - void poolSequenceWithStride(const Argument& input, - size_t stride, - ICpuGpuVectorPtr* stridePositions, - bool reversed = false); - /** - * @brief getValueString will return the argument's output in string. There - * are several kinds of output. The keys of output dictionary are 'value', - * 'id', 'sequence pos', 'sub-sequence pos'. - * @param out [out]: the return values. - */ - void getValueString(std::unordered_map* out) const; - - /** - * @brief printValueString will print the argument's output in order of - * 'value', 'id', 'sequence pos', 'sub-sequence pos'. - * @param stream: Output stream - * @param prefix: line prefix for printing. - */ - void printValueString(std::ostream& stream, - const std::string& prefix = "") const; - - /** - * @brief reorganizeSeqInfo will reorganize sequenceStartPositions and - * subSequenceStartPositions into a 2 dimensional arrary: reorganizedSeqInfo. - * - * @param seqStartPos: sequenceStartPositions of an Argument. - * @param subSeqStartPos: subSequenceStartPositions of an Argument. - * @param the reorganized sequence start position information. - * - * Examples: - * seqStartPos: [0, 4, 15, 20, 28] - * subSeqStartPos: [0, 3, 4, 5, 7, 10, 15, 20, 22, 23, 25, 28] - * reorganizedSeqInfo: - * [ - * [0,3,4], - * [4,5,7,10,15], - * [15,20], - * [20,22,23,25,28] - * ] - */ - static void reorganizeSeqInfo( - const ICpuGpuVectorPtr seqStartPos, - const ICpuGpuVectorPtr subSeqStartPos, - std::vector>& reorganizedSeqInfo); -}; - -} // namespace paddle diff --git a/paddle/legacy/parameter/AverageOptimizer.cpp b/paddle/legacy/parameter/AverageOptimizer.cpp deleted file mode 100644 index 82a7fed6c6451b8908851f2d039f17b9dc513818..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/AverageOptimizer.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "AverageOptimizer.h" - -namespace paddle { - -// factory method to create an instance of AverageOptimizer -ParameterOptimizer* AverageOptimizer::create( - const OptimizationConfig& optConfig, - ParameterOptimizer* optimizer, - bool isParameterSparse, - bool useParameterApply) { - if (optConfig.average_window() <= 0) { - return optimizer; - } - // disable average for embeded local updater - if (!useParameterApply && optConfig.num_batches_per_send_parameter() > 1) { - return optimizer; - } - if (isParameterSparse) { - return new AverageSparseOptimizer(optConfig, optimizer, useParameterApply); - } - return new AverageOptimizer(optConfig, optimizer, useParameterApply); -} - -AverageOptimizer::AverageOptimizer(const OptimizationConfig& optConfig, - ParameterOptimizer* optimizer, - bool useParameterApply) - : ParameterOptimizer(optConfig), - optimizer_(optimizer), - useApply_(useParameterApply), - numUpdates_(0), - prevNumUpdates_(0), - numAccumulates_(0), - oldNumAccumulates_(0), - minAverageWindow_( - std::min(10000L, optConfig_.max_average_window())), - maxAverageWindow_(optConfig_.max_average_window()) { - parameterTypes_ = optimizer_->getParameterTypes(); - addParameterType(PARAMETER_SUM1); - addParameterType(PARAMETER_SUM2); - addParameterType(PARAMETER_SUM3); - if (useParameterApply) { - addParameterType(PARAMETER_APPLY); - } -} - -void AverageOptimizer::startBatch(int64_t numSamplesProcessed) { - optimizer_->startBatch(numSamplesProcessed); - learningRate_ = optimizer_->getLearningRate(); - - ++numUpdates_; - ++numAccumulates_; -} - -/* - After traversal, the averaged parameter can be obtained by - ((PARAMETER_SUM1 + PARAMETER_SUM2 + PARAMETER_SUM3) - / (numAccumulates_ + oldNumAccumulates_)) -*/ -ParameterOptimizer::TraverseCallback AverageOptimizer::needSpecialTraversal( - const ParameterConfig& config) const { - TraverseCallbackVec callbacks; - - if (auto callback = optimizer_->needSpecialTraversal(config)) { - callbacks.emplace_back(callback); - } - - if (numUpdates_ % kMaxNumAccumulates == 0) { - // Move the sum to a different buffer to avoid loss of precision - // due to too many sums. - callbacks.emplace_back([](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { - vecs[PARAMETER_SUM2]->add(*vecs[PARAMETER_SUM1]); - vecs[PARAMETER_SUM1]->zeroMem(); - }); - } - - if (isAverageWindowTooLong()) { - // Now the average window is too long, discard the old sum. - if (auto callback = this->startCatchUpWith()) { - callbacks.emplace_back(callback); - } - callbacks.emplace_back([](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { - vecs[PARAMETER_SUM3]->add(*vecs[PARAMETER_SUM1], *vecs[PARAMETER_SUM2]); - vecs[PARAMETER_SUM1]->zeroMem(); - vecs[PARAMETER_SUM2]->zeroMem(); - }); - } - - return composeCallbacks(callbacks); -} - -void AverageOptimizer::finishBatch() { - optimizer_->finishBatch(); - if (isAverageWindowTooLong()) { - this->finishCatchUpWith(); - oldNumAccumulates_ = numAccumulates_; - numAccumulates_ = 0; - } -} - -ParameterOptimizer::TraverseCallback AverageOptimizer::apply() { - if (numAccumulates_ + oldNumAccumulates_ == 0) { - return nullptr; - } - - real scale = 1. / (numAccumulates_ + oldNumAccumulates_); - if (useApply_) { - return [scale](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { - vecs[PARAMETER_APPLY]->add3(*vecs[PARAMETER_SUM1], - *vecs[PARAMETER_SUM2], - *vecs[PARAMETER_SUM3], - scale, - scale, - scale); - }; - } else { - return [scale](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { - vecs[PARAMETER_GRADIENT]->copyFrom(*vecs[PARAMETER_VALUE]); - vecs[PARAMETER_VALUE]->add3(*vecs[PARAMETER_SUM1], - *vecs[PARAMETER_SUM2], - *vecs[PARAMETER_SUM3], - scale, - scale, - scale); - }; - } -} - -ParameterOptimizer::TraverseCallback AverageOptimizer::restore() { - if (numAccumulates_ + oldNumAccumulates_ == 0) { - return nullptr; - } - if (useApply_) { - return nullptr; - } - - return []( - const VectorPtr vecs[], const ParameterConfig& config, size_t sparseId) { - vecs[PARAMETER_VALUE]->copyFrom(*vecs[PARAMETER_GRADIENT]); - vecs[PARAMETER_GRADIENT]->zeroMem(); - }; -} - -void AverageSparseOptimizer::update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const { - optimizer_->update(vecs, paraConfig, sparseId); - - CHECK_LT(sparseId, t0Vec_.size()); - int timediff = timer_ + 1 - t0Vec_[sparseId]; - if (timediff > 0) { - vecs[PARAMETER_SUM1]->add(*vecs[PARAMETER_VALUE], timediff); - t0Vec_[sparseId] = timer_ + 1; - } -} - -ParameterOptimizer::TraverseCallback AverageSparseOptimizer::startCatchUpWith() - const { - TraverseCallbackVec callbacks; - - if (auto callback = optimizer_->startCatchUpWith()) { - callbacks.emplace_back(callback); - } - - if (timer_ > 0) { - callbacks.emplace_back( - [this](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { this->catchUpWith(vecs, config, sparseId); }); - } - - return composeCallbacks(callbacks); -} - -void AverageSparseOptimizer::catchUpWith(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const { - CHECK_LT(sparseId, t0Vec_.size()); - int timediff = timer_ - t0Vec_[sparseId]; - if (timediff > 0) { - vecs[PARAMETER_SUM1]->add(*vecs[PARAMETER_VALUE], timediff); - } -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/AverageOptimizer.h b/paddle/legacy/parameter/AverageOptimizer.h deleted file mode 100644 index f0fe2fd28e4be7df8ebc52fd9b9b5540f3d76949..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/AverageOptimizer.h +++ /dev/null @@ -1,145 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "FirstOrderOptimizer.h" - -namespace paddle { - -// After Optimization, parameter values are further averaged within -// time range. -class AverageOptimizer : public ParameterOptimizer { - public: - // if *useParameterApply* set, use PARAMETER_APPLY to store averaged parameter - // else use PARAMETER_VALUE, and value backup in PARAMETER_GRADIENT - AverageOptimizer(const OptimizationConfig& optConfig, - ParameterOptimizer* optimizer, - bool useParameterApply); - - static ParameterOptimizer* create(const OptimizationConfig& optConfig, - ParameterOptimizer* optimizer, - bool isParameterSparse = false, - bool useParameterApply = false); - - virtual void init(size_t numRows, const ParameterConfig* config) { - optimizer_->init(numRows, config); - } - - virtual void startPass() { optimizer_->startPass(); } - virtual void finishPass() { - optimizer_->finishPass(); - updateAverageWindowLimit(); - } - - virtual void startBatch(int64_t numSamplesProcessed); - virtual void finishBatch(); - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const { - optimizer_->update(vecs, paraConfig, sparseId); - vecs[PARAMETER_SUM1]->add(*vecs[PARAMETER_VALUE], 1.0f); - } - - virtual TraverseCallback needSpecialTraversal( - const ParameterConfig& config) const; - - virtual TraverseCallback startCatchUpWith() const { - return optimizer_->startCatchUpWith(); - } - virtual void finishCatchUpWith() { return optimizer_->finishCatchUpWith(); } - - virtual TraverseCallback apply(); - virtual TraverseCallback restore(); - - virtual void setNoDecay() { optimizer_->setNoDecay(); } - - protected: - std::unique_ptr optimizer_; - bool useApply_; - - // should only be called from finishPass() - void updateAverageWindowLimit() { - if (!optConfig_.has_max_average_window()) { - // use the number of batches in the last pass as maxAverageWindow_ - CHECK_GT(numUpdates_, prevNumUpdates_); - maxAverageWindow_ = numUpdates_ - prevNumUpdates_; - prevNumUpdates_ = numUpdates_; - } - minAverageWindow_ = std::min(minAverageWindow_, numUpdates_); - } - - bool isAverageWindowTooLong() const { - return numAccumulates_ >= minAverageWindow_ && - numAccumulates_ >= - std::min(maxAverageWindow_, - numUpdates_ * optConfig_.average_window()); - } - - static const int64_t kMaxNumAccumulates = 16384; - int64_t numUpdates_; - int64_t prevNumUpdates_; - int64_t numAccumulates_; - int64_t oldNumAccumulates_; - int64_t minAverageWindow_; - int64_t maxAverageWindow_; -}; - -// Average Optimizer with Sparse support. -class AverageSparseOptimizer : public AverageOptimizer { - public: - AverageSparseOptimizer(const OptimizationConfig& optConfig, - ParameterOptimizer* optimizer, - bool useParameterApply) - : AverageOptimizer(optConfig, optimizer, useParameterApply) {} - - virtual void init(size_t numRows, const ParameterConfig* config) { - AverageOptimizer::init(numRows, config); - - t0Vec_.resize(numRows); - - timer_ = 0; - t0Vec_.assign(t0Vec_.size(), 0); - } - virtual void finishBatch() { - AverageOptimizer::finishBatch(); - timer_++; - } - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const; - void catchUpWith(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const; - virtual TraverseCallback startCatchUpWith() const; - virtual void finishCatchUpWith() { - optimizer_->finishCatchUpWith(); - - timer_ = 0; - t0Vec_.assign(t0Vec_.size(), 0); - } - - protected: - /** - * counting batches, clear after catch up with - * t(timer_) is current time, - * t0(t0Vec_) are last occur time of i rows. - * if one block is update by multi threads, - * caller should hash sparse ids to avoid write conflict in t0Vec_. - */ - int timer_; - mutable std::vector t0Vec_; -}; - -} // namespace paddle diff --git a/paddle/legacy/parameter/CMakeLists.txt b/paddle/legacy/parameter/CMakeLists.txt deleted file mode 100644 index 19ae07e077e2b8f55ce4050566c9cf6aaa0efa0a..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# The utilities for paddle - -file(GLOB PARAMETERS_HEADERS . *.h) -file(GLOB PARAMETERS_SOURCES . *.cpp) - -add_library(paddle_parameter STATIC - ${PARAMETERS_SOURCES}) -add_dependencies(paddle_parameter paddle_proto ${external_project_dependencies}) -if(WITH_TESTING) - add_subdirectory(tests) -endif() diff --git a/paddle/legacy/parameter/FirstOrderOptimizer.cpp b/paddle/legacy/parameter/FirstOrderOptimizer.cpp deleted file mode 100644 index 4f82a115f7bb467737b53b9891d88d3c4f501faf..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/FirstOrderOptimizer.cpp +++ /dev/null @@ -1,330 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "FirstOrderOptimizer.h" -#include "paddle/legacy/math/TrainingAlgorithmOp.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/Util.h" - -#include - -DEFINE_bool(log_clipping, false, "enable log clipping or not"); - -namespace paddle { - -SparseMomentumParameterOptimizer::SparseMomentumParameterOptimizer( - const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig) { - addParameterType(PARAMETER_MOMENTUM); - addParameterType(PARAMETER_MOMENTUM_UT); - addParameterType(PARAMETER_MOMENTUM_VT); - alpha_ = 1; - beta_ = 1; - tau_ = -1; - threshold_ = 1e+06; -} - -void SparseMomentumParameterOptimizer::init(size_t numRows, - const ParameterConfig* config) { - isParameterSparse_ = numRows != 0; - t0Vec_.resize(numRows); - t0Vec_.assign(t0Vec_.size(), 0); - timer_ = 0; - momentum_ = config->momentum(); - decayRate_ = config->decay_rate(); - gamma_ = config->learning_rate(); -} - -void SparseMomentumParameterOptimizer::startBatch(int64_t numSamplesProcessed) { - learningRate_ = calcLearningRate(numSamplesProcessed, pass_); - if (isParameterSparse_) { - tau_ = tau_ + beta_ / alpha_; - alpha_ = alpha_ / momentum_; - beta_ = beta_ / (1 + decayRate_ * gamma_ * learningRate_); - } -} - -void SparseMomentumParameterOptimizer::update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const { - if (sparseId != -1LU) { - CHECK_LT(sparseId, t0Vec_.size()); - if (t0Vec_[sparseId] == 0) { - vecs[PARAMETER_MOMENTUM_VT]->assign(*vecs[PARAMETER_VALUE]); - t0Vec_[sparseId] = 1; - } - vecs[PARAMETER_MOMENTUM_UT]->add(*vecs[PARAMETER_GRADIENT], - -alpha_ * gamma_ * learningRate_); - vecs[PARAMETER_MOMENTUM_VT]->add(*vecs[PARAMETER_GRADIENT], - tau_ * alpha_ * gamma_ * learningRate_); - vecs[PARAMETER_VALUE]->add(*vecs[PARAMETER_MOMENTUM_UT], - tau_ / beta_ + 1.0 / alpha_, - *vecs[PARAMETER_MOMENTUM_VT], - 1.0 / beta_); - - } else { - vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT], - *vecs[PARAMETER_MOMENTUM], - learningRate_ * paraConfig.learning_rate(), - paraConfig.momentum(), - applyDecay_ ? paraConfig.decay_rate() : 0); - } -} - -ParameterOptimizer::TraverseCallback -SparseMomentumParameterOptimizer::needSpecialTraversal( - const ParameterConfig& config) const { - if (alpha_ > threshold_ && isParameterSparse_) { - // Restart to avoid large value multiplication - // 1. \alpha = 1, \beta = 1, \tau = 0 - // 2. Note that \tau * u_t + v_t = \beta \theta_t, therefore: - // u_t should be rescaled to u_t/alpha_ - // v_t should be reset to \theta_t - return [this](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { - vecs[PARAMETER_MOMENTUM_UT]->divScalar(alpha_); - vecs[PARAMETER_MOMENTUM_VT]->assign(*vecs[PARAMETER_VALUE]); - }; - } else { - return nullptr; - } -} - -void SparseMomentumParameterOptimizer::finishBatch() { - timer_++; - if (!isParameterSparse_) return; - if (alpha_ > threshold_) { - alpha_ = 1; - beta_ = 1; - tau_ = -1; - } -} - -void AdagradParameterOptimizer::update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - BaseMatrix& value = *vecs[PARAMETER_VALUE]; - BaseMatrix& grad = *vecs[PARAMETER_GRADIENT]; - BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM]; - BaseMatrix& accum_buffer = *vecs[PARAMETER_GRADIENT_SQURESUM]; - BaseMatrix& accum = *vecs[PARAMETER_GRADIENT_SQURESUM1]; - BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE]; - - real epsilon = optConfig_.ada_epsilon(); - real learningRate = learningRate_ * config.learning_rate(); - real momentum = config.momentum(); - real decayRate = applyDecay_ ? config.decay_rate() : 0; - - adagradApply(value, - grad, - mom, - accum_buffer, - accum, - lr, - epsilon, - learningRate, - momentum, - decayRate); -} - -ParameterOptimizer::TraverseCallback -AdagradParameterOptimizer::needSpecialTraversal( - const ParameterConfig& config) const { - if (numUpdates_ % kMaxNumAccumulates == 0) { - // Move the sum to a different buffer to avoid loss of precision - // due to too many sums. - return [](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { - vecs[PARAMETER_GRADIENT_SQURESUM]->add( - *vecs[PARAMETER_GRADIENT_SQURESUM1]); - vecs[PARAMETER_GRADIENT_SQURESUM1]->zeroMem(); - }; - } else { - return nullptr; - } -} - -void AdaDeltaParameterOptimizer::update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - CHECK(sparseId == -1LU) << "Sparse update is not supported"; - - BaseMatrix& value = *vecs[PARAMETER_VALUE]; - BaseMatrix& grad = *vecs[PARAMETER_GRADIENT]; - BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM]; - BaseMatrix& accum = *vecs[PARAMETER_GRADIENT_SQURESUM]; - BaseMatrix& accum_update = *vecs[PARAMETER_GRADIENT_SQURESUM1]; - BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE]; - - real learningRate = learningRate_ * config.learning_rate(); - real momentum = config.momentum(); - real decayRate = applyDecay_ ? config.decay_rate() : 0; - - adadeltaApply(value, - grad, - mom, - accum, - accum_update, - lr, - rou_, - epsilon_, - learningRate, - momentum, - decayRate); -} - -void RMSPropParameterOptimizer::update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - BaseMatrix& value = *vecs[PARAMETER_VALUE]; - BaseMatrix& grad = *vecs[PARAMETER_GRADIENT]; - BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM]; - BaseMatrix& sum = *vecs[PARAMETER_GRADIENT_SQURESUM]; - BaseMatrix& sum1 = *vecs[PARAMETER_GRADIENT_SQURESUM1]; - BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE]; - - real accumulatedRou = rou_; - bool firstTime = timer_ == 0; - if (sparseId != -1LU) { - CHECK_LT(sparseId, t0Vec_.size()); - accumulatedRou = std::pow(rou_, timer_ + 1 - t0Vec_[sparseId]); - firstTime = t0Vec_[sparseId] == 0; - t0Vec_[sparseId] = timer_ + 1; - } - - real epsilon = optConfig_.ada_epsilon(); - real learningRate = learningRate_ * config.learning_rate(); - real momentum = config.momentum(); - real decayRate = applyDecay_ ? config.decay_rate() : 0; - - rmspropApply(value, - grad, - mom, - sum, - sum1, - lr, - accumulatedRou, - rou_, - epsilon, - learningRate, - momentum, - decayRate, - firstTime); -} - -void DecayedAdagradParameterOptimizer::update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - BaseMatrix& value = *vecs[PARAMETER_VALUE]; - BaseMatrix& grad = *vecs[PARAMETER_GRADIENT]; - BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM]; - BaseMatrix& sum = *vecs[PARAMETER_GRADIENT_SQURESUM]; - BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE]; - - real accumulatedRou = rou_; - bool firstTime = timer_ == 0; - if (sparseId != -1LU) { - CHECK_LT(sparseId, t0Vec_.size()); - accumulatedRou = std::pow(rou_, timer_ + 1 - t0Vec_[sparseId]); - firstTime = t0Vec_[sparseId] == 0; - t0Vec_[sparseId] = timer_ + 1; - } - - real epsilon = optConfig_.ada_epsilon(); - real learningRate = learningRate_ * config.learning_rate(); - real momentum = config.momentum(); - real decayRate = applyDecay_ ? config.decay_rate() : 0; - - decayedAdagradApply(value, - grad, - mom, - sum, - lr, - accumulatedRou, - rou_, - epsilon, - learningRate, - momentum, - decayRate, - firstTime); -} - -void AdamParameterOptimizer::update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - CHECK(sparseId == -1UL) << "Sparse update is not supported"; - - real beta1_power = std::pow(beta1_, step_); - real beta2_power = std::pow(beta2_, step_); - real learningRate = config.learning_rate() * learningRate_; - - BaseMatrix& value = *vecs[PARAMETER_VALUE]; - BaseMatrix& grad = *vecs[PARAMETER_GRADIENT]; - BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM]; - BaseMatrix& v = *vecs[PARAMETER_SECOND_MOMENTUM]; - - adamApply(value, - grad, - mom, - v, - beta1_, - beta2_, - beta1_power, - beta2_power, - epsilon_, - learningRate); -} - -void AdamaxParameterOptimizer::update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - CHECK(sparseId == -1UL) << "Sparse update is not supported"; - real learningRate = config.learning_rate() * learningRate_; - - BaseMatrix& value = *vecs[PARAMETER_VALUE]; - BaseMatrix& grad = *vecs[PARAMETER_GRADIENT]; - BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM]; - BaseMatrix& u = *vecs[PARAMETER_WEIGHTED_INFINITY_NORM]; - - adamaxApply(value, grad, mom, u, beta1_, beta2_, step_, learningRate); -} - -void OptimizerWithGradientClipping::update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - real globalThreshold = optConfig_.gradient_clipping_threshold(); - real localThreshold = config.gradient_clipping_threshold(); - - // Use local gradient clipping threshold if it's enabled, - // otherwise using the global one. - real threshold = localThreshold > 0.0f ? localThreshold : globalThreshold; - std::string field = localThreshold > 0.0f ? "local" : "global"; - - real maxAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsMax(); - if (maxAbsGrad > threshold) { - if (FLAGS_log_clipping) { - real avgAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsSum() / - vecs[PARAMETER_GRADIENT]->getSize(); - LOG(INFO) << "parameter=" << config.name() << " need clipping by " - << field << " threshold=" << threshold - << ", max grad=" << maxAbsGrad << ", avg grad=" << avgAbsGrad; - } - vecs[PARAMETER_GRADIENT]->clip(-threshold, threshold); - } - optimizer_->update(vecs, config, sparseId); -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/FirstOrderOptimizer.h b/paddle/legacy/parameter/FirstOrderOptimizer.h deleted file mode 100644 index 86b9a591aff7a58aafa194c64cb09cd6636d0454..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/FirstOrderOptimizer.h +++ /dev/null @@ -1,381 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "ParameterOptimizer.h" -#include "ParameterUpdateFunctions.h" -#include "Regularizer.h" - -namespace paddle { - -// Plain SGD optimization. -class SgdOptimizer : public ParameterOptimizer { - public: - explicit SgdOptimizer(const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig) { - addParameterType(PARAMETER_MOMENTUM); - } - - virtual void startBatch(int64_t numSamplesProcessed) { - learningRate_ = calcLearningRate(numSamplesProcessed, pass_); - } - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const { - (void)sparseId; - real torch_learningRate = optConfig_.learning_method() == "torch_momentum" - ? 1.0 - paraConfig.momentum() - : 1.0; -#ifdef PADDLE_WITH_MKLDNN - sgdUpdate(learningRate_ * paraConfig.learning_rate() * - (firstTime_ ? 1.0 : torch_learningRate), - paraConfig.momentum(), - applyDecay_ ? paraConfig.decay_rate() : 0, - vecs[PARAMETER_VALUE].get(), - vecs[PARAMETER_GRADIENT].get(), - vecs[PARAMETER_MOMENTUM].get()); -#else - vecs[PARAMETER_VALUE]->sgdUpdate( - *vecs[PARAMETER_GRADIENT], - *vecs[PARAMETER_MOMENTUM], - learningRate_ * paraConfig.learning_rate() * - (firstTime_ ? 1.0 : torch_learningRate), - paraConfig.momentum(), - applyDecay_ ? paraConfig.decay_rate() : 0); -#endif - } - virtual void finishBatch() { firstTime_ = false; } -}; - -// SGD optimization with sparse support. -class SparseMomentumParameterOptimizer : public ParameterOptimizer { - /* sparse momentum optimizer - - update scheme: - - \alpha_t = \alpha_{t-1} / k - \beta_t = \beta_{t-1} / (1 + \lambda\gamma_t) - u_t = u_{t-1} - \alpha_t \gamma_t g_t - v_t = v_{t-1} + \tau_{t-1} \alpha_t \gamma_t g_t - \tau_t = \tau_{t-1} + \beta_t / \alpha_t - - where: - k: momentum - lambda: decay rate - \gamma_t: learning rate at the t'th step - */ - - public: - explicit SparseMomentumParameterOptimizer( - const OptimizationConfig& optConfig); - virtual void init(size_t numRows, const ParameterConfig* config); - virtual void startBatch(int64_t numSamplesProcessed); - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const; - virtual TraverseCallback needSpecialTraversal( - const ParameterConfig& config) const; - virtual void finishBatch(); - - private: - real alpha_; - real beta_; - real tau_; - real gamma_; - real threshold_; - real momentum_; - real decayRate_; - - protected: - int64_t timer_; - mutable std::vector t0Vec_; - bool isParameterSparse_; -}; - -/* - * AdaGrad optimization. - * http://www.magicbroom.info/Papers/DuchiHaSi10.pdf - */ -class AdagradParameterOptimizer : public ParameterOptimizer { - public: - explicit AdagradParameterOptimizer(const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig) { - addParameterType(PARAMETER_MOMENTUM); - addParameterType(PARAMETER_GRADIENT_SQURESUM); - addParameterType(PARAMETER_GRADIENT_SQURESUM1); - addParameterType(PARAMETER_LEARNING_RATE); - numUpdates_ = 0; - } - - virtual void startBatch(int64_t numSamplesProcessed) { - (void)numSamplesProcessed; - ++numUpdates_; - } - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - virtual TraverseCallback needSpecialTraversal( - const ParameterConfig& config) const; - - protected: - int64_t numUpdates_; - static const int64_t kMaxNumAccumulates = 16384; -}; - -/* - * AdaDelta Optimization. - * http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf - */ -class AdaDeltaParameterOptimizer : public ParameterOptimizer { - public: - explicit AdaDeltaParameterOptimizer(const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig) { - addParameterType(PARAMETER_MOMENTUM); - addParameterType(PARAMETER_GRADIENT_SQURESUM); - addParameterType(PARAMETER_GRADIENT_SQURESUM1); - addParameterType(PARAMETER_LEARNING_RATE); - rou_ = optConfig.ada_rou(); - epsilon_ = optConfig.ada_epsilon(); - } - - virtual void startBatch(int64_t numSamplesProcessed) { - learningRate_ = calcLearningRate(numSamplesProcessed, pass_); - } - - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - - protected: - real rou_; - real epsilon_; -}; - -// RMSProp Parameter Optimization. -class RMSPropParameterOptimizer : public ParameterOptimizer { - public: - explicit RMSPropParameterOptimizer(const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig) { - addParameterType(PARAMETER_MOMENTUM); - addParameterType(PARAMETER_GRADIENT_SQURESUM1); - addParameterType(PARAMETER_GRADIENT_SQURESUM); - addParameterType(PARAMETER_LEARNING_RATE); - rou_ = optConfig.ada_rou(); - epsilon_ = optConfig.ada_epsilon(); - } - - virtual void init(size_t numRows, const ParameterConfig* config) { - t0Vec_.resize(numRows); - t0Vec_.assign(t0Vec_.size(), 0); - timer_ = 0; - } - - virtual void startBatch(int64_t numSamplesProcessed) { - learningRate_ = calcLearningRate(numSamplesProcessed, pass_); - } - virtual void finishBatch() { timer_++; } - - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - - protected: - real rou_; - real epsilon_; - - /** - * counting batches, donot need catch up with - * t(timer_) is current time, - * t0(t0Vec_) are last occur time of i rows. - * if one block is update by multi threads, - * caller should hash sparse ids to avoid write conflict in t0Vec_. - */ - int64_t timer_; - mutable std::vector t0Vec_; -}; - -// Decayed AdaGrad Optimization. -class DecayedAdagradParameterOptimizer : public ParameterOptimizer { - public: - explicit DecayedAdagradParameterOptimizer(const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig) { - addParameterType(PARAMETER_MOMENTUM); - addParameterType(PARAMETER_GRADIENT_SQURESUM); - addParameterType(PARAMETER_LEARNING_RATE); - rou_ = optConfig.ada_rou(); - epsilon_ = optConfig.ada_epsilon(); - } - - virtual void init(size_t numRows, const ParameterConfig* config) { - t0Vec_.resize(numRows); - t0Vec_.assign(t0Vec_.size(), 0); - timer_ = 0; - } - - virtual void startBatch(int64_t numSamplesProcessed) { - learningRate_ = calcLearningRate(numSamplesProcessed, pass_); - } - virtual void finishBatch() { timer_++; } - - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - - protected: - real rou_; - real epsilon_; - - /** - * counting batches, donot need catch up with - * t(timer_) is current time, - * t0(t0Vec_) are last occur time of i rows. - * if one block is update by multi threads, - * caller should hash sparse ids to avoid write conflict in t0Vec_. - */ - int64_t timer_; - mutable std::vector t0Vec_; -}; - -/** - * Adam Optimizer. - * Reference Paper: http://arxiv.org/abs/1412.6980 Algorithm 1 - */ -class AdamParameterOptimizer : public ParameterOptimizer { - public: - explicit AdamParameterOptimizer(const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig), - beta1_(optConfig.adam_beta1()), - beta2_(optConfig.adam_beta2()), - epsilon_(optConfig.adam_epsilon()), - step_(1), - learningRate_(optConfig.learning_rate()) { - addParameterType(PARAMETER_MOMENTUM); - addParameterType(PARAMETER_SECOND_MOMENTUM); - } - - virtual void startBatch(int64_t numSamplesProcessed) { - learningRate_ = calcLearningRate(numSamplesProcessed, pass_); - } - - virtual void finishBatch() { ++step_; } - - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - - protected: - real beta1_; - real beta2_; - real epsilon_; - int64_t step_; - real learningRate_; -}; - -/** - * AdaMax Optimizer. - * Reference Paper: http://arxiv.org/abs/1412.6980 Algorithm 2 - */ -class AdamaxParameterOptimizer : public ParameterOptimizer { - public: - explicit AdamaxParameterOptimizer(const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig), - beta1_(optConfig.adam_beta1()), - beta2_(optConfig.adam_beta2()), - step_(1), - learningRate_(optConfig.learning_rate()) { - addParameterType(PARAMETER_MOMENTUM); - addParameterType(PARAMETER_WEIGHTED_INFINITY_NORM); - } - - virtual void finishBatch() { ++step_; } - - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - - protected: - real beta1_; - real beta2_; - int64_t step_; - real learningRate_; -}; - -// Used in pserver, -// when PARAMETER_DELTA stores in PARAMETER_GRADIENT. -class AddOptimizer : public ParameterOptimizer { - public: - explicit AddOptimizer(const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig) {} - - virtual void startBatch(int64_t numSamplesProcessed) { - // learningRate required by regularizer - learningRate_ = calcLearningRate(numSamplesProcessed, pass_); - } - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const { - vecs[PARAMETER_VALUE]->add(*vecs[PARAMETER_GRADIENT], - optConfig_.delta_add_rate()); - } -}; - -// A optimizer which does nothing. -class DummyOptimizer : public ParameterOptimizer { - public: - explicit DummyOptimizer(const OptimizationConfig& optConfig) - : ParameterOptimizer(optConfig) {} - - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - size_t sparseId) const {} -}; - -// Do gradient clipping before sgd update -class OptimizerWithGradientClipping : public ParameterOptimizer { - public: - OptimizerWithGradientClipping(const OptimizationConfig& optConfig, - ParameterOptimizer* optimizer) - : ParameterOptimizer(optConfig), optimizer_(optimizer) { - parameterTypes_ = optimizer_->getParameterTypes(); - } - - virtual void init(size_t numRows, const ParameterConfig* config) { - optimizer_->init(numRows, config); - } - - virtual void startPass() { optimizer_->startPass(); } - virtual void finishPass() { optimizer_->finishPass(); } - - virtual void startBatch(int64_t numSamplesProcessed) { - optimizer_->startBatch(numSamplesProcessed); - learningRate_ = optimizer_->getLearningRate(); - } - virtual void finishBatch() { optimizer_->finishBatch(); } - - virtual TraverseCallback needSpecialTraversal( - const ParameterConfig& config) const { - return optimizer_->needSpecialTraversal(config); - } - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - - virtual void setNoDecay() { optimizer_->setNoDecay(); } - - protected: - std::unique_ptr optimizer_; -}; - -} // namespace paddle diff --git a/paddle/legacy/parameter/LearningRateScheduler.cpp b/paddle/legacy/parameter/LearningRateScheduler.cpp deleted file mode 100644 index 68c44a7ec49f64a1085609d906441c9ed4502888..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/LearningRateScheduler.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "LearningRateScheduler.h" -#include "paddle/legacy/utils/StringUtil.h" - -namespace paddle { - -ClassRegistrar - LearningRateScheduler::registrar_; - -LearningRateScheduler* LearningRateScheduler::create( - const OptimizationConfig& config) { - return registrar_.createByType(config.learning_rate_schedule(), config); -} - -// LRS stands for LearningRateScheduler - -class BaseLRS : public LearningRateScheduler { - public: - explicit BaseLRS(const OptimizationConfig& config) - : learningRate_(config.learning_rate()), - a_(config.learning_rate_decay_a()), - b_(config.learning_rate_decay_b()) {} - - protected: - real learningRate_; - real a_; - real b_; -}; - -class ConstLRS : public BaseLRS { - public: - explicit ConstLRS(const OptimizationConfig& config) : BaseLRS(config) {} - virtual real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) { - return learningRate_; - } -}; -REGISTER_LEARNING_RATE_SCHEDULER(constant, ConstLRS); - -class PolyLRS : public BaseLRS { - public: - explicit PolyLRS(const OptimizationConfig& config) : BaseLRS(config) {} - virtual real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) { - return learningRate_ * pow(1.0 + a_ * numSamplesProcessed, -b_); - } -}; -REGISTER_LEARNING_RATE_SCHEDULER(poly, PolyLRS); - -class CaffePolyLRS : public BaseLRS { - public: - explicit CaffePolyLRS(const OptimizationConfig& config) : BaseLRS(config) {} - virtual real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) { - if (numSamplesProcessed > a_) { - LOG_FIRST_N(WARNING, 1) - << "Using caffe_poly learning rate schedule, " - << "learning rate hits ZERO when " - << "numSamplesProcessed > config.learning_rate_decay_b(), " - << "training is over and you can stop it. " - << "See common/LearningRateScheduler.cpp for more info."; - return 0; - } else { - return learningRate_ * pow(1.0 - numSamplesProcessed / a_, b_); - } - } -}; -REGISTER_LEARNING_RATE_SCHEDULER(caffe_poly, CaffePolyLRS); - -class ExpLRS : public BaseLRS { - public: - explicit ExpLRS(const OptimizationConfig& config) : BaseLRS(config) {} - virtual real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) { - double decayRatio = (double)numSamplesProcessed / b_; - return learningRate_ * pow(a_, decayRatio); - } -}; -REGISTER_LEARNING_RATE_SCHEDULER(exp, ExpLRS); - -class DiscreteExpLRS : public BaseLRS { - public: - explicit DiscreteExpLRS(const OptimizationConfig& config) : BaseLRS(config) {} - virtual real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) { - int numDecays = floor(numSamplesProcessed / b_); - return learningRate_ * pow(a_, numDecays); - } -}; -REGISTER_LEARNING_RATE_SCHEDULER(discexp, DiscreteExpLRS); - -class LinearLRS : public BaseLRS { - public: - explicit LinearLRS(const OptimizationConfig& config) : BaseLRS(config) {} - virtual real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) { - return std::max(learningRate_ - a_ * numSamplesProcessed, b_); - } -}; -REGISTER_LEARNING_RATE_SCHEDULER(linear, LinearLRS); - -/* - specify learning rate through - learning_rate_args = 'seg0:rate0,seg1:rate1,...,segK:rateK' - if seg_{i-1} <= numSamples <= seg_i, - then learning_rate = learning_rate_base * rate_i -*/ -class ManualLRS : public BaseLRS { - public: - explicit ManualLRS(const OptimizationConfig& config) - : BaseLRS(config), currentSegment_(0), lastNum_(0) { - std::vector pieces; - str::split(config.learning_rate_args(), ',', &pieces); - rates_.reserve(pieces.size()); - std::string s1, s2; - - for (auto& piece : pieces) { - auto pos = piece.find(':'); - CHECK(pos != std::string::npos) << "Wrong format for learning_rate_args: " - << config.learning_rate_args(); - segments_.push_back(str::to(piece.substr(0, pos))); - rates_.push_back(str::to(piece.substr(pos + 1))); - } - } - - virtual real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) { - return calc(numSamplesProcessed); - } - - real calc(int64_t num) { - // We assume that num never decreases. - CHECK_LE(lastNum_, num); - lastNum_ = num; - while (currentSegment_ < rates_.size()) { - if (num <= segments_[currentSegment_]) { - return learningRate_ * rates_[currentSegment_]; - } - ++currentSegment_; - if (currentSegment_ < rates_.size()) { - LOG(INFO) << " learning_rate changes to " - << learningRate_ * rates_[currentSegment_]; - } - } - return learningRate_ * rates_.back(); - } - - protected: - std::vector rates_; - std::vector segments_; - size_t currentSegment_; - int64_t lastNum_; -}; - -REGISTER_LEARNING_RATE_SCHEDULER(manual, ManualLRS); - -class PassManualLRS : public ManualLRS { - public: - explicit PassManualLRS(const OptimizationConfig& config) - : ManualLRS(config) {} - virtual real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) { - return calc(pass); - } -}; - -REGISTER_LEARNING_RATE_SCHEDULER(pass_manual, PassManualLRS); -} // namespace paddle diff --git a/paddle/legacy/parameter/LearningRateScheduler.h b/paddle/legacy/parameter/LearningRateScheduler.h deleted file mode 100644 index fc7e380a6af58577f4ba319d85522535b8f93a45..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/LearningRateScheduler.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "TrainerConfig.pb.h" -#include "paddle/legacy/utils/ClassRegistrar.h" - -namespace paddle { -// NOLINTNEXTLINES_4 -#define REGISTER_LEARNING_RATE_SCHEDULER(__type_name, __class_name) \ - static InitFunction __reg_type_##__type_name([]() { \ - LearningRateScheduler::registrar_.registerClass<__class_name>( \ - #__type_name); \ - }) - -class LearningRateScheduler { - public: - static LearningRateScheduler* create(const OptimizationConfig& config); - virtual ~LearningRateScheduler() {} - virtual real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) = 0; - - static ClassRegistrar registrar_; -}; - -} // namespace paddle diff --git a/paddle/legacy/parameter/OptimizerFunctions.cpp b/paddle/legacy/parameter/OptimizerFunctions.cpp deleted file mode 100644 index b7f920b89ccc7d024079ac504819c10703eb550d..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/OptimizerFunctions.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "AverageOptimizer.h" -#include "FirstOrderOptimizer.h" -#include "OptimizerWithRegularizer.h" - -namespace paddle { - -// creator for AverageOptimizer -ParameterOptimizer* sgdOptimizerCreate(const OptimizationConfig& optConfig, - const ParameterConfig& paraConfig, - bool isParameterSparse, - bool inPserver) { - ParameterOptimizer* optimizer = OptimizerWithRegularizer::create( - optConfig, paraConfig, isParameterSparse, inPserver); - return AverageOptimizer::create( - optConfig, optimizer, isParameterSparse, inPserver /*useParameterApply*/); -} - -std::vector sgdOptimizerGetTypes( - const OptimizationConfig& optConfig, bool inPserver) { - std::unique_ptr optimizer; - optimizer.reset( - AverageOptimizer::create(optConfig, - ParameterOptimizer::create(optConfig, inPserver), - false /*isParameterSparse*/, - inPserver)); - CHECK(optimizer) << "fail to create optimizer: " - << optConfig.learning_method(); - return optimizer->getParameterTypes(); -} - -bool useApplyInPserver(const OptimizationConfig& optConfig) { - auto types = sgdOptimizerGetTypes(optConfig, true /*inPserver*/); - return types.end() != std::find(types.begin(), types.end(), PARAMETER_APPLY); -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/OptimizerFunctions.h b/paddle/legacy/parameter/OptimizerFunctions.h deleted file mode 100644 index 57f6fc9d40ec8e9d245a9d4b0835e16157ab0aae..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/OptimizerFunctions.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "FirstOrderOptimizer.h" - -namespace paddle { - -/* - * Factory function creates the corresponding SgdOptimizer - * according to the configuration in optConfig. - */ -ParameterOptimizer* sgdOptimizerCreate(const OptimizationConfig& optConfig, - const ParameterConfig& paraConfig, - bool isParameterSparse, - bool inPserver); - -/* - * Get the parameter types needed for the specific optimization - * algorithm specified in optConfig. - */ -std::vector sgdOptimizerGetTypes( - const OptimizationConfig& optConfig, bool inPserver); - -/* - * Whether trainer need call apply() in pserver and get result back. - * currently, only averager depend on this. - */ -bool useApplyInPserver(const OptimizationConfig& optConfig); - -} // namespace paddle diff --git a/paddle/legacy/parameter/OptimizerWithRegularizer.cpp b/paddle/legacy/parameter/OptimizerWithRegularizer.cpp deleted file mode 100644 index 9e914ae4ecebe23d0b19640a6d1cc86421f38df7..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/OptimizerWithRegularizer.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "OptimizerWithRegularizer.h" - -namespace paddle { - -ParameterOptimizer::TraverseCallback -OptimizerWithRegularizerEveryNumBatches::needSpecialTraversal( - const ParameterConfig& config) const { - TraverseCallbackVec callbacks; - - if (isRegularizationBatch(config)) { - callbacks.emplace_back( - [this](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { this->doTraversal(vecs, config); }); - } - - if (auto callback = optimizer_->needSpecialTraversal(config)) { - callbacks.emplace_back(callback); - } - - return composeCallbacks(callbacks); -} - -void OptimizerWithRegularizerEveryNumBatches::doTraversal( - const VectorPtr vecs[], const ParameterConfig& config) const { - int32_t base = - std::max(baseTimer_, (timer_ + 1 - config.num_batches_regularization())); - regularizer_->update( - vecs, config, optimizer_->getLearningRate(), base, timer_ + 1); -} - -ParameterOptimizer::TraverseCallback -OptimizerWithRegularizerEveryNumBatches::startCatchUpWith() const { - TraverseCallbackVec callbacks; - - if (auto callback = optimizer_->startCatchUpWith()) { - callbacks.emplace_back(callback); - } - - if (baseTimer_ < timer_) { - callbacks.emplace_back( - [this](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { this->catchUpWith(vecs, config, sparseId); }); - } - - return composeCallbacks(callbacks); -} - -void OptimizerWithRegularizerEveryNumBatches::catchUpWith( - const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - int32_t base = timer_ - timer_ % config.num_batches_regularization(); - regularizer_->update(vecs, - config, - optimizer_->getLearningRate(), - std::max(base, baseTimer_), - timer_); -} - -void OptimizerWithRegularizerSparse::init(size_t numRows, - const ParameterConfig* config) { - OptimizerWithRegularizer::init(numRows, config); - t0Vec_.resize(numRows); - - timer_ = 0; - t0Vec_.assign(t0Vec_.size(), 0); -} - -void OptimizerWithRegularizerSparse::update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - optimizer_->update(vecs, config, sparseId); - // para W(t0) -> W(t+1) - CHECK_LT(sparseId, t0Vec_.size()); - regularizer_->update(vecs, - config, - optimizer_->getLearningRate(), - t0Vec_[sparseId], - timer_ + 1); - t0Vec_[sparseId] = timer_ + 1; -} - -ParameterOptimizer::TraverseCallback -OptimizerWithRegularizerSparse::startCatchUpWith() const { - TraverseCallbackVec callbacks; - - if (auto callback = optimizer_->startCatchUpWith()) { - callbacks.emplace_back(callback); - } - - if (timer_ > 0) { - callbacks.emplace_back( - [this](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { this->catchUpWith(vecs, config, sparseId); }); - } - - return composeCallbacks(callbacks); -} - -void OptimizerWithRegularizerSparse::catchUpWith(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - // para W(t0) -> W(t+1) - CHECK_LT(sparseId, t0Vec_.size()); - regularizer_->update( - vecs, config, optimizer_->getLearningRate(), t0Vec_[sparseId], timer_); -} - -// factory method to create instance of OptimizerWithRegularizer -ParameterOptimizer* OptimizerWithRegularizer::create( - const OptimizationConfig& optConfig, - const ParameterConfig& paraConfig, - bool isParameterSparse, - bool inPserver) { - ParameterOptimizer* optimizer = - ParameterOptimizer::create(optConfig, inPserver); - if ((optConfig.gradient_clipping_threshold() > 0.0f || - paraConfig.gradient_clipping_threshold() > 0.0f) && - !dynamic_cast(optimizer)) { - optimizer = new OptimizerWithGradientClipping(optConfig, optimizer); - } - Regularizer* regularizer = - Regularizer::get(optimizer->getParameterTypes(), paraConfig); - if (!regularizer) { - return optimizer; - } - - if (paraConfig.num_batches_regularization() > 1) { - if (optConfig.num_batches_per_send_parameter() > 1) { - CHECK_EQ(optConfig.num_batches_per_send_parameter() % - paraConfig.num_batches_regularization(), - 0) - << "regularization should be apply in sending batch"; - } - CHECK(paraConfig.momentum() == 0.0f) << "Parameter cannot support momentum " - "if num_batches_regularization set"; - - if (optConfig.center_parameter_update_method() == "average" && - optConfig.num_batches_per_send_parameter() == - paraConfig.num_batches_regularization()) { - LOG(INFO) << "decay in pserver and no decay in trainer"; - if (inPserver) { // decay in pserver - optimizer->setNoDecay(); - return new OptimizerWithRegularizer(optConfig, optimizer, regularizer); - } - // no decay in trainer - optimizer->setNoDecay(); - return optimizer; - } - if (dynamic_cast(optimizer)) { - return optimizer; // normal average, no decay in pserver - } - // normal - optimizer->setNoDecay(); - return new OptimizerWithRegularizerEveryNumBatches( - optConfig, optimizer, regularizer); - } - if (isParameterSparse) { - CHECK(paraConfig.momentum() == 0.0f) - << "Parameter cannot support momentum if it's sparse."; - optimizer->setNoDecay(); - return new OptimizerWithRegularizerSparse( - optConfig, optimizer, regularizer); - } - // dense - if (paraConfig.decay_rate_l1() == 0.0f || - dynamic_cast(optimizer)) { - return optimizer; - } - CHECK(paraConfig.momentum() == 0.0f) - << "Parameter cannot support momentum if it use L1 decay."; - optimizer->setNoDecay(); - return new OptimizerWithRegularizer(optConfig, optimizer, regularizer); -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/OptimizerWithRegularizer.h b/paddle/legacy/parameter/OptimizerWithRegularizer.h deleted file mode 100644 index bd29b3966324b2e206cfe56cc15678539d1e870e..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/OptimizerWithRegularizer.h +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "FirstOrderOptimizer.h" - -namespace paddle { - -// add regularizer for objective function to do optimization -class OptimizerWithRegularizer : public ParameterOptimizer { - public: - static ParameterOptimizer* create(const OptimizationConfig& optConfig, - const ParameterConfig& paraConfig, - bool isParameterSparse, - bool inPserver); - - OptimizerWithRegularizer(const OptimizationConfig& optConfig, - ParameterOptimizer* optimizer, - Regularizer* regularizer) - : ParameterOptimizer(optConfig), - optimizer_(optimizer), - regularizer_(regularizer) { - parameterTypes_ = optimizer_->getParameterTypes(); - } - - virtual void init(size_t numRows, const ParameterConfig* config) { - optimizer_->init(numRows, config); - } - - virtual void startPass() { - optimizer_->startPass(); - timer_ = 0; - } - - virtual void finishPass() { optimizer_->finishPass(); } - - virtual void startBatch(int64_t numSamplesProcessed) { - optimizer_->startBatch(numSamplesProcessed); - } - - virtual void finishBatch() { - optimizer_->finishBatch(); - ++timer_; - } - - virtual TraverseCallback needSpecialTraversal( - const ParameterConfig& config) const { - return optimizer_->needSpecialTraversal(config); - } - - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - optimizer_->update(vecs, config, sparseId); - regularizer_->update(vecs, config, optimizer_->getLearningRate(), 0, 1); - } - - protected: - std::unique_ptr optimizer_; - Regularizer* regularizer_; - - /** - * counting batches, clear after catch up with - * t(timer_) is current time, - * t0(t0Vec_) are last occur time of i rows. - * if one block is update by multi threads, - * caller should hash sparse ids to avoid write conflict in t0Vec_. - */ - int timer_; -}; - -// Regularized Loss function for every num of batches -class OptimizerWithRegularizerEveryNumBatches - : public OptimizerWithRegularizer { - public: - OptimizerWithRegularizerEveryNumBatches(const OptimizationConfig& optConfig, - ParameterOptimizer* optimizer, - Regularizer* regularizer) - : OptimizerWithRegularizer(optConfig, optimizer, regularizer) {} - - virtual void startPass() { - OptimizerWithRegularizer::startPass(); - baseTimer_ = 0; - } - - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const { - optimizer_->update(vecs, config, sparseId); - } - - virtual TraverseCallback needSpecialTraversal( - const ParameterConfig& config) const; - void doTraversal(const VectorPtr vecs[], const ParameterConfig& config) const; - - void catchUpWith(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - - virtual TraverseCallback startCatchUpWith() const; - virtual void finishCatchUpWith() { baseTimer_ = timer_; } - - protected: - bool isRegularizationBatch(const ParameterConfig& config) const { - return ((timer_ + 1) % config.num_batches_regularization() == 0); - } - - /** - * recored the timer_ value while catchUpWith called. - */ - int baseTimer_; -}; - -// Regularized Loss function with Sparse support -class OptimizerWithRegularizerSparse : public OptimizerWithRegularizer { - public: - OptimizerWithRegularizerSparse(const OptimizationConfig& optConfig, - ParameterOptimizer* optimizer, - Regularizer* regularizer) - : OptimizerWithRegularizer(optConfig, optimizer, regularizer) {} - - virtual void init(size_t numRows, const ParameterConfig* config); - - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - void catchUpWith(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) const; - virtual TraverseCallback startCatchUpWith() const; - virtual void finishCatchUpWith() { - timer_ = 0; - t0Vec_.assign(t0Vec_.size(), 0); - } - - protected: - /** - * t0Vec_ are last occur time of i rows - * if one block is update by multi threads, - * caller should hash sparse ids to avoid write conflict in t0Vec_. - */ - mutable std::vector t0Vec_; -}; - -} // namespace paddle diff --git a/paddle/legacy/parameter/Parameter.cpp b/paddle/legacy/parameter/Parameter.cpp deleted file mode 100644 index 666d808f0c13c5c828c51b2a36ee9d05f7f78c13..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/Parameter.cpp +++ /dev/null @@ -1,425 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Parameter.h" -#include -#include -#include "AverageOptimizer.h" -#include "FirstOrderOptimizer.h" -#include "OptimizerFunctions.h" -#include "OptimizerWithRegularizer.h" -#include "ParameterUpdateFunctions.h" -#include "ThreadLocalBuffer.h" -#include "hl_gpu.h" -#include "paddle/legacy/math/CpuSparseMatrix.h" -#include "paddle/legacy/math/MathUtils.h" -#include "paddle/legacy/math/SparseRowMatrix.h" -#include "paddle/legacy/utils/Logging.h" - -DEFINE_int32(enable_grad_share, - (100 * 1024 * 1024), - "threshold for enable gradient parameter share for batch " - "multi-cpu training"); -DEFINE_int32( - grad_share_block_num, - 64, - "block number of gradient parameter share for batch multi-cpu training"); - -namespace paddle { - -const std::string Parameter::kMissParameterFail = "fail"; -const std::string Parameter::kMissParameterRand = "rand"; -const std::string Parameter::kMissParameterZero = "zero"; - -Parameter::Parameter(const ParameterConfig& config, bool useGpu, bool doInit) - : config_(config), - useGpu_(useGpu), - deviceId_(-1), - sharedCount_(0), - updateCounter_(0), - updated_(false), - headerFormat_(PARAM_FORMAT_ORIGINAL) { - setID(-1); /* capture uninitialized id */ - if (useGpu_ && FLAGS_parallel_nn) { - /* gpu environment is specified by device property */ - deviceId_ = config_.device(); - if (deviceId_ < 0) { - useGpu_ = false; - } - } - - if (doInit) { - initialize(); - } - - for (int i = 0; i < config.update_hooks_size(); ++i) { - this->updaterHooks_.push_back(IParameterUpdaterHook::create(config, i)); - } -} - -void Parameter::initialize() { - SetDevice device(deviceId_); - - bufs_[PARAMETER_VALUE] = - Vector::createParallelVector(config_.size(), useGpu_); - bufs_[PARAMETER_VALUE]->zeroMem(); - - if (config_.is_sparse()) { - enableSparseParameter(); - } - - if (!isStatic()) { - bufs_[PARAMETER_GRADIENT] = - Vector::createParallelVector(config_.size(), useGpu_); - bufs_[PARAMETER_MOMENTUM] = - Vector::createParallelVector(config_.size(), useGpu_); - - bufs_[PARAMETER_GRADIENT]->zeroMem(); - bufs_[PARAMETER_MOMENTUM]->zeroMem(); - } -} - -void Parameter::randomize(const VectorPtr& value, - const ParameterConfig& config) { - if (PARAMETER_INIT_UNIFORM == config.initial_strategy()) { - // initialize the parameter as uniform distribution - real initial_min = config.initial_mean() - config.initial_std(); - real initial_max = config.initial_mean() + config.initial_std(); - value->uniform(initial_min, initial_max); - VLOG(1) << config.name() << ": initial_min=" << initial_min - << ", initial_max=" << initial_max; - } else if (PARAMETER_INIT_NORMAL == config.initial_strategy()) { - /* Initialize the parameters randomly */ - value->randnorm(config.initial_mean(), config.initial_std()); - VLOG(1) << config.name() << ": initial_mean=" << config.initial_mean() - << ", initial_std=" << config.initial_std(); - } else { - LOG(FATAL) << "not supported initial_strategy: " - << config.initial_strategy(); - } -} - -void Parameter::randomize() { - if (!bufs_[PARAMETER_VALUE]) return; - SetDevice device(deviceId_); - Parameter::randomize(bufs_[PARAMETER_VALUE], config_); - - if (config_.is_sparse()) { - if (format_ == SPARSE_CSC) { - sparseRand(intBufs_[PARAMETER_COLS]->getData(), - intBufs_[PARAMETER_ROWS]->getData(), - config_.size(), - config_.dims(1) + 1, - config_.dims(0), - useGpu_); - } else { - sparseRand(intBufs_[PARAMETER_ROWS]->getData(), - intBufs_[PARAMETER_COLS]->getData(), - config_.size(), - config_.dims(0) + 1, - config_.dims(1), - useGpu_); - } - } - setValueUpdated(); -} - -void Parameter::zeroMem() { - if (!bufs_[PARAMETER_VALUE]) return; - bufs_[PARAMETER_VALUE]->zeroMem(); - setValueUpdated(); - LOG(INFO) << getName() << " set to 0"; -} - -bool Parameter::isGradShared(size_t* blockNum) { - if (!useGpu_ && !isStatic() && FLAGS_enable_grad_share > 0 && - !isGradSparseUpdate() && - this->getSize() > (size_t)FLAGS_enable_grad_share) { - if (blockNum) { - *blockNum = (size_t)FLAGS_grad_share_block_num; - } - return true; - } - return false; -} - -bool Parameter::isValueShared() { - return !useGpu_ && config_.is_shared() && FLAGS_trainer_count > 1; -} - -bool Parameter::isGradSparseUpdate() const { - return !useGpu_ && !isStatic() && - (config_.sparse_update() || config_.sparse_remote_update()); -} - -void Parameter::setMat(ParameterType pType, int matType) { - CHECK(!mats_[pType]); - - if (config_.dims_size() == 0 && matType == MAT_NORMAL) { - return; - } - - CHECK_EQ((size_t)config_.dims_size(), 2LU); - size_t height = config_.dims(0); - size_t width = config_.dims(1); - if (matType == MAT_NORMAL) { - if (!config_.is_sparse()) { - CHECK_EQ(height * width, bufs_[pType]->getSize()); - mats_[pType] = - Matrix::create(bufs_[pType]->getMemoryHandle(), height, width); - } else { - size_t size = bufs_[pType]->getSize(); - CHECK_GE(height * width, size); - if (format_ == SPARSE_CSR) { - CHECK_EQ(height + 1, intBufs_[PARAMETER_ROWS]->getSize()); - CHECK_EQ(size, intBufs_[PARAMETER_COLS]->getSize()); - } else { - CHECK_EQ(width + 1, intBufs_[PARAMETER_COLS]->getSize()); - CHECK_EQ(size, intBufs_[PARAMETER_ROWS]->getSize()); - } - mats_[pType] = - Matrix::createSparseMatrix(bufs_[pType]->getData(), - intBufs_[PARAMETER_ROWS]->getData(), - intBufs_[PARAMETER_COLS]->getData(), - height, - width, - bufs_[pType]->getSize(), - FLOAT_VALUE, - format_, - false, - useGpu_); - } - } -#ifndef PADDLE_MOBILE_INFERENCE - // NOLINTNEXTLINE - else if (matType == MAT_NORMAL_SHARED) { - CHECK_EQ(height * width, bufs_[pType]->getSize()); - size_t blockNum = 0; - CHECK(isGradShared(&blockNum)); - mats_[pType] = std::make_shared( - blockNum, - std::dynamic_pointer_cast( - bufs_[pType]->getMemoryHandle()), - height, - width); - } else if (matType == MAT_VALUE_SHARED) { - CHECK_EQ(height * width, bufs_[pType]->getSize()); - mats_[pType] = std::make_shared( - std::dynamic_pointer_cast( - bufs_[pType]->getMemoryHandle()), - height, - width); - } else if (matType == MAT_SPARSE_ROW_IDS) { - CHECK_EQ(height * width, bufs_[pType]->getSize()); - mats_[pType] = std::make_shared( - std::dynamic_pointer_cast( - bufs_[pType]->getMemoryHandle()), - height, - width); - } else if (matType == MAT_SPARSE_ROW) { - auto valueMat = - std::dynamic_pointer_cast(mats_[PARAMETER_VALUE]); - SparseRowCpuMatrix::IndexDictPtr indexDict(nullptr); - if (pType != PARAMETER_VALUE) { - CHECK(valueMat) << "The matrix for PARAMETER_VALUE must be set " - << " and its type must be MAT_SPARSE_ROW," - << " MAT_SPARSE_ROW_PREFETCH or MAT_CACHE_ROW"; - indexDict = valueMat->getIndexDictHandle(); - } - auto mat = - std::make_shared(nullptr, - height, - width, - // grad share index with value - indexDict); - mats_[pType] = mat; - } else if (matType == MAT_CACHE_ROW) { - CHECK(isGradSparseUpdate()); - auto mat = std::make_shared(height, width); - mats_[pType] = mat; - } else if (matType == MAT_SPARSE_ROW_PREFETCH_FULL_SIZE || - matType == MAT_SPARSE_ROW_PREFETCH) { - auto mat = std::make_shared( - bufs_[pType] ? std::dynamic_pointer_cast( - bufs_[pType]->getMemoryHandle()) - : nullptr, - height, - width, - nullptr, // indexDictHandle - getGlobalSyncThreadPool()); - mats_[pType] = mat; - } else if (matType == MAT_SPARSE_ROW_AUTO_GROW) { - CHECK(isGradSparseUpdate()); - mats_[pType] = std::make_shared(height, width); - } -#endif - // NOLINTNEXTLINE - else { - LOG(FATAL) << "Unsupported mat type" << matType; - } -} - -void Parameter::incUpdate(const UpdateCallback& callback) { - // Static parameter is fixed, and does not need to be updated - if (isStatic()) { - return; - } - - ++updateCounter_; - if (isUpdatable()) { - if (callback) callback(this); - clearUpdate(); - } -} - -bool Parameter::save(const std::string& filename) const { - std::ofstream fs(filename, std::ios_base::binary); - CHECK(fs) << "Fail to open " << filename; - return save(fs); -} - -bool Parameter::save(std::ostream& s) const { - CpuVector vec(*bufs_[PARAMETER_VALUE].get()); - Header header; - header.format = headerFormat_; - header.valueSize = sizeof(real); - header.size = getSize(); - - CHECK_EQ(header.size, vec.getSize()); - - CHECK(s.write(reinterpret_cast(&header), sizeof(header))) - << "Fail to write parameter " << getName(); - - CHECK(s.write(reinterpret_cast(vec.getData()), - header.size * sizeof(real))) - << "Fail to write parameter " << getName(); - if (config_.is_sparse()) { - CpuIVector rows(*intBufs_[PARAMETER_ROWS].get()); - CpuIVector cols(*intBufs_[PARAMETER_COLS].get()); - CHECK(s.write(reinterpret_cast(rows.getData()), - rows.getSize() * sizeof(int))) - << "Fail to write parameter " << getName(); - CHECK(s.write(reinterpret_cast(cols.getData()), - cols.getSize() * sizeof(int))) - << "Fail to write parameter " << getName(); - } - - return true; -} - -/** - * Load parameter value from a file - */ -bool Parameter::load(const std::string& filename) { - std::ifstream fs(filename, std::ios_base::binary); - if (!fs) { - LOG(INFO) << "missing parameters [" << filename << "] while loading model."; - if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { - LOG(FATAL) << getName() << " missing, not allowed."; - return false; - } - if (kMissParameterRand == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to random."; - randomize(); - return true; - } - if (kMissParameterZero == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to zero."; - zeroMem(); - return true; - } - LOG(FATAL) << "unsupported load_missing_parameter_strategy: " - << FLAGS_load_missing_parameter_strategy; - return false; - } - return load(fs); -} - -bool Parameter::load(std::istream& s) { - CpuVector vec(*bufs_[PARAMETER_VALUE].get()); - Header header; - CHECK(s.read(reinterpret_cast(&header), sizeof(header))) - << "Fail to read parameter " << getName(); - CHECK(isHeaderFormatSupported(header.format)) << "Incorrect format version: " - << header.format; - headerFormat_ = header.format; - CHECK_EQ(header.size, getSize()) - << "The size (" << header.size << ") in the file does not match the size " - << "(" << getSize() << ") of the parameter: " << getName(); - CHECK_EQ(header.valueSize, sizeof(real)) - << "Unsupported valueSize " << header.valueSize << " at: " << getName(); - CHECK(s.read(reinterpret_cast(vec.getData()), - header.size * sizeof(real))); - - auto& tmp = *bufs_[PARAMETER_VALUE].get(); - if (typeid(tmp) == typeid(GpuVector)) { - bufs_[PARAMETER_VALUE]->copyFrom(vec); - } - - if (config_.is_sparse() && config_.need_compact()) { - // load from dense parameter with many zero - CHECK_EQ(config_.dims_size(), 2); - auto height = config_.dims(0); - auto width = config_.dims(1); - auto mat = Matrix::create(vec.getData(), height, width); - CpuSparseMatrix sparseMat(height, - width, - 0, - FLOAT_VALUE, - format_, - /*trans*/ false); - sparseMat.copyFrom(*mat, HPPL_STREAM_DEFAULT); - auto nnz = sparseMat.getElementCnt(); - size_t rowSize = (format_ == SPARSE_CSR) ? height + 1 : nnz; - size_t colSize = (format_ == SPARSE_CSR) ? nnz : width + 1; - - intBufs_[PARAMETER_ROWS]->copyFrom(sparseMat.getRows(), rowSize); - intBufs_[PARAMETER_COLS]->copyFrom(sparseMat.getCols(), colSize); - bufs_[PARAMETER_VALUE]->resize(nnz); // for setMat check - bufs_[PARAMETER_VALUE]->copyFrom(sparseMat.getValue(), nnz); - config_.set_size(nnz); - LOG(INFO) << "compact nnz=" << (1. * nnz / (height * width)) - << " name=" << config_.name(); - } else if (config_.is_sparse()) { - CpuIVector rows(*intBufs_[PARAMETER_ROWS].get()); - CpuIVector cols(*intBufs_[PARAMETER_COLS].get()); - size_t rowSize, colSize; - CHECK_EQ(config_.dims_size(), 2); - if (format_ == SPARSE_CSR) { - rowSize = config_.dims(0) + 1; - colSize = config_.size(); - } else { - rowSize = config_.size(); - colSize = config_.dims(1) + 1; - } - CHECK( - s.read(reinterpret_cast(rows.getData()), rowSize * sizeof(int))); - CHECK( - s.read(reinterpret_cast(cols.getData()), colSize * sizeof(int))); - auto& paramRows = *intBufs_[PARAMETER_ROWS].get(); - if (typeid(paramRows) == typeid(GpuIVector)) { - intBufs_[PARAMETER_ROWS]->copyFrom(rows); - } - auto& paramCols = *intBufs_[PARAMETER_COLS].get(); - if (typeid(paramCols) == typeid(GpuIVector)) { - intBufs_[PARAMETER_COLS]->copyFrom(cols); - } - } - - setValueUpdated(); - - return true; -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/Parameter.h b/paddle/legacy/parameter/Parameter.h deleted file mode 100644 index 43b567dad045ad786b1b3f2d3614072f58310527..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/Parameter.h +++ /dev/null @@ -1,380 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include -#include -#include - -#include "ParameterConfig.pb.h" -#include "TrainerConfig.pb.h" - -#include "ParameterUpdaterHook.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/GlobalConstants.h" -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/ThreadLocal.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -typedef enum { - /// The paddle original basic format - PARAM_FORMAT_ORIGINAL = 0, - - /// See mkldnn_memory_format_t in - /// https://github.com/01org/mkl-dnn/blob/master/include/mkldnn_types.h - /// for a detailed description. - /// 2D weights tensor in the format (output channels, input channels). - PARAM_FORMAT_MKLDNN_OI, - - /// The total format items numbers - PARAM_FORMAT_ITEMS, -} PARAM_FORMAT; - -class SparsePrefetchRowCpuMatrix; - -class Parameter; -typedef std::function UpdateCallback; -typedef std::function ParamInitCallback; - -class Parameter; -typedef std::shared_ptr ParameterPtr; - -class Parameter { - public: - Parameter(const ParameterConfig& config, bool useGpu, bool doInit = true); - const std::string& getName() const { return config_.name(); } - - size_t getSize() const { return config_.size(); } - - bool isFullSize() const { - if (bufs_[PARAMETER_VALUE]) { - return this->getSize() == bufs_[PARAMETER_VALUE]->getSize(); - } - return false; - } - - inline bool useGpu() const { return useGpu_; } - - int getDeviceId() const { return deviceId_; } - - void setDevice(int deviceId) { deviceId_ = deviceId; } - - /// The id ranges from 0 to the_total_number_of_parameters - 1 - size_t getID() const { return config_.para_id(); } - - /// ID is a implict value created until neural network is built. - void setID(size_t id) { config_.set_para_id(id); } - - bool isStatic() const { return config_.is_static(); } - - enum MatType { - MAT_NORMAL, - /// both value and grad are shared - MAT_NORMAL_SHARED, - - /// Now used in BatchNorm in CPU mode - MAT_VALUE_SHARED, - - /// sparse matrix, which has full size parameter - MAT_SPARSE_ROW_IDS, - /// sparse matrix, parameter size scale by sparse rates. - MAT_SPARSE_ROW_AUTO_GROW, - MAT_CACHE_ROW, - MAT_SPARSE_ROW, - - /// sparse matrix for prefetching parameter from pserver - MAT_SPARSE_ROW_PREFETCH, - /// same as above, but parameter has full size for saving parameter in local - MAT_SPARSE_ROW_PREFETCH_FULL_SIZE, - }; - - void enableSparseParameter() { - if (config_.is_sparse()) { - if (config_.format() == "csr") { - size_t height = config_.dims(0); - size_t nnz = config_.size(); - enableIntType(PARAMETER_ROWS, height + 1); - enableIntType(PARAMETER_COLS, nnz); - format_ = SPARSE_CSR; - } else { - size_t width = config_.dims(1); - size_t nnz = config_.size(); - enableIntType(PARAMETER_COLS, width + 1); - enableIntType(PARAMETER_ROWS, nnz); - format_ = SPARSE_CSC; - } - } - } - - /// allocate buffer for the give type - void enableType(ParameterType type, MatType matType = MAT_NORMAL) { - if (bufs_[type] || mats_[type]) { - return; - } - SetDevice device(deviceId_); - if (config_.dims_size() == 2) { - if (matType == MAT_NORMAL || matType == MAT_NORMAL_SHARED || - matType == MAT_SPARSE_ROW_PREFETCH_FULL_SIZE || - matType == MAT_VALUE_SHARED || matType == MAT_SPARSE_ROW_IDS) { - bufs_[type] = Vector::createParallelVector(config_.size(), useGpu_); - bufs_[type]->zeroMem(); - } else { - CHECK(isGradSparseUpdate()); - } - if (config_.is_sparse() && type == PARAMETER_VALUE) { - enableSparseParameter(); - } - setMat(type, matType); - } else { - bufs_[type] = Vector::createParallelVector(config_.size(), useGpu_); - bufs_[type]->zeroMem(); - } - } - - void enableBufType(ParameterType type) { - if (bufs_[type]) return; - bufs_[type] = Vector::createParallelVector(config_.size(), useGpu_); - bufs_[type]->zeroMem(); - } - - void enableIntType(ParameterType type, size_t intStoreSize = 0) { - if (!intBufs_[type]) { - SetDevice device(deviceId_); - size_t size = intStoreSize ? intStoreSize : config_.size(); - intBufs_[type] = IVector::create(size, useGpu_); - intBufs_[type]->zeroMem(); - } - } - - void enableSharedType(ParameterType type, - VectorPtr vec, - MatrixPtr mat = nullptr) { - if (!bufs_[type] && !mats_[type]) { - bufs_[type] = vec; - mats_[type] = mat; - } - } - - /// for batchGradientMachine: blockNum is number of partitions of the matrix. - bool isGradShared(size_t* blockNum = NULL); - - bool isValueShared(); - - // for AsgdSparseGradientMachine & SgdSparseGradientMachine: - // and MultiGradientMachine - bool isGradSparseUpdate() const; - - bool isSparseRemoteUpdate() const { - return config_.sparse_remote_update() && !useGpu(); - } - - const ParameterConfig& getConfig() const { return config_; } - - ParameterConfig& getConfig() { return config_; } - - bool hasType(ParameterType pType) const { - return bufs_[pType] || mats_[pType]; - } - - const VectorPtr& getBuf(ParameterType pType) const { - return this->bufs_[pType]; - } - - const VectorPtr* getBufs() const { return bufs_; } - - const MatrixPtr& getMat(ParameterType pType) const { return mats_[pType]; } - - void setValueUpdated() { updated_ = true; } - - void clearValueUpdated() { updated_ = false; } - - bool isValueUpdated() const { return updated_; } - - /** - * Save parameter value to a file - */ - bool save(const std::string& filename) const; - - /** - * Save parameter to ostream - */ - bool save(std::ostream& s) const; - - /** - * Load parameter value from a file - */ - bool load(const std::string& filename); - - /** - * Load parameter from istream - */ - bool load(std::istream& is); - - void incShared() { sharedCount_++; } - - /** - * After one of the parameter's gradient is merged - * You should call this function to do some additional processing, - */ - void incUpdate(const UpdateCallback& callbacks = NULL); - - void clearGradient() { - auto& mat = getMat(PARAMETER_GRADIENT); - if (mat) { - // zeroMem will also clear rows for SparseRowCpuMatrix - mat->zeroMem(); - } else { - auto& gradBuf = getBuf(PARAMETER_GRADIENT); - if (gradBuf) gradBuf->zeroMem(); - } - } - - void initialize(); - - /** - * Initialize the value according to config_: initial_mean, - * initial_std and initial_strategy. - */ - void randomize(); - static void randomize(const VectorPtr& value, const ParameterConfig& config); - - /// Initialize the value to 0 - void zeroMem(); - - /// file header structure - struct Header { - int32_t format; // = PARAM_FORMAT - uint32_t valueSize; // = sizeof(real) - uint64_t size; // = getSize() - }; - - /** - * @brief Is the header format supported. - */ - static bool isHeaderFormatSupported(int32_t fmt) { - return fmt < PARAM_FORMAT_ITEMS; - } - - /** - * @brief Get the format in header. - */ - int getHeaderFormat() { return headerFormat_; } - - /** - * @brief Set the format in header. - */ - void setHeaderFormat(int32_t fmt) { - CHECK(isHeaderFormatSupported(fmt)) << "Unsupported format version: " - << fmt; - headerFormat_ = fmt; - } - - /** - * @brief Parameter Update Hook. - * - * The parameter's update hook before ParameterUpdater::updateImpl - * It could modify gradient/momentum/etc here. Such as drop some gradient, - * etc. - */ - void updateHook() { - for (auto& hook : updaterHooks_) { - hook->update(this); - } - } - - /** - * @brief Initialize all updater hook. - * - * This method should be invoked in ParameterUpdater::init() only. - */ - void initHook() { - for (auto& hook : updaterHooks_) { - hook->init(this); - } - } - - protected: - /** - * @brief create matrix to matType. - * - * used by gradient machine which needs specify matrix type, - * instead of creating in weights.cpp. - * - * @note pType should be enabled already. - */ - void setMat(ParameterType pType, int matType); - - bool isUpdatable() { return (updateCounter_ == sharedCount_); } - - void clearUpdate() { updateCounter_ = 0; } - - protected: - ParameterConfig config_; - - bool useGpu_; - - int deviceId_; - - /** - * @brief bufs_ stores parameter value and gradient. - * - * Layer should use bufs_[PARAMETER_VALUE] to form weight matrix for - * calculation and stores gradient to bufs_[PARAMETER_GRADIENT]. - */ - VectorPtr bufs_[NUM_PARAMETER_TYPES]; - - /** - * @brief Weight matrix for bufs_. - * - * It's helpfull when parameter shared by multi-layers. - * Caller should check, if mats exist, do not create it again. - */ - MatrixPtr mats_[NUM_PARAMETER_TYPES]; - - /// Int vectors, used in some User defined parameter types - IVectorPtr intBufs_[NUM_PARAMETER_TYPES]; - - int sharedCount_; - int updateCounter_; - - bool updated_; - SparseFormat format_; - - /// The header format for saving or loading param - int32_t headerFormat_; - - std::vector> updaterHooks_; - - public: - void setSharedCount(int cnt) { sharedCount_ = cnt; } - int getSharedCount() { return sharedCount_; } - - bool isSparse() { return config_.is_sparse(); } - SparseFormat getFormat() { return format_; } - - static const std::string kMissParameterFail; - static const std::string kMissParameterRand; - static const std::string kMissParameterZero; -}; - -typedef std::map ParameterMap; - -} // namespace paddle diff --git a/paddle/legacy/parameter/ParameterOptimizer.cpp b/paddle/legacy/parameter/ParameterOptimizer.cpp deleted file mode 100644 index b9dffa5afb4c99314869c7ed547ea9711d718b6e..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ParameterOptimizer.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Logging.h" - -#include - -#include "AverageOptimizer.h" -#include "FirstOrderOptimizer.h" -#include "OptimizerFunctions.h" -#include "OptimizerWithRegularizer.h" -#include "ParameterOptimizer.h" -#include "hl_gpu.h" - -namespace paddle { - -ParameterOptimizer* ParameterOptimizer::create( - const OptimizationConfig& optConfig, bool inPserver) { - if (inPserver && optConfig.num_batches_per_send_parameter() > 1) { - return new AddOptimizer(optConfig); - } - if (optConfig.learning_method() == "momentum") { - return new SgdOptimizer(optConfig); - } - if (optConfig.learning_method() == "torch_momentum") { - return new SgdOptimizer(optConfig); - } - if (optConfig.learning_method() == "adagrad") { - return new AdagradParameterOptimizer(optConfig); - } - if (optConfig.learning_method() == "adadelta") { - return new AdaDeltaParameterOptimizer(optConfig); - } - if (optConfig.learning_method() == "rmsprop") { - return new RMSPropParameterOptimizer(optConfig); - } - if (optConfig.learning_method() == "decayed_adagrad") { - return new DecayedAdagradParameterOptimizer(optConfig); - } - if (optConfig.learning_method() == "adam") { - return new AdamParameterOptimizer(optConfig); - } - if (optConfig.learning_method() == "adamax") { - return new AdamaxParameterOptimizer(optConfig); - } - if (optConfig.learning_method() == "sparse_momentum") { - return new SparseMomentumParameterOptimizer(optConfig); - } - return nullptr; -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/ParameterOptimizer.h b/paddle/legacy/parameter/ParameterOptimizer.h deleted file mode 100644 index 019afa1358ae255fd096e84e5eb1d7b0b9d6859f..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ParameterOptimizer.h +++ /dev/null @@ -1,211 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "LearningRateScheduler.h" -#include "Parameter.h" - -namespace paddle { - -/** - * Some member functions are set to const for two reasons: - * - * 1. For sparse update thread safe: update(), traverse callback(const this) - * may be called many times, each time one row, and these function - * can be called parallelly by multi worker, to speed up large block. - * - * 2. For predicate functions, needSpecialTraversal(), startCatchUpWith() - * may be called many times, should be no state change between calls. - */ -class ParameterOptimizer { - public: - typedef std::function - TraverseCallback; - - public: - explicit ParameterOptimizer(const OptimizationConfig& optConfig) - : applyDecay_(true), - optConfig_(optConfig), - parameterTypes_{PARAMETER_VALUE, PARAMETER_GRADIENT}, - learningRate_(optConfig.learning_rate()), - learningRateScheduler_(LearningRateScheduler::create(optConfig)), - pass_(0), - firstTime_(true) {} - - real calcLearningRate(int64_t numSamplesProcessed, int64_t pass) { - return learningRateScheduler_->calcLearningRate(numSamplesProcessed, pass); - } - - virtual ~ParameterOptimizer() {} - - /** - * For sparse update, optimizer can maintain numRows of timer(t0). - * Some sparse optimizer depends on parameter config in functions - * such as startBatch(). Optimizer can get it here. But notice that, - * not all callers can pass config here, so the optimizer should check - * config passed in is not null ptr. - */ - virtual void init(size_t numRows, const ParameterConfig* config) {} - - virtual void startPass() {} - virtual void finishPass() { ++pass_; } - - /// called by Trainer before forward() of a batch. - virtual void startBatch(int64_t numSamplesProcessed) { - (void)numSamplesProcessed; - } - - /** - * following hooks useful for sparse update, - * because the traversal in block costs. - * called by Trainer after update and before finishBatch - * e.g. Trainer call like this: - * - * @code - * startBatch(); - * if (dense) { - * update(blockVec); - * } else {//sparse - * for (row : rows_in_block) {update(rowVec)} - * } - * auto callback = needSpecialTraversal(); - * if (callback) { - * // do traverse, maybe multi-thread - * if (dense) { - * callback(); - * } else {//sparse - * for (row : all_rows_in_block) {callback();} - * } - * } - * finishBatch(); - * @endcode - * - * @return callback if need traverse, - * else return nullptr. - * It should be no state change. - */ - virtual TraverseCallback needSpecialTraversal( - const ParameterConfig& config) const { - return nullptr; - } - - /// called by Trainer after backward() of a batch - virtual void finishBatch() {} - - /** - * between startBatch() and finishBatch(), update() will be called - * by the trainer multiple times, each time for updating one Parameter - * with its gradient in PARAMETER_GRADIENT. sparseId is row id, - * when sparseId set, update is sparse, each time one row. - */ - virtual void update(const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId = -1LU) const = 0; - - /** - * following hooks catch up with current time for sparse update, - * In the beginning, call startCatchUpWith() and check return. - * In the end, call finishCatchUpWith() to finish state. - * callback do the actual works, can call many times for sparse data. - * e.g. Trainer call like this: - * - * @code - * auto callback = startCatchUpWith(); - * if (callback) { - * // do catch up with, maybe multi-thread - * if (dense) { - * callback(); - * } else {//sparse - * for (row : rows_in_block) {callback();} - * } - * // finish catch up with, main thread - * finishCatchUpWith(); - * } - * @endcode - * - * @return callback if need catch up with, - * else return nullptr. - * It should be no state change. - */ - virtual TraverseCallback startCatchUpWith() const { return nullptr; } - virtual void finishCatchUpWith() {} - - /** - * following two hooks used by averager, - * apply to final parameter value (PARAMETER_VALUE or PARAMETER_APPLY). - * - * restore() will restore orginal value if it apply to PARAMETER_VALUE. - * Caller must ensure it's catched up with current time before apply. - * - * Use returned callback same way as callback returned by - * ParameterOptimizer::needSpecialTraversal() - */ - virtual TraverseCallback apply() { return nullptr; } - virtual TraverseCallback restore() { return nullptr; } - - /// return the parameter types used by this updater - const std::vector& getParameterTypes() const { - return parameterTypes_; - } - - void addParameterType(ParameterType type) { - for (auto t : parameterTypes_) { - if (t == type) return; - } - parameterTypes_.push_back(type); - } - - real getLearningRate() const { return learningRate_; } - - virtual void setNoDecay() { applyDecay_ = false; } - - static ParameterOptimizer* create(const OptimizationConfig& optConfig, - bool inPserver = false); - - protected: - typedef std::vector TraverseCallbackVec; - - static TraverseCallback composeCallbacks( - const TraverseCallbackVec& callbacks) { - if (callbacks.size() > 1LU) { - return [callbacks](const VectorPtr vecs[], - const ParameterConfig& config, - size_t sparseId) { - for (auto callback : callbacks) { - callback(vecs, config, sparseId); - } - }; - } - return (callbacks.size() == 1LU) ? callbacks[0] : nullptr; - } - - bool applyDecay_; - const OptimizationConfig& optConfig_; - std::vector parameterTypes_; - - /** - * global learning rate, init value is opt_config.learning_rate, - * sparse regularizer get this value per batch, after StartBatch() called - * so, if lr change in StartBatch, please assign to learningRate_ - */ - real learningRate_; - - std::unique_ptr learningRateScheduler_; - int64_t pass_; // current training pass (starting from 0) - bool firstTime_; -}; - -} // namespace paddle diff --git a/paddle/legacy/parameter/ParameterUpdateFunctions.cpp b/paddle/legacy/parameter/ParameterUpdateFunctions.cpp deleted file mode 100644 index 72c9841acf6d3eb1d28d631e1599a1a403175013..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ParameterUpdateFunctions.cpp +++ /dev/null @@ -1,300 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Logging.h" -#ifdef __AVX__ -#include -#include -#endif - -#include "ParameterUpdateFunctions.h" - -namespace paddle { - -void sgdUpdateCpu(real learningRate, - real momentum, - real decayRate, - size_t size, - real* value, - const real* grad, - real* momentumVec) { - decayRate *= learningRate; -#ifdef PADDLE_WITH_MKLML -#pragma omp parallel for -#endif - for (size_t i = 0; i < size; ++i) { - momentumVec[i] = momentum * momentumVec[i] - learningRate * grad[i] - - decayRate * value[i]; - value[i] += momentumVec[i]; - } -} - -void sgdUpdate(real learningRate, - real momentum, - real decayRate, - Vector* value, - Vector* grad, - Vector* momentumVec) { - size_t size = value->getSize(); - real* val = value->getData(); - real* grd = grad->getData(); - real* mom = momentumVec->getData(); - if (typeid(*value) == typeid(CpuVector)) { - sgdUpdateCpu(learningRate, momentum, decayRate, size, val, grd, mom); - } else if (typeid(*value) == typeid(GpuVector)) { - value->sgdUpdate(*grad, *momentumVec, learningRate, momentum, decayRate); - } else { - LOG(FATAL) << "Wrong"; - } -} - -void sgdUpdateAvx(float learningRate, - float momentum, - float decayRate, - size_t size, - float* value, - const float* _grad, - float* momentumVec) { -#ifdef __AVX__ - float* grad = const_cast(_grad); // the gradient is not modified - // but when invoke simd functions - // need non-const pointer. - size_t gradientAlign = 0; - size_t gradientAlignHeader = (size_t)grad % sizeof(__m256); - CHECK_EQ(gradientAlignHeader, (size_t)momentumVec % sizeof(__m256)) - << "Gradent buffer didn't align with momentum buffer"; - CHECK_EQ(gradientAlignHeader, (size_t)value % sizeof(__m256)) - << "Gradent buffer didn't align with value buffer"; - if (0 != gradientAlignHeader) { - gradientAlignHeader = sizeof(__m256) - gradientAlignHeader; - gradientAlign = gradientAlignHeader / sizeof(real); - - // handle the unalign buffer - for (size_t i = 0; i < gradientAlign; i++) { - momentumVec[i] = momentum * momentumVec[i] - (learningRate * grad[i]) - - (decayRate * learningRate * value[i]); - value[i] += momentumVec[i]; - } - grad += gradientAlign; - momentumVec += gradientAlign; - value += gradientAlign; - } - - constexpr size_t kParallelNum = 8; - constexpr size_t nStepSize = (sizeof(__m256) / sizeof(real)) * kParallelNum; - size_t cntLoop = (size - gradientAlign) / nStepSize; - size_t cntRem = (size - gradientAlign) % nStepSize; - __m256 gradientTmp[kParallelNum]; - __m256 valueTmp[kParallelNum]; - __m256 lr, mom, dr; - std::function loopFun; - - learningRate *= -1; - lr = _mm256_set_ps(learningRate, - learningRate, - learningRate, - learningRate, - learningRate, - learningRate, - learningRate, - learningRate); - - if (0 != momentum) { - mom = _mm256_set_ps(momentum, - momentum, - momentum, - momentum, - momentum, - momentum, - momentum, - momentum); - } - - decayRate *= learningRate; - if (0 != decayRate) { - dr = _mm256_set_ps(decayRate, - decayRate, - decayRate, - decayRate, - decayRate, - decayRate, - decayRate, - decayRate); - } - - auto gradMulFun = [&](void) { - gradientTmp[0] = _mm256_mul_ps(*reinterpret_cast<__m256*>(grad), lr); - gradientTmp[1] = _mm256_mul_ps(*reinterpret_cast<__m256*>(grad + 8), lr); - gradientTmp[2] = _mm256_mul_ps(*reinterpret_cast<__m256*>(grad + 16), lr); - gradientTmp[3] = _mm256_mul_ps(*reinterpret_cast<__m256*>(grad + 24), lr); - gradientTmp[4] = _mm256_mul_ps(*reinterpret_cast<__m256*>(grad + 32), lr); - gradientTmp[5] = _mm256_mul_ps(*reinterpret_cast<__m256*>(grad + 40), lr); - gradientTmp[6] = _mm256_mul_ps(*reinterpret_cast<__m256*>(grad + 48), lr); - gradientTmp[7] = _mm256_mul_ps(*reinterpret_cast<__m256*>(grad + 56), lr); - }; - - auto valueMulFun = [&](void) { - valueTmp[0] = _mm256_mul_ps(*reinterpret_cast<__m256*>(value), dr); - valueTmp[1] = _mm256_mul_ps(*reinterpret_cast<__m256*>(value + 8), dr); - valueTmp[2] = _mm256_mul_ps(*reinterpret_cast<__m256*>(value + 16), dr); - valueTmp[3] = _mm256_mul_ps(*reinterpret_cast<__m256*>(value + 24), dr); - valueTmp[4] = _mm256_mul_ps(*reinterpret_cast<__m256*>(value + 32), dr); - valueTmp[5] = _mm256_mul_ps(*reinterpret_cast<__m256*>(value + 40), dr); - valueTmp[6] = _mm256_mul_ps(*reinterpret_cast<__m256*>(value + 48), dr); - valueTmp[7] = _mm256_mul_ps(*reinterpret_cast<__m256*>(value + 56), dr); - }; - - auto momentumMulFun = [&](void) { - *reinterpret_cast<__m256*>(momentumVec) = - _mm256_mul_ps(*reinterpret_cast<__m256*>(momentumVec), mom); - *reinterpret_cast<__m256*>(momentumVec + 8) = - _mm256_mul_ps(*reinterpret_cast<__m256*>(momentumVec + 8), mom); - *reinterpret_cast<__m256*>(momentumVec + 16) = - _mm256_mul_ps(*reinterpret_cast<__m256*>(momentumVec + 16), mom); - *reinterpret_cast<__m256*>(momentumVec + 24) = - _mm256_mul_ps(*reinterpret_cast<__m256*>(momentumVec + 24), mom); - *reinterpret_cast<__m256*>(momentumVec + 32) = - _mm256_mul_ps(*reinterpret_cast<__m256*>(momentumVec + 32), mom); - *reinterpret_cast<__m256*>(momentumVec + 40) = - _mm256_mul_ps(*reinterpret_cast<__m256*>(momentumVec + 40), mom); - *reinterpret_cast<__m256*>(momentumVec + 48) = - _mm256_mul_ps(*reinterpret_cast<__m256*>(momentumVec + 48), mom); - *reinterpret_cast<__m256*>(momentumVec + 56) = - _mm256_mul_ps(*reinterpret_cast<__m256*>(momentumVec + 56), mom); - }; - - auto momentumAddGradFun = [&](void) { - *reinterpret_cast<__m256*>(momentumVec) = - _mm256_add_ps(*reinterpret_cast<__m256*>(momentumVec), gradientTmp[0]); - *reinterpret_cast<__m256*>(momentumVec + 8) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 8), gradientTmp[1]); - *reinterpret_cast<__m256*>(momentumVec + 16) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 16), gradientTmp[2]); - *reinterpret_cast<__m256*>(momentumVec + 24) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 24), gradientTmp[3]); - *reinterpret_cast<__m256*>(momentumVec + 32) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 32), gradientTmp[4]); - *reinterpret_cast<__m256*>(momentumVec + 40) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 40), gradientTmp[5]); - *reinterpret_cast<__m256*>(momentumVec + 48) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 48), gradientTmp[6]); - *reinterpret_cast<__m256*>(momentumVec + 56) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 56), gradientTmp[7]); - }; - - auto momentumZeroFun = [&](void) { - *reinterpret_cast<__m256*>(momentumVec) = gradientTmp[0]; - *reinterpret_cast<__m256*>(momentumVec + 8) = gradientTmp[1]; - *reinterpret_cast<__m256*>(momentumVec + 16) = gradientTmp[2]; - *reinterpret_cast<__m256*>(momentumVec + 24) = gradientTmp[3]; - *reinterpret_cast<__m256*>(momentumVec + 32) = gradientTmp[4]; - *reinterpret_cast<__m256*>(momentumVec + 40) = gradientTmp[5]; - *reinterpret_cast<__m256*>(momentumVec + 48) = gradientTmp[6]; - *reinterpret_cast<__m256*>(momentumVec + 56) = gradientTmp[7]; - }; - - auto momentumAddValueFun = [&](void) { - *reinterpret_cast<__m256*>(momentumVec) = - _mm256_add_ps(*reinterpret_cast<__m256*>(momentumVec), valueTmp[0]); - *reinterpret_cast<__m256*>(momentumVec + 8) = - _mm256_add_ps(*reinterpret_cast<__m256*>(momentumVec + 8), valueTmp[1]); - *reinterpret_cast<__m256*>(momentumVec + 16) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 16), valueTmp[2]); - *reinterpret_cast<__m256*>(momentumVec + 24) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 24), valueTmp[3]); - *reinterpret_cast<__m256*>(momentumVec + 32) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 32), valueTmp[4]); - *reinterpret_cast<__m256*>(momentumVec + 40) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 40), valueTmp[5]); - *reinterpret_cast<__m256*>(momentumVec + 48) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 48), valueTmp[6]); - *reinterpret_cast<__m256*>(momentumVec + 56) = _mm256_add_ps( - *reinterpret_cast<__m256*>(momentumVec + 56), valueTmp[7]); - }; - - auto valueAddMomentumFun = [&](void) { - *reinterpret_cast<__m256*>(value) = - _mm256_add_ps(*reinterpret_cast<__m256*>(value), - *reinterpret_cast<__m256*>(momentumVec)); - *reinterpret_cast<__m256*>(value + 8) = - _mm256_add_ps(*reinterpret_cast<__m256*>(value + 8), - *reinterpret_cast<__m256*>(momentumVec + 8)); - *reinterpret_cast<__m256*>(value + 16) = - _mm256_add_ps(*reinterpret_cast<__m256*>(value + 16), - *reinterpret_cast<__m256*>(momentumVec + 16)); - *reinterpret_cast<__m256*>(value + 24) = - _mm256_add_ps(*reinterpret_cast<__m256*>(value + 24), - *reinterpret_cast<__m256*>(momentumVec + 24)); - *reinterpret_cast<__m256*>(value + 32) = - _mm256_add_ps(*reinterpret_cast<__m256*>(value + 32), - *reinterpret_cast<__m256*>(momentumVec + 32)); - *reinterpret_cast<__m256*>(value + 40) = - _mm256_add_ps(*reinterpret_cast<__m256*>(value + 40), - *reinterpret_cast<__m256*>(momentumVec + 40)); - *reinterpret_cast<__m256*>(value + 48) = - _mm256_add_ps(*reinterpret_cast<__m256*>(value + 48), - *reinterpret_cast<__m256*>(momentumVec + 48)); - *reinterpret_cast<__m256*>(value + 56) = - _mm256_add_ps(*reinterpret_cast<__m256*>(value + 56), - *reinterpret_cast<__m256*>(momentumVec + 56)); - }; - - if (0 == decayRate && 0 == momentum) { - loopFun = [&](void) { - gradMulFun(); - momentumZeroFun(); - valueAddMomentumFun(); - }; - } else if (0 == decayRate && 0 != momentum) { - loopFun = [&](void) { - gradMulFun(); - momentumMulFun(); - momentumAddGradFun(); - valueAddMomentumFun(); - }; - } else if (0 != decayRate && 0 == momentum) { - loopFun = [&](void) { - gradMulFun(); - valueMulFun(); - momentumZeroFun(); - momentumAddValueFun(); - valueAddMomentumFun(); - }; - } else if (0 != decayRate && 0 != momentum) { - loopFun = [&](void) { - gradMulFun(); - valueMulFun(); - momentumMulFun(); - momentumAddGradFun(); - momentumAddValueFun(); - valueAddMomentumFun(); - }; - } - - for (size_t i = 0; i < cntLoop; i++) { - loopFun(); - grad += nStepSize; - momentumVec += nStepSize; - value += nStepSize; - } - - for (size_t i = 0; i < cntRem; i++) { - momentumVec[i] = momentum * momentumVec[i] + (learningRate * grad[i]) + - (decayRate * value[i]); - value[i] += momentumVec[i]; - } -#endif -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/ParameterUpdateFunctions.h b/paddle/legacy/parameter/ParameterUpdateFunctions.h deleted file mode 100644 index a7cc1c4c47b6c8723520221cb0efc2afb53a900c..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ParameterUpdateFunctions.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/utils/Common.h" - -namespace paddle { - -/** - * Performs the following operations. - * - * momentumVec = momentum * momentumVec - * - learningRate * grad - * - learningRate * decayRate * value - * - * value = value + momentumVec - * momentum = 0 or decayRate = 0 are specially handled to avoid unnecessary - * computation. - */ -void sgdUpdate(real learningRate, - real momentum, - real decayRate, - Vector* value, - Vector* grad, - Vector* momentumVec); - -void sgdUpdateCpu(real learningRate, - real momentum, - real decayRate, - size_t size, - real* value, - const real* grad, - real* momentumVec); - -void sgdUpdateAvx(float learningRate, - float momentum, - float decayRate, - size_t size, - float* value, - const float* grad, - float* momentumVec); - -} // namespace paddle diff --git a/paddle/legacy/parameter/ParameterUpdaterBase.cpp b/paddle/legacy/parameter/ParameterUpdaterBase.cpp deleted file mode 100644 index 7d9d3fad63160b76d6de0932f39596a8643d0a8e..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ParameterUpdaterBase.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ParameterUpdaterBase.h" -#include -#include "hl_gpu.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -void ParameterUpdater::init(const std::vector& parameters) { - parameters_ = parameters; - for (ParameterType type : getParameterTypes()) { - for (auto& para : parameters) { - para->enableType(type); - } - } - for (size_t pid = 0; pid < parameters_.size(); ++pid) { - nonStaticParaIDMap_.insert( - std::pair(parameters_[pid]->getID(), pid)); - } - - for (auto& para : parameters) { - if (!para->isStatic()) { - para->initHook(); - } - } -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/ParameterUpdaterBase.h b/paddle/legacy/parameter/ParameterUpdaterBase.h deleted file mode 100644 index 493512886cad3ea9b74026d6dfcc4fc90f6aadb9..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ParameterUpdaterBase.h +++ /dev/null @@ -1,182 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Parameter.h" - -namespace paddle { - -class ParameterOptimizer; - -class ParameterUpdater { - public: - ParameterUpdater() : parameterTypes_{PARAMETER_VALUE, PARAMETER_GRADIENT} {} - virtual ~ParameterUpdater() {} - - void addParameterType(ParameterType type) { - for (auto t : parameterTypes_) { - if (t == type) return; - } - parameterTypes_.push_back(type); - } - - virtual void init(const std::vector& parameters); - - // called by Trainer when starting a new pass - virtual void startPass() {} - - // called by Trainer then finishing a pass, ruturn true if pass accepted - virtual bool finishPass() { return true; } - - // called by Trainer before backward() of a batch - // Return the type of pass it needs. This pass type will be passed - // to GradientMachine::forward() by the caller. - virtual PassType startBatch(int64_t batchSize) { - (void)batchSize; - return PASS_TRAIN; - } - - // called by Trainer after backward() of a batch - // cost: the cost for this batch - virtual void finishBatch(real cost) { (void)cost; } - - // between startBatch() and finishBatch(), update() will be called - // by the trainer multiple times, each time for updating one Parameter - // with its gradient in PARAMETER_GRADIENT - void update(Parameter* para) { - SetDevice setDevice(para->getDeviceId()); - para->updateHook(); - this->updateImpl(para); - } - - // only get required sparse rows by default, - // get full matrix parameter if *fullSize* set - // get PARAMETER_APPLY on pserver if *apply* set - virtual void getParametersRemote(bool fullSize = false, bool apply = false) {} - - virtual void loadParametersRemote(const std::string& dirName) {} - virtual void saveParametersRemote(const std::string& dirName) {} - virtual void randParametersRemote() {} - - // something like regularization may be delayed apply - // trainer should catch up with before parameter is saved or sended. - virtual void catchUpWith() {} - - // following two hooks used by averager - // apply to final parameter value (PARAMETER_VALUE or PARAMETER_APPLY). - // restore() will restore orginal value if it apply to PARAMETER_VALUE. - virtual void apply() {} - virtual void restore() {} - - // return the parameter types used by this updater - const std::vector& getParameterTypes() const { - return parameterTypes_; - } - -#ifndef PADDLE_DISABLE_TIMER - virtual void setForwardbackwardTime(uint64_t delta) {} -#endif - - protected: - virtual void updateImpl(Parameter* para) = 0; - - std::vector parameterTypes_; - std::vector parameters_; - std::map nonStaticParaIDMap_; -}; - -// Composite of ParameterUpdaters, each ParameterUpdater handle -// part of all Parameters. It's useful when we need different -// update strategy for different Parameter. -class ParameterUpdaterComposite : public ParameterUpdater { - public: - ParameterUpdaterComposite() {} - virtual ~ParameterUpdaterComposite() {} - - virtual void init(const std::vector& parameters) = 0; - - virtual void startPass() { - syncThreadPool_->execPlusOwner( - [&](int tid, size_t numThreads) { updaters_[tid]->startPass(); }); - } - - virtual bool finishPass() { - syncThreadPool_->execPlusOwner( - [&](int tid, size_t numThreads) { updaters_[tid]->finishPass(); }); - return true; - } - - virtual PassType startBatch(int64_t batchSize) { - syncThreadPool_->execPlusOwner([&](int tid, size_t numThreads) { - updaters_[tid]->startBatch(batchSize); - }); - return PASS_TRAIN; - } - - virtual void finishBatch(real cost) { - syncThreadPool_->execPlusOwner( - [&](int tid, size_t numThreads) { updaters_[tid]->finishBatch(cost); }); - } - - virtual void getParametersRemote(bool fullSize, bool apply) { - syncThreadPool_->execPlusOwner([&](int tid, size_t numThreads) { - updaters_[tid]->getParametersRemote(fullSize, apply); - }); - } - virtual void loadParametersRemote(const std::string& dirName) { - syncThreadPool_->execPlusOwner([&](int tid, size_t numThreads) { - updaters_[tid]->loadParametersRemote(dirName); - }); - } - virtual void saveParametersRemote(const std::string& dirName) { - syncThreadPool_->execPlusOwner([&](int tid, size_t numThreads) { - updaters_[tid]->saveParametersRemote(dirName); - }); - } - virtual void randParametersRemote() { - syncThreadPool_->execPlusOwner([&](int tid, size_t numThreads) { - updaters_[tid]->randParametersRemote(); - }); - } - - virtual void catchUpWith() { - syncThreadPool_->execPlusOwner( - [&](int tid, size_t numThreads) { updaters_[tid]->catchUpWith(); }); - } - -#ifndef PADDLE_DISABLE_TIMER - virtual void setForwardbackwardTime(uint64_t delta) { - for (auto& updater : updaters_) { - updater->setForwardbackwardTime(delta); - } - } -#endif - - virtual void apply() { - syncThreadPool_->execPlusOwner( - [&](int tid, size_t numThreads) { updaters_[tid]->apply(); }); - } - virtual void restore() { - syncThreadPool_->execPlusOwner( - [&](int tid, size_t numThreads) { updaters_[tid]->restore(); }); - } - - protected: - virtual void updateImpl(Parameter* para) {} - std::vector> updaters_; - std::unique_ptr syncThreadPool_; -}; - -} // namespace paddle diff --git a/paddle/legacy/parameter/ParameterUpdaterHook.cpp b/paddle/legacy/parameter/ParameterUpdaterHook.cpp deleted file mode 100644 index bfb9769fb67fc71b6f96f09d44b2c108745eafa3..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ParameterUpdaterHook.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ParameterUpdaterHook.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -/** - * The static pruning hook - * Static means user specify a sparsity_ratio before training started, and the - * network will prune the parameters based on the sparsity_ratio. More details - * can be found https://arxiv.org/pdf/1506.02626.pdf. - */ - -class StaticPruningHook : public IParameterUpdaterHook { - public: - explicit StaticPruningHook(const ParameterUpdaterHookConfig &hookConfig) - : initCount_(0) { - sparsityRatio_ = hookConfig.sparsity_ratio(); - } - - static bool sortPairAscend(const std::pair &pair1, - const std::pair &pair2) { - return pair1.first > pair2.first; - } - - void update(Parameter *para) { - updateThreadChecker_.check(); - auto &vec = para->getBuf(PARAMETER_GRADIENT); - if (vec) { - vec->dotMul(*maskVec_); - } - } - - void generateMask(Parameter *para) { - VectorPtr maskTemp = Vector::create(para->getSize(), false); - maskTemp->zeroMem(); - real *maskTempData = maskTemp->getData(); - size_t nonZeroNum = para->getSize() * (1 - sparsityRatio_); - - VectorPtr paraVec = para->getBuf(PARAMETER_VALUE); - VectorPtr paraCpuCopy = Vector::create(para->getSize(), false); - - paraCpuCopy->copyFrom(*paraVec); - std::vector> param; - - for (size_t i = 0; i < para->getSize(); i++) - param.push_back(std::make_pair(fabs(paraCpuCopy->getData()[i]), i)); - - std::partial_sort( - param.begin(), param.begin() + nonZeroNum, param.end(), sortPairAscend); - for (size_t i = 0; i < nonZeroNum; i++) maskTempData[param[i].second] = 1.0; - - // Currently just use a mask vector for hack. - if (para->useGpu()) { - maskVec_ = Vector::create(para->getSize(), para->useGpu()); - maskVec_->copyFrom(*maskTemp); - } else { - maskVec_ = maskTemp; - } - } - - void init(Parameter *para) { - generateMask(para); - size_t initCount = this->initCount_.fetch_add(1); - CHECK_EQ(initCount, 0UL) << "Currently the StaticPruningHook must invoke " - "in same ParamterUpdater"; - VLOG(3) << "Initialize Parameter " << para; - SetDevice device(para->getDeviceId()); - - auto ¶Vec = para->getBuf(PARAMETER_VALUE); - paraVec->dotMul(*maskVec_); - } - - private: - SameThreadChecker updateThreadChecker_; - std::atomic initCount_; - VectorPtr maskVec_; - real sparsityRatio_; -}; - -IParameterUpdaterHook::IParameterUpdaterHook() {} - -IParameterUpdaterHook::~IParameterUpdaterHook() {} - -/** - * A Hasher used by g_hooks. - * - * Use the independent hasher intendedly. There is a hasher in PServer for hash - * ParameterBlock. But not to use same hasher to reduce dependency. - * - * May be extracted to Util.h to unify the hasher. - */ -class StringIntPairHasher { - public: - size_t operator()(const std::pair &k) const { - return intHasher_(strHasher_(k.first) + k.second); - } - - private: - std::hash strHasher_; - std::hash intHasher_; -}; - -static WeakKVCache, - IParameterUpdaterHook, - StringIntPairHasher> - g_hookCache_; - -/** - * ParameterUpdaterHook actually factory method. - */ -static IParameterUpdaterHook *createImpl( - const ParameterUpdaterHookConfig &config) { - auto &type = config.type(); - if (type == "pruning") { - return new StaticPruningHook(config); - } - - LOG(FATAL) << "Unknown Hook type: " << type; - return nullptr; -} - -std::shared_ptr IParameterUpdaterHook::create( - const ParameterConfig ¶mConfig, int idx) { - std::pair key = {paramConfig.name(), idx}; - return g_hookCache_.get( - key, [&] { return createImpl(paramConfig.update_hooks(idx)); }); -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/ParameterUpdaterHook.h b/paddle/legacy/parameter/ParameterUpdaterHook.h deleted file mode 100644 index cb96e4cf007572e9688c11719017a9d2771ecd51..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ParameterUpdaterHook.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -#include "ParameterConfig.pb.h" - -namespace paddle { - -class Parameter; - -/** - * The parameter updater hook interface. - * - * The Parameter Updater hooks is a group of methods invoke before - * ParameterUpdater::updateImpl. It can modify gradient/momentum/etc before - * parameter optimization. - */ -class IParameterUpdaterHook { - public: - virtual ~IParameterUpdaterHook(); - - /** - * Create A ParameterUpdaterHook. - * - * The same parameter shared the same hooks. So it returns shared_ptr. - * - * @param param_config The parameter config. - * @param idx The element index of param_config.updater_hooks() array. - */ - static std::shared_ptr create( - const ParameterConfig& paramConfig, int idx); - - /** - * The update hook method. Invoke before ParameterUpdater::updateImpl - */ - virtual void update(Parameter* para) = 0; - - /** - * The init hook method. Invoke in ParameterUpdater::init - */ - virtual void init(Parameter* para) = 0; - - protected: - /** - * Ctor. - */ - IParameterUpdaterHook(); -}; - -} // namespace paddle diff --git a/paddle/legacy/parameter/Regularizer.cpp b/paddle/legacy/parameter/Regularizer.cpp deleted file mode 100644 index c1d5f4fa68403408bb44341e1e28f2ce3beb2e4c..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/Regularizer.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Regularizer.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -Regularizer* Regularizer::get(const std::vector& types, - const ParameterConfig& paraConfig) { - bool useLearningRateVec = - std::find(types.begin(), types.end(), PARAMETER_LEARNING_RATE) != - types.end(); - if (paraConfig.decay_rate_l1() > 0.0f && - paraConfig.decay_rate() > 0.0f) { // use L1 and L2 - if (useLearningRateVec) { - static L1L2LrRegularizer regularizer_; - return ®ularizer_; - } - static L1L2Regularizer regularizer_; - return ®ularizer_; - } - if (paraConfig.decay_rate_l1() > 0.0f) { // use L1 only - if (useLearningRateVec) { - static L1LrRegularizer regularizer_; - return ®ularizer_; - } - static L1Regularizer regularizer_; - return ®ularizer_; - } - if (paraConfig.decay_rate() > 0.0f) { // use L2 only - if (useLearningRateVec) { - static L2LrRegularizer regularizer_; - return ®ularizer_; - } - static L2Regularizer regularizer_; - return ®ularizer_; - } - return nullptr; -} - -} // namespace paddle diff --git a/paddle/legacy/parameter/Regularizer.h b/paddle/legacy/parameter/Regularizer.h deleted file mode 100644 index fa5384e23251b918cc914df36c16ad790a5c59c5..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/Regularizer.h +++ /dev/null @@ -1,115 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "ParameterUpdaterBase.h" - -namespace paddle { - -// Regularizer function for parameter, e.g. L1/L2 -class Regularizer { - public: - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - real learningRate, // learningrate from optimizer - int t0, // last occurence time - int t) const = 0; // current time - virtual ~Regularizer() {} - - static Regularizer* get(const std::vector& types, - const ParameterConfig& paraConfig); -}; - -// L1 Regularizer, |w|_1 -class L1Regularizer : public Regularizer { - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - real learningRate, - int t0, - int t) const { - vecs[PARAMETER_VALUE]->applyL1(learningRate * paraConfig.learning_rate(), - paraConfig.decay_rate_l1() * (t - t0)); - } -}; - -// L1 Lr Regularizer -class L1LrRegularizer : public Regularizer { - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - real learningRate, - int t0, - int t) const { - vecs[PARAMETER_VALUE]->applyL1(*vecs[PARAMETER_LEARNING_RATE], - learningRate * paraConfig.learning_rate(), - paraConfig.decay_rate_l1() * (t - t0)); - } -}; - -// L2 Regularizer, |w|_2^2 -class L2Regularizer : public Regularizer { - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - real learningRate, - int t0, - int t) const { - vecs[PARAMETER_VALUE]->applyL2(learningRate * paraConfig.learning_rate(), - paraConfig.decay_rate() * (t - t0)); - } -}; - -// L2 Lr Regularizer -class L2LrRegularizer : public Regularizer { - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - real learningRate, - int t0, - int t) const { - vecs[PARAMETER_VALUE]->applyL2(*vecs[PARAMETER_LEARNING_RATE], - learningRate * paraConfig.learning_rate(), - paraConfig.decay_rate() * (t - t0)); - } -}; - -// L1 + L2 Regularizer, |w|_1 + |w|_2^2 -class L1L2Regularizer : public Regularizer { - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - real learningRate, - int t0, - int t) const { - vecs[PARAMETER_VALUE]->applyL1(learningRate * paraConfig.learning_rate(), - paraConfig.decay_rate_l1() * (t - t0)); - vecs[PARAMETER_VALUE]->applyL2(learningRate * paraConfig.learning_rate(), - paraConfig.decay_rate() * (t - t0)); - } -}; - -// L1 + L2 Lr Regularizer -class L1L2LrRegularizer : public Regularizer { - virtual void update(const VectorPtr vecs[], - const ParameterConfig& paraConfig, - real learningRate, - int t0, - int t) const { - vecs[PARAMETER_VALUE]->applyL1(*vecs[PARAMETER_LEARNING_RATE], - learningRate * paraConfig.learning_rate(), - paraConfig.decay_rate_l1() * (t - t0)); - vecs[PARAMETER_VALUE]->applyL2(*vecs[PARAMETER_LEARNING_RATE], - learningRate * paraConfig.learning_rate(), - paraConfig.decay_rate() * (t - t0)); - } -}; - -} // namespace paddle diff --git a/paddle/legacy/parameter/ThreadLocalBuffer.cpp b/paddle/legacy/parameter/ThreadLocalBuffer.cpp deleted file mode 100644 index 550e41dfdaab98d3710a1141709206132ebecdce..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ThreadLocalBuffer.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ThreadLocalBuffer.h" -#include "Parameter.h" - -namespace paddle { -namespace parameter { - -static ThreadLocal> tlsTempBufs_; - -VectorPtr* getThreadLocalBuffer() { - std::vector& bufs = *tlsTempBufs_; - if (bufs.empty()) { - bufs.resize(NUM_PARAMETER_TYPES); - for (auto& vec : bufs) { - vec.reset(new CpuVector(0, nullptr)); - } - } - return bufs.data(); -} - -} // namespace parameter -} // namespace paddle diff --git a/paddle/legacy/parameter/ThreadLocalBuffer.h b/paddle/legacy/parameter/ThreadLocalBuffer.h deleted file mode 100644 index d360feeed6c98ee60e3bdae924434054080576b0..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/ThreadLocalBuffer.h +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/legacy/math/Vector.h" - -namespace paddle { -namespace parameter { -extern VectorPtr* getThreadLocalBuffer(); -} // namespace parameter -} // namespace paddle diff --git a/paddle/legacy/parameter/Weight.cpp b/paddle/legacy/parameter/Weight.cpp deleted file mode 100644 index 9d94050a5cd8c3570c286e8e82c2a1470c40e6db..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/Weight.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Weight.h" -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -Weight::Weight(size_t height, size_t width, ParameterPtr param) { - VectorPtr vPtr = param->getBuf(PARAMETER_VALUE); - VectorPtr gPtr = param->getBuf(PARAMETER_GRADIENT); - - // create a new weight - if (param->isSparse()) { - CHECK_LE(param->getSize(), width * height); - } else { - CHECK_EQ(param->getSize(), width * height); - } - - // weight_ - weight_ = param->getMat(PARAMETER_VALUE); - if (!weight_ && vPtr) { - weight_ = Matrix::create(vPtr->getMemoryHandle(), height, width); - } - if (weight_) { - CHECK_EQ(height, weight_->getHeight()); - CHECK_EQ(width, weight_->getWidth()); - } - - // weightGrad - weightGrad_ = param->getMat(PARAMETER_GRADIENT); - if (!weightGrad_ && gPtr) { - weightGrad_ = Matrix::create(gPtr->getMemoryHandle(), height, width); - } - if (weightGrad_) { - CHECK_EQ(height, weightGrad_->getHeight()); - CHECK_EQ(width, weightGrad_->getWidth()); - } - - parameter_ = param; -} - -Weight::Weight(size_t height, size_t width, ParameterPtr param, size_t offset) { - VectorPtr vPtr = param->getBuf(PARAMETER_VALUE); - VectorPtr gPtr = param->getBuf(PARAMETER_GRADIENT); - - // create a new weight - CHECK_LE(offset + width * height, param->getSize()); - - // weight_ - if (vPtr) { - weight_ = Matrix::create(vPtr->getData() + offset, - height, - width, - /* trans */ false, - param->useGpu()); - } - - // weightGrad - if (gPtr) { - weightGrad_ = Matrix::create(gPtr->getData() + offset, - height, - width, - /* trans */ false, - param->useGpu()); - } - - parameter_ = param; -} - -const ParameterPtr& Weight::getParameterPtr() { return parameter_; } -void Weight::setParameterPtr(ParameterPtr param) { parameter_ = param; } -} // namespace paddle diff --git a/paddle/legacy/parameter/Weight.h b/paddle/legacy/parameter/Weight.h deleted file mode 100644 index 241c8d829cd0c7b57964324d3378bdfcf09e6a70..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/Weight.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include - -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/SparseRowMatrix.h" -#include "paddle/legacy/parameter/Parameter.h" - -namespace paddle { - -class Weight { - private: - MatrixPtr weight_; - MatrixPtr weightGrad_; - ParameterPtr parameter_; - - public: - Weight(size_t height, size_t width, ParameterPtr parameter); - Weight(size_t height, size_t width, ParameterPtr parameter, size_t offset); - - const MatrixPtr& getW() { return weight_; } - const MatrixPtr& getWGrad() { return weightGrad_; } - const ParameterPtr& getParameterPtr(); - - void incUpdate(const UpdateCallback& callback) { - getParameterPtr()->incUpdate(callback); - } - - void setParameterPtr(ParameterPtr param); -}; - -typedef std::vector> WeightList; - -} // namespace paddle diff --git a/paddle/legacy/parameter/tests/CMakeLists.txt b/paddle/legacy/parameter/tests/CMakeLists.txt deleted file mode 100644 index 181ccdc1f099e8d61a44c1741116abe7afe0f11d..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/tests/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_simple_unittest(test_common) -add_simple_unittest(test_argument) diff --git a/paddle/legacy/parameter/tests/test_argument.cpp b/paddle/legacy/parameter/tests/test_argument.cpp deleted file mode 100644 index 0c632e0cd10342431dfcada680a18d8f9eabeb9c..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/tests/test_argument.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -using namespace paddle; // NOLINT - -TEST(Argument, poolSequenceWithStride) { - Argument input, output; - ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false); - int* inStart = input.sequenceStartPositions->getMutableData(false); - inStart[0] = 0; - inStart[1] = 9; - inStart[2] = 14; - inStart[3] = 17; - inStart[4] = 30; - - int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30}; - int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30}; - - for (auto reversed : {false, true}) { - ICpuGpuVectorPtr stridePositions; - output.poolSequenceWithStride( - input, 5 /* stride */, &stridePositions, reversed); - - const int* outStart = output.sequenceStartPositions->getData(false); - CHECK_EQ(outStart[0], 0); - CHECK_EQ(outStart[1], 2); - CHECK_EQ(outStart[2], 3); - CHECK_EQ(outStart[3], 4); - CHECK_EQ(outStart[4], 7); - - CHECK_EQ(stridePositions->getSize(), 8UL); - auto result = reversed ? strideResultReversed : strideResult; - for (int i = 0; i < 8; i++) { - CHECK_EQ(stridePositions->getData(false)[i], result[i]); - } - } -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/parameter/tests/test_common.cpp b/paddle/legacy/parameter/tests/test_common.cpp deleted file mode 100644 index 8de9d6da983553c0b9e574ac27ae8fca14bea5b7..0000000000000000000000000000000000000000 --- a/paddle/legacy/parameter/tests/test_common.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include -#include -#include -#include -#include - -using namespace paddle; // NOLINT - -class CommonTest : public ::testing::Test { - protected: - CommonTest() : testStat_("test") {} - virtual ~CommonTest() {} - virtual void SetUp() { - const size_t buffSize[] = { - 100, 128, 500, 1024, 4096, 10240, 102400, 1000000}; - sizeVec_.resize(8); - memcpy(&sizeVec_[0], &buffSize[0], 8 * sizeof(size_t)); - valueUint_.resize(4); - valueUint_[0].first = 0.0; - valueUint_[0].second = 0.0; - valueUint_[1].first = 0.0; - valueUint_[1].second = 1.0; - valueUint_[2].first = 1.0; - valueUint_[2].second = 0.0; - valueUint_[3].first = 1.0; - valueUint_[3].second = 1.0; - learningRate_ = 1.0; - } - - void test_sgdUpadate(real* gradientBuffer, - real* valueBuffer, - real* momentumBuffer, - size_t size); - - virtual void TreaDown() { LOG(INFO) << "All Test Finished."; } - - protected: - std::vector> valueUint_; - std::vector sizeVec_; - real learningRate_; - StatSet testStat_; -}; - -void CommonTest::test_sgdUpadate(real* gradientBuffer, - real* valueBuffer, - real* momentumBuffer, - size_t size) { -// sgdUpdateAvx has no double version yet -#if defined(__AVX__) && !defined(PADDLE_TYPE_DOUBLE) - real valueSum1 = 0, valueSum2 = 0, momSum1 = 0, momSum2 = 0; - real* gradTmp = new real[size]; - real* valueTmp = new real[size]; - real* momentumTmp = new real[size]; - memcpy(gradTmp, gradientBuffer, size * sizeof(real)); - memcpy(valueTmp, valueBuffer, size * sizeof(real)); - memcpy(momentumTmp, momentumBuffer, size * sizeof(real)); - for (auto& arg : valueUint_) { - { - { - struct timeval t; - REGISTER_TIMER("gettimeofday", 0, testStat_); - gettimeofday(&t, NULL); - } - REGISTER_TIMER("avxTimer", 0); - sgdUpdateAvx(learningRate_, - arg.first, - arg.second, - size, - valueBuffer, - gradientBuffer, - momentumBuffer); - } - for (size_t i = 0; i < size; i++) { - valueSum1 += valueBuffer[i]; - momSum1 += momentumBuffer[i]; - // std::cout << "[" - // << valueBuffer[i] - // << "," << momentumBuffer[i] - // << "," << gradientBuffer[i] << "],"; - } - { - REGISTER_TIMER("cpuTimer", 0); - sgdUpdateCpu(learningRate_, - arg.first, - arg.second, - size, - valueTmp, - gradTmp, - momentumTmp); - } - for (size_t i = 0; i < size; i++) { - valueSum2 += valueTmp[i]; - momSum2 += momentumTmp[i]; - // std::cout << "[" - // << valueTmp[i] - // << "," << momentumTmp[i] - // << "," << gradTmp[i] << "],"; - } - - VLOG(3) << "valueSum1 = " << valueSum1 << " ; valueSum2 = " << valueSum2; - VLOG(3) << "momSum1 = " << momSum1 << " ; momSum2 = " << momSum2; - ASSERT_EQ(valueSum1, valueSum2); - ASSERT_EQ(momSum1, momSum2); - } - delete[] gradTmp; - delete[] valueTmp; - delete[] momentumTmp; -#endif -} - -TEST_F(CommonTest, sgdUpdate) { - const size_t alignHeader[] = {0, 2, 3, 5, 7, 8}; - for (auto& size : sizeVec_) { - real *gradientBuffer, *valueBuffer, *momentumBuffer; - CHECK_EQ(posix_memalign((void**)&gradientBuffer, 32, sizeof(real) * size), - 0); - CHECK_EQ(posix_memalign((void**)&valueBuffer, 32, sizeof(real) * size), 0); - CHECK_EQ(posix_memalign((void**)&momentumBuffer, 32, sizeof(real) * size), - 0); - - for (size_t i = 0; i < size; i++) { - gradientBuffer[i] = 1.0; - valueBuffer[i] = 2.0; - momentumBuffer[i] = 3.0; - } - for (int i = 0; i < 6; i++) { - LOG(INFO) << "----------------------" << size << ":" << alignHeader[i] - << "-------------------------"; - test_sgdUpadate(&gradientBuffer[alignHeader[i]], - &valueBuffer[alignHeader[i]], - &momentumBuffer[alignHeader[i]], - size - alignHeader[i]); - } - free(gradientBuffer); - free(valueBuffer); - free(momentumBuffer); - } - globalStat.printAllStatus(); - testStat_.printAllStatus(); -} - -TEST_F(CommonTest, syncThreadPool) { - SyncThreadPool pool(10); - - std::vector nums; - nums.resize(10); - - pool.exec([&](int tid, size_t numThreads) { nums[tid] = tid; }); - for (size_t i = 0; i < nums.size(); ++i) { - EXPECT_EQ((int)i, nums[i]); - } - - pool.exec([&](int tid, size_t numThreads) { nums[tid] -= tid; }); - for (size_t i = 0; i < nums.size(); ++i) { - EXPECT_EQ((int)0, nums[i]); - } -} diff --git a/paddle/legacy/pserver/BaseClient.cpp b/paddle/legacy/pserver/BaseClient.cpp deleted file mode 100644 index 13bb8a1cc58580a8e0af31c23b420836c7422ad8..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/BaseClient.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "BaseClient.h" -#include -#include -#include -#include "paddle/legacy/utils/Stat.h" - -DECLARE_string(pservers); - -namespace paddle { - -BaseClient::BaseClient(bool separate, int numPorts) - : stopping_(false), numPorts_(numPorts), separateSendAndRecv_(separate) { - CHECK_GT(numPorts, 0); -} - -BaseClient::~BaseClient() {} - -void BaseClient::recvData() { recvSyncBarrier_->wait(); } - -void BaseClient::synchronize(SyncObject syncObjectId) { - SynchronizeRequest request; - request.set_sync_object_id(syncObjectId); - std::vector responses; - multiCall(__func__, request, &responses); -} - -void BaseClient::startThreads() { - if (!separateSendAndRecv_) { - return; - } - recvSyncBarrier_.reset(new ThreadBarrier(threadNum_ + 1)); - - sendThreads_.resize(threadNum_); - recvThreads_.resize(threadNum_); - sendJobQueue_.resize(threadNum_); - recvJobQueue_.resize(threadNum_); - - for (int i = 0; i < threadNum_; ++i) { - sendJobQueue_[i].reset(new SendQueue()); - recvJobQueue_[i].reset(new SendQueue()); - - sendThreads_[i].reset( - new std::thread([this](int id) { this->send(id); }, i)); - - recvThreads_[i].reset( - new std::thread([this](int id) { this->recv(id); }, i)); - } -} - -void BaseClient::finishThreads() { - if (!separateSendAndRecv_) { - return; - } - stopping_ = true; - for (int i = 0; i < threadNum_; i++) { - sendJobQueue_[i]->enqueue(nullptr); - } - for (auto& thread : sendThreads_) { - thread->join(); - } - for (auto& thread : recvThreads_) { - thread->join(); - } - stopping_ = false; -} -} // namespace paddle diff --git a/paddle/legacy/pserver/BaseClient.h b/paddle/legacy/pserver/BaseClient.h deleted file mode 100644 index 66e8f39cd60998122bb8958b12b23ee7142be94d..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/BaseClient.h +++ /dev/null @@ -1,311 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "ParameterService.pb.h" -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/pserver/ProtoServer.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/Queue.h" - -namespace paddle { - -/** - * it manages all connections to pservers. - * it exists two modes to manage connections to all pservers. Firstly, one - * connection owns two threads that separately manage to send and receive - * data. Secondly, each thread uses one connection for all activation in it. - * the first solution arms with sendThreads_/recvThreads_ and sendJobQueue_/ - * recvJobQueue_. the second solution use some shared thread pool to manage - * connections. - */ -class BaseClient { - protected: - typedef std::unique_ptr ThreadPtr; - typedef std::vector> InputIovs; - typedef std::vector SendRequest; - typedef std::vector SendDataRequestVec; - - // TODO(yanfei): - // refine data structure to unify parameter and features communication - struct SendJob { - /// store parameters related blocks data - InputIovs parallelInputIovs; - /// store protobuf request - SendRequest parallelRequests; - /// store data, such as features for metric learning - SendDataRequestVec parallelDataRequests; - }; - - public: - explicit BaseClient(bool separate = false, int numPorts = FLAGS_ports_num); - - virtual ~BaseClient(); - - typedef std::shared_ptr SendJobPtr; - typedef Queue SendQueue; - - /// send data to server, support only synchronize - template - void putData(int clientId, - SendDataType type, - DataType* datas, - size_t size, - DataUpdateMode mode) { - synchronize(SYNC_DATA); - sendData(clientId, type, mode, datas, size); - recvData(); - synchronize(SYNC_DATA); - } - - template - void putOwnData(int clientId, - SendDataType type, - DataType* datas, - size_t size) { - putData(clientId, type, datas, size, DATA_UPDATE_MODE_SET_OWN); - } - - template - void getAllData(int clientId, - SendDataType type, - DataType* datas, - size_t size) { - sendData(clientId, - type, - DATA_UPDATE_MODE_GET_ALL, - reinterpret_cast(NULL), - 0); - recvData(); - size_t dataOffset = 0; - for (auto& recvMem : recvDataMems_) { - CHECK_LE(dataOffset, size); - size_t memSize = std::min(recvMem.get()->getSize(), - sizeof(DataType) * (size - dataOffset)); - CHECK_EQ(memSize % sizeof(DataType), size_t(0)); - memcpy(datas + dataOffset, recvMem.get()->getBuf(), memSize); - dataOffset += memSize / sizeof(DataType); - } - CHECK_EQ(dataOffset, size); - } - - /** - * Reduces values on all clients. - * This reduce just support SUM. - * The results are saved in recvBuf of rootId client - */ - template - void reduce(DataType* sendBuf, - DataType* recvBuf, - size_t size, - int clientId, - int rootId) { - putOwnData(clientId, DATA_REDUCE_SUM, sendBuf, size); - if (rootId == clientId) { - getAllData(clientId, DATA_REDUCE_SUM, recvBuf, size); - } - } - - /** - * return trans data type according to the input type - */ - virtual TransDataType getTransDtype(const std::type_info& info) { - TransDataType dataType; - if (typeid(int*) == info) { // NOLINT - dataType = TRANS_INT32; - } else if (typeid(uint32_t*) == info) { // NOLINT - dataType = TRANS_UINT32_T; - } else if (typeid(int64_t*) == info) { // NOLINT - dataType = TRANS_INT64_T; - } else if (typeid(uint64_t*) == info) { // NOLINT - dataType = TRANS_UINT64_T; - } else if (typeid(float*) == info) { // NOLINT - dataType = TRANS_FLOAT; - } else if (typeid(double*) == info) { // NOLINT - dataType = TRANS_DOUBLE; - } else { - LOG(FATAL) << "not supported"; - } - return dataType; - } - - protected: - /// for a > 0, b > 0: - /// return the smallest x s.t. b*x >= a - static int divup(int a, int b) { return (a + b - 1) / b; } - - int calcClientId(int i, int serviceNum) { - return (i + FLAGS_trainer_id * numPorts_) % serviceNum; - } - - /// start threads in sendThreads_ and recvThreads_ - void startThreads(); - - /// finish threads in sendThreads_ and recvThreads_ - void finishThreads(); - - template - void prepareData(int clientId, - SendDataType type, - DataUpdateMode updateMode, - DataType* datas, - size_t size, - SendJob* sendJob) { - sendJob->parallelDataRequests.resize(serviceNum_); - sendJob->parallelInputIovs.resize(serviceNum_); - for (int i = 0; i < serviceNum_; ++i) { - auto& request = sendJob->parallelDataRequests[i]; - request.set_update_mode(updateMode); - request.set_type(type); - request.set_client_id(clientId); - request.set_server_id(i); - } - - /// split datas which need send to Server into serviceNum_ pieces - if (!datas) { - CHECK(!size) << "ownSize should be zero since datas is nullptr"; - } - size_t baseSize = size / serviceNum_; - size_t dataOffset = 0; - for (int i = 0; i < serviceNum_; ++i) { - auto& request = sendJob->parallelDataRequests[i]; - DataBlock* block = request.add_blocks(); - size_t ownSize = size_t(i) < size % serviceNum_ ? baseSize + 1 : baseSize; - size_t realSize = datas ? std::max(ownSize, size_t(1)) : 0; - block->set_total_size(realSize * sizeof(DataType)); - block->set_data_size(sizeof(DataType)); - // TODO(yuyang18): The getTransDtype can be rewritten as template method - // to reduce runtime overhead. - block->set_data_type(getTransDtype(typeid(DataType*))); // NOLINT - if (datas) { - sendJob->parallelInputIovs[i].push_back( - {datas + dataOffset, realSize * sizeof(DataType)}); - } - dataOffset += ownSize; - } - CHECK_EQ(dataOffset, size); - } - - /** - * @brief send data to all data servers - * - * @note each trainer sends all its data to all data servers - * it's for broadcast data synchronization, such as features - * synchronization in metric learning. - */ - template - void sendData(int clientId, - SendDataType type, - DataUpdateMode updateMode, - DataType* datas, - size_t size) { - SendJobPtr sendJob = std::make_shared(); - prepareData(clientId, type, updateMode, datas, size, sendJob.get()); - for (int i = 0; i < threadNum_; ++i) { - sendJobQueue_[i]->enqueue(sendJob); - } - } - - /** - * @brief recv data from all data servers - * - * @note synchronize all recv threads - */ - void recvData(); - - /// send request, and recv responses - template - void multiCall(const char* funcName, - const ProtoIn& request, - std::vector* responses) { - responses->resize(clients_.size()); - size_t numClients = clients_.size(); - for (size_t i = 0; i < numClients; ++i) { - clients_[i].send(funcName, request); - } - for (size_t i = 0; i < numClients; ++i) { - clients_[i].recv(&(*responses)[i]); - } - } - - /** - * @brief synchronize all trainers and pservers - * - * @note used to ensure that data of all trainers have been received - */ - void synchronize(SyncObject syncObjectId = SYNC_DEFAULT); - - /** - * @brief use multithread to separately send data - * - * @note each thread should read its own JobQueue to handle requests - * each thread should calcClientId() to retrieve connections - * managed by himself. - * send and recv are implemented in child class. - */ - virtual void send(int threadId) = 0; - - /** - * @brief use multithread to separately receive data - * - * @note almost same as send() - */ - virtual void recv(int threadId) = 0; - - protected: - bool stopping_; - /// nodes * ports that means the number of real pservers - int serviceNum_; - /** - * threads num for managing all services. Normally the - * number of pservers are relatively less than several - * hundreds so that using thread-based parallelization - * can benifit traffic performance and pserver's sgd - * optimization performance. - */ - int threadNum_; - /// the connection manager at client end - std::vector clients_; - /// send threads for parallelization - std::vector sendThreads_; - /// recv threads for parallelization - std::vector recvThreads_; - std::unique_ptr recvSyncBarrier_; - - // TODO(yanfei): - // current pserver's will return value until all parameters' - // optimization are finished so that recv are not overlapped - // in reality. More robust implimentation should be to pipeline - // all send/recv action based on parameter unit level, and - // it will benifits deep and larger model training in future, - // especially local node compution power surpasses inter-connection - // such as GPU cluster, even with BOX GPU cluster. - // queue for buffering send request - /** - * send/recv queue cooperates with each other to accomplish - * overlapping communication with forwardBackward action. - */ - std::vector> sendJobQueue_; - /// queue for buffering recv request - std::vector> recvJobQueue_; - /// specific for dserver - SendJob sendJob_; - /// port num for each node - int numPorts_; - /// if set, overlapped optimization is disabled - bool separateSendAndRecv_; - std::vector recvDataMems_; -}; -} // namespace paddle diff --git a/paddle/legacy/pserver/CMakeLists.txt b/paddle/legacy/pserver/CMakeLists.txt deleted file mode 100644 index 0ae9c6ef6afc6ec5a99a685b08883def0db51cf1..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/CMakeLists.txt +++ /dev/null @@ -1,56 +0,0 @@ -# parameter server package - -######################### paddle_network #################### -set(NETWORK_SOURCES - LightNetwork.cpp - SocketChannel.cpp - ProtoServer.cpp) - -set(NETWORK_HEADERS - LightNetwork.h - SocketChannel.h - ProtoServer.h) - -add_library(paddle_network STATIC - ${NETWORK_SOURCES}) - -add_dependencies(paddle_network paddle_proto ${external_project_dependencies}) - -################### paddle_pserver ###################### -set(PSERVER_SOURCES - BaseClient.cpp - ParameterClient2.cpp - ParameterServer2.cpp - SparseParameterDistribution.cpp - ParameterServerController.cpp) - -set(PSERVER_HEADERS - BaseClient.h - ParameterClient2.h - ParameterServer2.h - SparseParameterDistribution.h - ParameterServerController.h) - -add_library(paddle_pserver STATIC - ${PSERVER_SOURCES}) - -add_dependencies(paddle_pserver paddle_proto ${external_project_dependencies}) - -set(PSERVER_MAIN_SOURCES - ParameterServer2Main.cpp) - -if(WITH_TESTING) - add_subdirectory(test) -endif() - -if(NOT MOBILE_INFERENCE) - add_executable(paddle_pserver_main ${PSERVER_MAIN_SOURCES}) - link_paddle_exe(paddle_pserver_main) - - install(TARGETS paddle_pserver_main - RUNTIME DESTINATION opt/paddle/bin - PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ - GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) - - set_target_properties(paddle_pserver_main PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) -endif() diff --git a/paddle/legacy/pserver/LightNetwork.cpp b/paddle/legacy/pserver/LightNetwork.cpp deleted file mode 100644 index 469c95853ecdc02a6028417ca37b0020406eea09..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/LightNetwork.cpp +++ /dev/null @@ -1,459 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "LightNetwork.h" -#include "RDMANetwork.h" -#include "paddle/legacy/utils/StringUtil.h" -#include "paddle/legacy/utils/Util.h" - -/// quick ack can reduce the latency of small message -DEFINE_bool(small_messages, - false, - "if message size is small, recommend set it True to enable quick " - "ack and no delay"); - -/// reasonable sock_send_buf_size can control the traffic injected into switch -/// network. Injecting too many data into traffic could cause packets loss which -/// cause long latency and degrade the efficiency of communication. -DEFINE_int32(sock_send_buf_size, - 1024 * 1024 * 40, - "restrict sock send buff size, can reduce network congestion if " - "set carefully"); - -/// reasonable size can hold bursted packets and reduce packets loss -DEFINE_int32(sock_recv_buf_size, - 1024 * 1024 * 40, - "restrict sock recv buff size"); - -/// reasonable sock_listen_queue_size can control maximum pending connections. -DEFINE_int32(sock_listen_queue_size, - 1024, - "listen queue size when pserver listen a TCP port"); - -namespace paddle { - -/** - * @brief get ip address from interface name - * - * @param[in] device device interface name - */ -std::string getIpAddr(std::string &device) { - int sock; - struct sockaddr_in sin; - struct ifreq ifr; - - sock = socket(AF_INET, SOCK_DGRAM, 0); - CHECK(sock >= 0) << "Create socket error."; - - strncpy(ifr.ifr_name, device.c_str(), IFNAMSIZ); - ifr.ifr_name[IFNAMSIZ - 1] = 0; - - CHECK_GE(ioctl(sock, SIOCGIFADDR, &ifr), 0); - memcpy(&sin, &ifr.ifr_addr, sizeof(sin)); - close(sock); - return std::string(inet_ntoa(sin.sin_addr)); -} - -/** - * @brief set sock option - * - * @param[in] sockfd sock file descriptor - * - * @note adjust some default sock option for better performance - */ -void setOption(int sockfd) { -#if !defined(__APPLE__) && !defined(__OSX__) - int sendSize = FLAGS_sock_send_buf_size; - int recvSize = FLAGS_sock_recv_buf_size; - CHECK_GE( - setsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, &recvSize, sizeof(recvSize)), - 0); - CHECK_GE( - setsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &sendSize, sizeof(sendSize)), - 0); -#endif - - if (FLAGS_small_messages) { - int optval = 1; - CHECK_GE( - setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &optval, sizeof(optval)), - 0); -#ifdef TCP_QUICKACK - optval = 1; - CHECK_GE( - setsockopt(sockfd, IPPROTO_TCP, TCP_QUICKACK, &optval, sizeof(optval)), - 0); -#endif - } - int reuse = 1; - CHECK_GE(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)), - 0); -} - -/** - * @brief class constructor for SocketServer - * @param[in] addr sock bind address - * @param[in] port sock bind port - * @param[in] rdmaCpu rdma sock bind cpu core - * - * @note start one socket server which hosts parameter server process. - * rdmaCpu is passed to rdma deamon for better performance, and - * start tcp socket instead of rdma socket if rdmaCpu is equal - * to -1. Each trainer process starts one connection to one socket - * server, and use --ports_num to build more connections to harness - * fat communication channel if necessary. - * each connection is controlled by single thread with blocking - * read and write. - */ -SocketServer::SocketServer(const std::string &addr, int port, int rdmaCpu) - : port_(port), addr_(addr), stopping_(false) { - if (rdmaCpu == -1) { - tcpRdma_ = F_TCP; - socket_ = 0; - maxPendingConnections_ = FLAGS_sock_listen_queue_size; - } else { - tcpRdma_ = F_RDMA; - rdmaCpu_ = rdmaCpu; - rdmaSocket_ = 0; - - std::stringstream ss; - ss << port; - rdmaUri_ = "rdma://" + addr + ":" + ss.str(); - } - - /// trigger to initialize RDMA lib - CHECK(RdmaClientDaemons::get()) << "initilizate RDMA failed\n"; -} - -SocketServer::~SocketServer() { - stopping_ = true; - /// trigger accept thread to stop - { - SocketClient trigger(addr_.empty() ? "127.0.0.1" : addr_, port_, tcpRdma_); - } - this->join(); -} - -/** - * @brief start one tcp server which hosts parameter server - * - * @note do tcp socket bind and listen. it will spawn one thread - * for each connection - */ -void SocketServer::tcpServer() { - int newsockfd; - socklen_t clilen; - struct sockaddr_in serv_addr, cli_addr; - struct hostent *server; - - /// First call to socket() function - socket_ = socket(AF_INET, SOCK_STREAM, 0); - CHECK(socket_ >= 0) << "ERROR opening socket"; - - /// Initialize socket structure - bzero((char *)&serv_addr, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - serv_addr.sin_port = htons(port_); - if (!addr_.empty()) { - server = gethostbyname(addr_.c_str()); - CHECK(server) << "ERROR, no such host: " << addr_; - bcopy((char *)server->h_addr, - (char *)&serv_addr.sin_addr.s_addr, - server->h_length); - } else { - serv_addr.sin_addr.s_addr = INADDR_ANY; - } - - setOption(socket_); - - /// Now bind the host address using bind() call. - CHECK(bind(socket_, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) >= 0) - << "ERROR on binding " << addr_; - - /// Now start listening for the clients, here process will - /// go in sleep mode and will wait for the incoming connection - listen(socket_, maxPendingConnections_); - clilen = sizeof(cli_addr); - - while (true) { - /// Accept actual connection from the client - newsockfd = accept(socket_, (struct sockaddr *)&cli_addr, &clilen); - if (stopping_) { - break; - } - CHECK(newsockfd >= 0) << "ERROR on accept"; - constexpr int kPeerNameLen = 128; - char peerName[kPeerNameLen]; - CHECK(inet_ntop(AF_INET, &cli_addr.sin_addr, peerName, kPeerNameLen)); - - SocketWorker *worker = - new SocketWorker(createChannel(newsockfd, std::string(peerName)), this); - worker->start(); - worker->detach(); - } - close(socket_); - LOG(INFO) << "pserver accept thread finish, addr=" << addr_ - << " port=" << port_; -} - -/** - * @brief start one rdma server which hosts parameter server - * - * @note do rdma bind and listen, which calling self-defined socket - * like rdma library. it will spawn one thread for each connection - */ -void SocketServer::rdmaServer() { - struct sxi_sock *newsock; - - /// First call to socket() function - rdmaSocket_ = rdma::ssocket(rdmaCpu_); - CHECK(rdmaSocket_) << "ERROR opening RDMA socket"; - - CHECK(rdma::bind(rdmaSocket_, rdmaUri_.c_str()) == 0) - << "ERROR bind RDMA socket"; - - /// Now start listening for the clients, here process will - /// go in sleep mode and will wait for the incoming connection - CHECK(rdma::listen(rdmaSocket_) == 0) << "ERROR listen RDMA socket"; - - while (true) { - /// Accept actual connection from the client - newsock = rdma::accept(rdmaSocket_); - if (stopping_) { - break; - } - CHECK(newsock) << "ERROR on accept"; - - constexpr int kPeerNameLen = 128; - char peerName[kPeerNameLen]; - - struct sockaddr_in *saddr = rdma::getSourceAddress(newsock); - CHECK(inet_ntop(AF_INET, &saddr->sin_addr, peerName, kPeerNameLen)); - - SocketWorker *worker = - new SocketWorker(createChannel(newsock, std::string(peerName)), this); - worker->start(); - worker->detach(); - } - rdma::close(rdmaSocket_); - LOG(INFO) << "pserver accept thread finish, rdma uri=" << rdmaUri_; -} - -/** - * @brief start a socket server - * - * @note framework for starting socket server - */ -void SocketServer::run() { - if (tcpRdma_ == F_TCP) { - LOG(INFO) << "tcp server start "; - tcpServer(); - } else if (tcpRdma_ == F_RDMA) { - LOG(INFO) << "rdma server start "; - rdmaServer(); - } -} - -/** - * @brief class constructor for rdma client deamons - * - * @note automatically start several client deamons for better performance - */ -std::unique_ptr RdmaClientDaemons::daemons_ = nullptr; -std::once_flag RdmaClientDaemons::initDataFlag_; - -RdmaClientDaemons::RdmaClientDaemons() { - if (FLAGS_rdma_tcp == "rdma") { - rdma::init(); - - struct sxi_socket *socket; - onlineCpus_ = rdma::numCpus(); - for (auto i = 0; i < onlineCpus_; i++) { - socket = rdma::csocket(i); - CHECK(socket) << "ERROR open client socket daemon"; - - rdmaClientSocket_.push_back(socket); - } - LOG(INFO) << "RDMA client daemons started, onlineCpus_:" << onlineCpus_; - /// round robin scheduler for new connection - curCpu_ = 0; - /// wait daemons to start completely. - sleep(2); - } -} - -RdmaClientDaemons::~RdmaClientDaemons() { - if (FLAGS_rdma_tcp == "rdma") { - for (auto i = 0; i < onlineCpus_; i++) { - rdma::close(rdmaClientSocket_[i]); - } - LOG(INFO) << "RDMA client daemons is destoryed, onlineCpus_ " - << onlineCpus_; - } -} - -/** - * @brief worker thread main context - * - * @note each connection from client(trainer) is controlled by single worker - * thread, which is for handling all parameter server requests - */ -void SocketWorker::run() { - LOG(INFO) << "worker started, peer = " << channel_->getPeerName(); - - std::vector inputIovs; - - while (true) { - std::unique_ptr msgReader = channel_->readMessage(); - if (!msgReader) { - break; - } - - auto callback = [this](const std::vector &outputIovs) { - channel_->writeMessage(outputIovs); - }; - - server_->handleRequest(std::move(msgReader), callback); - } - - LOG(INFO) << "worker begin to finish, peer = " << channel_->getPeerName(); - delete this; -} - -/** - * @brief start one tcp connection to tcp server - * @param[in] serverAddr tcp server ip - * @param[in] serverPort tcp server port - * - * @note each object contains one channel which accept byte stream - */ -void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) { - struct sockaddr_in serv_addr; - struct hostent *server; - - int errRet; // temp for gethostbyname_r - - /// Create a socket point - int sockfd = socket(AF_INET, SOCK_STREAM, 0); - CHECK(sockfd >= 0) << "ERROR opening socket"; - -#if defined(__OSX__) || defined(__APPLE__) - server = getipnodebyname(serverAddr.c_str(), AF_INET, AI_DEFAULT, &errRet); - CHECK_NE(HOST_NOT_FOUND, errRet) << "ERROR, no such host: " << serverAddr - << " ret = " << errRet; - CHECK(server) << "getipnodebyname error!"; -#else - struct hostent hostinfo; - char buf[1024]; // temp for gethostbyname_r - CHECK_EQ( - 0, - gethostbyname_r( - serverAddr.c_str(), &hostinfo, buf, sizeof(buf), &server, &errRet)) - << "ERROR, no such host: " << serverAddr << " ret = " << errRet; - CHECK(server) << "gethostbyname_r error!"; -#endif - - bzero((char *)&serv_addr, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - bcopy((char *)server->h_addr, - (char *)&serv_addr.sin_addr.s_addr, - server->h_length); - serv_addr.sin_port = htons(serverPort); - - setOption(sockfd); - - /// Now connect to the server - int retry_count = 0; - do { - if (connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) == 0) { - break; - } - - if (errno == ECONNREFUSED) { - LOG(WARNING) << "connection refused by pserver, try again!"; - if (retry_count++ >= 7) { - LOG(FATAL) << "connection refused by pserver, maybe pserver failed!"; - } - std::this_thread::sleep_for(std::chrono::seconds(1)); - } else { - CHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":" - << serverPort << "errorno: " << errno; - } - } while (errno == ECONNREFUSED); - - channel_.reset(new SocketChannel(sockfd, serverAddr)); - tcpRdma_ = F_TCP; -} - -/** - * @brief start one RDMA connection to rdma server - * @param[in] serverAddr rdma server ip - * @param[in] serverPort rdma server port - * - * @note each object contains one channel which accept byte stream - * for rdma, low level sock also provide byte stream api. - */ -void SocketClient::RdmaClient(const std::string &serverAddr, int serverPort) { - struct sxi_sock *sock; - - std::stringstream ss; - ss << serverPort; - - std::string rdmaUri = "rdma://" + serverAddr + ":" + ss.str(); - - RdmaClientDaemons *daemons = RdmaClientDaemons::daemons_->get(); - socketDaemon_ = daemons->selectDaemon(); - - /// connect to server with socket daemon - sock = rdma::connect(socketDaemon_, rdmaUri.c_str()); - CHECK(sock) << "ERROR connect to server" << rdmaUri; - - std::vector seg; - str::split(rdmaUri, '/', &seg); - std::string server = seg.at(seg.size() - 1); - channel_.reset(new SocketChannel(sock, server)); - tcpRdma_ = F_RDMA; -} - -/** - * @brief class constructor - * @param[in] serverAddr pserver ip address - * @param[in] serverPort pserver port - * @param[in] ChannelType F_TCP or F_RDMA - * - * @note responsible for building one connection to specified pserver port - */ -SocketClient::SocketClient(const std::string &serverAddr, - int serverPort, - enum ChannelType channelType) { - if (channelType == F_RDMA) - RdmaClient(serverAddr, serverPort); - else - TcpClient(serverAddr, serverPort); -} - -} // namespace paddle diff --git a/paddle/legacy/pserver/LightNetwork.h b/paddle/legacy/pserver/LightNetwork.h deleted file mode 100644 index 380f86832f5894fdf29588dde9a77068c624e066..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/LightNetwork.h +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "SocketChannel.h" - -#include -#include -#include -#include - -#include "paddle/legacy/utils/Thread.h" - -struct sxi_socket; - -namespace paddle { - -class SocketWorker; - -/** - * @brief class for holding all parameters processing for current port - * - * @note each parameter server inherits from one socket server, each - * server contains serveral woker threads which are to parallelize - * the processing of computation, but share some common datas stored - * in child class of socketserver. - */ -class SocketServer : public Thread { - // rdmaCpu controls the cpu affinity of RDMA server daemon, - // which could benifit performance. rdmaCpu = -1 means TCP - // is used instead of RDMA transport. - public: - SocketServer(const std::string& addr, int port, int rdmaCpu); - ~SocketServer(); - - virtual void run(); - - typedef std::function& outputIovs)> - ResponseCallback; - - protected: - // - // The derived class needs to implement this function - // to handle the request received by SocketWorker - // The request is encapsulated by MsgReader, which contains - // a set of blocks. - virtual void handleRequest(std::unique_ptr msgReader, - ResponseCallback callback) = 0; - - std::unique_ptr createChannel(int sock, - const std::string& peerName) { - return std::unique_ptr(new SocketChannel(sock, peerName)); - } - std::unique_ptr createChannel(struct sxi_sock* sock, - const std::string& peerName) { - return std::unique_ptr(new SocketChannel(sock, peerName)); - } - - friend class SocketWorker; - - private: - void rdmaServer(); - void tcpServer(); - - void detach() {} // detach accept thread is forbidden - - protected: - enum ChannelType tcpRdma_; - // for rdma - int rdmaCpu_; - std::string rdmaUri_; - sxi_socket* rdmaSocket_; - // for tcp - int port_; - std::string addr_; - int socket_; - int maxPendingConnections_; - bool stopping_; -}; - -/** - * @brief class for holding one connection from one trainer - * - * @note all parameter processing will run in the context of this worker - */ -class SocketWorker : public Thread { - public: - SocketWorker(std::unique_ptr&& channel, SocketServer* server) - : channel_(std::move(channel)), server_(server) {} - - virtual ~SocketWorker() {} - - virtual void run(); - - protected: - std::unique_ptr channel_; - SocketServer* server_; - enum ChannelType tcpRdma_; -}; - -/** - * @brief class for providing rdma client deamon thread - * - * @note the deamons are required by sock like rdam library. Here - * use singleton model for daemons. Each deamon hosts in - * single cpu core for better load balance performance - */ -class RdmaClientDaemons { - private: - RdmaClientDaemons(); - - static std::unique_ptr daemons_; - - public: - static RdmaClientDaemons* get() { - std::call_once(RdmaClientDaemons::initDataFlag_, - &RdmaClientDaemons::getInstance); - - return daemons_.get(); - } - - struct sxi_socket* selectDaemon() { - int cpu = curCpu_; - curCpu_ = (curCpu_ + 1) % onlineCpus_; - - LOG(INFO) << "select daemon " << cpu << "onlineCpus_ " << onlineCpus_; - return rdmaClientSocket_[cpu]; - } - - ~RdmaClientDaemons(); - - public: - friend class SocketClient; - - private: - static std::once_flag initDataFlag_; - static void getInstance() { - if (!daemons_.get()) daemons_.reset(new RdmaClientDaemons()); - } - - std::vector rdmaClientSocket_; - std::atomic curCpu_; - int onlineCpus_; -}; - -/** - * @brief management for client connection which are from trainers - * - * @note it contains one channel descriptor which used to write and - * read data - */ -class SocketClient { - public: - SocketClient(const std::string& serverAddr, - int serverPort, - enum ChannelType channelType); - - SocketChannel* getChannel() { return channel_.get(); } - - protected: - std::unique_ptr channel_; - struct sxi_socket* socketDaemon_; - enum ChannelType tcpRdma_; - - private: - void RdmaClient(const std::string& serverAddr, int serverPort); - void TcpClient(const std::string& serverAddr, int serverPort); -}; - -std::string getIpAddr(std::string& device); -void setOption(int sockfd); - -} // namespace paddle diff --git a/paddle/legacy/pserver/ParameterClient2.cpp b/paddle/legacy/pserver/ParameterClient2.cpp deleted file mode 100644 index 264faa791843b3dcaa5a41fbe7817dbf13430b7c..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/ParameterClient2.cpp +++ /dev/null @@ -1,781 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "ParameterClient2.h" -#include "paddle/legacy/math/SparseRowMatrix.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/StringUtil.h" - -DEFINE_string(pservers, "127.0.0.1", "Comma separated addresses of pservers"); -DEFINE_int32(parallel_thread_num, 1, "Thread number for parameter send"); - -namespace paddle { - -template -void copyToRepeatedField(google::protobuf::RepeatedField* dest, - const T2* src, - size_t size) { - dest->Clear(); - dest->Reserve(size); - for (size_t i = 0; i < size; ++i) { - dest->AddAlreadyReserved(src[i]); - } -} - -ParameterClient2::ParameterClient2(bool separate, int port, int numPorts) - : BaseClient(separate, numPorts), port_(port) { -#ifndef PADDLE_DISABLE_TIMER - forwardbackwordTime_ = 0; -#endif -} - -int ParameterClient2::calcParameterBlockSize( - const std::vector& parameters, size_t serviceNum) { - size_t totalSize = 0; - for (auto& para : parameters) { - totalSize += para->getSize(); - } - size_t perServerSize = totalSize / serviceNum; - - int sizeBits = 64 - __builtin_clzl(perServerSize); - - /// 2^10 is min block size - /// 2^7 will be max number of blocks in one pserver - int blockSizeBits = std::max((sizeBits - 7), 10); - return 1 << blockSizeBits; -} - -void ParameterClient2::initThreads() { - threadNum_ = serviceNum_; - if (FLAGS_parallel_thread_num > 1) { - LOG(INFO) << "parallel_thread_num dosent need to set"; - } - syncThreadPool_.reset(new SyncThreadPool(threadNum_)); - startThreads(); -} - -bool ParameterClient2::init(const std::vector& parameters) { - destroy(); - - std::vector hosts; - str::split(FLAGS_pservers, ',', &hosts); - serviceNum_ = hosts.size() * numPorts_; - uint64_t denseBlockSize = calcParameterBlockSize(parameters, serviceNum_); - - /// setup prefetch matrix if exists - for (auto& para : parameters) { - /// set block size for each parameter - para->getConfig().set_parameter_block_size( - para->getConfig().sparse_remote_update() ? para->getConfig().dims(1) - : denseBlockSize); - } - - for (auto& para : parameters) { - CHECK_NE(-1UL, para->getID()) << "id in parameter is not initialized"; - parameterMap_[para->getID()] = para; - } - - allSegments_.reserve(parameters.size()); - - for (auto& para : parameters) { - ParameterSegments segments; - segments.name = para->getName(); - segments.id = para->getID(); - allSegments_.push_back(segments); - if (para->getConfig().sparse_remote_update()) { - CHECK_EQ(para->getConfig().parameter_block_size(), - para->getConfig().dims(1)) - << "For sparse remote update parameter," - << " block size is the width of each row."; - } - } - - /// init clients - clients_.reserve(serviceNum_); - recvDataMems_.resize(serviceNum_); - - for (size_t i = 0; i < hosts.size(); ++i) { - for (int j = 0; j < numPorts_; ++j) { - LOG(INFO) << "pserver " << i * numPorts_ + j << " " << hosts[i] << ":" - << port_ + j; - if (FLAGS_rdma_tcp == "rdma") { - clients_.emplace_back(hosts[i], port_ + j, F_RDMA); - } else { - clients_.emplace_back(hosts[i], port_ + j, F_TCP); - } - } - } - - sparseDistribution_.reset(new SparseParameterDistribution(serviceNum_)); - - sleep(2); - - initThreads(); - - return true; -} - -ParameterClient2::~ParameterClient2() { destroy(); } - -void ParameterClient2::destroy() { - if (clients_.empty()) { - /// this means not initialized. - return; - } - finishThreads(); - - parameterMap_.clear(); - allSegments_.clear(); - clients_.clear(); -} - -void ParameterClient2::sendParallel(int tid, - size_t numThreads, - ParameterType recvParameterType) { - int numMyClients = divup(serviceNum_ - tid, numThreads); - - for (int j = 0; j < numMyClients; ++j) { - REGISTER_TIMER("client_sendAndRecv_send"); - int i = numThreads * j + tid; - /// Try to make different clients to send data to different pservers - /// at the same time so that they will not flood data to the same - /// pserver. - i = calcClientId(i, serviceNum_); - clients_[i].send("sendParameter", - sendJob_.parallelRequests[i], - sendJob_.parallelInputIovs[i]); - - /// clear large structure - sendJob_.parallelRequests[i].Clear(); - sendJob_.parallelInputIovs[i].clear(); - } - - std::vector bufs; - SendParameterResponse response; - for (int j = 0; j < numMyClients; ++j) { - REGISTER_TIMER("client_sendAndRecv_recv"); - int i = numThreads * j + tid; - i = calcClientId(i, serviceNum_); - auto msgReader = clients_[i].recv(&response); - CHECK_EQ(msgReader->getNumBlocks(), (size_t)response.blocks_size()); - bufs.clear(); - bufs.reserve(response.blocks_size()); - for (auto& block : response.blocks()) { - auto it = parameterMap_.find(block.para_id()); - CHECK(it != parameterMap_.end()); - Parameter* parameter = it->second.get(); - real* buf = nullptr; - if (parameter->getBuf(recvParameterType)) { - buf = parameter->getBuf(recvParameterType)->getPoint(block.begin_pos()); - } else { - auto recvMat = dynamic_cast( - parameter->getMat(recvParameterType).get()); - CHECK(recvMat); - size_t width = parameter->getConfig().dims(1); - // TODO(wuyi): need add lock here? may also cause resize. - buf = recvMat->getLocalRow(block.begin_pos() / width); - } - /// sparse_id is not useful while receiving data since sparse data - /// storage is continuous, do commit recieved data as that of dense. - bufs.push_back(buf); - } - msgReader->readBlocks(bufs); - } -} - -void ParameterClient2::prepareSendData( - ParameterUpdateMode updateMode, - ParameterType parameterType, - const std::vector& parameterSegments, - int64_t numSamples, - real cost, - bool sendBackParameter, - ParameterType sendBackParameterType, - BatchStatus batchStatus, - SendJob* sendJob) { - sendJob->parallelRequests.resize(serviceNum_); - sendJob->parallelInputIovs.resize(serviceNum_); - - for (auto& request : sendJob->parallelRequests) { -#ifndef PADDLE_DISABLE_TIMER - if (updateMode == PSERVER_UPDATE_MODE_ADD_GRADIENT) { - request.set_forwardbackward_time(forwardbackwordTime_); - } -#endif - request.set_trainer_id(trainerId_); - request.set_update_mode(updateMode); - request.set_send_back_parameter(sendBackParameter); - request.set_send_back_parameter_type(sendBackParameterType); - request.set_num_samples(numSamples); - request.set_cost(cost); - request.set_batch_status(batchStatus); - CHECK_EQ(request.blocks_size(), 0); - VLOG(1) << "request: trainer_id: " << request.trainer_id() << " update_mode" - << request.update_mode() - << " send_back_parameter: " << request.send_back_parameter() - << " send_back_parameter_type: " - << request.send_back_parameter_type() - << " num_samples: " << request.num_samples() - << " cost: " << request.cost() - << " batch_status: " << request.batch_status(); - } - for (const auto& segments : parameterSegments) { - const auto it = parameterMap_.find(segments.id); - CHECK(it != parameterMap_.end()); - Parameter* parameter = it->second.get(); - CHECK(parameter != nullptr) << "parameter is nullptr"; - int64_t nameHash = std::hash()(segments.name); - bool sendingPara = !(updateMode == PSERVER_UPDATE_MODE_GET_PARAM || - updateMode == PSERVER_UPDATE_MODE_GET_PARAM_SPARSE || - updateMode == PSERVER_UPDATE_MODE_SET_PARAM_ZERO); - bool sparseUpdate = parameter->getConfig().sparse_remote_update() && - (updateMode == PSERVER_UPDATE_MODE_ADD_GRADIENT || - updateMode == PSERVER_UPDATE_MODE_ASYNC_SGD || - updateMode == PSERVER_UPDATE_MODE_GET_PARAM_SPARSE); - - const auto blockSize = parameter->getConfig().parameter_block_size(); - CHECK_GE(blockSize, 1LU) << "blockSize should > 0 " << blockSize; - const auto paraSize = parameter->getSize(); - if (sparseUpdate) { - auto prefetchMat = std::dynamic_pointer_cast( - parameter->getMat(PARAMETER_VALUE)); - CHECK(prefetchMat != nullptr) << "prefetchMat is nullptr"; - auto sendMat = dynamic_cast( - parameter->getMat(parameterType).get()); - CHECK(sendMat != nullptr) << "sendMat is nullptr"; - - syncThreadPool_->exec([&](int tid, size_t numThreads) { - std::lock_guard guard(sparseAutoGrowthMutex_); - const auto& localIndices = prefetchMat->getLocalIndices(); - /// num of sparse rows - size_t nLocalBlocks = localIndices.size(); - uint64_t beginDim = 0; - uint64_t endDim = 0; - - // HACK(typhoonzero): let it resize first - prefetchMat->getLocalRow(nLocalBlocks); - sendMat->getLocalRow(nLocalBlocks); - - for (size_t row = 0; row < nLocalBlocks; ++row) { - int64_t blockId = localIndices[row]; // local row -> sparse row - int serverId = std::abs((blockId + nameHash) % serviceNum_); - if (serverId % numThreads != (size_t)tid) { - continue; - } - - beginDim = blockId * blockSize; - endDim = std::min(beginDim + blockSize, paraSize); - - auto& request = sendJob->parallelRequests[serverId]; - ParameterBlock* block = request.add_blocks(); - block->set_para_id(segments.id); - /// global sparse row id - block->set_block_id(blockId); - /// local row offset - block->set_begin_pos(row * blockSize); - /// block len - block->set_block_size(endDim - beginDim); - if (sendingPara) { - sendJob->parallelInputIovs[serverId].push_back( - {sendMat->getLocalRow(row), sizeof(real) * (size_t)blockSize}); - /// detect sparse parameter distribution - sparseDistribution_->probeDistribution(serverId, - sizeof(real) * blockSize); - } - } - }); - - } else { /// parameter set for dense and sparse - real* buf = - sendingPara ? parameter->getBuf(parameterType)->getPoint(0) : nullptr; - uint64_t endDim = 0; - for (uint64_t beginDim = 0; beginDim < paraSize; beginDim = endDim) { - endDim = std::min(beginDim + blockSize, paraSize); - int64_t blockId = beginDim / blockSize; - int serverId = std::abs((blockId + nameHash) % serviceNum_); - - auto& request = sendJob->parallelRequests[serverId]; - ParameterBlock* block = request.add_blocks(); - block->set_para_id(segments.id); - block->set_block_id(blockId); - block->set_begin_pos(beginDim); - block->set_block_size(endDim - beginDim); - if (buf) { - sendJob->parallelInputIovs[serverId].push_back( - {buf + beginDim, sizeof(real) * ((size_t)(endDim - beginDim))}); - } - } - } - } // parameterSegments - - sparseDistribution_->checkAndResetDistribution(); -} - -void ParameterClient2::sendAndReceiveParameter( - ParameterUpdateMode updateMode, - ParameterType parameterType, - const std::vector& parameterSegments, - int64_t numSamples, - real cost, - bool sendBackParameter, - ParameterType sendBackParameterType, - ParameterType recvParameterType) { - prepareSendData(updateMode, - parameterType, - parameterSegments, - numSamples, - cost, - sendBackParameter, - sendBackParameterType, - /*batchStatus = */ BATCH_START_AND_FINISH, - &sendJob_); - - syncThreadPool_->exec([&](int tid, size_t numThreads) { - this->sendParallel(tid, numThreads, recvParameterType); - }); -} - -void ParameterClient2::sendParameter( - ParameterUpdateMode updateMode, - ParameterType parameterType, - const std::vector& parameterSegments, - int64_t numSamples, - real cost, - bool sendBackParameter, - BatchStatus batchStatus) { - SendJobPtr sendJob = std::make_shared(); - prepareSendData(updateMode, - parameterType, - parameterSegments, - numSamples, - cost, - sendBackParameter, - PARAMETER_VALUE, - batchStatus, - sendJob.get()); - - for (int i = 0; i < threadNum_; i++) { - sendJobQueue_[i]->enqueue(sendJob); - } -} - -void ParameterClient2::recvParameter() { recvSyncBarrier_->wait(); } - -void ParameterClient2::send(int threadId) { - int index = threadId; - LOG(INFO) << "send thread " << threadId << " started"; - int numMyClients = divup(serviceNum_ - index, threadNum_); - while (true) { - SendJobPtr recvJob = sendJobQueue_[index]->dequeue(); - if (stopping_) { - recvJobQueue_[index]->enqueue(recvJob); - break; - } - for (int j = 0; j < numMyClients; ++j) { - REGISTER_TIMER("client_send"); - int i = threadNum_ * j + index; - /// Try to make different clients to send data to different pservers - /// at the same time so that they will not flood data to the same - /// pserver. - i = calcClientId(i, serviceNum_); - if (recvJob->parallelRequests.size()) { - clients_[i].send("sendParameter", - recvJob->parallelRequests[i], - recvJob->parallelInputIovs[i]); - } else { - clients_[i].send("sendData", - recvJob->parallelDataRequests[i], - recvJob->parallelInputIovs[i]); - } - } - recvJobQueue_[index]->enqueue(recvJob); - } -} - -void ParameterClient2::recv(int threadId) { - LOG(INFO) << "recv thread " << threadId << " started"; - int index = threadId; - int numMyClients = divup(serviceNum_ - index, threadNum_); - while (true) { - std::vector bufs; - SendParameterResponse response; - SendDataResponse dataResponse; - SendJobPtr recvJob = recvJobQueue_[index]->dequeue(); - if (stopping_) break; - for (int j = 0; j < numMyClients; ++j) { - REGISTER_TIMER("client_recv"); - int i = threadNum_ * j + index; - i = calcClientId(i, serviceNum_); - if (recvJob->parallelRequests.size()) { - auto msgReader = clients_[i].recv(&response); - CHECK_EQ(msgReader->getNumBlocks(), (size_t)response.blocks_size()); - bufs.clear(); - bufs.reserve(response.blocks_size()); - for (auto& block : response.blocks()) { - auto it = parameterMap_.find(block.para_id()); - CHECK(it != parameterMap_.end()); - Parameter* parameter = it->second.get(); - real* buf = - parameter->getBuf(PARAMETER_VALUE)->getPoint(block.begin_pos()); - CHECK_EQ(msgReader->getBlockLength(bufs.size()), - sizeof(real) * (block.block_size())); - bufs.push_back(buf); - } - msgReader->readBlocks(bufs); - } else { - auto msgReader = clients_[i].recv(&dataResponse); - CHECK_EQ(msgReader->getNumBlocks(), (size_t)dataResponse.blocks_size()); - size_t totalLen = msgReader->getTotalLength(); - if (0 == totalLen) { - continue; - } - auto& recvMem = recvDataMems_[dataResponse.server_id()]; - CHECK_EQ(dataResponse.blocks_size(), 1) - << "Only one block currently support now!"; - auto& block = dataResponse.blocks(0); - CHECK_EQ(totalLen % sizeof(block.data_size()), 0U); - recvMem = std::make_shared(totalLen); - msgReader->readNextBlock(recvMem.get()->getBuf()); - } - } - recvSyncBarrier_->wait(); - } -} - -void ParameterClient2::waitPassStart() { - WaitPassStartRequest request; - std::vector responses; - multiCall(__func__, request, &responses); -} - -void ParameterClient2::waitPassFinish() { - WaitPassFinishRequest request; - std::vector responses; - multiCall(__func__, request, &responses); -} - -void ParameterClient2::synchronize(SyncObject syncObjectId) { - SynchronizeRequest request; - request.set_sync_object_id(syncObjectId); - std::vector responses; - multiCall(__func__, request, &responses); -} - -void ParameterClient2::asyncFinishPass(SyncObject syncObjectId) { - SynchronizeRequest request; - request.set_sync_object_id(syncObjectId); - request.set_trainer_id(trainerId_); - std::vector responses; - multiCall(__func__, request, &responses); -} - -void ParameterClient2::setConfig(const OptimizationConfig& optConfig, - const std::string& saveDir, - bool isSparseServer) { - SetConfigRequest request; - std::vector responses; - - for (auto& nameAndPara : parameterMap_) { - *request.add_param_configs() = nameAndPara.second->getConfig(); - } - - *request.mutable_opt_config() = optConfig; - request.set_save_dir(saveDir); - request.set_is_sparse_server(isSparseServer); - - std::vector requests; - requests.resize(clients_.size()); - for (size_t i = 0; i < requests.size(); ++i) { - requests[i].CopyFrom(request); - requests[i].set_server_id(i); - } - - responses.resize(clients_.size()); - size_t numClients = clients_.size(); - for (size_t i = 0; i < numClients; ++i) { - clients_[i].send(__func__, requests[i]); - } - for (size_t i = 0; i < numClients; ++i) { - clients_[i].recv(&responses[i]); - } -} - -bool ParameterClient2::inStatus(PServerStatus status) { - GetStatusRequest request; - std::vector responses; - - bool ok = true; - multiCall("getStatus", request, &responses); - for (auto& response : responses) { - if (response.status() != status) { - ok = false; - } - } - - return ok; -} - -void ParameterClient2::setStatus(PServerStatus status) { - SetStatusRequest request; - request.set_status(status); - std::vector responses; - multiCall(__func__, request, &responses); -} - -void ParameterClient2::waitForStatus(PServerStatus status) { - while (!inStatus(status)) { - sleep(1); - } -} - -template -static void validateResponses(const std::vector& responses) { - for (auto& response : responses) { - CHECK(response.return_message().empty()) - << "client" << &response - &responses[0] - << " error:" << response.return_message(); - } -} - -PServerVector ParameterClient2::createVector() { - CreateVectorRequest request; - std::vector responses; - int64_t handle = -1; - - multiCall(__func__, request, &responses); - validateResponses(responses); - - for (auto& response : responses) { - if (handle == -1) { - handle = response.handle(); - } else { - CHECK_EQ(handle, response.handle()) << "Inconsistent handle from client" - << &response - &responses[0] << " " - << handle << " " << response.handle(); - } - } - return PServerVector{handle}; -} - -void ParameterClient2::releaseVector(PServerVector handle) { - ReleaseVectorRequest request; - std::vector responses; - - request.set_handle(handle.handle); - multiCall(__func__, request, &responses); - validateResponses(responses); -} - -PServerMatrix ParameterClient2::createMatrix(int32_t numCols) { - CreateMatrixRequest request; - std::vector responses; - int64_t handle = -1; - - request.set_num_cols(numCols); - multiCall(__func__, request, &responses); - validateResponses(responses); - - for (auto& response : responses) { - if (handle == -1) { - handle = response.handle(); - } else { - CHECK_EQ(handle, response.handle()) << "Inconsistent handle from client" - << &response - &responses[0] << " " - << handle << " " << response.handle(); - } - } - return PServerMatrix{handle}; -} - -void ParameterClient2::releaseMatrix(PServerMatrix handle) { - ReleaseMatrixRequest request; - std::vector responses; - - request.set_handle(handle.handle); - multiCall(__func__, request, &responses); - validateResponses(responses); -} - -void PreparedOperations::addOperationHelper(Operation* op, CpuVectorPtr vec) { - ProtoVector& pvec = *op->add_vectors(); - size_t dim = vec->getSize(); - pvec.set_dim(dim); - copyToRepeatedField(pvec.mutable_values(), vec->getData(), vec->getSize()); -} - -void PreparedOperations::addOperationHelper(Operation* op, CpuMatrixPtr mat) { - ProtoMatrix& pmat = *op->add_matrices(); - pmat.set_num_cols(mat->getWidth()); - pmat.set_num_rows(mat->getHeight()); - copyToRepeatedField( - pmat.mutable_values(), mat->getData(), pmat.num_cols() * pmat.num_rows()); -} - -static inline real addTwo(real a, double b) { return a + b; } - -void ParameterClient2::doOperation(PreparedOperations& ops, - bool waitForGradient, - bool sendBackGradient, - bool releasePass) { - std::vector responses; - ops.request_.set_wait_for_gradient(waitForGradient); - ops.request_.set_send_back_parameter(sendBackGradient); - ops.request_.set_release_pass(releasePass); - multiCall(__func__, ops.request_, &responses); - validateResponses(responses); - size_t numPassFinishServers = 0; - - size_t numOps = ops.request_.operations_size(); - for (auto& response : responses) { - numPassFinishServers += response.pass_finish(); - CHECK_EQ(numOps, (size_t)response.results_size()); - for (size_t opId = 0; opId < numOps; ++opId) { - const OperationResult& result = response.results(opId); - std::vector& resultScalars = ops.localResults_[opId].resultScalars; - std::vector& resultVectors = - ops.localResults_[opId].resultVectors; - std::vector& resultMatrices = - ops.localResults_[opId].resultMatrices; - - if (&response == &responses[0]) { - /// Initialize results to zero - - resultScalars.resize(result.scalars_size()); - for (auto p : resultScalars) { - if (!p) continue; - *p = 0; - } - size_t numVectors = result.vectors_size(); - resultVectors.resize(numVectors); - for (size_t i = 0; i < numVectors; ++i) { - if (!resultVectors[i]) continue; - resultVectors[i]->resize(result.vectors(i).dim()); - resultVectors[i]->zeroMem(); - } - size_t numMatrices = result.matrices_size(); - resultMatrices.resize(numMatrices); - for (size_t i = 0; i < numMatrices; ++i) { - if (!resultMatrices[i]) continue; - resultMatrices[i]->resize(result.matrices(i).num_rows(), - result.matrices(i).num_cols()); - resultMatrices[i]->zeroMem(); - } - } - - // aggregate results from each pserver to results - - CHECK_EQ(resultScalars.size(), (size_t)result.scalars_size()); - for (ssize_t i = 0; i < result.scalars_size(); ++i) { - real* rscalar = resultScalars[i]; - if (!rscalar) continue; - *rscalar += result.scalars(i); - } - - CHECK_EQ(resultVectors.size(), (size_t)result.vectors_size()); - for (auto& vec : result.vectors()) { - int i = &vec - &result.vectors(0); - CpuVectorPtr rvec = resultVectors[i]; - if (!rvec) continue; - CHECK_EQ(rvec->getSize(), (size_t)vec.dim()); - std::transform(rvec->getData(), - rvec->getData() + rvec->getSize(), - vec.values().data(), - rvec->getData(), - addTwo); - } - - CHECK_EQ(resultMatrices.size(), (size_t)result.matrices_size()); - for (auto& mat : result.matrices()) { - int i = &mat - &result.matrices(0); - CpuMatrixPtr rmat = resultMatrices[i]; - if (!rmat) continue; - CHECK_EQ(rmat->getHeight(), (size_t)mat.num_rows()); - CHECK_EQ(rmat->getWidth(), (size_t)mat.num_cols()); - - std::transform(rmat->getData(), - rmat->getData() + rmat->getElementCnt(), - mat.values().data(), - rmat->getData(), - addTwo); - } - } - } - passFinish_ = numPassFinishServers == clients_.size(); -} - -real ParameterClient2::vectorDotProduct(PServerVector u, PServerVector v) { - real result = 0.0; - PreparedOperations ops; - ops.addOperation(PSERVER_OP_utv, u, v)(&result); - doOperation(ops, false, false); - return result; -} - -void ParameterClient2::vectorScale(PServerVector u, real a) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_au, u, a); - doOperation(ops, false, false); -} - -void ParameterClient2::vectorCopy(PServerVector src, PServerVector dst) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_COPY, src, dst); - doOperation(ops, false, false); -} - -void ParameterClient2::vectorAddMult(PServerVector u, PServerVector v, real a) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_au_bv, v, u, a, (real)1); - doOperation(ops, false, false); -} - -void ParameterClient2::vectorAddMultInto(PServerVector u, - PServerVector v, - PServerVector w, - real a) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_au_bv_cw, v, w, u, (real)1, a, (real)0); - doOperation(ops, false, false); -} - -void ParameterClient2::vectorScaleInto(PServerVector u, - PServerVector v, - real a) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_au_bv, v, u, a, (real)0); - doOperation(ops, false, false); -} - -void ParameterClient2::loadValueVector(const std::string& dirName) { - LoadValueRequest request; - request.set_dir_name(dirName); - std::vector responses; - - multiCall(__func__, request, &responses); - validateResponses(responses); -} - -void ParameterClient2::saveValueVector(const std::string& dirName) { - SaveValueRequest request; - request.set_dir_name(dirName); - std::vector responses; - - multiCall(__func__, request, &responses); - validateResponses(responses); -} - -} // namespace paddle diff --git a/paddle/legacy/pserver/ParameterClient2.h b/paddle/legacy/pserver/ParameterClient2.h deleted file mode 100644 index 9320e19c4df6c5439266f89e5599b9496f145172..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/ParameterClient2.h +++ /dev/null @@ -1,602 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include - -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/pserver/BaseClient.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/Queue.h" -#include "paddle/legacy/utils/Util.h" - -#include "ParameterService.pb.h" - -#include "ProtoServer.h" -#include "SparseParameterDistribution.h" - -DECLARE_int32(parallel_thread_num); - -namespace paddle { - -struct PServerMatrix { - int64_t handle; -}; - -struct PServerVector { - int64_t handle; -}; - -/** - * @brief A class to help to prepare server-side operations. - */ -class PreparedOperations { - protected: - class ResultsAdder; - struct LocalOperationResult; - - public: - /** - * Offers an easy way to prepare operations that will be performed on - * server-side. - * - * Usage: - * @code - * addOperation(optype, arguments...)(results...) - * @endcode - * - * Examples: - * 1. set pserver vector to 1: - * @code - * PServerVector u = parameterClient.createVector(); - * addOperation(PSERVER_OP_RESET, u, (real)1); - * @endcode - * - * 2. Compute inner product of to pserver vectors. - * @code - * PServerVector u = parameterClient.createVector(); - * PServerVector v = parameterClient.createVector(); - * real result; - * addOperation(PSERVER_OP_utv, u, v)(&result) - * @endcode - * - * @param[in] operation The operation that pserver will perform. - * @param[in] args Argument list of the operation - * @return A ResultsAdder object initialized with the last element of - * localResults_. - */ - template - ResultsAdder addOperation(MatrixVectorOperation operation, Args... args) { - Operation* op = request_.add_operations(); - op->set_operation(operation); - localResults_.emplace_back(); - addOperationHelper(op, args...); - return ResultsAdder(&localResults_.back()); - } - - protected: - void addOperationHelper(Operation* op) {} - - /** - * @brief Helper function to add an new operation that takes a PServerVector - * as an operand. - */ - void addOperationHelper(Operation* op, PServerVector arg) { - op->add_pvectors(arg.handle); - } - - /** - * @brief Helper function to add an new operation that takes a PServerMatrix - * as an operand. - */ - void addOperationHelper(Operation* op, PServerMatrix arg) { - op->add_pmatrices(arg.handle); - } - - /** - * @brief Helper function to add an new operation that takes a real valued - * scalar as an operand. - */ - void addOperationHelper(Operation* op, real arg) { op->add_scalars(arg); } - - /** - * @brief Helper function to add an new operation that takes a CpuVectorPtr - * as an operand. - * @note The array of CpuVectors that arg points to will be copied to - * op's vectors field. - */ - void addOperationHelper(Operation* op, CpuVectorPtr arg); - - /** - * @brief Helper function to add an new operation that takes a CpuMatrixPtr - * as an operand. - * @note The array of CpuMatrixs that arg points to will be copied to - * op's matrices field. - */ - void addOperationHelper(Operation* op, CpuMatrixPtr arg); - - /** - * @brief Helper function to add an new operation and prepare the operands. - * - * @tparam Arg An operand of the operation. - * @tparam Args A list of rest operands of the operation. - * @param op Pointer to an Operation object. - */ - template - void addOperationHelper(Operation* op, Arg arg, Args... args) { - addOperationHelper(op, arg); - addOperationHelper(op, args...); - } - - /** - * @brief ResultsAdder offers easy ways to quickly store operation results. - */ - class ResultsAdder { - public: - explicit ResultsAdder(LocalOperationResult* localResult) - : localResult_(localResult) {} - template - void operator()(Args... args) { - addResult(args...); - } - void addResult() {} - void addResult(real* arg) { localResult_->resultScalars.push_back(arg); } - void AddResult(CpuVectorPtr arg) { - localResult_->resultVectors.push_back(arg); - } - void AddResult(CpuMatrixPtr arg) { - localResult_->resultMatrices.push_back(arg); - } - template - void addResult(Arg arg, Args... args) { - addResult(arg); - addResult(args...); - } - - protected: - LocalOperationResult* localResult_; - }; - - protected: - DoOperationRequest request_; - std::vector inputIovs_; - struct LocalOperationResult { - std::vector resultScalars; - std::vector resultVectors; - std::vector resultMatrices; - }; - std::vector localResults_; - friend class ParameterClient2; -}; - -struct ParameterSegments { - std::string name; // name of the parameter - size_t id; // id of the parameter -}; - -/** - * The client interface for parameter server. ParameterClient2 supports 2 modes - * for managing connections to parameter servers, in the 1st mode one connection - * is shared by 2 threads that are separately responsible for sending and - * recieving activities, in the 2nd mode one connection is owned by only one - * thread, and all the sending and recieving activities run in that single - * thread. - * TODO(yanfei): - * Additional core idea to further optimizate pserver performance is - * to do sync-sgd based parameter level instead of pserver level. - * full-parallelization based parameter level for sync-sgd also can - * sense forwardbackward computation layer-by-layer for more deeper layer - * model. - * Firstly, pserver can do full-parallelization on all computation based - * parameter level instead of waiting for all gradients are finished and - * start to send back parameters value immediately if parameter is ready - * instead of waiting for all parameters value are ready - * Secondly, parameter client can write back parameters to GPU instead of - * waiting until all parameters are received to CPU host end. - */ -class ParameterClient2 : public BaseClient { - public: - /** Constructor. - * @param separate True if sending and recieving activities are separated - * into 2 threads, otherwise false. - * @param port Port number that parameter client runs on. - * @param numPorts Number of ports parameter clients occupies, - * numPorts * pserver number is the total number of - * connections the parameter client maintains. - */ - ParameterClient2(bool separate = false, - int port = FLAGS_port, - int numPorts = FLAGS_ports_num); - - ~ParameterClient2(); - - static int calcParameterBlockSize(const std::vector& parameters, - size_t serviceNum); - - public: - bool init(const std::vector& parameters); - - /// service functions - - /** - * @brief Sends the segments in parameter to parameter servers, then receives - * the response from the servers. - * @param[in] updateMode Indicates how parameters should be updated on the - * server side. - * @param[in] parameterType Type of parameter that will be sent. - * @param[in] segments Segments in the parameter that will be sent. - * @param[in] numSamples Number of samples this update is based on. - * @param[in] cost Cost of the batch, will be used to calculate global object - * value. - * @param[in] sendBackParameter True if the updated parameters should be sent - * back, otherwise false. - * @param[in] sendBackParameterType Send back parameter type on pserver, - * PARAMETER_VALUE by default - * @param[in] recvParameterType pserver[sendBackParameterType] will be copy to - * client[recvParameterType] - * @note Only parameterType will be sent. - */ - void sendAndReceiveParameter(ParameterUpdateMode updateMode, - ParameterType parameterType, - const std::vector& segments, - int64_t numSamples, - real cost, - bool sendBackParameter, - ParameterType sendBackParameterType, - ParameterType recvParameterType); - - /** - * @brief Sends all parameters to parameter servers, and receives the response - * from the servers. - */ - void sendAndReceiveParameter( - ParameterUpdateMode updateMode, - ParameterType parameterType, - int64_t numSamples, - real cost, - bool sendBackParameter, - ParameterType sendBackParameterType = PARAMETER_VALUE, - ParameterType recvParameterType = PARAMETER_VALUE) { - sendAndReceiveParameter(updateMode, - parameterType, - allSegments_, - numSamples, - cost, - sendBackParameter, - sendBackParameterType, - recvParameterType); - } - - /** - * @brief Sends the segments in parameter to parameter servers. Each - * sendParameter() must be paired with a recvParameter() in the future. - * Only parameterType will be sent. - * - * @param[in] updateMode Indicates how parameters should be updated on the - * server side. - * @param[in] parameterType Type of parameter that will be sent. - * @param[in] segments Segments in the parameter that will be sent. - * @param[in] numSamples Number of samples this update is based on. - * @param[in] cost Cost of the batch, will be used to calculate global object - * value. - * @param[in] sendBackParameter True if the updated parameters should be sent - * back, otherwise false. - * @param[in] batchStatus Status of the batch. - * @note This function is non-blocking. This means that parameter should - * not change between this call and recvParameter() - */ - void sendParameter(ParameterUpdateMode updateMode, - ParameterType parameterType, - const std::vector& segments, - int64_t numSamples, - real cost, - bool sendBackParameter, - BatchStatus batchStatus); - - void recvParameter(); - - /** - * Sends all parameters to parameter servers, recvParameter() have to be - * invoked - * afterwards. - * - * @note This function is non-blocking. This means that if parameter should - * not changes between this call and recvParameter() - */ - void sendParameter(ParameterUpdateMode updateMode, - ParameterType parameterType, - int64_t numSamples, - real cost, - bool sendBackParameter, - BatchStatus batchStatus) { - sendParameter(updateMode, - parameterType, - allSegments_, - numSamples, - cost, - sendBackParameter, - batchStatus); - } - - /// Get all parameters from parameter servers - void getParameter(ParameterType recvParameterType = PARAMETER_VALUE, - ParameterType sendBackParameterType = PARAMETER_VALUE) { - sendAndReceiveParameter(PSERVER_UPDATE_MODE_GET_PARAM, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - true, // sendBackParameter = true - sendBackParameterType, - recvParameterType); - } - - /// Get parameters by sparse row ids from parameter servers - void getParameterSparse( - ParameterType recvParameterType = PARAMETER_VALUE, - ParameterType sendBackParameterType = PARAMETER_VALUE) { - sendAndReceiveParameter(PSERVER_UPDATE_MODE_GET_PARAM_SPARSE, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - true, // sendBackParameter = true - sendBackParameterType, - recvParameterType); - } - - /// Set all parameters on parameter servers using the local parameters - void setParameter() { - sendAndReceiveParameter(PSERVER_UPDATE_MODE_SET_PARAM, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - false); // sendBackParameter = false - } - /** - * Set all parameters on parameter servers, values will be zero - * means do not sending local parameters - */ - void setParameterZero() { - sendAndReceiveParameter(PSERVER_UPDATE_MODE_SET_PARAM_ZERO, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - false); // sendBackParameter = false - } - - /** - * @brief Wait until all gradient servers start one pass. - * - * @note This is now only used by the gradient servers for "sgd" - * algorithm. Calling this function means that the calling gradient - * server is ready to start a new pass. - */ - void waitPassStart(); - - /** - * @brief Wait until all gradient servers finish one pass. - * - * @note This is now only used by the gradient servers for "sgd" algorithm. - * Calling this function means that the calling gradient server - * finishes one pass. - */ - void waitPassFinish(); - - /// Wait until all gradient servers call this function. - void synchronize(SyncObject syncObjectId = SYNC_DEFAULT); - - /// Called when async-sgd finish pass. - void asyncFinishPass(SyncObject syncObjectId = SYNC_DEFAULT); - - void asyncStartPass(SyncObject syncObjectId = SYNC_DEFAULT) { - return synchronize(syncObjectId); - } - - /** - * @brief Execute the prepared operations on pservers, fetch the results and - * aggregate results from different pservers. - * @param[in] ops Prepared operations that will be executed on pservers. - * @param[in] waitForGradient If true, wait for gradient to be ready before - * starting the operations. - * @param[in] sendBackParameter If true, send back the parameter to clients - * after the operations are finished. - * @param[in] If true, and if all clients call waitPassFinish, signal all - * clients finish the pass. - */ - void doOperation(PreparedOperations& ops, - bool waitForGradient, - bool sendBackParameter, - bool releasePass = true); - - /** - * Set the configuration of pserver, including parameter config and - * optimization config - */ - void setConfig(const OptimizationConfig& optConfig, - const std::string& saveDir = "", - bool isSparseServer = false); - - /// Return true if all pservers are in the given status - bool inStatus(PServerStatus status); - bool isPassFinish() { return passFinish_; } - - /// Set pserver status - void setStatus(PServerStatus status); - - /** - * @brief Wait until all pservers are at status - * @note This function is not suitable for frequent use, - * because it sleeps 1 second each time when condition is satisfied. - */ - void waitForStatus(PServerStatus status); - - /// Create a column vector. The size is the dimension of parameter. - PServerVector createVector(); - - /// Release the PServerVector given handle. - void releaseVector(PServerVector handle); - - /** - * Create a column major matrix. The number of rows is the dimension of - * parameter. The number of columns is specifed by numCols. - */ - PServerMatrix createMatrix(int32_t numCols); - - /// Release the PServerMatrix given handle. - void releaseMatrix(PServerMatrix handle); - - // Some basic algebra functions - /// Calculate the dot product of u and v - real vectorDotProduct(PServerVector u, PServerVector v); - - /// Scale u by a - void vectorScale(PServerVector u, real a); - - /// Copy from src to dest - void vectorCopy(PServerVector src, PServerVector dst); - - /// u += v * a - void vectorAddMult(PServerVector u, PServerVector v, real a); - - /// u = v + w * a - void vectorAddMultInto(PServerVector u, - PServerVector v, - PServerVector w, - real a); - /// u = v * a - void vectorScaleInto(PServerVector u, PServerVector v, real a); - - /// Return pserver parameter value. - PServerVector getPServerParameterValue() { - PServerVector vec; - vec.handle = PARAMETER_VALUE; - return vec; - } - - /// Return pserver parameter gradient. - PServerVector getPServerParameterGradient() { - PServerVector vec; - vec.handle = PARAMETER_GRADIENT; - return vec; - } - - /** - * Tell pservers to load value vector from file. - * - * @param[in] dirName The directory that contains the value vector file. - */ - void loadValueVector(const std::string& dirName); - - /// Tell pservers to save value vector to file. - void saveValueVector(const std::string& dirName); - - void setTrainerId(int trainerId) { trainerId_ = trainerId; } - -#ifndef PADDLE_DISABLE_TIMER - void setForwardbackwardTime(uint64_t delta) { forwardbackwordTime_ = delta; } -#endif - - protected: - template - void multiCall(const char* funcName, - const ProtoIn& request, - std::vector* responses) { - responses->resize(clients_.size()); - size_t numClients = clients_.size(); - for (size_t i = 0; i < numClients; ++i) { - clients_[i].send(funcName, request); - } - for (size_t i = 0; i < numClients; ++i) { - clients_[i].recv(&(*responses)[i]); - } - } - - private: - void destroy(); - - /** - * @brief management function for parallelizing send/recv all connections - * to all pservers. it is called under one SyncThreadPool. it - * supports to use N thread to control M connections. the receiving - * actions can be started until all sending action to all connections - * owned by current thread are finished. Different connections - * controlled - * by different threads can transfer data asynchronously. - */ - void sendParallel(int tid, - size_t numThreads, - ParameterType recvParameterType); - /// sending thread routine for asynchronously send data - void send(int threadId); - /// receiving thread routing for asynchronously receive data - void recv(int threadId); - - /** - * @brief main routine to build data for pserver - * - * @note it can prepare different kinds of parameter type data. it can - * be regarded as layer for bridging real parameters data and - * protobuf data for communication. - * TODO(yanfei): - * can abstract additional layer to encode and decode data to/from - * protobuf data. - */ - void prepareSendData( - ParameterUpdateMode updateMode, - ParameterType parameterType, // client send type - const std::vector& parameterSegments, - int64_t numSamples, - real cost, - bool sendBackParameter, - ParameterType sendBackParameterType, // send back type in pserver - BatchStatus batchStatus, - SendJob* sendJob); - - /// start necessary threads for threadPool - void initThreads(); - - protected: - /// start port number of pserver - /// it deduce all ports for dense and sparse with some rules - int port_; - /// identify the trainer id using this client - int trainerId_; - -#ifndef PADDLE_DISABLE_TIMER - uint64_t forwardbackwordTime_; -#endif - std::mutex sparseAutoGrowthMutex_; - - /// map id to parameter used for decoding protobuf data - std::unordered_map parameterMap_; - /// segments for all parameters that needed to sync - std::vector allSegments_; - - /// module for sensing sparse parameters distribution on all pservers - std::unique_ptr sparseDistribution_; - - /// thread pool for parallelizing all connections to pservers - std::unique_ptr syncThreadPool_; - - bool passFinish_; -}; - -} // namespace paddle diff --git a/paddle/legacy/pserver/ParameterServer2.cpp b/paddle/legacy/pserver/ParameterServer2.cpp deleted file mode 100644 index 8533a322d92d292ee613d44795cf60462082a11b..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/ParameterServer2.cpp +++ /dev/null @@ -1,1401 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ParameterServer2.h" - -#include -#include - -#include "paddle/legacy/math/SIMDFunctions.h" -#include "paddle/legacy/parameter/AverageOptimizer.h" -#include "paddle/legacy/parameter/FirstOrderOptimizer.h" -#include "paddle/legacy/parameter/OptimizerFunctions.h" -#include "paddle/legacy/parameter/OptimizerWithRegularizer.h" -#include "paddle/legacy/parameter/ParameterOptimizer.h" -#include "paddle/legacy/parameter/ParameterUpdateFunctions.h" -#include "paddle/legacy/parameter/Regularizer.h" -#include "paddle/legacy/parameter/ThreadLocalBuffer.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/GlobalConstants.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/StringUtil.h" - -DEFINE_int32(pserver_num_threads, 1, "number of threads for sync op exec"); -DEFINE_double(async_lagged_ratio_min, - 1.0, - "control config_.async_lagged_grad_discard_ratio() min value"); -DEFINE_double( - async_lagged_ratio_default, - 1.5, - "if async_lagged_grad_discard_ratio is not set in trainer_config.conf" - "use it as defalut value"); - -namespace paddle { - -const std::string ParameterServer2::kRetMsgInvalidMatrixHandle = - "Invalid matrix handle"; -const std::string ParameterServer2::kRetMsgInvalidVectorHandle = - "Invalid vector handle"; -const std::string ParameterServer2::kRetMsgUnknownOperation = - "Unknown operation"; - -ParameterServer2::ParameterServer2(const std::string& addr, - int port, - int rdmaCpu) - : ProtoServer(addr, port, rdmaCpu), - dataSize_(0), - size_(0), - gradientReadyBarrier_(FLAGS_num_gradient_servers + 1), - parameterReadyBarrier_(FLAGS_num_gradient_servers + 1), - passBarrier_(FLAGS_num_gradient_servers + 1), - numPassFinishClients_(0), - allClientPassFinish_(false), - serverId_(-1), - batchId_(-1) { - /** - * register function for remote client calling, these functions - * will be mapped to a data structure for quick looking up. each - * request from trainer can contains one function name to indicate - * remote action. this architecture looks like rpc style for pserver. - */ - REGISTER_SERVICE_FUNCTION_EX(ParameterServer2, sendParameter); - REGISTER_SERVICE_FUNCTION_EX(ParameterServer2, sendData); - REGISTER_SERVICE_FUNCTION(ParameterServer2, setConfig); - REGISTER_SERVICE_FUNCTION(ParameterServer2, setStatus); - REGISTER_SERVICE_FUNCTION(ParameterServer2, getStatus); - REGISTER_SERVICE_FUNCTION(ParameterServer2, doOperation); - REGISTER_SERVICE_FUNCTION(ParameterServer2, createVector); - REGISTER_SERVICE_FUNCTION(ParameterServer2, releaseVector); - REGISTER_SERVICE_FUNCTION(ParameterServer2, createMatrix); - REGISTER_SERVICE_FUNCTION(ParameterServer2, releaseMatrix); - REGISTER_SERVICE_FUNCTION(ParameterServer2, waitPassStart); - REGISTER_SERVICE_FUNCTION(ParameterServer2, waitPassFinish); - REGISTER_SERVICE_FUNCTION(ParameterServer2, synchronize); - REGISTER_SERVICE_FUNCTION(ParameterServer2, asyncFinishPass); - REGISTER_SERVICE_FUNCTION(ParameterServer2, loadValueVector); - REGISTER_SERVICE_FUNCTION(ParameterServer2, saveValueVector); - - /// thread pool for parallelizing some computations - if (FLAGS_pserver_num_threads > 1) { - syncThreadPool_.reset(new SyncThreadPool(FLAGS_pserver_num_threads, false)); - } -} - -bool ParameterServer2::init() { - vectors_.resize(NUM_PARAMETER_TYPES); - configMap_.clear(); - - numSamplesProcessed_ = 0; - cost_ = 0; - char* mpienv = getenv("OMPI_COMM_WORLD_SIZE"); - if (mpienv != NULL) { - mpiSize_ = atoi(mpienv); - } else { - mpiSize_ = 1; - } - status_ = PSERVER_STATUS_NOT_SET; - dataMems_.resize(FLAGS_num_gradient_servers); - synchronizeBarriers_.resize(SyncObject_ARRAYSIZE); - for (auto& barrier : synchronizeBarriers_) { - barrier.reset(new ThreadBarrier(FLAGS_num_gradient_servers)); - } - - // initialization for dicarding lagging gradient - asyncUpdateSteps_ = 0; - asyncTrainerSteps_.resize(FLAGS_num_gradient_servers); - asyncTrainerSteps_.assign(asyncTrainerSteps_.size(), 0); - asyncLaggedGradientsNum_ = 0; - asyncUpdateStat_.resize(static_cast(FLAGS_num_gradient_servers * - FLAGS_async_lagged_ratio_default)); - asyncUpdateStat_.assign(asyncUpdateStat_.size(), 0); - asyncTrainerDiscardStat_.resize(FLAGS_num_gradient_servers); - asyncTrainerDiscardStat_.assign(asyncTrainerDiscardStat_.size(), 0); - asyncTrainerCommitStat_.resize(FLAGS_num_gradient_servers); - asyncTrainerCommitStat_.assign(asyncTrainerCommitStat_.size(), 0); - - return true; -} - -void ParameterServer2::getStatus(const GetStatusRequest& request, - ProtoResponseCallback callback) { - (void)request; - GetStatusResponse response; - response.set_status(status_); - callback(response); -} - -void ParameterServer2::setStatus(const SetStatusRequest& request, - ProtoResponseCallback callback) { - status_ = request.status(); - SetStatusResponse response; - callback(response); -} - -void ParameterServer2::setConfig(const SetConfigRequest& request, - ProtoResponseCallback callback) { - { - std::lock_guard guard(parameterMutex_); - - serverId_ = request.server_id(); - isSparseServer_ = request.is_sparse_server(); - - if (!request.save_dir().empty()) { - mkDir(request.save_dir().c_str()); - } - - for (const auto& config : request.param_configs()) { - CHECK(!configMap_.count(config.para_id())) - << "Duplicated parameter name: " << config.name(); - configMap_[config.para_id()] = config; - CHECK_EQ(config.sparse_remote_update(), isSparseServer_); - } - - config_ = request.opt_config(); - if (config_.algorithm() == TrainAlgorithm::AsyncSGD) { - auto asyncLaggedRatio = config_.async_lagged_grad_discard_ratio(); - if (asyncLaggedRatio <= FLAGS_async_lagged_ratio_min) { - LOG(INFO) << "WARNING: async_lagged_grad_discard_ratio is too small" - << "reset to default, async_lagged_grad_discard_ratio = " - << FLAGS_async_lagged_ratio_default; - asyncLaggedRatio = FLAGS_async_lagged_ratio_default; - } - asyncLaggedThreshold_ = - static_cast(FLAGS_num_gradient_servers * asyncLaggedRatio); - LOG(INFO) << "discard lagged async gradient ratio: " << asyncLaggedRatio - << " asyncLaggedhreshold: " << asyncLaggedThreshold_; - } - if (isSparseServer_ && config_.num_batches_per_send_parameter() > 1) { - /// sparse server must NOT use local update mode - config_.set_num_batches_per_send_parameter(1); - } - - if (config_.num_batches_per_send_parameter() > 1 && - config_.center_parameter_update_method() == "average") { - /// scaling L1/L2 decay rate as large as L1/L2 apply in trainer - /// if parameter regularization in pserver - for (auto& pair : configMap_) { - ParameterConfig& config = pair.second; - if (config_.num_batches_per_send_parameter() == - config.num_batches_regularization()) { - real scale = - config_.delta_add_rate() * config.num_batches_regularization(); - if (config_.algorithm() == "sgd") { - scale *= FLAGS_num_gradient_servers; - } - config.set_decay_rate(config.decay_rate() * scale); - if (config.decay_rate() > 0.1f) { - LOG(FATAL) << "L2 decay=" << config.decay_rate() - << " for parameter:" << config.name() - << " is too large after scale in pserver!"; - } - config.set_decay_rate_l1(config.decay_rate_l1() * scale); - if (config.decay_rate_l1() > 0.1f) { - LOG(FATAL) << "L1 decay=" << config.decay_rate_l1() - << " for parameter:" << config.name() - << " is too large after scale in pserver!"; - } - - LOG(INFO) << "parameter:" << config.name() - << " decay apply in pserver," - << " L1 decay=" << config.decay_rate_l1() - << " L2 decay=" << config.decay_rate(); - } - } - } - } - - SetConfigResponse response; - callback(response); -} - -real bufferSum(const std::vector& buffers) { - real sum = 0; - for (const auto buffer : buffers) { - for (size_t i = 0; i < buffer.size; ++i) { - sum += buffer.base[i]; - } - } - return sum; -} - -void ParameterServer2::mergeSegments(BlockSegments* segments) { - if (segments->empty()) { - return; - } - std::sort(segments->begin(), segments->end()); - auto curr = segments->begin(); - for (auto it = segments->begin(); it != segments->end(); ++it) { - if (it->first <= curr->second) { - curr->second = std::max(curr->second, it->second); - } else { - ++curr; - *curr = *it; - } - } - ++curr; - segments->erase(curr, segments->end()); -} - -void ParameterServer2::setParameter(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers) { - (void)response; - (void)outputBuffers; - LOG(INFO) << "pserver: setParameter"; - std::lock_guard guard(parameterMutex_); - - int64_t numBlocks = blockIdMap_.size(); - CHECK_EQ(blockIdMap_.size(), blockOffsetMap_.size()); - /// total bytes for all the added blocks - int64_t totalSize = size_; - std::vector offsets; - offsets.reserve(request.blocks_size()); - std::vector blockIds; - blockIds.reserve(request.blocks_size()); - int bufferIndex = 0; - - if (!request.blocks().size()) { - LOG(WARNING) - << "--ports_num or --ports_num_for_sparse might be too large, " - << "or total dense parameter size or sparse parameters size " - << "might be too small, this psever doesn't store any parameter."; - return; - } - - for (const auto& block : request.blocks()) { - /// block size for parameter(e.g. 128 for sparse row, 1K for dense) - uint64_t blockSize = getParameterConfig(block).parameter_block_size(); - BlockKey key(block.para_id(), block.block_id()); - if (inputBuffers.size()) { // if !=PSERVER_UPDATE_MODE_SET_PARAM_ZERO - Buffer buffer = inputBuffers[bufferIndex]; - ++bufferIndex; - CHECK_EQ(buffer.size, block.block_size()) - << "data size is too big:" - << " block_size=" << block.block_size() - << " data_size=" << buffer.size; - } - - /// add a new block - if (blockIdMap_.count(key) == 0) { - blockOffsetMap_[key] = totalSize; - blockIdMap_[key] = numBlocks; - ++numBlocks; - totalSize += blockSize; - } - offsets.push_back(blockOffsetMap_[key]); - blockIds.push_back(blockIdMap_[key]); - } - - size_ = totalSize; - LOG(INFO) << "pserver: new cpuvector: size=" << size_; - if (!vectors_[PARAMETER_VALUE]) { - /// vectors_ - const auto types = sgdOptimizerGetTypes(config_, true /*inPserver*/); - for (const auto type : types) { - vectors_[type].reset(new CpuVector(size_)); - vectors_[type]->zeroMem(); - } - - blockInfos_.resize(numBlocks); - for (auto& info : blockInfos_) { - info.lock.reset(new std::mutex()); - } - } else { - CHECK_EQ((size_t)size_, vectors_[PARAMETER_VALUE]->getSize()) - << "Currently adding new blocks is not supported. " - << "All blocks must be added in one setParameter call"; - } - - VectorPtr buf = vectors_[PARAMETER_VALUE]; - usedSegments_.reserve(offsets.size()); - /// if offsets is empty, means parameter_block_size is too big or too many - /// nodes. - if (offsets.empty()) { - LOG(WARNING) << "in setParameter: offsets is empty"; - } - for (size_t i = 0; i < offsets.size(); ++i) { - size_t blockId = blockIds[i]; - BlockInfo& info = blockInfos_[blockId]; - const ParameterConfig& config = getParameterConfig(request.blocks(i)); - info.config = &config; - info.offset = offsets[i]; - info.optimizer.reset(sgdOptimizerCreate( - config_, config, config.sparse_remote_update(), true /*inPserver*/)); - if (config.sparse_remote_update()) { - size_t width = config.dims(1); - CHECK_EQ(config.parameter_block_size(), width) - << "block size: " << config.parameter_block_size() - << "width : " << width; - } - info.optimizer->init(1, info.config); - usedSegments_.push_back(std::make_pair( - offsets[i], offsets[i] + request.blocks(i).block_size())); - } - mergeSegments(&usedSegments_); - - if (request.update_mode() == PSERVER_UPDATE_MODE_SET_PARAM) { - /// copy param from trainer - for (size_t i = 0; i < offsets.size(); ++i) { - Buffer buffer = inputBuffers[i]; - real* start = buf->getPoint(offsets[i]); - CHECK_LE(offsets[i] + buffer.size, buf->getSize()); - memcpy(start, buffer.base, sizeof(real) * buffer.size); - } - } else { - CHECK(request.update_mode() == PSERVER_UPDATE_MODE_SET_PARAM_ZERO); - /// nothing to do, value vector zero mem already - } -} - -void ParameterServer2::addGradient(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers) { - VLOG(1) << "pserver: addGradient"; - - { - ReadLockGuard guard(parameterMutex_); - int bufferIndex = 0; - for (const auto& block : request.blocks()) { - int64_t offset = getBlockOffset(block); - CHECK_GE(offset, 0) << "Only existing parameter block is allowed: " - << " id=" << block.para_id() - << " block id=" << block.block_id(); - - int64_t blockId = getBlockId(block); - CHECK_GE(blockId, 0) << "Only existing parameter block is allowed: " - << " id=" << block.para_id() - << " block id=" << block.block_id(); - - Buffer buffer = inputBuffers[bufferIndex]; - ++bufferIndex; - - const real* gradientBuffer = buffer.base; - real* gradientSumBuffer = vectors_[PARAMETER_GRADIENT]->getPoint(offset); - - size_t size = buffer.size; - - BlockInfo& info = blockInfos_[blockId]; - const ParameterConfig& config = getParameterConfig(blockId); - if (config.sparse_remote_update()) { - CHECK_EQ(size, config.parameter_block_size()); - } else { // dense - CHECK_LE(size, config.parameter_block_size()); - } - std::lock_guard guard(*info.lock); - simd::addTo(gradientSumBuffer, gradientBuffer, size); - } - } - if (request.batch_status() == BATCH_FINISH || - request.batch_status() == BATCH_START_AND_FINISH) { - numSamplesProcessed_ += request.num_samples(); - cost_ += request.cost(); - VLOG(1) << "num samples: " << numSamplesProcessed_ - << ", new cost:" << cost_; - - /// notify doOperation gradient ready - gradientReadyBarrier_.wait(); - - /// wait doOperation finish - parameterReadyBarrier_.wait(); - VLOG(1) << "start send back"; - } -} - -bool ParameterServer2::asyncGrdientCommitCheckAndStat( - const SendParameterRequest& request) { - const auto trainerId = request.trainer_id(); - int64_t trainerSteps = asyncTrainerSteps_[trainerId]; - CHECK_GE(asyncUpdateSteps_, trainerSteps) - << " async update steps overflows " - << " trainer id: " << trainerId - << " async update steps in pserver: " << asyncUpdateSteps_ - << " async update steps in request: " << trainerSteps; - - asyncUpdateSteps_++; - bool commitGradient = true; - - int64_t delta = asyncUpdateSteps_ - trainerSteps; - if (delta >= asyncLaggedThreshold_) { - VLOG(1) << "discard Async Update: " - << " trainer id: " << trainerId - << " pserver steps: " << asyncUpdateSteps_ - << " request steps: " << trainerSteps; - asyncLaggedGradientsNum_++; - commitGradient = false; - } - /// stat on lagged steps, to get total discard distribution - if (static_cast(delta) < asyncUpdateStat_.size()) { - asyncUpdateStat_[delta]++; - } else { - asyncUpdateStat_[asyncUpdateStat_.size() - 1]++; - } - /// stat on trainerId and discard, to get trainer condition - if (commitGradient) { - asyncTrainerCommitStat_[trainerId]++; - } else { - asyncTrainerDiscardStat_[trainerId]++; - } - - return commitGradient; -} - -static ThreadLocal> localBlockBitset_; - -void ParameterServer2::asyncSGD(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers) { - int64_t numBlocks = blockIdMap_.size(); - auto& localBlockBitset = *localBlockBitset_; - - if (isSparseServer_) { - if (localBlockBitset.empty()) { - localBlockBitset.resize(numBlocks); - } - localBlockBitset.assign(numBlocks, false); - } - - ReadLockGuard guard(parameterMutex_); - - if (request.send_back_parameter()) { - outputBuffers->reserve(request.blocks_size()); - } - - bool commitGradient = asyncGrdientCommitCheckAndStat(request); - - VectorPtr* vecs = parameter::getThreadLocalBuffer(); - size_t bufferIndex = 0; - for (const auto& block : request.blocks()) { - int64_t offset = getBlockOffset(block); - CHECK_GE(offset, 0) << "Only existing parameter block is allowed: " - << " id=" << block.para_id() - << " block id=" << block.block_id(); - int64_t blockId = getBlockId(block); - CHECK_GE(blockId, 0) << "Only existing parameter block is allowed: " - << " id=" << block.para_id() - << " block id=" << block.block_id(); - Buffer buffer = inputBuffers[bufferIndex]; - ++bufferIndex; - - size_t size = buffer.size; - - BlockInfo& info = blockInfos_[blockId]; - const ParameterConfig& config = getParameterConfig(blockId); - - std::lock_guard guard(*info.lock); - /// gradients are too obsolete, will be discarded - if (commitGradient) { - info.optimizer->startBatch(numSamplesProcessed_); - - for (const auto type : info.optimizer->getParameterTypes()) { - vecs[type]->subVecFrom(*vectors_[type], offset, size); - } - vecs[PARAMETER_GRADIENT]->subVecFrom(buffer.base, 0, size); - info.optimizer->update(vecs, config, isSparseServer_ ? 0 : -1); - - if (auto callback = info.optimizer->needSpecialTraversal(config)) { - blockTraverse(info, config, offset, size, vecs, callback); - } - info.optimizer->finishBatch(); - } - - if (commitGradient && isSparseServer_) { - localBlockBitset[blockId] = true; - } - - if (!isSparseServer_ && request.send_back_parameter()) { // dense - int type = request.send_back_parameter_type(); - sendBackParameter(block, type, response, &buffer, outputBuffers); - } - } /// foreach block - - asyncTrainerSteps_[request.trainer_id()] = asyncUpdateSteps_; - - if (commitGradient && isSparseServer_) { - /// find blocks that trainer do not request update - for (int64_t blockId = 0; blockId < numBlocks; ++blockId) { - if (localBlockBitset[blockId]) { - continue; - } - - BlockInfo& info = blockInfos_[blockId]; - const ParameterConfig& config = *info.config; - size_t size = config.parameter_block_size(); - - std::lock_guard guard(*info.lock); - info.optimizer->startBatch(numSamplesProcessed_); - if (auto callback = info.optimizer->needSpecialTraversal(config)) { - blockTraverse(info, config, info.offset, size, vecs, callback); - } - info.optimizer->finishBatch(); - } - } - - if (commitGradient && (request.batch_status() == BATCH_FINISH || - request.batch_status() == BATCH_START_AND_FINISH)) { - numSamplesProcessed_ += request.num_samples(); - } - - /// show some performance log if needed - if (request.trainer_id() == 0) { - /// batchId_ is approximately equal to "real batchId_" - batchId_++; - } -} - -void ParameterServer2::getParameter(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers) { - (void)inputBuffers; - LOG(INFO) << "pserver: getParameter"; - ReadLockGuard guard(parameterMutex_); - for (const auto& block : request.blocks()) { - int type = request.send_back_parameter_type(); - sendBackParameter(block, type, response, outputBuffers); - } -} - -void ParameterServer2::getParameterSparse(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers) { - (void)inputBuffers; - auto& buffer = *readWriteBuffer_; - size_t numReals = 0; - for (const auto& block : request.blocks()) { - numReals += getParameterConfig(block).dims(1); - } - buffer.resize(numReals); - - VLOG(3) << "pserver: getParameterSparse, numReals=" << numReals; - - ReadLockGuard guard(parameterMutex_); - size_t offset = 0; - for (const auto& block : request.blocks()) { - size_t width = getParameterConfig(block).dims(1); - Buffer buf = {buffer.data() + offset, width}; - int type = request.send_back_parameter_type(); - sendBackParameterSparse(block, type, response, &buf, width, outputBuffers); - offset += width; - } -} - -void ParameterServer2::sendBackParameter(const ParameterBlock& block, - int parameterType, - SendParameterResponse* response, - std::vector* outputBuffers) { - ParameterBlock* returnBlock = response->add_blocks(); - returnBlock->set_para_id(block.para_id()); - returnBlock->set_block_id(block.block_id()); - returnBlock->set_begin_pos(block.begin_pos()); - returnBlock->set_block_size(block.block_size()); - - int64_t offset = getBlockOffset(block); - CHECK_GE(offset, 0) << "Only existing parameter block is allowed: " - << " id=" << block.para_id() - << " block id=" << block.block_id(); - - real* valueBuffer = vectors_[parameterType]->getPoint(offset); - outputBuffers->push_back({valueBuffer, (size_t)block.block_size()}); -} - -void ParameterServer2::sendBackParameter(const ParameterBlock& block, - int parameterType, - SendParameterResponse* response, - Buffer* buffer, - std::vector* outputBuffers) { - ParameterBlock* returnBlock = response->add_blocks(); - returnBlock->set_para_id(block.para_id()); - returnBlock->set_block_id(block.block_id()); - returnBlock->set_begin_pos(block.begin_pos()); - returnBlock->set_block_size(block.block_size()); - - int64_t offset = getBlockOffset(block); - CHECK_GE(offset, 0) << "Only existing parameter block is allowed: " - << " id=" << block.para_id() - << " block id=" << block.block_id(); - - size_t size = buffer->size; - real* valueBuffer = vectors_[parameterType]->getPoint(offset); - /// copy to second buffer to avoid to be polluted by other request - memcpy(buffer->base, valueBuffer, sizeof(real) * size); - outputBuffers->push_back({buffer->base, size}); -} - -void ParameterServer2::sendBackParameterSparse( - const ParameterBlock& block, - int parameterType, - SendParameterResponse* response, - Buffer* buffer, - size_t width, - std::vector* outputBuffers) { - ParameterBlock* returnBlock = response->add_blocks(); - returnBlock->set_para_id(block.para_id()); - returnBlock->set_block_id(block.block_id()); - returnBlock->set_begin_pos(block.begin_pos()); - returnBlock->set_block_size(block.block_size()); - int64_t offset = getBlockOffset(block); - CHECK_GE(offset, 0) << "Only existing parameter block is allowed: " - << " id=" << block.para_id() - << " block id=" << block.block_id(); - - real* valueBuffer = vectors_[parameterType]->getPoint(offset); - CHECK_EQ(buffer->size, width); - memcpy(buffer->base, valueBuffer, width * sizeof(real)); - outputBuffers->push_back(*buffer); -} - -void ParameterServer2::readAllBlocks( - MsgReader* msgReader, std::vector* buffers) { - auto& buffer = *readWriteBuffer_; - size_t numBlocks = msgReader->getNumBlocks(); - buffer.resizeWithAlignHints(msgReader->getTotalLength() / sizeof(real), - numBlocks); - std::vector bufs(numBlocks); - buffers->clear(); - buffers->reserve(numBlocks); - buffer.resetAlignAlloc(); - for (size_t i = 0; i < numBlocks; ++i) { - size_t len = msgReader->getBlockLength(i); - CHECK_EQ(len % sizeof(real), (size_t)0); - size_t size = len / sizeof(real); - bufs[i] = buffer.nextBlock(size); - buffers->push_back({(real*)bufs[i], size}); - } - msgReader->readBlocks(bufs); -} - -void ParameterServer2::sendParameter(const SendParameterRequest& request, - std::unique_ptr msgReader, - ProtoResponseCallbackEx callback) { - SendParameterResponse response; - std::vector inputBuffers; - std::vector outputBuffers; - readAllBlocks(msgReader.get(), &inputBuffers); - msgReader.reset(); - - switch (request.update_mode()) { - case PSERVER_UPDATE_MODE_SET_PARAM: - case PSERVER_UPDATE_MODE_SET_PARAM_ZERO: - setParameter(request, inputBuffers, &response, &outputBuffers); - break; - case PSERVER_UPDATE_MODE_GET_PARAM: - getParameter(request, inputBuffers, &response, &outputBuffers); - break; - case PSERVER_UPDATE_MODE_GET_PARAM_SPARSE: - getParameterSparse(request, inputBuffers, &response, &outputBuffers); - break; - case PSERVER_UPDATE_MODE_ASYNC_SGD: - asyncSGD(request, inputBuffers, &response, &outputBuffers); - break; - case PSERVER_UPDATE_MODE_ADD_GRADIENT: - addGradient(request, inputBuffers, &response, &outputBuffers); - break; - case PSERVER_UPDATE_MODE_AVERAGE_PARAMETER: - break; - } - switch (request.update_mode()) { - case PSERVER_UPDATE_MODE_ADD_GRADIENT: - (*requestVec_).push_back(request); - (*callbackVec_).push_back(callback); - if (request.batch_status() == BATCH_FINISH || - request.batch_status() == BATCH_START_AND_FINISH) { - for (size_t i = 0; i < (*requestVec_).size(); i++) { - ReadLockGuard guard(parameterMutex_); - SendParameterRequest& request = (*requestVec_)[i]; - SendParameterResponse responseTemp; - - std::vector outputIovs; - if (request.send_back_parameter()) { - CHECK(!isSparseServer_); - std::vector outputBuffersTemp; - for (const auto& block : request.blocks()) { - int type = request.send_back_parameter_type(); - sendBackParameter(block, type, &responseTemp, &outputBuffersTemp); - } - outputIovs.reserve(outputBuffersTemp.size()); - for (auto buffer : outputBuffersTemp) { - outputIovs.push_back({buffer.base, buffer.size * sizeof(real)}); - } - } - - ProtoResponseCallbackEx& callbackTemp = (*callbackVec_)[i]; - callbackTemp(responseTemp, outputIovs); - } - (*requestVec_).clear(); - (*callbackVec_).clear(); - } - break; - case PSERVER_UPDATE_MODE_SET_PARAM: - case PSERVER_UPDATE_MODE_SET_PARAM_ZERO: - case PSERVER_UPDATE_MODE_GET_PARAM: - case PSERVER_UPDATE_MODE_GET_PARAM_SPARSE: - case PSERVER_UPDATE_MODE_ASYNC_SGD: - case PSERVER_UPDATE_MODE_AVERAGE_PARAMETER: - std::vector outputIovs; - outputIovs.reserve(outputBuffers.size()); - for (auto buffer : outputBuffers) { - outputIovs.push_back({buffer.base, buffer.size * sizeof(real)}); - } - callback(response, outputIovs); - break; - } -} - -template -void ParameterServer2::reduceAndSendData(const SendDataRequest& request, - std::unique_ptr& msgReader, - ProtoResponseCallbackEx& callback) { - SendDataResponse response; - response.set_type(request.type()); - response.set_server_id(serverId_); - - auto sendData = reinterpret_cast(dataMems_[0].get()->getBuf()); - size_t rawMemSize = dataMems_[0].get()->getSize(); - CHECK_EQ(rawMemSize % sizeof(Dtype), 0U); - size_t dataMemSize = rawMemSize / sizeof(Dtype); - for (size_t i = 1; i < dataMems_.size(); ++i) { - CHECK_EQ(dataMems_[i].get()->getSize(), rawMemSize); - auto data = reinterpret_cast(dataMems_[i].get()->getBuf()); - for (size_t j = 0; j < dataMemSize; ++j) { - sendData[j] += data[j]; - } - } - std::vector outputIovs; - auto block = response.add_blocks(); - outputIovs.push_back({sendData, rawMemSize}); - block->set_total_size(rawMemSize); - block->set_data_size(sizeof(Dtype)); - callback(response, outputIovs); -} - -void ParameterServer2::templateReduceSum(const SendDataRequest& request, - std::unique_ptr& msgReader, - ProtoResponseCallbackEx& callback) { - const auto& block = request.blocks(0); - switch (block.data_type()) { - case TRANS_FLOAT: - reduceAndSendData(request, msgReader, callback); - break; - case TRANS_DOUBLE: - reduceAndSendData(request, msgReader, callback); - break; - case TRANS_INT32: - reduceAndSendData(request, msgReader, callback); - break; - case TRANS_UINT32_T: - reduceAndSendData(request, msgReader, callback); - break; - case TRANS_INT64_T: - reduceAndSendData(request, msgReader, callback); - break; - case TRANS_UINT64_T: - reduceAndSendData(request, msgReader, callback); - break; - default: - LOG(FATAL) << "not supported"; - break; - } -} - -void ParameterServer2::sendData(const SendDataRequest& request, - std::unique_ptr msgReader, - ProtoResponseCallbackEx callback) { - SendDataResponse response; - response.set_type(request.type()); - response.set_server_id(serverId_); - - switch (request.update_mode()) { - case DATA_UPDATE_MODE_SET_OWN: { - CHECK_EQ(msgReader->getNumBlocks(), (size_t)(request.blocks_size())); - size_t totalLen = msgReader->getTotalLength(); - if (totalLen > 0) { - CHECK_EQ(msgReader->getNumBlocks(), 1U) - << "Only one block currently support now!"; - const auto& block = request.blocks(0); - if (0 == dataSize_) { - dataSize_ = block.data_size(); - } else { - CHECK_EQ(dataSize_, block.data_size()); - } - int64_t serverId = request.server_id(); - if (serverId_ < 0) { - serverId_ = serverId; - } else { - CHECK_EQ(serverId_, serverId); - } - int64_t clientId = request.client_id(); - dataMems_[clientId] = std::make_shared(totalLen); - CHECK_EQ(totalLen % sizeof(block.data_size()), 0U); - msgReader->readNextBlock(dataMems_[clientId].get()->getBuf()); - } - msgReader.reset(); - std::vector outputIovs; - callback(response, outputIovs); - break; - } - case DATA_UPDATE_MODE_GET_ALL: { - /// Currently only support DATA_REDUCE_SUM - /// And their Operations are just add - CHECK(DATA_REDUCE_SUM == request.type()); - templateReduceSum(request, msgReader, callback); - break; - } - default: { LOG(FATAL) << "not supported"; } - } -} - -void ParameterServer2::clearUnusedSegments(CpuVector* vec) { - real* data = vec->getData(); - if (usedSegments_.empty()) { - return; - } - memset(data, 0, sizeof(real) * usedSegments_[0].first); - memset(data + usedSegments_.back().second, - 0, - sizeof(real) * (size_ - usedSegments_.back().second)); - size_t n = size_ - usedSegments_.back().second; - - for (size_t i = 1; i < usedSegments_.size(); ++i) { - memset( - data + usedSegments_[i - 1].second, - 0, - sizeof(real) * (usedSegments_[i].first - usedSegments_[i - 1].second)); - n += usedSegments_[i].first - usedSegments_[i - 1].second; - } -} - -void ParameterServer2::parallelExecForEachBlock(ExecFunc func) { - SyncThreadPool::execHelper( - syncThreadPool_.get(), [&](int tid, size_t numThreads) { - int64_t numBlocks = blockIdMap_.size(); - VectorPtr* vecs = parameter::getThreadLocalBuffer(); - for (int64_t blockId = tid; blockId < numBlocks; - blockId += numThreads) { - func(blockId, vecs); - } - }); -} - -void ParameterServer2::blockTraverse( - BlockInfo& info, - const ParameterConfig& config, - int64_t offset, - size_t size, - const VectorPtr vecs[], - const ParameterOptimizer::TraverseCallback& callback) { - /// setup sub bufs - for (const auto type : info.optimizer->getParameterTypes()) { - vecs[type]->subVecFrom(*vectors_[type], offset, size); - } - callback(vecs, config, config.sparse_remote_update() ? 0 : -1LU); -} - -void ParameterServer2::op_SGD(const Operation& operation, - OperationResult* result) { - (void)operation; - (void)result; - - if (allClientPassFinish_) { - /// when all clients signal pass finished, the update - /// is empty. - return; - } - - { - parallelExecForEachBlock([&](int64_t blockId, const VectorPtr vecs[]) { - BlockInfo& info = blockInfos_[blockId]; - const ParameterConfig& config = getParameterConfig(blockId); - int64_t offset = info.offset; - size_t size = config.parameter_block_size(); - - info.optimizer->startBatch(numSamplesProcessed_); - - for (const auto type : info.optimizer->getParameterTypes()) { - vecs[type]->subVecFrom(*vectors_[type], offset, size); - } - info.optimizer->update( - vecs, config, config.sparse_remote_update() ? 0 : -1LU); - vecs[PARAMETER_GRADIENT]->zeroMem(); - - if (auto callback = info.optimizer->needSpecialTraversal(config)) { - blockTraverse(info, config, offset, size, vecs, callback); - } - info.optimizer->finishBatch(); - }); - } - - batchId_++; -} - -void ParameterServer2::op_start_pass(const Operation& operation, - OperationResult* result) { - (void)operation; - (void)result; - - parallelExecForEachBlock([&](int64_t blockId, const VectorPtr vecs[]) { - BlockInfo& info = blockInfos_[blockId]; - info.optimizer->startPass(); - }); -} - -void ParameterServer2::op_finish_pass(const Operation& operation, - OperationResult* result) { - (void)operation; - (void)result; - - parallelExecForEachBlock([&](int64_t blockId, const VectorPtr vecs[]) { - BlockInfo& info = blockInfos_[blockId]; - const ParameterConfig& config = getParameterConfig(blockId); - size_t size = config.parameter_block_size(); - - /// catch up with - if (auto callback = info.optimizer->startCatchUpWith()) { - blockTraverse(info, config, info.offset, size, vecs, callback); - info.optimizer->finishCatchUpWith(); - } - - /// finish pass - info.optimizer->finishPass(); - }); - batchId_ = 0; -} - -void ParameterServer2::op_apply(const Operation& operation, - OperationResult* result) { - (void)operation; - (void)result; - - parallelExecForEachBlock([&](int64_t blockId, const VectorPtr vecs[]) { - BlockInfo& info = blockInfos_[blockId]; - const ParameterConfig& config = getParameterConfig(blockId); - int64_t offset = info.offset; - size_t size = config.parameter_block_size(); - - // catch up with - if (auto callback = info.optimizer->startCatchUpWith()) { - blockTraverse(info, config, offset, size, vecs, callback); - info.optimizer->finishCatchUpWith(); - } - - // apply to PARAMETER_APPLY - if (auto callback = info.optimizer->apply()) { - blockTraverse(info, config, offset, size, vecs, callback); - } - }); -} - -void ParameterServer2::op_randomize(const Operation& operation, - OperationResult* result) { - LOG(INFO) << "ParameterServer2::op_randomize: serverId=" << serverId_; - - CpuVector& valueVec = *vectors_[PARAMETER_VALUE]; - - parallelExecForEachBlock([&](int64_t blockId, const VectorPtr vecs[]) { - BlockInfo& info = blockInfos_[blockId]; - const ParameterConfig& config = getParameterConfig(blockId); - size_t size = config.parameter_block_size(); - - vecs[PARAMETER_VALUE]->subVecFrom(valueVec, info.offset, size); - Parameter::randomize(vecs[PARAMETER_VALUE], config); - }); -} - -void ParameterServer2::loadValueVector(const LoadValueRequest& request, - ProtoResponseCallback callback) { - LoadValueResponse response; - LOG(INFO) << "ParameterServer2::loadValueVector: serverId=" << serverId_; - - constexpr int kBufLen = 100; - char buf[kBufLen]; - snprintf(buf, kBufLen, "/pserver.%04d", static_cast(serverId_)); - std::string filename = request.dir_name() + buf; - - std::ifstream fs(filename, std::ios_base::binary); - CHECK(fs) << "Fail to open " << filename; - - CpuVector& vec = *vectors_[PARAMETER_VALUE]; - Parameter::Header header; - CHECK(fs.read(reinterpret_cast(&header), sizeof(header))) - << "Fail to read parameters in pserver"; - CHECK(Parameter::isHeaderFormatSupported(header.format)) - << "Incorrect format version: " << header.format; - CHECK_EQ(header.size, (size_t)size_) - << "The size (" << header.size << ") in the file does not match the size " - << "(" << size_ << ") of the pserver: " << serverId_; - CHECK_EQ(header.valueSize, sizeof(real)) << "Unsupported valueSize " - << header.valueSize; - CHECK(fs.read(reinterpret_cast(vec.getData()), - header.size * sizeof(real))); - - callback(response); -} - -void ParameterServer2::saveValueVector(const SaveValueRequest& request, - ProtoResponseCallback callback) { - SaveValueResponse response; - LOG(INFO) << "ParameterServer2::SaveValueVector: serverId=" << serverId_; - - mkDir(request.dir_name().c_str()); - - constexpr int kBufLen = 100; - char buf[kBufLen]; - snprintf(buf, kBufLen, "/pserver.%04d", static_cast(serverId_)); - std::string filename = request.dir_name() + buf; - - std::ofstream fs(filename, std::ios_base::binary); - CHECK(fs) << "Fail to open " << filename; - - CpuVector& vec = vectors_[PARAMETER_APPLY] ? *vectors_[PARAMETER_APPLY] - : *vectors_[PARAMETER_VALUE]; - Parameter::Header header; - // TODO(TJ): save param headerFormat_ - header.format = PARAM_FORMAT_ORIGINAL; - header.valueSize = sizeof(real); - header.size = size_; - - CHECK_EQ(header.size, vec.getSize()); - - CHECK(fs.write(reinterpret_cast(&header), sizeof(header))) - << "Fail to write parameter in pserver: " << serverId_; - - CHECK(fs.write(reinterpret_cast(vec.getData()), - header.size * sizeof(real))) - << "Fail to write parameter in pserver: " << serverId_; - - callback(response); -} - -void ParameterServer2::op_RESET(const Operation& operation, - OperationResult* result) { - (void)result; - CpuVector* u = vectors_[operation.pvectors(0)].get(); - u->reset(operation.scalars(0)); - clearUnusedSegments(u); -} - -void ParameterServer2::op_utv(const Operation& operation, - OperationResult* result) { - real* u = vectors_[operation.pvectors(0)]->getData(); - real* v = vectors_[operation.pvectors(1)]->getData(); - int64_t size = size_; - double sum = 0; - for (int64_t i = 0; i < size; ++i) { - sum += (double)u[i] * (double)v[i]; - } - result->add_scalars(sum); -} - -void ParameterServer2::op_au_bv(const Operation& operation, - OperationResult* result) { - (void)result; - real* u = vectors_[operation.pvectors(0)]->getData(); - real* v = vectors_[operation.pvectors(1)]->getData(); - int64_t size = size_; - real a = operation.scalars(0); - real b = operation.scalars(1); - for (int64_t i = 0; i < size; ++i) { - v[i] = a * u[i] + b * v[i]; - } -} - -void ParameterServer2::op_COPY(const Operation& operation, - OperationResult* result) { - (void)result; - real* u = vectors_[operation.pvectors(0)]->getData(); - real* v = vectors_[operation.pvectors(1)]->getData(); - int64_t size = size_; - for (int64_t i = 0; i < size; ++i) { - v[i] = u[i]; - } -} - -void ParameterServer2::op_au(const Operation& operation, - OperationResult* result) { - (void)result; - real* u = vectors_[operation.pvectors(0)]->getData(); - int64_t size = size_; - real a = operation.scalars(0); - for (int64_t i = 0; i < size; ++i) { - u[i] *= a; - } -} - -void ParameterServer2::op_au_bv_cw(const Operation& operation, - OperationResult* result) { - (void)result; - real* u = vectors_[operation.pvectors(0)]->getData(); - real* v = vectors_[operation.pvectors(1)]->getData(); - real* w = vectors_[operation.pvectors(2)]->getData(); - int64_t size = size_; - real a = operation.scalars(0); - real b = operation.scalars(1); - real c = operation.scalars(2); - for (int64_t i = 0; i < size; ++i) { - w[i] = a * u[i] + b * v[i] + c * w[i]; - } -} - -void ParameterServer2::op_make_steepest_desc_dir(const Operation& operation, - OperationResult* result) { - (void)result; - real* dir = vectors_[operation.pvectors(0)]->getData(); - real* grad = vectors_[operation.pvectors(1)]->getData(); - real* x = vectors_[operation.pvectors(2)]->getData(); - int64_t size = size_; - real l1weight = operation.scalars(0); - for (int64_t i = 0; i < size; ++i) { - if (x[i] < 0) { - dir[i] = -grad[i] + l1weight; - } else if (x[i] > 0) { - dir[i] = -grad[i] - l1weight; - } else { - if (grad[i] < -l1weight) { - dir[i] = -grad[i] - l1weight; - } else if (grad[i] > l1weight) { - dir[i] = -grad[i] + l1weight; - } else { - dir[i] = 0; - } - } - } -} - -void ParameterServer2::op_fix_dir_signs(const Operation& operation, - OperationResult* result) { - (void)result; - real* dir = vectors_[operation.pvectors(0)]->getData(); - real* steepestDescDir = vectors_[operation.pvectors(1)]->getData(); - int64_t size = size_; - for (int64_t i = 0; i < size; ++i) { - if (dir[i] * steepestDescDir[i] <= 0) { - dir[i] = 0; - } - } -} - -void ParameterServer2::op_fix_omega_signs(const Operation& operation, - OperationResult* result) { - (void)result; - real* x = vectors_[operation.pvectors(0)]->getData(); - real* newx = vectors_[operation.pvectors(1)]->getData(); - int64_t size = size_; - for (int64_t i = 0; i < size; ++i) { - if (x[i] * newx[i] < 0) { - newx[i] = 0; - } - } -} - -void ParameterServer2::op_dir_deriv(const Operation& operation, - OperationResult* result) { - real* dir = vectors_[operation.pvectors(0)]->getData(); - real* grad = vectors_[operation.pvectors(1)]->getData(); - real* x = vectors_[operation.pvectors(2)]->getData(); - int64_t size = size_; - real l1weight = operation.scalars(0); - double sum = 0; - for (int64_t i = 0; i < size; ++i) { - if (dir[i] != 0) { - if (x[i] < 0) { - sum += dir[i] * (grad[i] - l1weight); - } else if (x[i] > 0) { - sum += dir[i] * (grad[i] + l1weight); - } else if (dir[i] < 0) { - sum += dir[i] * (grad[i] - l1weight); - } else if (dir[i] > 0) { - sum += dir[i] * (grad[i] + l1weight); - } - } - } - result->add_scalars(sum); -} - -void ParameterServer2::op_cost(const Operation& operation, - OperationResult* result) { - real* x = vectors_[operation.pvectors(0)]->getData(); - real* newgrad = vectors_[operation.pvectors(1)]->getData(); - int64_t size = size_; - real l1weight = operation.scalars(0); - real l2weight = operation.scalars(1); - double cost_real = cost_ / mpiSize_; - double sum_weight_l1 = 0; - double sum_weight_l2 = 0; - for (int64_t i = 0; i < size; ++i) { - sum_weight_l1 += std::abs(x[i]); - sum_weight_l2 += x[i] * x[i]; - newgrad[i] += 2.0 * l2weight * x[i]; - } - cost_real += l1weight * sum_weight_l1 + l2weight * sum_weight_l2; - result->add_scalars(cost_real); -} - -ParameterServer2::OperatorFunction ParameterServer2::opFuncs[] = { - nullptr, // PSERVER_OP_utu = 0; - &ParameterServer2::op_utv, // PSERVER_OP_utv = 1; - &ParameterServer2::op_au, // PSERVER_OP_au = 2; - &ParameterServer2::op_au_bv, // PSERVER_OP_au_bv = 3; - nullptr, // PSERVER_OP_aAx_bu = 4; - &ParameterServer2::op_SGD, // PSERVER_OP_SGD = 5; - &ParameterServer2::op_RESET, // PSERVER_OP_RESET = 6; - &ParameterServer2::op_COPY, // PSERVER_OP_COPY = 7; - &ParameterServer2::op_au_bv_cw, // PSERVER_OP_au_bv_cw = 8; - &ParameterServer2::op_make_steepest_desc_dir, - /// PSERVER_OP_MAKE_STEEPEST_DESC_DIR = 9; - &ParameterServer2::op_fix_dir_signs, // PSERVER_OP_FIX_SIGNS = 10; - &ParameterServer2::op_dir_deriv, // PSERVER_OP_DIR_DERIV = 11; - &ParameterServer2::op_fix_omega_signs, // PSERVER_OP_FIX_OMEGA_SIGNS = 12; - &ParameterServer2::op_cost, // PSERVER_OP_COST = 13 - &ParameterServer2::op_start_pass, // PSERVER_OP_START_PASS = 14 - &ParameterServer2::op_finish_pass, // PSERVER_OP_FINISH_PASS = 15 - &ParameterServer2::op_randomize, // PSERVER_OP_RANDOMIZE = 16 - &ParameterServer2::op_apply, // PSERVER_OP_APPLY = 17 -}; - -void ParameterServer2::doOperation(const DoOperationRequest& request, - ProtoResponseCallback callback) { - if (request.wait_for_gradient()) { - /// wait gradient update - gradientReadyBarrier_.wait(); - allClientPassFinish_ = numPassFinishClients_ == FLAGS_num_gradient_servers; - } - - DoOperationResponse response; - response.set_pass_finish(allClientPassFinish_); - - for (const auto& op : request.operations()) { - OperationResult* opResult = response.add_results(); - if (op.operation() >= ARRAYSIZE(opFuncs)) { - LOG(ERROR) << "Unknown operation " << op.operation(); - response.set_return_message(kRetMsgUnknownOperation); - } - OperatorFunction opFunc = opFuncs[op.operation()]; - if (!opFunc) { - LOG(ERROR) << "Operation not implemented: " << op.operation(); - response.set_return_message(kRetMsgUnknownOperation); - } - (this->*opFunc)(op, opResult); - } - - if (request.send_back_parameter()) { - /// clean current cost - cost_ = 0; - - if (allClientPassFinish_ && request.release_pass()) { - /// This signals that all clients finish one pass, so waitPassFinish() - /// will stop waiting. - numPassFinishClients_ = 0; - } - - /// notify addGradient() to send back parameter - parameterReadyBarrier_.wait(); - } - callback(response); -} - -void ParameterServer2::waitPassStart(const WaitPassStartRequest& request, - ProtoResponseCallback callback) { - passBarrier_.wait(); - callback(WaitPassStartResponse()); -} - -void ParameterServer2::waitPassFinish(const WaitPassFinishRequest& request, - ProtoResponseCallback callback) { - numPassFinishClients_ += 1; - - while (numPassFinishClients_ != 0) { - /// notify doOperation gradient ready - gradientReadyBarrier_.wait(); - /// wait doOperation finish - parameterReadyBarrier_.wait(); - } - - callback(WaitPassFinishResponse()); -} - -void ParameterServer2::synchronize(const SynchronizeRequest& request, - ProtoResponseCallback callback) { - synchronizeBarriers_[request.sync_object_id()]->wait(); - dataSize_ = 0; - callback(SynchronizeResponse()); -} - -void ParameterServer2::asyncFinishPass(const SynchronizeRequest& request, - ProtoResponseCallback callback) { - synchronizeBarriers_[request.sync_object_id()]->wait(); - callback(SynchronizeResponse()); - - if (request.trainer_id() == 0) { - batchId_ = 0; - } -} - -void ParameterServer2::createVector(const CreateVectorRequest& request, - ProtoResponseCallback callback) { - (void)request; - CreateVectorResponse response; - LOG(INFO) << "ParameterServer2::createVector: size=" << size_; - CpuVectorPtr vec = std::make_shared(size_); - int64_t handle = -1; - { - std::lock_guard guard(parameterMutex_); - handle = vectors_.size(); - vectors_.push_back(vec); - } - response.set_handle(handle); - callback(response); -} - -void ParameterServer2::releaseVector(const ReleaseVectorRequest& request, - ProtoResponseCallback callback) { - ReleaseVectorResponse response; - CpuVectorPtr vec; - { - std::lock_guard guard(parameterMutex_); - vec.swap(vectors_[request.handle()]); - } - callback(response); -} - -void ParameterServer2::createMatrix(const CreateMatrixRequest& request, - ProtoResponseCallback callback) { - CreateMatrixResponse response; - /// We need to create column major matrix of size_ * num_cols - /// Matrix is row majoar. Need to tranpose when use it. - CpuMatrixPtr mat = std::make_shared(request.num_cols(), size_); - int64_t handle = -1; - { - std::lock_guard guard(parameterMutex_); - handle = matrices_.size(); - matrices_.push_back(mat); - } - response.set_handle(handle); - callback(response); -} - -void ParameterServer2::releaseMatrix(const ReleaseMatrixRequest& request, - ProtoResponseCallback callback) { - ReleaseMatrixResponse response; - CpuMatrixPtr mat; - { - std::lock_guard guard(parameterMutex_); - mat.swap(matrices_[request.handle()]); - } - callback(response); -} - -} // namespace paddle diff --git a/paddle/legacy/pserver/ParameterServer2.h b/paddle/legacy/pserver/ParameterServer2.h deleted file mode 100644 index 069e730ea4ea4b253518d70142f0f242145cd326..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/ParameterServer2.h +++ /dev/null @@ -1,696 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "paddle/legacy/math/Matrix.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/parameter/ParameterOptimizer.h" -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/ThreadLocal.h" - -#include "ParameterService.pb.h" - -#include "ProtoServer.h" - -DECLARE_int32(port); - -namespace paddle { - -// @TODO(yanfei): -// if armed with high density computation resource per node, pserver could also -// utilize GPU to reduce overhead. if this mechanism is used, it could pipeline -// network receiving and GPU computation to reduce the network overhead even -// further. the pipeline could help to accelerate BIG model training. -// @TODO:(yanfei) -// for cpu and less/low gpu machine, the time exhausted by forward and backward -// could be larger than optimization at pserver. However, if armed with lots of -// gpus per node and if the model size is so large enough that limited cpu -// computation causes big optmization latency, the GPU may be required by -// pserver. - -/** - * Client interface for the parameter server - * - * it implements several rpc API for remote parameter client usage. - * for sync-sgd, client needs one controller thread to build connections - * to all pservers, these controller connections do barriers - * synchronization with these connections used for transfering data. - * each data connection uses block based fine grained synchronization - * to gain better scalability. Merging gradients from different trainers - * are concurrently executed with block units, so that some network - * overhead will be hidden in merging gradient. - * for async-sgd, the difference is that pserver will do optimization - * immediately if the gradients are ready, so that pserver needs to - * prepare separate buffer to store value for sending back to trainer - * to prevent from being polluted. - */ -class ParameterServer2 : public ProtoServer { - protected: - /// parameter_ mutex. - RWLock parameterMutex_; - - typedef std::pair BlockKey; - struct BlockKeyHash { - size_t operator()(const BlockKey& key) const { - return std::hash()(key.first) + key.second; - } - }; - - // TODO(yanfei): - // if index data structure is based on parameters instead of blocks, the - // lookup performance could be better. In addition, the block memory - // access almost exhibits good locality, so index data structure and - // block data structure can be refined further, especially if gpu is used - // for pserver. - /** - * all parameters are stored in CpuVector with a blockMap_ data structure - * to index block data required by requests. - */ - typedef std::unordered_map BlockMap; - /// <(para, block), global offset(byte) in all parameters> - BlockMap blockOffsetMap_; - /// <(para, block), global idx [0, nBlocksInAllParameters]> - BlockMap blockIdMap_; - - std::vector vectors_; - std::vector matrices_; - std::vector dataMems_; - - // TODO(yanfei): - // if storing sparse_remote_update() flag in request instead of - // reading configMap_, and storing config within new block wise - // overview data structure, the config mapping, block mapping - // can be unified in single clean data structure. Use para_id - // to index parameters, use offset to index block within parameter - // and keep two index into single one. - /** - * mapping between parameter and config - * different parameter allows different config, such as decay_rate. - * for each request, it need to read config for adding gradient - * and optmization. - */ - std::unordered_map configMap_; - - /** - * to parallelize the multi-thread and multi-connnection - * computation at pserver, it use block unit to reduce - * the contention for computation, even further use block - * level optimizater control for each block for some special - * reason annotated below. - */ - struct BlockInfo { - const ParameterConfig* config; - std::unique_ptr lock; - /// global offset for all parameters - uint64_t offset; - /** - * - * Async sgd in pserver is very different from sync sgd. - * Each trainer follows startBatch, update*, finishBatch as in - * sync sgd, but all these actions are almost executed by - * multi-core and multi-thread simutaneously, so that async - * sgd optimization is based on block level in reality, then - * per block optimization is necessary indeed. In addition, - * per block optimization is also perfered for performance - * with multithreads. - */ - std::unique_ptr optimizer; - }; - std::vector blockInfos_; - - typedef std::vector> BlockSegments; - /// Because some blocks might not be fully used. We keep a - /// record of which segments are used. - BlockSegments usedSegments_; - - /// record pserver status, all status defined in ParameterService.pb - PServerStatus status_; - /// record all samples processed which could be used by optimizater - std::atomic numSamplesProcessed_; - double cost_; - int mpiSize_; - int dataSize_; - /// configuration for current parameter optimizer - OptimizationConfig config_; - - /** - * The ReadWriteBuffer is based on std::vector, but aligned for avx/sse - * compute. And add some helper method to allocate memory aligned blocks. - * - * @param T type of element. - * @param AlignBytes the memory aligned bytes for allocated blocks. - */ - template - class ReadWriteBuffer - : public std::vector> { - public: - static_assert(sizeof(T) % AlignBytes == 0 || AlignBytes % sizeof(T) == 0, - "Type T must be able to aligned."); - - /** - * @brief IsTLargerThanAlign compiled time calculated constant for is type - * T larger than alignments. - */ - constexpr static bool IsTLargerThanAlign = sizeof(T) >= AlignBytes; - - static_assert(std::is_pod::value, "T must be POD type."); - - /** - * @brief if AlignBytes > sizeof(T), then will calcuate how many elements - * can be stored in AlignBytes. - */ - constexpr static size_t AlignElementCount = AlignBytes / sizeof(T); - - static_assert(AlignElementCount == - (AlignElementCount & -AlignElementCount) || - AlignBytes > sizeof(T), - "AlignElementCount should be exp of 2"); - - /** - * @brief Resize Buffer, with block count that will be allocated. Each block - * will be memory aligned in AlignBytes. - * @param size The element count in all blocks. - * @param alignBlockCount The block count that will be allocated. - */ - void resizeWithAlignHints(size_t size, size_t alignBlockCount = 1) { - if (IsTLargerThanAlign) { //! So, each elements is memory aligned. - this->resize(size); - } else { - //! at most, we need such elements in buffer to make sure each block is - //! aligned. - this->resize(size + alignBlockCount * (AlignElementCount - 1)); - } - } - - /** - * @brief reset aligned allocate blocks. - */ - void resetAlignAlloc() { this->curOffset_ = 0; } - - /** - * @brief get next aligned block address. - * @param blockSize is the element count in each block. - * @return Aligned block address. - */ - T* nextBlock(size_t blockSize) { - T* r = &this->operator[](curOffset_); - curOffset_ += blockSize; - - if (!IsTLargerThanAlign) { - curOffset_ = - (curOffset_ + AlignElementCount - 1) & ~(AlignElementCount - 1); - } - return r; - } - - private: - size_t curOffset_; - }; - - /// to buffer the data from network for further processing to - /// reduce redundant memory allocation. - ThreadLocal> readWriteBuffer_; - - /// size of the parameter - int64_t size_; - - /// for synchronized training, check details in addGradient() - /// and doOperation() - ThreadBarrier gradientReadyBarrier_; - ThreadBarrier parameterReadyBarrier_; - ThreadBarrier passBarrier_; - ThreadLocal> requestVec_; - ThreadLocal> callbackVec_; - - std::atomic numPassFinishClients_; - bool allClientPassFinish_; - - std::vector> synchronizeBarriers_; - std::atomic serverId_; - - /** - * - * for lagged async gradient gradient commit control in Async Sgd. - * discard lagged gradients from too slow nodes, whose gradients - * exhibits bad quality. - * Algorithm: - * pserver: - * 1. initial asyncUpdaterSteps = 0, asyncTrainerSteps_[N] = 0. - * syncUpdaterSteps means - * the version of parameter value. - * 2. when pull arrives, record asyncUpdateSteps_ into - * syncTrainerSteps_[trainer_id] - * 3. when push arrives, compare asyncUpdateSteps_ with - * syncTrainerSteps_[trainer_id] - * if delta > threshold, discard current gradient, else commit - * gradient. - * 4. reset asyncUpdaterSteps_ and asyncTrainerSteps_[N] when pass - * finished - * Note: - * it can not discard all lag-gradient strictly in some special - * condition. part of gradients could be discarded if - * ConcurrentRemoteParameterUpdater is sed. - * this algorithm is implemented in asynSGD() - */ - int64_t asyncLaggedThreshold_; - std::atomic asyncUpdateSteps_; - std::vector asyncTrainerSteps_; - size_t asyncLaggedGradientsNum_; - /// stat all async update - std::vector asyncUpdateStat_; - /// stat per trainer_id - std::vector asyncTrainerDiscardStat_; - /// stat per trainer_id - std::vector asyncTrainerCommitStat_; - - /// only used by controller and other control cmd from trainer number 0 - std::unique_ptr syncThreadPool_; - - /// pserver for sparse remote update parameters - bool isSparseServer_; - - /// barrier performance tuning sync-sgd required - std::atomic batchId_; - - public: - struct Buffer { - real* base; - size_t size; - }; - - protected: - /// async gradient commit control - bool asyncGrdientCommitCheckAndStat(const SendParameterRequest& request); - - public: - /// disable default parameter for overloading - /// @rdmaCpu:the id of cpu core hosting RDMA server(0-N) - /// -1 means using TCP transport instead of RDMA - ParameterServer2(const std::string& addr, int port, int rdmaCpu = -1); - - ~ParameterServer2() {} - - static const std::string kRetMsgInvalidMatrixHandle; - static const std::string kRetMsgInvalidVectorHandle; - static const std::string kRetMsgUnknownOperation; - - /// service functions - template - void reduceAndSendData(const SendDataRequest& request, - std::unique_ptr& msgReader, - ProtoResponseCallbackEx& callback); - - void templateReduceSum(const SendDataRequest& request, - std::unique_ptr& msgReader, - ProtoResponseCallbackEx& callback); - - /** - * @brief framework for sending parameters - * - * @note different parameter data type can be sent to pserver. - * in most case, the api is used to send gradients from - * trainer to pserver. - * it also can be used to retrieve parameters from pserver - */ - void sendParameter(const SendParameterRequest& request, - std::unique_ptr msgReader, - ProtoResponseCallbackEx callback); - - void sendData(const SendDataRequest& request, - std::unique_ptr msgReader, - ProtoResponseCallbackEx callback); - - /** - * @brief send config to pserver - * - * @note it can help pserver to understand the configuration for - * optimization, - * logging control, duplicated initialization, etc. - */ - void setConfig(const SetConfigRequest& request, - ProtoResponseCallback callback); - - /** - * @brief get status for pserver - * - * @note used to check if parameters are ready at pserver - */ - void getStatus(const GetStatusRequest& request, - ProtoResponseCallback callback); - - /** - * @brief set status for pserver - * - * @note used to check if parameters are ready at pserver, since parameters - * at pserver are initialized by trainer - */ - void setStatus(const SetStatusRequest& request, - ProtoResponseCallback callback); - - /** - * @brief framework for doing some operation at pserver end - * - * @note if sync-sgd is used, controller will calling op_SGD action - * for gradient optimization. - * check avaiable operations in opFuncs[] - */ - void doOperation(const DoOperationRequest& request, - ProtoResponseCallback callback); - - /// Create a column vector. The size is the dimension of parameter - void createVector(const CreateVectorRequest& request, - ProtoResponseCallback callback); - - void releaseVector(const ReleaseVectorRequest& request, - ProtoResponseCallback callback); - - /// Create a column major matrix. The number of rows is the dimension of - /// parameter. The number of columns is specifed by num_cols. - void createMatrix(const CreateMatrixRequest& request, - ProtoResponseCallback callback); - - void releaseMatrix(const ReleaseMatrixRequest& request, - ProtoResponseCallback callback); - /** - * @brief stateful control for indicationg sync pass start - * - * @note it is valuable for logging and state control, - * especially for sync-sgd control - */ - void waitPassStart(const WaitPassStartRequest& request, - ProtoResponseCallback callback); - - /** - * @brief stateful control for indicationg sync pass end - * - * @note it is valuable for logging and state control, - * especially for sync-sgd control - */ - void waitPassFinish(const WaitPassFinishRequest& request, - ProtoResponseCallback callback); - - /** - * @brief synchronize all distributed trainers - * - * @note it's general api for synchronizing trainer and pserver - */ - void synchronize(const SynchronizeRequest& request, - ProtoResponseCallback callback); - - /** - * @brief stateful control for indicating async pass is finished - * - * @note it is valuable for logging control, state reset, etc. - */ - void asyncFinishPass(const SynchronizeRequest& request, - ProtoResponseCallback callback); - - void loadValueVector(const LoadValueRequest& request, - ProtoResponseCallback callback); - - void saveValueVector(const SaveValueRequest& request, - ProtoResponseCallback callback); - - public: - /** - * @brief initialize parameter server - */ - bool init(); - - /** - * @brief set parameters at pserver - * - * @note do parameter initialization if neccessy. - */ - void setParameter(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers); - - /** - * @brief receive gradients and do optimization for async-sgd - * - * @note this api asynchronizately receives all data from all - * trainers, and immediately do optimization and return - * optimizated value for trainer. - * this above routine are block based atomic updating, - * which means different block could based different stale - * gradient. - * it will discard some lagged gradients by default for - * better convergence. - */ - void asyncSGD(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers); - - /** - * @brief merge gradients from all trainer - * - * @note this api use block based parallelization as fine grained - * parallelization which benifits lock contention and latency - * hidden for communication, also can harness multi-core - * efficiently. - * it also implements the synchronization for sync-sgd - */ - void addGradient(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers); - - /** - * @brief get dense parameters from pserver - * - * @note for some specified condition, trainer will get parameters from - * pservers. - * e.g. - * if all parameters are stored at perver end for big model training - * trainer can use it to retrieve all parameters if necessary. - */ - void getParameter(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers); - - /** - * @brief get sparse value from parameter server - * - * @note with sparse enabled, pservers own all latest value - * while trainer only retrieve value that only are needed. - * e.g. - * trainer will do prefetch action to retrieve necessary latest - * value from pserver for sparse calculation. - */ - void getParameterSparse(const SendParameterRequest& request, - std::vector& inputBuffers, - SendParameterResponse* response, - std::vector* outputBuffers); - - protected: - void mergeSegments(BlockSegments* segments); - - /// set the unused segments to zero - void clearUnusedSegments(CpuVector* vec); - - // TODO(yanfei): - // if read data and do optimization interleavely block by block, - // the performance could be better for gaining less network congestion. - /// read all data from connection and store it in static pre-allocated buffer - void readAllBlocks(MsgReader* msgReader, - std::vector* buffers); - - const ParameterConfig& getParameterConfig(const ParameterBlock& block) { - CHECK_LT(block.para_id(), -1UL) << "invalid parameter id:" - << block.para_id(); - const auto it = configMap_.find(block.para_id()); - CHECK(it != configMap_.end()) << "can not find parameter id: " - << block.para_id(); - return it->second; - } - - /// it implictly check blockOffsetMap_ while retrieving blockId - const ParameterConfig& getParameterConfig(int64_t blockId) const { - CHECK(blockId >= 0 && blockId < (int64_t)blockInfos_.size()) - << "block idx out of range, id: " << blockId - << " info size: " << blockInfos_.size(); - return *(blockInfos_[blockId].config); - } - - template - bool isValidVectorHandle(int64_t handle, Response* response) { - if (handle < 0 || (size_t)handle >= vectors_.size()) { - LOG(ERROR) << "Invalid vector handle " << handle; - response->set_return_message(kRetMsgInvalidVectorHandle); - return false; - } - return true; - } - - template - bool isValidMatrixHandle(int64_t handle, Response* response) { - if (handle < 0 || (size_t)handle >= matrices_.size()) { - LOG(ERROR) << "Invalid matrix handle " << handle; - response->set_return_message(kRetMsgInvalidMatrixHandle); - return false; - } - return true; - } - - /** - * @brief get block offset - * - * @note block.begin_dim is added to the block offset. - * return -1 if block cannot be found - */ - int64_t getBlockOffset(const ParameterBlock& block) const { - BlockKey key(block.para_id(), block.block_id()); - auto it = blockOffsetMap_.find(key); - if (it == blockOffsetMap_.end()) { - return -1; - } - return it->second; - } - - /// return -1 if block cannot be found - int64_t getBlockId(const ParameterBlock& block) const { - BlockKey key(block.para_id(), block.block_id()); - auto it = blockIdMap_.find(key); - if (it == blockIdMap_.end()) { - return -1; - } - return it->second; - } - - /** - * @brief prepare data for sending back - * - * @note modify reponse and outputBuffers for sending parameter - * back to client. The buffer for socket sending uses - * vectors_[parameterType] directly - * for dense with sync-sgd - */ - void sendBackParameter(const ParameterBlock& block, - int parameterType, - SendParameterResponse* response, - std::vector* outputBuffers); - - /** - * @brief prepare data for sending back - * - * @note modify response and outputBuffers for sending parameter - * back to client. The buffer for socket sending uses buffer->base - * The parameter values are copied from vectors_[parameterType] - * to buffer->base. - * for dense with async-sgd - */ - void sendBackParameter(const ParameterBlock& block, - int parameterType, - SendParameterResponse* response, - Buffer* buffer, - std::vector* outputBuffers); - /** - * @brief prepare data for sending back - * - * @note specified for sparse - */ - void sendBackParameterSparse(const ParameterBlock& block, - int parameterType, - SendParameterResponse* response, - Buffer* buffer, - size_t width, - std::vector* outputBuffers); - - /** - * framework routine for block parallelization - * e.g. - * for optimization on all blocks at pserver end, this routine can facilitize - * the parallelize of do optimization on all blocks with multithreads. - */ - typedef std::function ExecFunc; - void parallelExecForEachBlock(ExecFunc func); - void blockTraverse(BlockInfo& info, - const ParameterConfig& config, - int64_t offset, - size_t size, - const VectorPtr vecs[], - const ParameterOptimizer::TraverseCallback& callback); - - public: - typedef void (ParameterServer2::*OperatorFunction)(const Operation& operation, - OperationResult* result); - - /** - * doOperation will call following operations indirectly - * e.g. - * for sync-sgd control, the controller in remote updater will send op_SGD - * command to pserver, then send sendParameter request to pserver immediately. - * the two function at pserver end will do cooperation to achieve the sync-sgd - * gradient merge and optimization. - * the most following operations are specified for owlqn, all operations are - * under the context of doOperation function - */ - static OperatorFunction opFuncs[]; - - void op_SGD(const Operation& operation, OperationResult* result); - - void op_RESET(const Operation& operation, OperationResult* result); - - void op_utv(const Operation& operation, OperationResult* result); - - void op_au_bv(const Operation& operation, OperationResult* result); - - void op_COPY(const Operation& operation, OperationResult* result); - - void op_au(const Operation& operation, OperationResult* result); - - void op_au_bv_cw(const Operation& operation, OperationResult* result); - - void op_make_steepest_desc_dir(const Operation& operation, - OperationResult* result); - - void op_fix_dir_signs(const Operation& operation, OperationResult* result); - - void op_dir_deriv(const Operation& operation, OperationResult* result); - - void op_fix_omega_signs(const Operation& operation, OperationResult* result); - - void op_cost(const Operation& operation, OperationResult* result); - - void op_start_pass(const Operation& operation, OperationResult* result); - void op_finish_pass(const Operation& operation, OperationResult* result); - - void op_apply(const Operation& operation, OperationResult* result); - - void op_randomize(const Operation& operation, OperationResult* result); - - void op_load(const Operation& operation, OperationResult* result); - void op_save(const Operation& operation, OperationResult* result); -}; - -} // namespace paddle diff --git a/paddle/legacy/pserver/ParameterServer2Main.cpp b/paddle/legacy/pserver/ParameterServer2Main.cpp deleted file mode 100644 index dfbae0cd0f58faa58d9c7110050144226affdfed..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/ParameterServer2Main.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "ParameterServerController.h" - -using namespace paddle; // NOLINT - -int main(int argc, char** argv) { - initMain(argc, argv); - - std::unique_ptr parameterServerPtr( - paddle::ParameterServerController::createFromGflags()); - parameterServerPtr->start(); - parameterServerPtr->wait(); - - return 0; -} diff --git a/paddle/legacy/pserver/ParameterServerController.cpp b/paddle/legacy/pserver/ParameterServerController.cpp deleted file mode 100644 index 2a7dcc15aa63e39704a523202d3559765b709702..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/ParameterServerController.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ParameterServerController.h" - -namespace paddle { - -ParameterServerController::ParameterServerController( - const ParameterServerConfig& config) { - // round robin to load balance RDMA server ENGINE - std::vector devices; - int rdmaCpu = 0; - int onlineCpus = rdma::numCpus(); - int numPorts = config.ports_num() + config.ports_num_for_sparse(); - - if (config.nics().empty()) { - parameterServers_.resize(numPorts); - for (int i = 0; i < numPorts; ++i) { - if (config.rdma_tcp() == "rdma") { - parameterServers_[i].reset( - new ParameterServer2(std::string(), config.port() + i, rdmaCpu++)); - rdmaCpu = rdmaCpu % onlineCpus; - } else { - parameterServers_[i].reset( - new ParameterServer2(std::string(), config.port() + i)); - } - CHECK(parameterServers_[i]->init()) << "Fail to initialize parameter " - "server on port " - << config.port() + i; - } - } else { - str::split(config.nics(), ',', &devices); - parameterServers_.resize(devices.size() * numPorts); - for (int i = 0; i < numPorts; ++i) { - for (size_t j = 0; j < devices.size(); ++j) { - if (config.rdma_tcp() == "rdma") { - parameterServers_[i * devices.size() + j].reset(new ParameterServer2( - getIpAddr(devices[j]), config.port() + i, rdmaCpu++)); - rdmaCpu = rdmaCpu % onlineCpus; - } else { - parameterServers_[i * devices.size() + j].reset( - new ParameterServer2(getIpAddr(devices[j]), config.port() + i)); - } - CHECK(parameterServers_[i * devices.size() + j]->init()) - << "Fail to initialize parameter server with device " << devices[j] - << config.port() + i; - } - } - } -} - -ParameterServerController::~ParameterServerController() { this->wait(); } - -ParameterServerController* ParameterServerController::createFromGflags() { - ParameterServerConfig config; - - config.set_nics(FLAGS_nics); - config.set_rdma_tcp(FLAGS_rdma_tcp); - config.set_port(FLAGS_port); - config.set_ports_num(FLAGS_ports_num); - config.set_ports_num_for_sparse(FLAGS_ports_num_for_sparse); - - return create(config); -} - -ParameterServerController* ParameterServerController::create( - const ParameterServerConfig& config) { - return new ParameterServerController(config); -} - -void ParameterServerController::start() { - LOG(INFO) << "number of parameterServer instances: " - << parameterServers_.size(); - int i = 0; - for (const auto& parameterServer : parameterServers_) { - LOG(INFO) << "Starting parameterServer[" << i << "]"; - parameterServer->start(); - i++; - } -} - -void ParameterServerController::wait() { - int i = 0; - for (const auto& parameterServer : parameterServers_) { - LOG(INFO) << "Waiting parameterServer[" << i << "]"; - parameterServer->join(); - i++; - } -} - -} // namespace paddle diff --git a/paddle/legacy/pserver/ParameterServerController.h b/paddle/legacy/pserver/ParameterServerController.h deleted file mode 100644 index b90d0cbceaa879b8cb281867b5326ff50c1e311a..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/ParameterServerController.h +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "ParameterServer2.h" -#include "ParameterServerConfig.pb.h" -#include "RDMANetwork.h" -#include "paddle/legacy/utils/StringUtil.h" - -namespace paddle { - -/** - * @brief ParameterServerController is used for create, init and manage multi - * parameter server instances. The num of the instances is decided by port - * num(the ports number for parameter send) and network devices configured - * by gflags or proto. - */ -class ParameterServerController final { - public: - DISABLE_COPY(ParameterServerController); - - /** - * @brief Ctor, Create a ParameterServerController from ParameterServerConfig. - */ - explicit ParameterServerController(const ParameterServerConfig& config); - - /** - * @brief Dtor. - */ - ~ParameterServerController(); - - /** - * @brief create ParameterServerController from gflags, this is used for - * compatibility with the old usage of configuration by gflags. - */ - static ParameterServerController* createFromGflags(); - - /** - * @brief create ParameterServerController with ParameterServerConfig, remove - * gflags from ParameterServer. Init all ParameterServer2 instances according - * to - * the config. - */ - static ParameterServerController* create(const ParameterServerConfig& config); - - /** - * @brief start all ParameterServer2 instances in this - * ParameterServerController. - */ - void start(); - - /** - * @brief join and wait for all ParameterServer2 instances thread in this - * ParameterServerController. - */ - void wait(); - - private: - std::vector> parameterServers_; -}; - -} // namespace paddle diff --git a/paddle/legacy/pserver/ProtoServer.cpp b/paddle/legacy/pserver/ProtoServer.cpp deleted file mode 100644 index 6b7948a7d0aba262360b201690b53b58be87be08..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/ProtoServer.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ProtoServer.h" - -namespace paddle { - -void ProtoServer::handleRequest(std::unique_ptr msgReader, - ResponseCallback callback) { - /// 0 for funcName - /// 1 for proto - CHECK_GE(msgReader->getNumBlocks(), (size_t)2); - - std::string funcName(msgReader->getNextBlockLength(), 0); - /// read function name string - msgReader->readNextBlock(&funcName[0]); - /// looking up rpc wrapped callback function - auto it = nameToFuncMap_.find(funcName); - if (it != nameToFuncMap_.end()) { -#ifndef PADDLE_DISABLE_TIMER - gettimeofday(&(*(handleRequestBegin_)), nullptr); -#endif - it->second(std::move(msgReader), callback); - } else { - LOG(ERROR) << "Unknown funcName: " << funcName; - std::vector iovs; - callback(iovs); - } -} - -void ProtoServer::registerServiceFunctionImp(const std::string& funcName, - ServiceFunction func) { - CHECK(!nameToFuncMap_.count(funcName)) << "Duplicated registration: " - << funcName; - nameToFuncMap_[funcName] = func; -} - -void ProtoClient::send(const char* funcName, - const google::protobuf::MessageLite& proto, - const std::vector& userIovs) { - std::string protoStr; - CHECK(proto.SerializeToString(&protoStr)); - std::vector iovs; - iovs.reserve(iovs.size() + 2); - /// sending function name string, protobuf data and user additional data - iovs.push_back({(void*)funcName, strlen(funcName)}); - iovs.push_back({&protoStr[0], protoStr.size()}); - iovs.insert(iovs.end(), userIovs.begin(), userIovs.end()); - channel_->writeMessage(iovs); -} - -std::unique_ptr ProtoClient::recv( - google::protobuf::MessageLite* proto) { - std::vector iovs; - std::unique_ptr msgReader = channel_->readMessage(); - CHECK_GE(msgReader->getNumBlocks(), (size_t)1); - std::string str(msgReader->getNextBlockLength(), 0); - msgReader->readNextBlock(&str[0]); - CHECK(proto->ParseFromString(str)); - return msgReader; -} - -} // namespace paddle diff --git a/paddle/legacy/pserver/ProtoServer.h b/paddle/legacy/pserver/ProtoServer.h deleted file mode 100644 index 2943867de5885ab1af1aa0f69e93a931092b28e3..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/ProtoServer.h +++ /dev/null @@ -1,267 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "LightNetwork.h" - -#include - -#include - -namespace paddle { - -/** - * - * It implements the rpc framework, which launchs one thread for each - * connection. Here define one parameter server as single TCP server - * binding on single port. All connections share single tcp ProtoServer - * object, each connection handles all requests from specified trainer - * within single worker thread. - * to accelerate bandwidth efficiency and harness multicore for pserver - * optimization to reduce pserver latency, you could launch more port - * for single NIC hardward with --port=N(N>1) for small cluster job. - */ -class ProtoServer : public SocketServer { - public: - /// rdmaCpu controls the cpu affinity of RDMA server daemon, - /// which could benifit performance. rdmaCpu = -1 means TCP - /// is used instead of RDMA transport. - ProtoServer(const std::string& addr, int port, int rdmaCpu = -1) - : SocketServer(addr, port, rdmaCpu) {} - - typedef std::function& outputIovs)> - ProtoResponseCallbackEx; - - typedef std::function - ProtoResponseCallback; - - /** - * Register a service function for this server - * void(const ProtoIn& request, - * ProtoResponseCallback callback) - * The service function process the request and call the callback - * after it finishes the request. - - * Use macro REGISTER_SERVICE_FUNCTION as a helper - * to simplify the use. - */ - template - void registerServiceFunction( - const std::string& funcName, - std::function func); - - /** - * Register a service function for this server - * The signature of the service function is - * void(const ProtoIn&, - * std::unique_ptr msgReader, - * ProtoResponseCallbackEx callback) - * The service function process the request and call the callback - * after it finishes the request. - * The extended service function can take extra input blocks from - * the communication channel by reading msgReader. It can also - * send extra blocks to the communication channel by providing - * outputIovs as the argument for the callback function. - - * Use macro REGISTER_SERVICE_FUNCTION_EX as a helper - * to simplify the use. - */ - template - void registerServiceFunctionEx( - const std::string& funcName, - std::function msgReader, - ProtoResponseCallbackEx callback)> func); - - protected: - /** - * @brief handle rpc request - * @param[in] msgReader Message reader for reading data from connection - * @param[in] callback equal to channel->writeMessage - * - * @note it lookups rpc function mapping table to find function pointer, - * then call this function with further reading data from connection - */ - virtual void handleRequest(std::unique_ptr msgReader, - ResponseCallback callback); - - typedef std::function msgReader, - ResponseCallback callback)> - ServiceFunction; - - /** - * @brief register one RPC function in function mapping - * @param[in] funcName function name string - * @param[in] func rpc function wrapped with reading and writing data - */ - void registerServiceFunctionImp(const std::string& funcName, - ServiceFunction func); - - protected: - /// Tuning bare network overhead: the beginning of receiving request - ThreadLocal handleRequestBegin_; - - /// mapping to find rpc function while handling request - std::map nameToFuncMap_; -}; - -class ProtoClient : public SocketClient { - public: - ProtoClient(const std::string& serverAddr, - int serverPort, - enum ChannelType channelType = F_TCP) - : SocketClient(serverAddr, serverPort, channelType) {} - - /** - * @brief Make a request to the server. - * @param[in] funcName request rpc function name string - * @param[in] proto protobuf data for sending to pserver - * @param[in] iov additional iov data for sending to pserver - * - * @note iov provides additional blocks which need to be written to the - * communication channel - */ - void send(const char* funcName, - const google::protobuf::MessageLite& proto, - const std::vector& iov = std::vector()); - - /** - * @brief receive the response from the server. - * @param[in] proto proto binary buffer - * - * @note this must be paired with a corresponding send() call. The - * returned MsgReader allows the caller to receive additional - * blocks from the communication channel. - */ - std::unique_ptr recv(google::protobuf::MessageLite* proto); - - /// combines send() and recv() - std::unique_ptr sendAndRecv( - const char* funcName, - const google::protobuf::MessageLite& protoIn, - google::protobuf::MessageLite* protoOut) { - send(funcName, protoIn); - return recv(protoOut); - } - - /// combines send() and recv() - std::unique_ptr sendAndRecv( - const char* funcName, - const google::protobuf::MessageLite& protoIn, - const std::vector& iov, - google::protobuf::MessageLite* protoOut) { - send(funcName, protoIn, iov); - return recv(protoOut); - } -}; - -template -struct service_arg_type; -/// helper class for obtaining the argument type of a service function -template -struct service_arg_type { - typedef Arg1 _1; -}; - -template -struct service_arg_type, - Arg2)> { - typedef Arg1 _1; -}; - -/// register a service function to the ProtoServer -/// This should only be used within a member function of className -#define REGISTER_SERVICE_FUNCTION(className, funcName) \ - registerServiceFunction< \ - service_arg_type::_1>( \ - #funcName, \ - std::bind(&className::funcName, \ - this, \ - std::placeholders::_1, \ - std::placeholders::_2)) - -/// register a service function to the ProtoServer -/// This should only be used within a member function of className -#define REGISTER_SERVICE_FUNCTION_EX(className, funcName) \ - registerServiceFunctionEx< \ - service_arg_type::_1>( \ - #funcName, \ - std::bind(&className::funcName, \ - this, \ - std::placeholders::_1, \ - std::placeholders::_2, \ - std::placeholders::_3)) - -/// create wrapper function for parameter server high level function and -/// register the wrapper function into function mapping. -template -void ProtoServer::registerServiceFunctionEx( - const std::string& funcName, - std::function msgReader, - ProtoResponseCallbackEx callback)> func) { - auto f = [func](std::unique_ptr msgReader, - ResponseCallback callback) { - ProtoIn request; - std::string str(msgReader->getNextBlockLength(), 0); - msgReader->readNextBlock(&str[0]); - CHECK(request.ParseFromString(str)); - auto pcob = [callback](const google::protobuf::MessageLite& response, - const std::vector& outputIovs) { - std::string out; - CHECK(response.SerializeToString(&out)); - std::vector iovs; - iovs.push_back({&out[0], out.size()}); - iovs.insert(iovs.end(), outputIovs.begin(), outputIovs.end()); - callback(iovs); - }; - - func(request, std::move(msgReader), pcob); - }; - - registerServiceFunctionImp(funcName, f); -} - -template -void ProtoServer::registerServiceFunction( - const std::string& funcName, - std::function func) { - auto f = [func](std::unique_ptr msgReader, - ResponseCallback callback) { - ProtoIn request; - std::string str(msgReader->getNextBlockLength(), 0); - msgReader->readNextBlock(&str[0]); - CHECK(request.ParseFromString(str)); - msgReader.reset(); - - auto pcob = [callback](const google::protobuf::MessageLite& response) { - std::string out; - CHECK(response.SerializeToString(&out)); - std::vector iovs; - iovs.push_back({&out[0], out.size()}); - callback(iovs); - }; - - func(request, pcob); - }; - - registerServiceFunctionImp(funcName, f); -} - -} // namespace paddle diff --git a/paddle/legacy/pserver/RDMANetwork.h b/paddle/legacy/pserver/RDMANetwork.h deleted file mode 100644 index c87056f72c56647c827cdbd7bdd6a992b4bb1cf6..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/RDMANetwork.h +++ /dev/null @@ -1,158 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifndef PADDLE_DISABLE_RDMA -#include "sxi_sock.h" -#else -#define PROMPT_ERR() LOG(FATAL) << "Paddle is not compiled with rdma" -#endif -#include "paddle/legacy/utils/Logging.h" - -#include -struct sxi_sock; -struct sxi_socket; - -#ifndef MAX_VEC_SIZE -// define default MAX_VEC_SIZE -#define MAX_VEC_SIZE (1UL << 16) -#endif - -namespace paddle { -/// Namespace rdma is adaptors for sxi_sock.h. Make paddle not depend on it -/// when disable rdma support -namespace rdma { -inline int numCpus() { -#ifndef PADDLE_DISABLE_RDMA - return sxi_num_configured_cpus(); -#else - return 0; -#endif -} - -inline sxi_socket* ssocket(int cpuId) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_ssocket(cpuId); -#else - PROMPT_ERR(); -#endif -} - -inline int listen(sxi_socket* s) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_listen(s); -#else - PROMPT_ERR(); -#endif -} - -inline int bind(sxi_socket* s, const char* str) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_bind(s, str); -#else - PROMPT_ERR(); -#endif -} - -inline sxi_sock* accept(sxi_socket* s) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_accept(s); -#else - PROMPT_ERR(); -#endif -} - -inline sockaddr_in* getSourceAddress(sxi_sock* sock) { -#ifndef PADDLE_DISABLE_RDMA - return reinterpret_cast(&sock->sa); -#else - PROMPT_ERR(); -#endif -} - -inline int close(sxi_socket* sock) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_socket_close(sock); -#else - PROMPT_ERR(); -#endif -} - -inline int close(sxi_sock* sock) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_sock_close(sock); -#else - PROMPT_ERR(); -#endif -} - -inline void init() { -#ifndef PADDLE_DISABLE_RDMA - sxi_module_init(); -#else - PROMPT_ERR(); -#endif -} - -inline sxi_socket* csocket(int cpuId) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_csocket(cpuId); -#else - PROMPT_ERR(); -#endif -} - -inline ssize_t read(sxi_sock* channel, void* data, size_t len) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_read(channel, data, len); -#else - PROMPT_ERR(); -#endif -} - -inline ssize_t write(sxi_sock* channel, void* data, size_t len) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_write(channel, data, len); -#else - PROMPT_ERR(); -#endif -} - -inline ssize_t readv(sxi_sock* channel, iovec* iov, int count) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_readv(channel, iov, count); -#else - PROMPT_ERR(); -#endif -} - -inline ssize_t writev(sxi_sock* channel, iovec* iov, int count) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_writev(channel, iov, count); -#else - PROMPT_ERR(); -#endif -} - -inline sxi_sock* connect(sxi_socket* socket, const char* url) { -#ifndef PADDLE_DISABLE_RDMA - return sxi_connect(socket, url); -#else - PROMPT_ERR(); -#endif -} - -} // namespace rdma -} // namespace paddle diff --git a/paddle/legacy/pserver/SocketChannel.cpp b/paddle/legacy/pserver/SocketChannel.cpp deleted file mode 100644 index 79c763c62ba845067c7729eafb5b218fc7b91482..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/SocketChannel.cpp +++ /dev/null @@ -1,235 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "SocketChannel.h" - -#include -#include -#include -#include -#include -#include -#include "RDMANetwork.h" - -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -/** - * UIO_MAXIOV is documented in writev(2), but only - * declares it on osx/ios if defined(KERNEL) - */ -#ifndef UIO_MAXIOV -#define UIO_MAXIOV 512 -#endif - -SocketChannel::~SocketChannel() { - if (tcpRdma_ == F_TCP) - close(tcpSocket_); - else - rdma::close(rdmaSocket_); - LOG(INFO) << "destory connection in socket channel, peer = " << peerName_; -} - -size_t SocketChannel::read(void* buf, size_t size) { - size_t total = 0; - while (total < size) { - ssize_t len; - if (tcpRdma_ == F_TCP) - len = ::read(tcpSocket_, (char*)buf + total, size - total); - else - len = rdma::read(rdmaSocket_, (char*)buf + total, size - total); - - CHECK(len >= 0) << " peer=" << peerName_; - if (len <= 0) { - return total; - } - total += len; - } - return total; -} - -size_t SocketChannel::write(const void* buf, size_t size) { - size_t total = 0; - while (total < size) { - ssize_t len; - if (tcpRdma_ == F_TCP) - len = ::write(tcpSocket_, (const char*)buf + total, size - total); - else - len = rdma::write(rdmaSocket_, (char*)buf + total, size - total); - - CHECK(len >= 0) << " peer=" << peerName_; - if (len <= 0) { - return total; - } - total += len; - } - return total; -} - -template -static size_t readwritev(IOFunc iofunc, - SocketType socket, - iovec* iovs, - int iovcnt, - int maxiovs, - const std::string& peerName) { - int curIov = 0; - size_t total = 0; - - for (int i = 0; i < iovcnt; ++i) { - total += iovs[i].iov_len; - } - - size_t size = 0; - size_t curIovSizeDone = 0; - - while (size < total) { - ssize_t len = - iofunc(socket, &iovs[curIov], std::min(iovcnt - curIov, maxiovs)); - CHECK(len > 0) << " peer=" << peerName << " curIov=" << curIov - << " iovCnt=" << iovcnt - << " iovs[curIov].base=" << iovs[curIov].iov_base - << " iovs[curIov].iov_len=" << iovs[curIov].iov_len; - size += len; - - /// restore iovs[curIov] to the original value - iovs[curIov].iov_base = - (void*)((char*)iovs[curIov].iov_base - curIovSizeDone); - iovs[curIov].iov_len += curIovSizeDone; - - len += curIovSizeDone; - - while (curIov < iovcnt) { - if ((size_t)len < iovs[curIov].iov_len) break; - len -= iovs[curIov].iov_len; - ++curIov; - } - if (curIov < iovcnt) { - curIovSizeDone = len; - iovs[curIov].iov_base = (void*)((char*)iovs[curIov].iov_base + len); - iovs[curIov].iov_len -= len; - } - } - return size; -} - -/// rdma::readv and rdma::writev can take advantage of RDMA blocking offload -/// transfering -size_t SocketChannel::writev(const std::vector& iovs) { - if (tcpRdma_ == F_TCP) - return readwritev(::writev, - tcpSocket_, - const_cast(&iovs[0]), - iovs.size(), - UIO_MAXIOV, - peerName_); - else - return readwritev(rdma::writev, - rdmaSocket_, - const_cast(&iovs[0]), - iovs.size(), - MAX_VEC_SIZE, - peerName_); -} - -size_t SocketChannel::readv(std::vector* iovs) { - if (tcpRdma_ == F_TCP) - return readwritev(::readv, - tcpSocket_, - const_cast(&(*iovs)[0]), - iovs->size(), - UIO_MAXIOV, - peerName_); - else - return readwritev(rdma::readv, - rdmaSocket_, - const_cast(&(*iovs)[0]), - iovs->size(), - MAX_VEC_SIZE, - peerName_); -} - -void SocketChannel::writeMessage(const std::vector& userIovs) { - MessageHeader header; - header.numIovs = userIovs.size(); - - std::vector iovLengths; - iovLengths.reserve(userIovs.size()); - for (auto& iov : userIovs) { - iovLengths.push_back(iov.iov_len); - } - - std::vector iovs; - iovs.reserve(userIovs.size() + 2); - iovs.push_back({&header, sizeof(header)}); - iovs.push_back({&iovLengths[0], - static_cast(sizeof(iovLengths[0]) * header.numIovs)}); - iovs.insert(iovs.end(), userIovs.begin(), userIovs.end()); - - header.totalLength = 0; - for (auto& iov : iovs) { - header.totalLength += iov.iov_len; - } - - CHECK(writev(iovs) == (size_t)header.totalLength); -} - -std::unique_ptr SocketChannel::readMessage() { - MessageHeader header; - - size_t len = read(&header, sizeof(header)); - if (len == 0) { - return nullptr; - } - - CHECK(len == sizeof(header)); - - std::unique_ptr msgReader(new MsgReader(this, header.numIovs)); - - CHECK_EQ(msgReader->getTotalLength() + sizeof(header) + - msgReader->getNumBlocks() * sizeof(size_t), - (size_t)header.totalLength) - << " totalLength=" << msgReader->getTotalLength() - << " numBlocks=" << msgReader->getNumBlocks(); - return msgReader; -} - -MsgReader::MsgReader(SocketChannel* channel, size_t numBlocks) - : channel_(channel), blockLengths_(numBlocks), currentBlockIndex_(0) { - size_t size = numBlocks * sizeof(blockLengths_[0]); - CHECK(channel_->read(&blockLengths_[0], size) == size); -} - -void MsgReader::readBlocks(const std::vector& bufs) { - CHECK_LE(currentBlockIndex_ + bufs.size(), blockLengths_.size()); - std::vector iovs; - iovs.reserve(bufs.size()); - size_t totalLength = 0; - for (void* buf : bufs) { - iovs.push_back({buf, getNextBlockLength()}); - totalLength += getNextBlockLength(); - ++currentBlockIndex_; - } - - CHECK(channel_->readv(&iovs) == totalLength); -} - -void MsgReader::readNextBlock(void* buf) { - CHECK_LT(currentBlockIndex_, blockLengths_.size()); - CHECK(channel_->read(buf, getNextBlockLength()) == getNextBlockLength()); - ++currentBlockIndex_; -} - -} // namespace paddle diff --git a/paddle/legacy/pserver/SocketChannel.h b/paddle/legacy/pserver/SocketChannel.h deleted file mode 100644 index a7b3cd42f0aa32c3a74e14f87dbfe64d25473254..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/SocketChannel.h +++ /dev/null @@ -1,153 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/utils/Util.h" - -#include - -#include -#include - -struct sxi_sock; - -namespace paddle { - -class SocketChannel; -enum ChannelType { - F_TCP = 1, - F_RDMA = 2, -}; - -/// reading a set of blocks of data from SocketChannel. -class MsgReader { - public: - MsgReader(SocketChannel* channel, size_t numIovs); - ~MsgReader() { - /// ensure all data blocks have been processed - CHECK_EQ(currentBlockIndex_, blockLengths_.size()); - } - /** - * @brief number of remaining parts - */ - size_t getNumBlocks() const { - return blockLengths_.size() - currentBlockIndex_; - } - - /** - * @brief lenght of next block - */ - size_t getNextBlockLength() const { return getBlockLength(0); } - - /** - * @brief get the total length of all the remaining blocks - */ - size_t getTotalLength() const { - size_t total = 0; - for (size_t i = currentBlockIndex_; i < blockLengths_.size(); ++i) { - total += blockLengths_[i]; - } - return total; - } - - /** - * @brief Get the length for block currentBlockIndex + i - */ - size_t getBlockLength(size_t i) const { - return blockLengths_[currentBlockIndex_ + i]; - } - - /** - * @brief read blocks data and store it to buf - */ - void readBlocks(const std::vector& bufs); - void readNextBlock(void* buf); - - protected: - SocketChannel* channel_; - std::vector blockLengths_; - size_t currentBlockIndex_; -}; - -/// APIs for reading and writing byte stream data or naive iov data -/// from the APIs both RDMA and TCP exhibits byte stream style -class SocketChannel { - public: - SocketChannel(int socket, const std::string& peerName) - : tcpSocket_(socket), peerName_(peerName) { - tcpRdma_ = F_TCP; - } - SocketChannel(struct sxi_sock* socket, const std::string& peerName) - : rdmaSocket_(socket), peerName_(peerName) { - tcpRdma_ = F_RDMA; - } - - ~SocketChannel(); - - const std::string& getPeerName() const { return peerName_; } - - /** - * @brief read size bytes. - * - * @note keep reading until getting size bytes or sock is closed - * is closed - */ - size_t read(void* buf, size_t size); - - /** - * @brief write size bytes. - * - * @note keep writing until writing size bytes or sock is closed - */ - size_t write(const void* buf, size_t size); - - /** - * @brief write a set of buffers. - * - * @note keep writing until all buffers are written or sock is closed - */ - size_t writev(const std::vector& iov); - - /** - * @brief read a set of buffers. - * - * @note keep reading until all buffers are full or sock is closed. - */ - size_t readv(std::vector* iov); - - /** - * @brief write a set of buffers. - * - * @note keep writing until all buffers are passed or sock is closed - */ - void writeMessage(const std::vector& iov); - - /// return null to indicate socket is closed - std::unique_ptr readMessage(); - - protected: - struct MessageHeader { - int64_t totalLength; /// include the header - int64_t numIovs; - int64_t iovLengths[0]; - }; - - int tcpSocket_; - struct sxi_sock* rdmaSocket_; - const std::string peerName_; - enum ChannelType tcpRdma_; -}; - -} // namespace paddle diff --git a/paddle/legacy/pserver/SparseParameterDistribution.cpp b/paddle/legacy/pserver/SparseParameterDistribution.cpp deleted file mode 100644 index 3f17b228f0e5fd33b7e7db2afe1fb9421acc69c5..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/SparseParameterDistribution.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/legacy/utils/Logging.h" - -#include "paddle/legacy/utils/Flags.h" - -#include "SparseParameterDistribution.h" - -DEFINE_bool(check_sparse_distribution_in_pserver, - false, - "check whether sparse parameter exhibts balanced distribution at " - "all pservers"); -DEFINE_bool(show_check_sparse_distribution_log, - false, - "show logs details for sparse parameter distribution in pserver"); -DEFINE_int32(check_sparse_distribution_batches, - 100, - "run sparse parameter distribution check for N batches"); -DEFINE_double( - check_sparse_distribution_ratio, - 0.6, - "if parameters dispatched to different pservers exhibit unbalanced " - " distribution for check_sparse_distribution_ratio * " - " check_sparse_distribution_batches times, crash program"); -DEFINE_double(check_sparse_distribution_unbalance_degree, - 2.0, - "the ratio of maximum data size and minimun data size for " - "different pserver"); - -namespace paddle { - -SparseParameterDistribution::SparseParameterDistribution(size_t serviceNum) { - totBytes_ = 0; - data_.resize(serviceNum); - - batchPassed_ = 0; - unbalanceCnt_ = 0; -} - -void SparseParameterDistribution::probeDistribution(int serverId, - size_t dataSize) { - if (!FLAGS_check_sparse_distribution_in_pserver || - batchPassed_ > FLAGS_check_sparse_distribution_batches) { - return; - } - - CHECK_LT((size_t)serverId, data_.size()) - << "invalid sparse parameter distribution probe"; - - data_[serverId] += dataSize; - totBytes_ += dataSize; -} - -void SparseParameterDistribution::checkAndResetDistribution() { - if (!FLAGS_check_sparse_distribution_in_pserver || - batchPassed_ >= FLAGS_check_sparse_distribution_batches) { - return; - } - - /// at runtime, prepareSendData is called by many contexts, - /// so need to check if data is avaiable. - if (!totBytes_) { - return; - } - - /// check if distribution is balanced - auto avgSize = totBytes_ / data_.size(); - auto unbalanceDegree = FLAGS_check_sparse_distribution_unbalance_degree; - for (auto& dataSize : data_) { - if (dataSize > unbalanceDegree * avgSize || - dataSize * unbalanceDegree < avgSize) { - unbalanceCnt_++; - break; - } - } - - auto printData = [&]() { - std::stringstream ss; - for (auto& dataSize : data_) { - ss << dataSize * 0.001 << "KB "; - } - ss << std::endl; - LOG(INFO) << ss.str(); - }; - - /// show all sparse data size for different pserver - if (FLAGS_show_check_sparse_distribution_log) { - LOG(INFO) << "sparse distribution:"; - printData(); - } - - totBytes_ = 0; - batchPassed_++; - - if (batchPassed_ == FLAGS_check_sparse_distribution_batches) { - LOG(INFO) << "show last parameter distribution sample:"; - printData(); - LOG(INFO) << "total unbalanced batches: " << unbalanceCnt_ - << " in passed batches: " << batchPassed_; - CHECK_LE((float)unbalanceCnt_ / (float)batchPassed_, - FLAGS_check_sparse_distribution_ratio) - << "unbalanced sparse parameter distribution for different pserver. " - << "it could be caused by unbalanced sparse ids distribution, try " - << "to shuffle dimensions in input samples"; - } - - std::fill(data_.begin(), data_.end(), 0); -} -} // namespace paddle diff --git a/paddle/legacy/pserver/SparseParameterDistribution.h b/paddle/legacy/pserver/SparseParameterDistribution.h deleted file mode 100644 index ee78029958f675d07ec0aba2d0c1ea92d664e8fd..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/SparseParameterDistribution.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -#include -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -/* - * if sparse_remote_updater is used, different ParameterServer could - * be assigned with unbalanced gradients. the parameter value from - * ParameterServer also be not balanced. the distribution of different - * dimensions of sparse ids determines the unbalanced degree of data - * distributed among all ParameterServers. Even distribution will - * benifits cluster efficiency. - * do check the unbalanced degree of gradients at runtime, crash program - * if unbalanced distribution exhibts by default. - */ -class SparseParameterDistribution { - public: - /// serviceNum means the number of ParameterServers - explicit SparseParameterDistribution(size_t serviceNum); - ~SparseParameterDistribution() {} - /// collect data - void probeDistribution(int serverId, size_t data); - void checkAndResetDistribution(); - - private: - std::vector data_; - std::atomic totBytes_; - - /// after some batches, stop to check - int batchPassed_; - - /// stat on unbalanced distribution found - int unbalanceCnt_; -}; -} // namespace paddle diff --git a/paddle/legacy/pserver/test/.gitignore b/paddle/legacy/pserver/test/.gitignore deleted file mode 100644 index aeb58c5b562c61d472466b2579067a40971ddd6e..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/test/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -log -test_ParameterServer -test_ParameterServer2 -socket_test -test_ProtoServer diff --git a/paddle/legacy/pserver/test/CMakeLists.txt b/paddle/legacy/pserver/test/CMakeLists.txt deleted file mode 100644 index b66a00ba0652dfe1afbb877eca06cacdfe2ca343..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/test/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -######################### socket_test ######################## -add_unittest_without_exec(socket_test - SocketTest.cpp) - -add_test(NAME socket_test - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port - ${CMAKE_CURRENT_BINARY_DIR}/socket_test --loop_time=10) - -####################### test_ProtoServer #################### -add_unittest_without_exec(test_ProtoServer - test_ProtoServer.cpp) - -IF(NOT ON_TRAVIS) - add_test(NAME test_ProtoServer - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port - ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) -ENDIF(NOT ON_TRAVIS) - -# TODO(yuyang18): Run test_ProtoServer when with rdma -# add_test(NAME test_ProtoServerRDMA -# COMMAND ...) - -#################### test_ParameterServer2 #################### -add_unittest_without_exec(test_ParameterServer2 - test_ParameterServer2.cpp) -add_test(NAME test_ParameterServer2 - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port -n 4 - ${CMAKE_CURRENT_BINARY_DIR}/test_ParameterServer2) diff --git a/paddle/legacy/pserver/test/SocketTest.cpp b/paddle/legacy/pserver/test/SocketTest.cpp deleted file mode 100644 index 3a781fcbf655b554e79fc753f3409d12f10f6646..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/test/SocketTest.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Util.h" - -#include -#include -#include -#include -#include - -#include - -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/utils/Logging.h" - -struct MessageHeader { - int64_t dataLength; -}; - -class Thread { - public: - void start(); - virtual void run() = 0; - virtual ~Thread() {} - - protected: - std::unique_ptr thread_; -}; - -void Thread::start() { - thread_.reset(new std::thread([this]() { this->run(); })); -} - -class SocketChannel { - public: - explicit SocketChannel(int socket) : socket_(socket) {} - int getSocketFd() const { return socket_; } - uint64_t readAll(void* buf, size_t size); - uint64_t writeAll(const void* buf, size_t size); - - protected: - int socket_; -}; - -uint64_t SocketChannel::readAll(void* buf, size_t size) { - uint64_t total = 0; - while (total < size) { - int64_t len = read(socket_, (char*)buf + total, size - total); - if (len <= 0) { - return total; - } - total += len; - } - return total; -} - -uint64_t SocketChannel::writeAll(const void* buf, size_t size) { - uint64_t total = 0; - while (total < size) { - int64_t len = write(socket_, (const char*)buf + total, size - total); - if (len <= 0) { - return total; - } - total += len; - } - return total; -} - -class SocketWorker : public Thread { - public: - explicit SocketWorker(int socket) : channel_(socket) {} - virtual void run(); - - // read n bytes. - int64_t readAll(char* buf, size_t n); - - // write n bytes - - protected: - SocketChannel channel_; - std::string buffer_; -}; - -class SocketServer : public Thread { - public: - explicit SocketServer(int port) - : port_(port), socket_(0), maxPendingConnections_(100) {} - - virtual void run(); - - protected: - int port_; - int socket_; - int maxPendingConnections_; -}; - -void SocketServer::run() { - int newsockfd; - socklen_t clilen; - struct sockaddr_in serv_addr, cli_addr; - - /* First call to socket() function */ - socket_ = socket(AF_INET, SOCK_STREAM, 0); - CHECK(socket_ >= 0) << "ERROR opening socket"; - - /* Initialize socket structure */ - bzero((char*)&serv_addr, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - serv_addr.sin_addr.s_addr = INADDR_ANY; - serv_addr.sin_port = htons(port_); - - /* Now bind the host address using bind() call.*/ - CHECK(bind(socket_, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) >= 0) - << "ERROR on binding"; - - /* Now start listening for the clients, here process will - * go in sleep mode and will wait for the incoming connection - */ - listen(socket_, maxPendingConnections_); - clilen = sizeof(cli_addr); - - while (true) { - /* Accept actual connection from the client */ - newsockfd = accept(socket_, (struct sockaddr*)&cli_addr, &clilen); - CHECK(newsockfd >= 0) << "ERROR on accept"; - - SocketWorker* worker = new SocketWorker(newsockfd); - worker->start(); - } -} - -void SocketWorker::run() { - MessageHeader header; - - while (true) { - int64_t n = channel_.readAll(&header, sizeof(header)); - CHECK(n == sizeof(header)) << "ERROR reading from socket"; - - buffer_.resize(header.dataLength); - n = channel_.readAll(&buffer_[0], header.dataLength); - CHECK(n == header.dataLength) << "ERROR reading from socket"; - - /* Write a response to the client */ - n = channel_.writeAll(&header, sizeof(header)); - CHECK(n == sizeof(header)) << "ERROR reading from socket"; - n = channel_.writeAll(buffer_.data(), buffer_.size()); - CHECK(n == header.dataLength) << "ERROR writing to socket"; - } -} - -class SocketClient { - public: - SocketClient(const std::string& serverAddr, int serverPort); - SocketChannel* getChannel() const { return channel_.get(); } - - protected: - std::unique_ptr channel_; -}; - -SocketClient::SocketClient(const std::string& serverAddr, int serverPort) { - struct sockaddr_in serv_addr; - struct hostent* server; - - // char buffer[256]; - - /* Create a socket point */ - int sockfd = socket(AF_INET, SOCK_STREAM, 0); - CHECK(sockfd >= 0) << "ERROR opening socket"; - server = gethostbyname(serverAddr.c_str()); - CHECK(server) << "ERROR, no such host: " << serverAddr; - - bzero((char*)&serv_addr, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - bcopy((char*)server->h_addr, - (char*)&serv_addr.sin_addr.s_addr, - server->h_length); - serv_addr.sin_port = htons(serverPort); - - /* Now connect to the server */ - CHECK(connect(sockfd, (sockaddr*)&serv_addr, sizeof(serv_addr)) >= 0) - << "ERROR connecting"; - - channel_.reset(new SocketChannel(sockfd)); -} - -DEFINE_string(server_addr, "127.0.0.1", "Server address"); -DEFINE_int64(dim, 10000000, "Data size"); -DEFINE_int32(loop_time, 100000, "test loop time"); - -using namespace paddle; // NOLINT - -int main(int argc, char** argv) { - paddle::initMain(argc, argv); - SocketServer server(FLAGS_port); - server.start(); - sleep(1); - - SocketClient client(FLAGS_server_addr, FLAGS_port); - - SocketChannel* channel = client.getChannel(); - - MessageHeader header; - - uint64_t dataSize = FLAGS_dim * sizeof(real); - -#ifdef PADDLE_WITH_CUDA - GpuVector gpuParam(FLAGS_dim); - GpuVector gpuGrad(FLAGS_dim); -#else - CpuVector gpuParam(FLAGS_dim); - CpuVector gpuGrad(FLAGS_dim); -#endif - CpuVector cpuParam(FLAGS_dim); - CpuVector cpuGrad(FLAGS_dim); - - gpuParam.rand(); - gpuGrad.rand(); - cpuParam.rand(); - cpuGrad.rand(); - - for (int i = 0; i < FLAGS_loop_time; ++i) { - cpuGrad.copyFrom(gpuGrad); - - header.dataLength = dataSize; - CHECK(channel->writeAll(&header, sizeof(header)) == sizeof(header)) - << "Client write header error"; - - CHECK(channel->writeAll(cpuGrad.getData(), dataSize) == dataSize) - << "Client write data error"; - - /* Now read server response */ - CHECK(channel->readAll(&header, sizeof(header)) == sizeof(header)) - << "Client read header error"; - - CHECK_EQ((uint64_t)header.dataLength, dataSize); - CHECK(channel->readAll(cpuParam.getData(), dataSize) == dataSize) - << "Client read data error"; - - gpuParam.copyFrom(cpuParam); - - LOG_EVERY_N(INFO, 100) << "i=" << i; - } - exit(0); -} diff --git a/paddle/legacy/pserver/test/test_ParameterServer2.cpp b/paddle/legacy/pserver/test/test_ParameterServer2.cpp deleted file mode 100644 index 542e80e046972be38d403bc3223f7e7fcd15e3f0..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/test/test_ParameterServer2.cpp +++ /dev/null @@ -1,624 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_int32(num_gradient_servers); -DEFINE_string(server_addr, "127.0.0.1", "assign server address"); -DEFINE_int32(server_cpu, 0, "assign server cpu"); - -class ParameterServer2Tester : public ParameterServer2 { - public: - ParameterServer2Tester(std::string serverAddr, - int port, - int rdmaCpu = -1, - bool sepSendAndRecv = false) - : ParameterServer2(serverAddr, port, rdmaCpu), client_(sepSendAndRecv) {} - virtual ~ParameterServer2Tester() {} - void setup() { - CHECK(ParameterServer2::init()); - - parameters_.clear(); - clientConfigs_.clear(); - - clientConfigs_.resize(2); - { - ParameterConfig& config = clientConfigs_[0]; - config.set_name("para0"); - config.set_para_id(0); - config.set_size(10000); - config.set_device(-1); - config.set_learning_rate(1.0); - config.set_momentum(0.9); - } - - { - ParameterConfig& config = clientConfigs_[1]; - config.set_name("para1"); - config.set_para_id(1); - config.set_size(5000); - config.set_device(-1); - config.set_learning_rate(0.5); - config.set_momentum(0.4); - } - - for (auto& config : clientConfigs_) { - parameters_.emplace_back(new Parameter(config, /* useGpu= */ false)); - } - - size_t id = 0; - for (auto& para : parameters_) { - para->setID(id++); - } - - CHECK(client_.init(parameters_)); - OptimizationConfig optConfig; - optConfig.set_algorithm("async_sgd"); - optConfig.set_batch_size(100); - optConfig.set_learning_rate(0.1); - client_.setConfig(optConfig); - client_.setParameter(); - } - - void setConfigTest(); - void setStatusTest(); - void sendParameterTest(); - void sendDataTest(SendDataType type, size_t size); - void operationTest(); - void mergeBlockSegmentTest(); - void checkSegments(const BlockSegments& expected, const BlockSegments& segs); - void waitPassFinishTest(); - void synchronizeTest(); - - protected: - ParameterClient2 client_; - vector clientConfigs_; - vector parameters_; -}; - -std::unique_ptr g_server; - -void ParameterServer2Tester::setConfigTest() { - setup(); - - for (auto& config : clientConfigs_) { - auto it = configMap_.find(config.para_id()); - EXPECT_TRUE(it != configMap_.end()); - auto& serverConfig = it->second; - EXPECT_EQ(config.name(), serverConfig.name()); - EXPECT_EQ(config.size(), serverConfig.size()); - EXPECT_EQ(config.learning_rate(), serverConfig.learning_rate()); - EXPECT_EQ(config.momentum(), serverConfig.momentum()); - } -} - -void ParameterServer2Tester::setStatusTest() { - setup(); - EXPECT_TRUE(client_.inStatus(PSERVER_STATUS_NOT_SET)); - client_.setStatus(PSERVER_STATUS_PARAMETER_READY); - EXPECT_EQ(PSERVER_STATUS_PARAMETER_READY, status_); - EXPECT_TRUE(client_.inStatus(PSERVER_STATUS_PARAMETER_READY)); -} - -real sumVector(const CpuVector& vec) { - const real* data = vec.getData(); - size_t dim = vec.getSize(); - real sum = 0; - for (size_t i = 0; i < dim; ++i) { - sum += data[i]; - } - return sum; -} - -void ParameterServer2Tester::sendParameterTest() { - setup(); - - client_.sendAndReceiveParameter(PSERVER_UPDATE_MODE_SET_PARAM, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - false); // sendBackParameter = false - - vector parameterCopies; - - for (auto& parameter : parameters_) { - parameterCopies.emplace_back( - new Parameter(parameter->getConfig(), /* useGpu= */ false)); - parameterCopies.back() - ->getBuf(PARAMETER_VALUE) - ->copyFrom(*parameter->getBuf(PARAMETER_VALUE)); - } - - client_.sendAndReceiveParameter(PSERVER_UPDATE_MODE_GET_PARAM, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - true); // sendBackParameter = true - - for (size_t i = 0; i != parameters_.size(); ++i) { - real* v1 = parameters_[i]->getBuf(PARAMETER_VALUE)->getData(); - real* v2 = parameterCopies[i]->getBuf(PARAMETER_VALUE)->getData(); - EXPECT_EQ(parameters_[i]->getSize(), parameterCopies[i]->getSize()); - size_t size = parameters_[i]->getSize(); - real sum1 = 0, sum2 = 0; - for (size_t j = 0; j < size; ++j) { - sum1 += v1[j]; - sum2 += v2[j]; - } - EXPECT_EQ(sum1, sum2); - } -} - -void ParameterServer2Tester::sendDataTest(SendDataType type, size_t size) { - ParameterClient2 client1(true); - client1.init(parameters_); - ParameterClient2 client2(true); - client2.init(parameters_); - ParameterClient2 client3(true); - client3.init(parameters_); - - ThreadWorker worker1; - ThreadWorker worker2; - ThreadWorker worker3; - - double* testData1 = new double[size]; - double* testData2 = new double[size]; - double* testData3 = new double[size]; - double* getDataExpect = new double[size]; - double* getDataReal = new double[size]; - for (size_t i = 0; i < size; ++i) { - testData1[i] = rand(); // NOLINT TODO(yuyang18): Use rand_r instead. - testData2[i] = rand(); // NOLINT - testData3[i] = rand(); // NOLINT - getDataExpect[i] = testData1[i] + testData2[i] + testData3[i]; - } - - auto put1 = [&]() { - LOG(INFO) << "putOwnData1 start"; - client1.putOwnData(0, type, testData1, size); - LOG(INFO) << "putOwnData1 finish"; - }; - - auto get1 = [&]() { - LOG(INFO) << "sendData1 get all start"; - client1.getAllData(0, type, getDataReal, size); - for (size_t i = 0; i < size; ++i) { - CHECK_EQ(getDataReal[i], getDataExpect[i]); - } - LOG(INFO) << "sendData1 get all finish"; - }; - - auto put2 = [&]() { - LOG(INFO) << "putOwnData2 start"; - client2.putOwnData(1, type, testData2, size); - LOG(INFO) << "putOwnData2 finish"; - }; - - auto put3 = [&]() { - LOG(INFO) << "putOwnData3 start"; - client3.putOwnData(2, type, testData3, size); - LOG(INFO) << "putOwnData3 finish"; - }; - - worker1.addJob(put1); - worker1.addJob(get1); - worker2.addJob(put2); - worker3.addJob(put3); - - worker1.addJob(put1); - worker2.addJob(put2); - worker3.addJob(put3); - worker1.addJob(get1); - - worker1.wait(); - worker2.wait(); - worker3.wait(); - free(testData1); - free(testData2); - free(testData3); - free(getDataExpect); - free(getDataReal); -} - -void ParameterServer2Tester::operationTest() { - PServerVector v1, v2; - v1 = client_.createVector(); - EXPECT_EQ(NUM_PARAMETER_TYPES, v1.handle); - - v2 = client_.createVector(); - EXPECT_EQ(NUM_PARAMETER_TYPES + 1, v2.handle); - - PreparedOperations ops; - ops.addOperation(PSERVER_OP_RESET, v1, (real)1); - ops.addOperation(PSERVER_OP_RESET, v2, (real)2); - - real res1, res2, res3; - ops.addOperation(PSERVER_OP_utv, v1, v2)(&res1); - - ops.addOperation(PSERVER_OP_au_bv, v1, v2, (real)-1, (real)1); - ops.addOperation(PSERVER_OP_utv, v1, v2)(&res2); - - ops.addOperation(PSERVER_OP_au_bv, v1, v2, (real)-1, (real)1); - ops.addOperation(PSERVER_OP_utv, v1, v2)(&res3); - client_.doOperation(ops, false, false); - - EXPECT_EQ(30000, res1); - EXPECT_EQ(15000, res2); - EXPECT_EQ(0, res3); - - PServerMatrix m1, m2; - m1 = client_.createMatrix(4); - EXPECT_EQ(0, m1.handle); - m2 = client_.createMatrix(8); - EXPECT_EQ(1, m2.handle); - - // TODO(yuyang18): add tests for other operations OP_COPY, OP_au - - client_.releaseVector(v1); - client_.releaseVector(v2); - client_.releaseMatrix(m1); - client_.releaseMatrix(m2); -} - -void ParameterServer2Tester::checkSegments(const BlockSegments& expected, - const BlockSegments& segs) { - EXPECT_EQ(expected.size(), segs.size()); - if (expected.size() != segs.size()) { - return; - } - for (size_t i = 0; i < expected.size(); ++i) { - EXPECT_EQ(expected[i], segs[i]); - } -} - -void ParameterServer2Tester::mergeBlockSegmentTest() { - { - BlockSegments segs{{10, 20}, {30, 45}, {50, 70}}; - mergeSegments(&segs); - checkSegments({{10, 20}, {30, 45}, {50, 70}}, segs); - } - { - BlockSegments segs{{30, 45}, {50, 70}, {10, 20}}; - mergeSegments(&segs); - checkSegments({{10, 20}, {30, 45}, {50, 70}}, segs); - } - { - BlockSegments segs{{30, 45}, {50, 70}, {10, 30}}; - mergeSegments(&segs); - checkSegments({{10, 45}, {50, 70}}, segs); - } - { - BlockSegments segs{{30, 45}, {10, 70}, {10, 30}}; - mergeSegments(&segs); - checkSegments({{10, 70}}, segs); - } - { - BlockSegments segs{{30, 45}, {50, 70}, {10, 35}}; - mergeSegments(&segs); - checkSegments({{10, 45}, {50, 70}}, segs); - } - { - BlockSegments segs{{30, 45}, {50, 70}, {10, 60}}; - mergeSegments(&segs); - checkSegments({{10, 70}}, segs); - } - { - BlockSegments segs{{30, 45}, {50, 70}, {30, 47}}; - mergeSegments(&segs); - checkSegments({{30, 47}, {50, 70}}, segs); - } -} - -void ParameterServer2Tester::waitPassFinishTest() { - ParameterClient2 client1; - ParameterClient2 client2; - ParameterClient2 client3; - - ThreadWorker worker1; - ThreadWorker worker2; - ThreadWorker worker3; - - auto init1 = [&]() { - LOG(INFO) << "init1 start"; - client1.init(parameters_); - LOG(INFO) << "init1 finish"; - }; - - auto init2 = [&]() { - LOG(INFO) << "init2 start"; - client2.init(parameters_); - LOG(INFO) << "init2 finish"; - }; - - auto init3 = [&]() { - LOG(INFO) << "init3 start"; - client3.init(parameters_); - LOG(INFO) << "init3 finish"; - }; - - auto update1 = [&]() { - LOG(INFO) << "update1 start"; - client1.sendAndReceiveParameter(PSERVER_UPDATE_MODE_ADD_GRADIENT, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - true); // sendBackParameter = false - LOG(INFO) << "update1 finish"; - }; - - auto wait1 = [&]() { - LOG(INFO) << "wait1 start"; - client1.waitPassFinish(); - LOG(INFO) << "wait1 finish"; - }; - - auto update2 = [&]() { - LOG(INFO) << "update2 start"; - client2.sendAndReceiveParameter(PSERVER_UPDATE_MODE_ADD_GRADIENT, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - true); // sendBackParameter = false - LOG(INFO) << "update2 finish"; - }; - - auto wait2 = [&]() { - LOG(INFO) << "wait2 start"; - client2.waitPassFinish(); - LOG(INFO) << "wait2 finish"; - }; - - auto op3 = [&]() { - LOG(INFO) << "op3 start"; - PreparedOperations ops; - ops.addOperation(PSERVER_OP_SGD); - client3.doOperation(ops, - /* waitForGradient= */ true, - /* sendBackarameter= */ true); - LOG(INFO) << "op3 finish"; - }; - - worker1.addJob(init1); - worker2.addJob(init2); - worker3.addJob(init3); - - worker1.addJob(update1); - worker2.addJob(update2); - worker3.addJob(op3); - - worker3.addJob(op3); - worker3.addJob(op3); - worker2.addJob(update2); - worker2.addJob(update2); - worker1.addJob(wait1); - - worker2.addJob(wait2); - worker3.addJob(op3); - - worker1.wait(); - worker2.wait(); - worker3.wait(); - - LOG(INFO) << "Pass 1 finished"; - - worker1.addJob(update1); - worker2.addJob(update2); - worker3.addJob(op3); - - worker1.wait(); - worker2.wait(); - worker3.wait(); - - worker3.addJob(op3); - worker3.addJob(op3); - worker1.addJob(update1); - worker1.addJob(wait1); - worker2.addJob(wait2); - - worker1.wait(); - worker2.wait(); - worker3.wait(); - - LOG(INFO) << "Pass 2 finished"; -} - -void ParameterServer2Tester::synchronizeTest() { - ParameterClient2 client1; - ParameterClient2 client2; - - ThreadWorker worker1; - ThreadWorker worker2; - - FLAGS_log_period_server = 2; - - auto init1 = [&]() { - LOG(INFO) << "init1 start"; - client1.init(parameters_); - client1.setTrainerId(0); - LOG(INFO) << "init1 finish"; - }; - - auto init2 = [&]() { - LOG(INFO) << "init2 start"; - client2.init(parameters_); - client2.setTrainerId(1); - LOG(INFO) << "init2 finish"; - }; - - auto update1 = [&]() { - LOG(INFO) << "update1 start"; - client1.sendAndReceiveParameter(PSERVER_UPDATE_MODE_ASYNC_SGD, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - true); // sendBackParameter = false - LOG(INFO) << "update1 finish"; - }; - - auto wait1 = [&]() { - LOG(INFO) << "wait1 start"; - client1.asyncFinishPass(); - LOG(INFO) << "wait1 finish"; - }; - - auto update2 = [&]() { - LOG(INFO) << "update2 start"; - client2.sendAndReceiveParameter(PSERVER_UPDATE_MODE_ASYNC_SGD, - PARAMETER_VALUE, - 0, // numSamples = 0 - 0, // cost = 0 - true); // sendBackParameter = false - LOG(INFO) << "update2 finish"; - }; - - auto wait2 = [&]() { - LOG(INFO) << "wait2 start"; - client2.asyncFinishPass(); - LOG(INFO) << "wait2 finish"; - }; - - worker1.addJob(init1); - worker2.addJob(init2); - // call wait to reset some stats at pserver - worker1.addJob(wait1); - worker2.addJob(wait2); - - worker1.addJob(update1); - worker2.addJob(update2); - - worker2.addJob(update2); - worker2.addJob(update2); - worker1.addJob(wait1); - - worker2.addJob(wait2); - - worker1.wait(); - worker2.wait(); - LOG(INFO) << "Pass 1 finished"; - - worker1.addJob(update1); - worker2.addJob(update2); - - worker1.wait(); - worker2.wait(); - - worker1.addJob(update1); - worker2.addJob(update2); - worker1.addJob(update1); - worker1.addJob(update1); - worker1.addJob(update1); - worker1.addJob(update1); - worker1.addJob(update1); - worker1.addJob(update1); - worker1.addJob(wait1); - worker2.addJob(wait2); - - worker1.wait(); - worker2.wait(); - LOG(INFO) << "Pass 2 finished"; -} - -TEST(ParameterServer2, sendParameter) { g_server->sendParameterTest(); } - -TEST(ParameterServer2, setConfig) { g_server->setConfigTest(); } - -TEST(ParameterServer2, setStatus) { g_server->setStatusTest(); } - -TEST(ParameterServer2, operation) { g_server->operationTest(); } - -TEST(ParameterServer2, mergeBlockSegment) { g_server->mergeBlockSegmentTest(); } - -TEST(ParameterServer2, waitPassFinish) { g_server->waitPassFinishTest(); } - -TEST(ParameterServer2, synchronize) { g_server->synchronizeTest(); } - -TEST(ParameterServer2, sendData) { - // Set gserver and pserver all 3, so that the test is sufficient. - int oldFlagsPortsNUm = FLAGS_ports_num; - int oldFlagsNumGradientServers = FLAGS_num_gradient_servers; - int oldFlagsPort = FLAGS_port; - FLAGS_ports_num = 3; - FLAGS_num_gradient_servers = 3; - FLAGS_port = FLAGS_port + 1; - std::unique_ptr g_server1; - std::unique_ptr g_server2; - std::unique_ptr g_server3; - if (FLAGS_rdma_tcp == "rdma") { - g_server1.reset(new ParameterServer2Tester( - FLAGS_server_addr, FLAGS_port, FLAGS_server_cpu)); - g_server1->start(); - g_server2.reset(new ParameterServer2Tester( - FLAGS_server_addr, FLAGS_port + 1, FLAGS_server_cpu + 1)); - g_server2->start(); - g_server3.reset(new ParameterServer2Tester( - FLAGS_server_addr, FLAGS_port + 2, FLAGS_server_cpu + 2)); - g_server3->start(); - } else { // tcp - g_server1.reset(new ParameterServer2Tester(FLAGS_server_addr, FLAGS_port)); - g_server1->start(); - g_server2.reset( - new ParameterServer2Tester(FLAGS_server_addr, FLAGS_port + 1)); - g_server2->start(); - g_server3.reset( - new ParameterServer2Tester(FLAGS_server_addr, FLAGS_port + 2)); - g_server3->start(); - } - - g_server2->init(); - g_server3->init(); - sleep(2); - g_server1->setup(); - g_server1->sendDataTest(DATA_REDUCE_SUM, 1 << 24); - sleep(2); - g_server1->sendDataTest(DATA_REDUCE_SUM, 2); - sleep(2); - g_server1.reset(); - g_server2.reset(); - g_server3.reset(); - - FLAGS_ports_num = oldFlagsPortsNUm; - FLAGS_num_gradient_servers = oldFlagsNumGradientServers; - FLAGS_port = oldFlagsPort; -} - -int main(int argc, char** argv) { - paddle::initMain(argc, argv); - testing::InitGoogleTest(&argc, argv); - - FLAGS_num_gradient_servers = 2; - - if (FLAGS_rdma_tcp == "rdma") { - g_server.reset(new ParameterServer2Tester( - FLAGS_server_addr, FLAGS_port, FLAGS_server_cpu)); - } else { - g_server.reset(new ParameterServer2Tester(FLAGS_server_addr, FLAGS_port)); - } - - g_server->start(); - - sleep(2); - - int ret = RUN_ALL_TESTS(); - - g_server.reset(); - - exit(ret); -} diff --git a/paddle/legacy/pserver/test/test_ProtoServer.cpp b/paddle/legacy/pserver/test/test_ProtoServer.cpp deleted file mode 100644 index f7ab2e8af45f97a6537d41ca1afe51a4d3270b80..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/test/test_ProtoServer.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "ParameterService.pb.h" -#include "paddle/legacy/math/Vector.h" -#include "paddle/legacy/pserver/ProtoServer.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" - -DEFINE_string(server_addr, "127.0.0.1", "Server address"); -DEFINE_int64(dim, 50000000, "Data size"); -DEFINE_bool(test_proto_server, true, "whether to test ProtoServer"); -DEFINE_bool(benchmark, false, "Do benchmark. Skip some tests"); - -using namespace paddle; // NOLINT - -class MyServer : public ProtoServer { - public: - explicit MyServer(int port, int rdmaCpu = -1) - : ProtoServer(FLAGS_server_addr, port, rdmaCpu), - status_(PSERVER_STATUS_NOT_SET) { - REGISTER_SERVICE_FUNCTION(MyServer, getStatus); - REGISTER_SERVICE_FUNCTION(MyServer, setStatus); - REGISTER_SERVICE_FUNCTION_EX(MyServer, getStatusEx); - } - void getStatus(const GetStatusRequest& request, - ProtoResponseCallback callback) { - (void)request; - GetStatusResponse response; - response.set_status(status_); - callback(response); - } - - void getStatusEx(const GetStatusRequest& request, - std::unique_ptr msgReader, - ProtoResponseCallbackEx callback) { - (void)request; - GetStatusResponse response; - response.set_status(status_); - buffer_.resize(msgReader->getNextBlockLength()); - msgReader->readNextBlock(&buffer_[0]); - callback(response, {{&buffer_[0], buffer_.size()}}); - } - - void setStatus(const SetStatusRequest& request, - ProtoResponseCallback callback) { - SetStatusResponse response; - status_ = request.status(); - callback(response); - } - - protected: - PServerStatus status_; - std::string buffer_; -}; - -TEST(ProtoServer, regular) { - ProtoClient* client; - if (FLAGS_rdma_tcp == "rdma") - client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_RDMA); - else - client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_TCP); - { - GetStatusRequest request; - GetStatusResponse response; - auto msgReader = client->sendAndRecv("getStatus", request, &response); - EXPECT_EQ(response.status(), PSERVER_STATUS_NOT_SET); - EXPECT_EQ(msgReader->getNumBlocks(), (size_t)0); - } - - { - SetStatusRequest request; - SetStatusResponse response; - request.set_status(PSERVER_STATUS_PARAMETER_READY); - client->sendAndRecv("setStatus", request, &response); - } - - { - GetStatusRequest request; - GetStatusResponse response; - client->sendAndRecv("getStatus", request, &response); - EXPECT_EQ(response.status(), PSERVER_STATUS_PARAMETER_READY); - } - - delete client; -} - -TEST(ProtoServer, extended) { -#ifdef PADDLE_WITH_CUDA - ProtoClient* client; - if (FLAGS_rdma_tcp == "rdma") - client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_RDMA); - else - client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_TCP); - int64_t dataSize = FLAGS_dim * sizeof(real); - - GpuVector gpuParam(FLAGS_dim); - GpuVector gpuGrad(FLAGS_dim); - CpuVector cpuParam(FLAGS_dim); - CpuVector cpuGrad(FLAGS_dim); - - gpuParam.rand(); - gpuGrad.rand(); - cpuParam.rand(); - cpuGrad.rand(); - - for (int k = 0; k < 4; ++k) { - for (int i = 0; i < 10; ++i) { - cpuGrad.copyFrom(gpuGrad); - if (FLAGS_test_proto_server) { - GetStatusRequest request; - GetStatusResponse response; - { - REGISTER_TIMER("sendAndRecv"); - auto msgReader = - client->sendAndRecv("getStatusEx", - request, - {{cpuGrad.getData(), (size_t)dataSize}}, - &response); - - EXPECT_EQ(msgReader->getNumBlocks(), (size_t)1); - EXPECT_EQ(msgReader->getNextBlockLength(), (size_t)dataSize); - msgReader->readNextBlock(cpuParam.getData()); - } - if (!FLAGS_benchmark) { - real* v1 = cpuGrad.getData(); - real* v2 = cpuParam.getData(); - real sum1 = 0, sum2 = 0; - for (int j = 0; j < FLAGS_dim; ++j) { - sum1 += v1[j]; - sum2 += v2[j]; - } - EXPECT_EQ(sum1, sum2); - } - } - gpuParam.copyFrom(cpuParam); - - LOG_EVERY_N(INFO, 10) << "i=" << i; - } - globalStat.printAllStatus(); - globalStat.reset(); - } - - delete client; -#endif -} - -int main(int argc, char** argv) { - paddle::initMain(argc, argv); - testing::InitGoogleTest(&argc, argv); - MyServer server(FLAGS_port, FLAGS_rdma_tcp == "rdma" ? 0 : -1); - server.start(); - usleep(10000); - - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/pserver/test/test_ProtoServer.sh b/paddle/legacy/pserver/test/test_ProtoServer.sh deleted file mode 100755 index 1439350847308cc5590329b0fe2a6d2c77d04409..0000000000000000000000000000000000000000 --- a/paddle/legacy/pserver/test/test_ProtoServer.sh +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -x -for ((port=12340;port<=12360;port++)) -do - port_used_num=`netstat -a |grep $port|wc -l` - if [ $port_used_num -eq 0 ] - then - echo $port; - legacy/pserver/test/test_ProtoServer --port=$port - if [ $? -eq 0 ] - then - exit 0 - else - echo "test_ProtoServer run wrong" - exit 1 - fi -fi -done -echo "test_ProtoServer port not found" -exit 1 diff --git a/paddle/legacy/trainer/CMakeLists.txt b/paddle/legacy/trainer/CMakeLists.txt deleted file mode 100644 index 6192de4388c8c3f5165fb88b443d372748f7a17e..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/CMakeLists.txt +++ /dev/null @@ -1,73 +0,0 @@ -# paddle trainer package - -set(TRAINER_SOURCES - ParameterUpdater.cpp - ParamUtil.cpp - RemoteParameterUpdater.cpp - NewRemoteParameterUpdater.cpp - Tester.cpp - Trainer.cpp - TrainerInternal.cpp - TrainerBenchmark.cpp - ThreadParameterUpdater.cpp - TrainerInternalConfig.cpp - TrainerConfigHelper.cpp) - -set(TRAINER_HEADERS - ParameterUpdater.h - ParamUtil.h - RemoteParameterUpdater.h - NewRemoteParameterUpdater.h - Tester.h - TesterConfig.h - Trainer.h - TrainerInternal.h - TrainerInternalConfig.h - ThreadParameterUpdater.h - TrainerConfigHelper.h) - -if(NOT WITH_GOLANG) - list(REMOVE_ITEM TRAINER_SOURCES - NewRemoteParameterUpdater.cpp) - list(REMOVE_ITEM TRAINER_HEADERS - NewRemoteParameterUpdater.h) -endif() - -add_library(paddle_trainer_lib STATIC - ${TRAINER_SOURCES}) - -add_dependencies(paddle_trainer_lib - paddle_proto - ${external_project_dependencies}) - -macro(add_paddle_exe TARGET_NAME) - add_executable(${TARGET_NAME} ${ARGN}) - link_paddle_exe(${TARGET_NAME}) -endmacro() - -if(WITH_TESTING) - add_subdirectory(tests) -endif() - -if(NOT MOBILE_INFERENCE) - add_paddle_exe(paddle_trainer TrainerMain.cpp) - add_paddle_exe(paddle_merge_model MergeModel.cpp) - - install(TARGETS paddle_trainer paddle_merge_model - RUNTIME DESTINATION opt/paddle/bin - PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ - GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) - - set_target_properties(paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) - set_target_properties(paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) -endif() - -if(APPLE) - set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") -endif() - -if(WITH_GOLANG) - add_dependencies(paddle_trainer_lib paddle_pserver_cclient) - target_link_libraries(paddle_trainer_lib paddle_pserver_cclient) - target_link_libraries(paddle_trainer paddle_pserver_cclient) -endif(WITH_GOLANG) diff --git a/paddle/legacy/trainer/MergeModel.cpp b/paddle/legacy/trainer/MergeModel.cpp deleted file mode 100644 index 8a3601f192224a43687191527374149d99285ae0..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/MergeModel.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "ParamUtil.h" -#include "Trainer.h" -#include "paddle/legacy/pserver/ParameterServer2.h" -#include "paddle/legacy/utils/PythonUtil.h" - -DEFINE_string(model_dir, "", "Directory for separated model files"); -DEFINE_string(config_file, "", "Config file for the model"); -DEFINE_string(model_file, "", "File for merged model file"); - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -int main(int argc, char** argv) { - initMain(argc, argv); - initPython(argc, argv); - - if (FLAGS_model_dir.empty() || FLAGS_config_file.empty() || - FLAGS_model_file.empty()) { - LOG(INFO) << "Usage: ./paddle_merge_model --model_dir=pass-00000 " - "--config_file=config.py --model_file=out.paddle"; - return 0; - } - - string confFile = FLAGS_config_file; -#ifndef PADDLE_WITH_CUDA - FLAGS_use_gpu = false; -#endif - auto config = std::make_shared(confFile); - unique_ptr gradientMachine(GradientMachine::create(*config)); - gradientMachine->loadParameters(FLAGS_model_dir); - - ofstream os(FLAGS_model_file); - - string buf; - config->getConfig().SerializeToString(&buf); - int64_t size = buf.size(); - os.write((char*)&size, sizeof(size)); - CHECK(os) << "Fail to write to " << FLAGS_model_file; - os.write(buf.data(), buf.size()); - vector& parameters = gradientMachine->getParameters(); - for (auto& para : parameters) { - para->save(os); - CHECK(os) << "Fail to write to " << FLAGS_model_file; - } - os.close(); - - return 0; -} diff --git a/paddle/legacy/trainer/NewRemoteParameterUpdater.cpp b/paddle/legacy/trainer/NewRemoteParameterUpdater.cpp deleted file mode 100644 index cdd832acd16e5c259a7f6463aac537e4e6537c97..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/NewRemoteParameterUpdater.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "NewRemoteParameterUpdater.h" -#include "Trainer.h" -#include "paddle/legacy/utils/Stat.h" - -DECLARE_int32(trainer_id); -DECLARE_string(save_dir); - -namespace paddle { -NewRemoteParameterUpdater::NewRemoteParameterUpdater( - const OptimizationConfig &config, const std::string pserverSpec) - : trainerConfig_(config), - parameterClient_(-1), - newParameters_(nullptr), - newGradients_(nullptr), - pserverSpec_(pserverSpec) {} - -NewRemoteParameterUpdater::NewRemoteParameterUpdater( - const OptimizationConfig &config, - const std::string pserverSpec, - const bool useEtcd) - : trainerConfig_(config), - parameterClient_(-1), - newParameters_(nullptr), - newGradients_(nullptr), - pserverSpec_(pserverSpec), - useEtcd_(useEtcd) {} - -void NewRemoteParameterUpdater::init( - const std::vector ¶meters) { - ParameterUpdater::init(parameters); - - // create parameter server client. - if (useEtcd_) { - parameterClient_ = - paddle_new_etcd_pserver_client((char *)pserverSpec_.c_str()); - } else { - parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), - FLAGS_trainer_id == 0); - } - - // init new parameter and gradient. - newParameters_ = initNewParameter(PARAMETER_VALUE); - newGradients_ = initNewParameter(PARAMETER_GRADIENT); - - // init parameter, one trainer will get the opportunity to int parameter and - // send them to parameter server. Others will get the initialized parameter - // from parameter server - if (paddle_begin_init_params(parameterClient_)) { - LOG(INFO) << "paddle_begin_init_params start"; - // NOTE: convert V1 OptimizatioinConfig proto to V2 OptimizerConfig. - // This makes golang pserver compatible with handy V1 demos. - // TODO(wuyi): Refine or remove these ugly converting lines - OptimizerConfig optimizerConfigV2; - if (trainerConfig_.learning_method() == "momentum") { - optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); - } else if (trainerConfig_.learning_method() == "adagrad") { - optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); - optimizerConfigV2.mutable_adagrad()->set_epsilon( - trainerConfig_.ada_epsilon()); - } else if (trainerConfig_.learning_method() == "adadelta") { - optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); - optimizerConfigV2.mutable_adadelta()->set_epsilon( - trainerConfig_.ada_epsilon()); - optimizerConfigV2.mutable_adadelta()->set_rho(trainerConfig_.ada_rou()); - } else if (trainerConfig_.learning_method() == "adam") { - optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adam); - optimizerConfigV2.mutable_adam()->set_beta_1(trainerConfig_.adam_beta1()); - optimizerConfigV2.mutable_adam()->set_beta_2(trainerConfig_.adam_beta2()); - optimizerConfigV2.mutable_adam()->set_epsilon( - trainerConfig_.adam_epsilon()); - } else { - LOG(ERROR) << "got unsupported v1 optimizer config: " - << trainerConfig_.learning_method(); - optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); - } - - if (trainerConfig_.learning_rate_schedule() == "constant") { - optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); - optimizerConfigV2.mutable_const_lr()->set_learning_rate( - trainerConfig_.learning_rate()); - } else if (trainerConfig_.learning_rate_schedule() == "linear") { - optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Linear); - optimizerConfigV2.mutable_linear_lr()->set_learning_rate( - trainerConfig_.learning_rate()); - optimizerConfigV2.mutable_linear_lr()->set_lr_decay_a( - trainerConfig_.learning_rate_decay_a()); - optimizerConfigV2.mutable_linear_lr()->set_lr_decay_b( - trainerConfig_.learning_rate_decay_b()); - } else { - LOG(ERROR) << "got unsupported v1 learning_rate_schedule config: " - << trainerConfig_.learning_rate_schedule() << ", set to const"; - optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); - optimizerConfigV2.mutable_const_lr()->set_learning_rate( - trainerConfig_.learning_rate()); - } - - // overwrite optimizerConfigV2 for per-parameter(layer) configs - for (int i = 0; i < parameterSize(); ++i) { - // FIXME(typhoonzero): paramConfig always have default values, - // how to check if it's default? - // TODO(typhoonzero): log output: optimizerConfigV2.DebugString(); - LOG(INFO) << "trainerConfig_: " << trainerConfig_.DebugString(); - // send param and config to pserver - std::string bytes = optimizerConfigV2.SerializeAsString(); - const char *array = bytes.data(); - int size = (int)bytes.size(); - paddle_init_param( - parameterClient_, *newParameters_[i], (void *)array, size); - } - paddle_finish_init_params(parameterClient_); - LOG(INFO) << "paddle_begin_init_params done"; - } else { - paddle_get_params(parameterClient_, newParameters_, parameterSize()); - } - - LOG(INFO) << "NewRemoteParameterUpdater initialized"; -} - -void NewRemoteParameterUpdater::updateImpl(Parameter *para) {} - -void NewRemoteParameterUpdater::finishBatch(real cost) { - // send gradient to parameter server. - paddle_send_grads(parameterClient_, newGradients_, parameterSize()); - // get the updated parameter from parameterClient. - paddle_get_params(parameterClient_, newParameters_, parameterSize()); - - // clear gradient after update parameter. - for (auto ¶ : parameters_) { - para->getBuf(PARAMETER_GRADIENT)->zeroMem(); - } -} - -void NewRemoteParameterUpdater::startPass() {} - -bool NewRemoteParameterUpdater::finishPass() { return true; } -} // namespace paddle diff --git a/paddle/legacy/trainer/NewRemoteParameterUpdater.h b/paddle/legacy/trainer/NewRemoteParameterUpdater.h deleted file mode 100644 index 707e9ceb9b6a22d265f9bf7b02af7f3002930fd4..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/NewRemoteParameterUpdater.h +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "OptimizerConfig.pb.h" -#include "ParameterUpdater.h" -#include "libpaddle_pserver_cclient.h" -#include "paddle/legacy/pserver/ParameterClient2.h" -#include "paddle/legacy/utils/Queue.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -/** - * New remote parameter updater for dense parameters that use cclient of go. - */ -class NewRemoteParameterUpdater : public ParameterUpdater { - public: - NewRemoteParameterUpdater(const OptimizationConfig& config, - const std::string pserverSpec); - NewRemoteParameterUpdater(const OptimizationConfig& config, - const std::string pserverSpec, - const bool useEtcd); - ~NewRemoteParameterUpdater() { - releaseNewParameter(newParameters_); - releaseNewParameter(newGradients_); - if (parameterClient_ >= 0) paddle_pserver_client_release(parameterClient_); - } - - /** - * initialize the internal parameter client and itself. - */ - virtual void init(const std::vector& parameters); - /** - * @brief start batch - * - * @note one batch training exhibits stateful feature to help - * to do performance tuning, sgd optimization if necessary. - */ - virtual PassType startBatch(int64_t batchSize) { return PASS_TRAIN; } - - /** - * send parameters to pservers and get returned parameters - * from all pservers if necessary. - */ - virtual void finishBatch(real cost); - virtual void startPass(); - virtual bool finishPass(); - - protected: - /** - * work need to do after finishBatch - */ - virtual void updateImpl(Parameter* para); - - private: - int parameterSize() { return (int)parameters_.size(); } - - /** - * init parameter of go paddle pserver cclient. - * @param new_params - * @param type - */ - paddle_parameter** initNewParameter(ParameterType type) { - paddle_parameter** new_params = - (paddle_parameter**)malloc(sizeof(paddle_parameter*) * parameterSize()); - for (int i = 0; i < parameterSize(); ++i) { - new_params[i] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); - memset(new_params[i], 0, sizeof(paddle_parameter)); - } - - for (int i = 0; i < parameterSize(); ++i) { - ParameterPtr param = parameters_[i]; - new_params[i]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; - new_params[i]->name = (char*)param->getName().c_str(); - new_params[i]->content = - (unsigned char*)(param->getBuf(type).get()->getData()); - new_params[i]->content_len = - (int)param->getBuf(type).get()->getSize() * sizeof(real); - } - return new_params; - } - - void releaseNewParameter(paddle_parameter** newParams) { - if (newParams != nullptr) { - for (int i = 0; i < parameterSize(); ++i) { - free(newParams[i]); - } - free(newParams); - } - } - - protected: - const OptimizationConfig& trainerConfig_; - /// internal parameter client object for exchanging data with pserver - paddle_pserver_client parameterClient_; - /// the parameters for new pserver client - paddle_parameter** newParameters_; - /// the gradinets for new pserver client - paddle_parameter** newGradients_; - /// the specification of parameter server "host1:port,host1:port" - std::string pserverSpec_; - /// true if pserverSpec_ is etcd endpoint, else pserverSpec_ is pserver addr - bool useEtcd_; -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/ParamUtil.cpp b/paddle/legacy/trainer/ParamUtil.cpp deleted file mode 100644 index b5aba32dee1d07015ae3fce1cc76242b8ae80fe5..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/ParamUtil.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ParamUtil.h" - -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include "paddle/legacy/utils/GlobalConstants.h" -#include "paddle/legacy/utils/PythonUtil.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" - -#include "TesterConfig.h" -#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/legacy/gserver/layers/ValidationLayer.h" - -namespace paddle { - -ParameterUtil::ParameterUtil( - const std::shared_ptr &config, - std::unique_ptr &&intconfig, - const GradientMachinePtr &gradientMachine, - const std::shared_ptr ¶meterUpdater) { - config_ = config; - intConfig_ = std::move(intconfig); - gserver_ = gradientMachine; - pUpdater_ = parameterUpdater; -} - -bool ParameterUtil::loadParameters(int passId, bool local, bool remote) { - constexpr int kBufLen = 100; - char buf[kBufLen]; - snprintf(buf, kBufLen, "pass-%05d", passId); - std::string doneFile = path::join(config_->getSaveDir(), buf, "done"); - if (!fileExist(doneFile.c_str())) return false; - loadParametersWithPath(path::join(config_->getSaveDir(), buf), local, remote); - return true; -} - -void ParameterUtil::loadParametersWithPath(const std::string &dir, - bool local, - bool remote) { - if (local) { - gserver_->loadParameters(dir); - } - if (remote && pUpdater_) { - pUpdater_->loadParametersRemote(dir); - } -} - -void ParameterUtil::saveParametersOnePass(int passId, int passInnerId) { - pUpdater_->apply(); - saveParameters(passId, passInnerId); - if (intConfig_->save_only_one_ && passId >= intConfig_->saving_period_) { - deleteParameters(passId - intConfig_->saving_period_); - } - pUpdater_->restore(); -} - -void ParameterUtil::saveParameters(int passId, int passInnerId) { - constexpr int kBufLen = 100; - char buf[kBufLen]; - if (passInnerId > 0) { - snprintf(buf, kBufLen, "pass-%05d-%03d", passId, passInnerId); - } else { - snprintf(buf, kBufLen, "pass-%05d", passId); - } - - std::string basePath = config_->getSaveDir(); - if (basePath.find('/') == std::string::npos) { - basePath = "./" + basePath; - } - mkDirRecursively(basePath.c_str()); - - std::string saveDir = path::join(basePath, buf); - mkDir(saveDir.c_str()); - if (!intConfig_->load_save_param_pserver_) { - pUpdater_->getParametersRemote(true /*full parameter*/, - true /*after apply*/); - } - - gserver_->saveParameters(saveDir); - if (intConfig_->load_save_param_pserver_) { - pUpdater_->saveParametersRemote(saveDir); - } - std::string doneFile = path::join(saveDir, "done"); - touchFile(doneFile.c_str()); - std::ofstream out(doneFile); - version::printVersion(out); - out.close(); - VLOG(1) << "save dir " << saveDir; - saveConfigWithPath(saveDir); -} - -void ParameterUtil::deleteParameters(int passId, int passInnerId) { - constexpr int kBufLen = 100; - char buf[kBufLen]; - const std::string &saveDir = config_->getSaveDir(); - if (passInnerId > 0) { - snprintf(buf, - kBufLen, - "%s/pass-%05d-%03d", - saveDir.c_str(), - passId, - passInnerId); - } else { - snprintf(buf, kBufLen, "%s/pass-%05d", saveDir.c_str(), passId); - } - mkDir(saveDir.c_str()); - LOG(INFO) << "delete dir " << buf; - rmDir(buf); -} - -void ParameterUtil::saveConfigWithPath(const std::string &path) { - std::string src; - // save config in some path - if (!intConfig_->config_.empty()) { - src = intConfig_->config_; - } else { - bool ok; - src = config_->getConfigName(&ok); - if (!ok) { - return; - } - } - copyFileToPath(src, path); - - // save other import config file name to path.txt - std::string ss = path::join(path, "path.txt"); - std::ofstream os(ss); - std::string fileName = path::basename(src); - CHECK(os.write(fileName.c_str(), fileName.length())) - << "Fail to write config file name " << ss; - VLOG(1) << "fileName " << fileName; - os.close(); - - // copy other import config files - for (int i = 0; i < config_->getConfig().config_files_size(); ++i) { - copyFileToPath(config_->getConfig().config_files(i), path); - } -} - -} // namespace paddle diff --git a/paddle/legacy/trainer/ParamUtil.h b/paddle/legacy/trainer/ParamUtil.h deleted file mode 100644 index 07786967762a7b9267d190de5275f0f94bbd21ef..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/ParamUtil.h +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/utils/Util.h" - -#include - -#include "hl_gpu.h" -#include "paddle/legacy/gserver/dataproviders/DataProvider.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" - -#include -#include -#include "ParameterUpdater.h" -#include "TrainerConfig.pb.h" -#include "TrainerConfigHelper.h" - -namespace paddle { - -/** - * Configuration for parameter utils. - */ -struct ParameterUtilConfig { - DISABLE_COPY(ParameterUtilConfig); - - ParameterUtilConfig(bool save_only_one, - int saving_period, - bool load_save_parameters_in_pserver, - std::string config) - : save_only_one_(save_only_one), - saving_period_(saving_period), - load_save_param_pserver_(load_save_parameters_in_pserver), - config_(config) {} - - bool save_only_one_; - int saving_period_; - bool load_save_param_pserver_; - std::string config_; -}; - -/** - * ParameterUtil - * Utility class for loading and saving parameters - */ -class ParameterUtil { - public: - /** - * Ctor. - * - * @param config - * @param intconfig - * @param gradientMachine - * @param parameterUpdater - * @return - */ - ParameterUtil(const std::shared_ptr &config, - std::unique_ptr &&intconfig, - const GradientMachinePtr &gradientMachine, - const std::shared_ptr ¶meterUpdater); - - /// Load parameter from the saved parameter file as pass passId - /// if loadsave_parameters_in_pserver is set, some parameters MUST - /// load in pserver, which is "remote". - /// loadParameters can choose to load local/remote parameter, or both. - bool loadParameters(int passId, bool local = true, bool remote = false); - - /// load parameters given path info - void loadParametersWithPath(const std::string &dir, - bool local = true, - bool remote = false); - - /// Save parameter to dist for pass passId - /// passInnerId means saving times in one pass, some users want to - /// save parameters when have processed some batches in one pass - /// passInnerId = 0 means do not need to save in one inner pass - void saveParameters(int passId, int passInnerId = 0); - - /// save parameters for one pass, when passInnerId > 0 means saving - /// the passInnerId times in one pass - void saveParametersOnePass(int passId, int passInnerId = 0); - - /// delete parameter from disk via passId - void deleteParameters(int passId, int passInnerId = 0); - - /// save config given path info - void saveConfigWithPath(const std::string &path); - - /** - * Try to load parameter from config. - * @return true if can load from trainer config. - */ - inline bool tryLoadParametersFromConfig() { - auto &c = config_->getConfig(); - if (!c.init_model_path().empty()) { - loadParametersWithPath(c.init_model_path()); - return true; - } else if (c.start_pass() > 0) { - CHECK(loadParameters(c.start_pass() - 1)); - return true; - } else { - return false; - } - } - - private: - std::shared_ptr config_; - std::unique_ptr intConfig_; - GradientMachinePtr gserver_; - std::shared_ptr pUpdater_; -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/ParameterUpdater.cpp b/paddle/legacy/trainer/ParameterUpdater.cpp deleted file mode 100644 index 549fb0332da78053a261928b5558beb1ffbc79c5..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/ParameterUpdater.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ParameterUpdater.h" - -#include "paddle/legacy/utils/Logging.h" - -#include "paddle/legacy/utils/Thread.h" - -namespace paddle { - -static const hl_stream_t kDeviceToHostStream = HPPL_STREAM_1; -static const hl_stream_t kHostToDeviceStream = HPPL_STREAM_2; - -SgdUpdaterWithCpuAverager::SgdUpdaterWithCpuAverager( - const OptimizationConfig& optConfig) - : SgdLocalUpdater(optConfig, false /*with averager*/) { - CHECK(FLAGS_use_gpu && optConfig.do_average_in_cpu()); - averager_.reset(AverageOptimizer::create(optConfig, - new DummyOptimizer(optConfig), - false /*sparse*/, - true /*apply*/)); - updateWorker_.addJob([]() { hl_set_device(FLAGS_gpu_id); }); -} - -void SgdUpdaterWithCpuAverager::init( - const std::vector& parameters) { - SgdLocalUpdater::init(parameters); - averager_->init(parameters_.size(), nullptr); - copyEvents_.resize(parameters_.size()); - for (auto& parameter : parameters) { - SetDevice device(parameter->getDeviceId()); - cpuParameters_.emplace_back(new Parameter(parameter->getConfig(), - /* useGpu= */ false, - /* doInit= */ false)); - if (parameter->useGpu()) { - cpuParameters_.back()->enableType(PARAMETER_APPLY); - } else { - cpuParameters_.back()->enableSharedType( - PARAMETER_APPLY, parameter->getBuf(PARAMETER_VALUE)); - } - for (ParameterType type : averager_->getParameterTypes()) { - cpuParameters_.back()->enableType(type); - } - - hl_create_event(©Events_[nonStaticParaIDMap_[parameter->getID()]]); - } -} - -SgdUpdaterWithCpuAverager::~SgdUpdaterWithCpuAverager() { - for (auto& event : copyEvents_) { - hl_destroy_event(event); - } -} - -void SgdUpdaterWithCpuAverager::updateImpl(Parameter* para) { - SgdLocalUpdater::updateImpl(para); - - if (para->useGpu()) { - size_t pid = nonStaticParaIDMap_[para->getID()]; - Parameter* cpuPara = cpuParameters_[pid].get(); - cpuPara->getBuf(PARAMETER_VALUE) - ->copyFrom(*para->getBuf(PARAMETER_VALUE), kDeviceToHostStream); - hl_stream_record_event(kDeviceToHostStream, copyEvents_[pid]); - } - - updateWorker_.addJob( - std::bind(&SgdUpdaterWithCpuAverager::updateFunc, this, para)); -} - -void SgdUpdaterWithCpuAverager::updateFunc(Parameter* para) { - SetDevice setDevice(para->getDeviceId()); - size_t pid = nonStaticParaIDMap_[para->getID()]; - Parameter* cpuPara = cpuParameters_[pid].get(); - if (para->useGpu()) { - hl_event_synchronize(copyEvents_[pid]); - } - averager_->update(cpuPara->getBufs(), cpuPara->getConfig(), -1LU); -} - -void SgdUpdaterWithCpuAverager::finishBatch(real cost) { - SgdLocalUpdater::finishBatch(cost); - - updateWorker_.wait(); - for (auto para : cpuParameters_) { - if (auto callback = averager_->needSpecialTraversal(para->getConfig())) { - callback(para->getBufs(), para->getConfig(), -1LU); - } - } - averager_->finishBatch(); -} - -void SgdUpdaterWithCpuAverager::apply() { - // backup gpu value - for (auto& para : parameters_) { - SetDevice setDevice(para->getDeviceId()); - para->getBuf(PARAMETER_GRADIENT) - ->copyFrom(*para->getBuf(PARAMETER_VALUE), kHostToDeviceStream); - } - - // apply on cpu parameter - if (auto callback = averager_->apply()) { - for (auto para : cpuParameters_) { - callback(para->getBufs(), para->getConfig(), -1LU); - } - } - - // copy to gpu value - for (auto& para : parameters_) { - SetDevice setDevice(para->getDeviceId()); - size_t pid = nonStaticParaIDMap_[para->getID()]; - Parameter* cpuPara = cpuParameters_[pid].get(); - if (parameters_[pid]->useGpu()) { - para->getBuf(PARAMETER_VALUE) - ->copyFrom(*cpuPara->getBuf(PARAMETER_APPLY), kHostToDeviceStream); - } - } - hl_stream_synchronize(kHostToDeviceStream); - for (auto& para : parameters_) { - para->setValueUpdated(); - } -} - -void SgdUpdaterWithCpuAverager::restore() { - // restore on cpu parameter - if (auto callback = averager_->restore()) { - for (auto para : cpuParameters_) { - callback(para->getBufs(), para->getConfig(), -1LU); - } - } - - // restore gpu value - for (auto& para : parameters_) { - SetDevice device(para->getDeviceId()); - para->getBuf(PARAMETER_VALUE)->copyFrom(*para->getBuf(PARAMETER_GRADIENT)); - para->getBuf(PARAMETER_GRADIENT)->zeroMem(); - para->setValueUpdated(); - } -} - -} // namespace paddle diff --git a/paddle/legacy/trainer/ParameterUpdater.h b/paddle/legacy/trainer/ParameterUpdater.h deleted file mode 100644 index acddc3702d78fdb198973f70a8642c5192af992b..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/ParameterUpdater.h +++ /dev/null @@ -1,265 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/utils/Thread.h" -#include "paddle/legacy/utils/Util.h" - -#include "paddle/legacy/parameter/AverageOptimizer.h" -#include "paddle/legacy/parameter/FirstOrderOptimizer.h" -#include "paddle/legacy/parameter/OptimizerFunctions.h" -#include "paddle/legacy/parameter/OptimizerWithRegularizer.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/parameter/ParameterUpdaterBase.h" - -#include "TrainerConfig.pb.h" -#include "paddle/legacy/gserver/layers/Layer.h" - -#include -#include - -namespace paddle { - -/** - * @brief Parameter Updater for SGD, and local(not cluster) run. - */ -class SgdLocalUpdater : public ParameterUpdater { - public: - /** - * @brief Ctor. Initialize optimizer locally by optConfig. - * @param optConfig optimization config. - * @param withAverager with average optimizer or not, default is true. - */ - explicit SgdLocalUpdater(const OptimizationConfig& optConfig, - bool withAverager = true) - : numSamplesProcessed_(0) { - auto baseOptimizer = ParameterOptimizer::create(optConfig); - optimizer_.reset(withAverager - ? AverageOptimizer::create(optConfig, baseOptimizer) - : baseOptimizer); - CHECK(optimizer_) << "fail to create optimizer: " - << optConfig.learning_method(); - auto types = optimizer_->getParameterTypes(); - for (auto type : types) { - addParameterType(type); - } - } - - /** - * @brief Initialize parameters and optimizer_. - * For example, - * If optimizer need hassien vector, then parameter's hassien will - * be initialized. - * @param parameters The parameter need to be initialized. - */ - virtual void init(const std::vector& parameters) { - ParameterUpdater::init(parameters); - optimizer_->init(parameters_.size(), nullptr); - // check no L1 decay in parameter configs - CHECK(std::find_if(parameters.begin(), - parameters.end(), - [](const ParameterPtr& para) { - return para->getConfig().decay_rate_l1() > 0.0f; - }) == parameters.end()) - << "SgdLocalUpdater cannot support L1 decay in parameter"; - } - - /** - * @brief Start a batch with current mini-batch size - * @param current mini-batch size. - * @return Always PASS_TRAIN. - */ - virtual PassType startBatch(int64_t batchSize) { - numSamplesProcessed_ += batchSize; - optimizer_->startBatch(numSamplesProcessed_); - return PASS_TRAIN; - } - - /** - * @brief finish a mini-batch. - */ - virtual void finishBatch(real cost) { optimizer_->finishBatch(); } - - /** - * @brief start a pass. - */ - virtual void startPass() { optimizer_->startPass(); } - - /** - * @brief finish a pass. - * @param cost sum cost during one pass. - * @return true if accept (used for owlqn). - */ - virtual bool finishPass() { - optimizer_->finishPass(); - return ParameterUpdater::finishPass(); - } - - /** - * @brief apply model average. - */ - virtual void apply() { - if (auto callback = optimizer_->apply()) { - for (auto para : parameters_) { - SetDevice device(para->getDeviceId()); - callback(para->getBufs(), para->getConfig(), -1UL); - } - } - } - - /** - * @brief restore parameter value before model average - */ - virtual void restore() { - if (auto callback = optimizer_->restore()) { - for (auto para : parameters_) { - SetDevice device(para->getDeviceId()); - callback(para->getBufs(), para->getConfig(), -1UL); - } - } - } - - protected: - /** - * @brief update method. Update value from gradient. - * @param para parameter that will be updated. - */ - virtual void updateImpl(Parameter* para) { - optimizer_->update(para->getBufs(), para->getConfig()); - if (auto callback = optimizer_->needSpecialTraversal(para->getConfig())) { - callback(para->getBufs(), para->getConfig(), -1UL); - } - - para->setValueUpdated(); - para->getBuf(PARAMETER_GRADIENT)->zeroMem(); - } - - std::unique_ptr optimizer_; - - /** - * @brief total number of samples processed. - */ - int64_t numSamplesProcessed_; -}; - -/** - * @brief SgdCpuUpdater is used only in recursive neural network - * @deprecated - */ -class SgdCpuUpdater : public SgdLocalUpdater, public Deprecated { - public: - explicit SgdCpuUpdater(const OptimizationConfig& optConfig) - : SgdLocalUpdater(optConfig), - Deprecated( - "SgdCpuUpdater is used only in recursive neural network, " - "and recursive neural network is deprecated in paddle. " - "Use it all by your own.") {} - - /** - * @brief update all parameter on finish batch. - * @param cost - */ - virtual void finishBatch(real cost) { - for (auto para : parameters_) { - SgdLocalUpdater::update(para.get()); - } - optimizer_->finishBatch(); - } - - protected: - /** - * @brief do nothing. - * @param para - */ - virtual void updateImpl(Parameter* para) {} -}; - -/** - * @brief Sgd Local Updater With average in cpu. - * - * It will do model average in cpu to reduce gpu memory comsuption. - */ -class SgdUpdaterWithCpuAverager : public SgdLocalUpdater { - public: - /** - * @brief Ctor. - * - * SgdUpdaterWithCpuAverager will do everything as a - * SgdLocalUpdater, then copy parameter from GPU to CPU, and do model - * average in cpu. - */ - explicit SgdUpdaterWithCpuAverager(const OptimizationConfig& optConfig); - ~SgdUpdaterWithCpuAverager(); - - /** - * @brief init. Initialize cpu parameters, model average optimizer. - * @param parameters - */ - virtual void init(const std::vector& parameters); - - virtual PassType startBatch(int64_t batchSize) { - averager_->startBatch(-1UL); - return SgdLocalUpdater::startBatch(batchSize); - } - virtual void finishBatch(real cost); - - virtual void startPass() { - averager_->startPass(); - SgdLocalUpdater::startPass(); - } - virtual bool finishPass() { - averager_->finishPass(); - return SgdLocalUpdater::finishPass(); - } - - /// apply the averaged parameter to PARAMETER_VALUE - /// use PARAETER_GRADIENT for backing up PARAMETER_VALUE - virtual void apply(); - - /** - * @brief Restore parameter before apply(). - */ - virtual void restore(); - - protected: - virtual void updateImpl(Parameter* para); - - void updateFunc(Parameter* para); - - protected: - std::unique_ptr averager_; - - /** - * @brief The thread worker which do model average. - * - * For each parameter, GPU->CPU parameter is async, and do model average in - * another thread. Because the training process don't need model average while - * training, and model average only used in evaluation stage and saving stage. - * So the model average is totally async. - */ - ThreadWorker updateWorker_; - - /** - * @brief The parameter mirror in cpu. - */ - std::vector cpuParameters_; - - /** - * @brief GPU -> CPU copy event. Model average will wait after copy done. - */ - std::vector copyEvents_; -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/RemoteParameterUpdater.cpp b/paddle/legacy/trainer/RemoteParameterUpdater.cpp deleted file mode 100644 index 5de1cc7827aa8f219de60fe9da67fbb0595eb1d5..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/RemoteParameterUpdater.cpp +++ /dev/null @@ -1,843 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "RemoteParameterUpdater.h" -#include "Trainer.h" -#include "paddle/legacy/utils/GlobalConstants.h" -#include "paddle/legacy/utils/Stat.h" - -DECLARE_int32(trainer_id); -DECLARE_string(save_dir); - -namespace paddle { - -static const hl_stream_t kDeviceToHostStream = HPPL_STREAM_1; -static const hl_stream_t kHostToDeviceStream = HPPL_STREAM_2; -static const int kFinishBatchPid = -1; - -const std::string RemoteParameterUpdater::kAverage = "average"; -const std::string RemoteParameterUpdater::kElasticAverage = "elastic_average"; - -RemoteParameterUpdater::RemoteParameterUpdater( - const OptimizationConfig& config, - int expectedPassCount, - std::unique_ptr&& localUpdater) - : config_(config), - localUpdater_(std::move(localUpdater)), - numBatches_(0), - passCount_(0), - expectedPassCount_(expectedPassCount), - separateSendAndRecv_(false), - isFirstPass_(true), - useApplyInPserver_(false) { - addParameterType(PARAMETER_MOMENTUM); -} - -void RemoteParameterUpdater::init(const std::vector& parameters) { - ParameterUpdater::init(parameters); - - if (localUpdater_) { - localUpdater_->init(parameters); - - for (auto& parameter : parameters) { - parameter->enableType(PARAMETER_DELTA); - } - - CHECK(config_.center_parameter_update_method() == kAverage || - config_.center_parameter_update_method() == kElasticAverage) - << "unknown center_parameter_update_method"; - - // modify delta_add_rate - CHECK_GT(FLAGS_num_gradient_servers, 1) - << "FLAGS_num_gradient_servers should be set in trainer args."; - real delta_add_rate = config_.delta_add_rate() / FLAGS_num_gradient_servers; - config_.set_delta_add_rate(delta_add_rate); - LOG(INFO) << "center parameter in pserver," - << " modify delta_add_rate=" << delta_add_rate; - } - - if (!FLAGS_use_gpu) { - cpuParameters_ = parameters; - } else { - for (auto& parameter : parameters) { - cpuParameters_.emplace_back(new Parameter(parameter->getConfig(), - /* useGpu= */ false)); - cpuParameters_.back()->setID(parameter->getID()); - if (localUpdater_) { - cpuParameters_.back()->enableType(PARAMETER_DELTA); - } - } - } - - parameterClient_.reset(new ParameterClient2(separateSendAndRecv_)); - parameterClient_->init(cpuParameters_); - parameterClient_->setTrainerId(FLAGS_trainer_id); - - if (FLAGS_trainer_id == 0) { - parameterClient_->setConfig(config_); - copyParametersFromDevice(PARAMETER_VALUE); - parameterClient_->setParameter(); - parameterClient_->setStatus(PSERVER_STATUS_PARAMETER_READY); - } else { - parameterClient_->waitForStatus(PSERVER_STATUS_PARAMETER_READY); - parameterClient_->getParameter(); - copyParametersToDevice(PARAMETER_VALUE); - } - if (FLAGS_trainer_id == 0 && - (config_.algorithm() != TrainAlgorithm::AsyncSGD)) { - startController(); - useApplyInPserver_ = useApplyInPserver(config_); - } -} - -void RemoteParameterUpdater::startController() { - controllerThread_.reset(new std::thread([this]() { this->controller(); })); -} - -void RemoteParameterUpdater::controller() { - ParameterClient2 client(false); - client.init(cpuParameters_); - while (true) { - /*start pass*/ { - client.waitPassStart(); - - PreparedOperations ops; - ops.addOperation(PSERVER_OP_START_PASS); - client.doOperation(ops, - /* waitForGradient= */ false, - /* sendBackarameter= */ false, - /* releasePass= */ false); - } - - while (true) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_SGD); - client.doOperation(ops, - /* waitForGradient= */ true, - /* sendBackarameter= */ true, - /* releasePass= */ false); - if (client.isPassFinish()) { - break; - } - } - - /*finish pass*/ { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_FINISH_PASS); - client.doOperation(ops, - /* waitForGradient= */ true, - /* sendBackarameter= */ true, - /* releasePass= */ true); - } - - passCount_++; - if (passCount_ == expectedPassCount_) { - break; - } - } -} - -void RemoteParameterUpdater::copyParametersToDevice( - ParameterType parameterType) { - if (!FLAGS_use_gpu) { - return; - } - int numParameters = cpuParameters_.size(); - for (int i = 0; i < numParameters; ++i) { - parameters_[i] - ->getBuf(parameterType) - ->copyFrom(*cpuParameters_[i]->getBuf(parameterType)); - if (parameterType == PARAMETER_VALUE) { - parameters_[i]->setValueUpdated(); - } - } -} - -void RemoteParameterUpdater::copyParametersFromDevice( - ParameterType parameterType) { - if (!FLAGS_use_gpu) { - return; - } - int numParameters = cpuParameters_.size(); - for (int i = 0; i < numParameters; ++i) { - cpuParameters_[i] - ->getBuf(parameterType) - ->copyFrom(*parameters_[i]->getBuf(parameterType)); - } -} - -void RemoteParameterUpdater::updateImpl(Parameter* para) { - REGISTER_TIMER("update"); - if (localUpdater_) { - localUpdater_->update(para); - } -} - -void RemoteParameterUpdater::finishBatch(real cost) { - if (localUpdater_) { - localUpdater_->finishBatch(cost); - } - - const std::string& algorithm = config_.algorithm(); - ParameterUpdateMode mode; - if (algorithm == TrainAlgorithm::AsyncSGD) { - mode = PSERVER_UPDATE_MODE_ASYNC_SGD; - } else if (algorithm == TrainAlgorithm::SGD) { - mode = PSERVER_UPDATE_MODE_ADD_GRADIENT; - } else { - LOG(FATAL) << "Unknown algorithm: " << algorithm; - } - - ParameterType sendType; - bool sendBackParameter = true; - if (localUpdater_) { - ++numBatches_; - if (numBatches_ % config_.num_batches_per_send_parameter() != 0) { - return; - } - - if (config_.center_parameter_update_method() == kElasticAverage) { - parameterClient_->getParameter(PARAMETER_DELTA); - copyParametersToDevice(PARAMETER_DELTA); - sendBackParameter = false; // no need send back after send - - // calc delta - for (auto& para : parameters_) { - // DELTA = LOCAL_VALUE - CENTER_VALUE/*store in DELTA*/ - para->getBuf(PARAMETER_DELTA) - ->add(*para->getBuf(PARAMETER_VALUE), -1.0f, 1.0f); - - // when delta send to pserver, pserver will do: - // CENTER_VALUE += alpha * (LOCAL_VALUE - CENTER_VALUE) - } - } else { - // calc delta - for (auto& para : parameters_) { - // DELTA = NEW_VALUE - OLD_VALUE/*store in DELTA*/ - para->getBuf(PARAMETER_DELTA) - ->add(*para->getBuf(PARAMETER_VALUE), -1.0f, 1.0f); - } - } - - sendType = PARAMETER_DELTA; - - } else { - // In this case, we perform SGD on pserver. - sendType = PARAMETER_GRADIENT; - } - - copyParametersFromDevice(sendType); - - { - REGISTER_TIMER("sendAndRecv_dense"); - parameterClient_->sendAndReceiveParameter(mode, - sendType, - batchSize_, - 0, // cost = 0 - sendBackParameter); - } - - if (sendBackParameter) { - copyParametersToDevice(PARAMETER_VALUE); - } - - if (localUpdater_) { - if (config_.center_parameter_update_method() == kElasticAverage) { - for (auto& para : parameters_) { - SetDevice device(para->getDeviceId()); - // LOCAL_VALUE += -alpha * (LOCAL_VALUE - CENTER_VALUE) - para->getBuf(PARAMETER_VALUE) - ->add(*para->getBuf(PARAMETER_DELTA), -config_.delta_add_rate()); - } - - } else { // average - // copy value to delta - for (auto& para : parameters_) { - SetDevice device(para->getDeviceId()); - para->getBuf(PARAMETER_DELTA)->copyFrom(*para->getBuf(PARAMETER_VALUE)); - } - } - } else { - for (auto& para : parameters_) { - SetDevice device(para->getDeviceId()); - para->getBuf(sendType)->zeroMem(); - } - } -} - -void RemoteParameterUpdater::startPass() { - if (config_.algorithm() == TrainAlgorithm::SGD) { - parameterClient_->waitPassStart(); - } else { - // sync could benifits reducing lagged trainer for async-sgd - // even if sync could not remove all lagged trainer for the - // sake of file loading, buffer etc. - parameterClient_->asyncStartPass(); - } - - if (localUpdater_) { - localUpdater_->startPass(); - numBatches_ = 0; - - if (config_.center_parameter_update_method() == kElasticAverage) { - if (!isFirstPass_) { - // restore local value from delta - for (auto& para : parameters_) { - SetDevice device(para->getDeviceId()); - para->getBuf(PARAMETER_VALUE) - ->copyFrom(*para->getBuf(PARAMETER_DELTA)); - } - } - } else { // average - // copy value to delta - for (auto& para : parameters_) { - SetDevice device(para->getDeviceId()); - para->getBuf(PARAMETER_DELTA)->copyFrom(*para->getBuf(PARAMETER_VALUE)); - } - } - } -} - -bool RemoteParameterUpdater::finishPass() { - if (localUpdater_) { - localUpdater_->finishPass(); - } - - if (config_.algorithm() == TrainAlgorithm::SGD) { - parameterClient_->waitPassFinish(); - } else { - parameterClient_->asyncFinishPass(); - } - if (localUpdater_) { - if (config_.center_parameter_update_method() == kElasticAverage) { - // backup local value to delta as we will get - // the remote parameter for saving/testing - for (auto& para : parameters_) { - SetDevice device(para->getDeviceId()); - para->getBuf(PARAMETER_DELTA)->copyFrom(*para->getBuf(PARAMETER_VALUE)); - } - } - } - parameterClient_->getParameter(); - copyParametersToDevice(PARAMETER_VALUE); - - isFirstPass_ = false; - return true; -} - -void RemoteParameterUpdater::apply() { - if (useApplyInPserver_) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_APPLY); - parameterClient_->doOperation(ops, - /* waitForGradient= */ false, - /* sendBackarameter= */ false); - parameterClient_->getParameter( - /* recvParameterType= */ PARAMETER_VALUE, - /* sendBackParameterType= */ PARAMETER_APPLY); - copyParametersToDevice(PARAMETER_VALUE); - } -} - -void RemoteParameterUpdater::restore() { - if (useApplyInPserver_) { - parameterClient_->getParameter(); - copyParametersToDevice(PARAMETER_VALUE); - } -} - -ConcurrentRemoteParameterUpdater::ConcurrentRemoteParameterUpdater( - OptimizationConfig config, - int passCount, - std::unique_ptr&& localUpdater) - : RemoteParameterUpdater(config, passCount, std::move(localUpdater)) { - sendThread_.reset(new std::thread([this]() { this->send(); })); - recvThread_.reset(new std::thread([this]() { this->recv(); })); - - stopping_ = false; - oneBatchFinished_ = false; - separateSendAndRecv_ = true; -} - -ConcurrentRemoteParameterUpdater::~ConcurrentRemoteParameterUpdater() { - stopping_ = true; - sendQueue_.enqueue(0); - sendThread_->join(); - recvQueue_.enqueue(0); - recvThread_->join(); -} - -void ConcurrentRemoteParameterUpdater::finishBatch(real cost) { - if (localUpdater_) { - localUpdater_->finishBatch(cost); - - if (!needToUpdateRemotely()) { - ++numBatches_; - return; - } - } - - sendQueue_.enqueue(kFinishBatchPid); - - finishBatchCond_.wait([this]() { return oneBatchFinished_; }); - oneBatchFinished_ = false; - { - REGISTER_TIMER("sync_hostToDeviceStream"); - for (auto& para : parameters_) { - SetDevice device(para->getDeviceId()); - hl_stream_synchronize(kHostToDeviceStream); - } - } - - if (localUpdater_) { - ++numBatches_; - } -} - -// Use para=NULL to signal the end of one batch -void ConcurrentRemoteParameterUpdater::send(Parameter* para) { - const std::string& algorithm = config_.algorithm(); - ParameterUpdateMode mode; - if (algorithm == TrainAlgorithm::AsyncSGD) { - mode = PSERVER_UPDATE_MODE_ASYNC_SGD; - } else if (algorithm == TrainAlgorithm::SGD) { - mode = PSERVER_UPDATE_MODE_ADD_GRADIENT; - } else { - LOG(FATAL) << "Unknown algorithm: " << algorithm; - } - ParameterType sendType; - if (localUpdater_) { - sendType = PARAMETER_DELTA; - } else { - // In this case, we perform SGD on pserver. - sendType = PARAMETER_GRADIENT; - } - std::vector paraSegment; - if (para == NULL) { - parameterClient_->sendParameter( - mode, - sendType, - paraSegment, - batchSize_, - 0, // cost=0 - true, // sendBackParameter = true - batchStatus_); // batchStatus_ = BATCH_FINISH - - } else { - ParameterSegments paraSegTemp; - paraSegment.reserve(1); - paraSegTemp.name = para->getName(); - paraSegTemp.id = para->getID(); - paraSegment.push_back(paraSegTemp); - { - SetDevice device(para->getDeviceId()); - REGISTER_TIMER("copySingleParaFromDevice"); - copySingleParaFromDevice(para, sendType); - hl_stream_synchronize(kDeviceToHostStream); - } - parameterClient_->sendParameter(mode, - sendType, - paraSegment, - batchSize_, - 0, // cost=0 - true, // sendBackParameter = true - batchStatus_); - if (batchStatus_ == BATCH_START) batchStatus_ = BATCH_ON; - } -} -void ConcurrentRemoteParameterUpdater::recv(Parameter* para) { - parameterClient_->recvParameter(); - if (para != NULL) { - REGISTER_TIMER("copySingleParaToDevice"); - SetDevice device(para->getDeviceId()); - copySingleParaToDevice(para, PARAMETER_VALUE); - - if (localUpdater_) { - para->getBuf(PARAMETER_DELTA)->copyFrom(*para->getBuf(PARAMETER_VALUE)); - } else { - // if cpu, parameter should not changes until recvParameter(). - // if gpu, zero mem when send finish - if (!FLAGS_use_gpu) { - para->getBuf(PARAMETER_GRADIENT)->zeroMem(); - } - } - } -} - -void ConcurrentRemoteParameterUpdater::recv() { - if (FLAGS_use_gpu) hl_set_device(FLAGS_gpu_id); - StatPtr stat = getStat("recv"); - FOR_TIMING(Timer timer); - while (true) { - int pid; - { - REGISTER_TIMER("recv_dequeue"); - pid = recvQueue_.dequeue(); - } - if (pid == kFinishBatchPid) { - Parameter* para = NULL; - FOR_TIMING(timer.start()); - recv(para); - FOR_TIMING(timer.stop()); - FOR_TIMING(stat->addSample(timer.get())); - FOR_TIMING(timer.reset()); - finishBatchCond_.notify_all([this] { oneBatchFinished_ = true; }); - } else { - if (stopping_) break; - Parameter* para = parameters_[pid].get(); - FOR_TIMING(timer.start()); - recv(para); - FOR_TIMING(timer.stop()); - oneBatchFinished_ = false; - } - } -} - -void ConcurrentRemoteParameterUpdater::send() { - if (FLAGS_use_gpu) hl_set_device(FLAGS_gpu_id); - StatPtr stat = getStat("send"); - FOR_TIMING(Timer timer); - while (true) { - int pid; - { - REGISTER_TIMER("send_dequeue"); - pid = sendQueue_.dequeue(); - } - if (pid == kFinishBatchPid) { - batchStatus_ = BATCH_FINISH; - if (!localUpdater_) { - // if cpu, parameter should not changes until recvParameter(). - // if gpu, zeroMem() at the end of batch so that it won't - // interfere with computation. - if (FLAGS_use_gpu) { - REGISTER_TIMER("para_zeroMem"); - for (auto& para : parameters_) { - SetDevice device(para->getDeviceId()); - para->getBuf(PARAMETER_GRADIENT)->zeroMem(); - } - } - } - Parameter* para = NULL; - FOR_TIMING(timer.start()); - send(para); - FOR_TIMING(timer.stop()); - FOR_TIMING(stat->addSample(timer.get())); - FOR_TIMING(timer.reset()); - recvQueue_.enqueue(pid); - } else { - if (stopping_) break; - Parameter* para = parameters_[pid].get(); - if (localUpdater_) { - // DELTA = NEW_VALUE - OLD_VALUE/*store in DELTA*/ - para->getBuf(PARAMETER_DELTA) - ->add(*para->getBuf(PARAMETER_VALUE), -1.0f, 1.0f); - } - FOR_TIMING(timer.start()); - send(para); - FOR_TIMING(timer.stop()); - recvQueue_.enqueue(nonStaticParaIDMap_[para->getID()]); - } - } -} - -void ConcurrentRemoteParameterUpdater::updateImpl(Parameter* para) { - REGISTER_TIMER("update"); - if (localUpdater_) { - localUpdater_->update(para); - if (!needToUpdateRemotely()) { - return; - } - } - sendQueue_.enqueue(nonStaticParaIDMap_[para->getID()]); -} - -void ConcurrentRemoteParameterUpdater::copySingleParaToDevice( - Parameter* para, ParameterType parameterType) { - if (!FLAGS_use_gpu) { - return; - } - int i = nonStaticParaIDMap_[para->getID()]; - para->getBuf(parameterType) - ->copyFrom(*cpuParameters_[i]->getBuf(parameterType), - kHostToDeviceStream); - if (parameterType == PARAMETER_VALUE) { - para->setValueUpdated(); - } -} - -void ConcurrentRemoteParameterUpdater::copySingleParaFromDevice( - Parameter* para, ParameterType parameterType) { - if (!FLAGS_use_gpu) { - return; - } - int i = nonStaticParaIDMap_[para->getID()]; - cpuParameters_[i] - ->getBuf(parameterType) - ->copyFrom(*para->getBuf(parameterType), kDeviceToHostStream); -} - -SparseRemoteParameterUpdater::SparseRemoteParameterUpdater( - const OptimizationConfig& config, int expectedPassCount, bool testing) - : config_(config), - passCount_(0), - expectedPassCount_(expectedPassCount), - testing_(testing), - useApplyInPserver_(false) {} - -void SparseRemoteParameterUpdater::init( - const std::vector& parameters) { - ParameterUpdater::init(parameters); - - parameterClient_.reset(new ParameterClient2( - false, FLAGS_port + FLAGS_ports_num, FLAGS_ports_num_for_sparse)); - parameterClient_->init(parameters_); - parameterClient_->setTrainerId(FLAGS_trainer_id); - - if (FLAGS_trainer_id == 0) { - parameterClient_->setConfig( - config_, FLAGS_save_dir, true /*is_sparse_server*/); - if (parameters[0]->isFullSize()) { - parameterClient_->setParameter(); - } else { // init in pserver - parameterClient_->setParameterZero(); - } - } - if (FLAGS_trainer_id == 0 && !testing_ && - config_.algorithm() == TrainAlgorithm::SGD) { - startController(); - useApplyInPserver_ = useApplyInPserver(config_); - } -} - -void SparseRemoteParameterUpdater::startController() { - controllerThread_.reset(new std::thread([this]() { this->controller(); })); -} - -void SparseRemoteParameterUpdater::controller() { - ParameterClient2 client( - false, FLAGS_port + FLAGS_ports_num, FLAGS_ports_num_for_sparse); - client.init(parameters_); - - while (true) { - /*start pass*/ { - client.waitPassStart(); - - PreparedOperations ops; - ops.addOperation(PSERVER_OP_START_PASS); - client.doOperation(ops, - /* waitForGradient= */ false, - /* sendBackarameter= */ false, - /* releasePass= */ false); - } - - while (true) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_SGD); - client.doOperation(ops, - /* waitForGradient= */ true, - /* sendBackarameter= */ true, - /* releasePass= */ false); - if (client.isPassFinish()) { - break; - } - } - - /*finish pass*/ { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_FINISH_PASS); - client.doOperation(ops, - /* waitForGradient= */ true, - /* sendBackarameter= */ true, - /* releasePass= */ true); - } - - passCount_++; - if (passCount_ == expectedPassCount_) { - break; - } - } -} - -PassType SparseRemoteParameterUpdater::startBatch(int64_t batchSize) { - batchSize_ = batchSize; - return PASS_TRAIN; -} - -void SparseRemoteParameterUpdater::finishBatch(real cost) { - const std::string& algorithm = config_.algorithm(); - ParameterUpdateMode mode; - if (algorithm == TrainAlgorithm::AsyncSGD) { - mode = PSERVER_UPDATE_MODE_ASYNC_SGD; - } else if (algorithm == TrainAlgorithm::SGD) { - mode = PSERVER_UPDATE_MODE_ADD_GRADIENT; - } else { - LOG(FATAL) << "Unknown algorithm: " << algorithm; - } - - ParameterType sendType = PARAMETER_GRADIENT; - - REGISTER_TIMER("sendSparseParam"); - parameterClient_->sendAndReceiveParameter(mode, - sendType, - batchSize_, - 0, // cost = 0 - false); // sendBackParameter - - // grad zero move to sgd grad machine, before merge grad sparse remote -} - -void SparseRemoteParameterUpdater::startPass() { - if (config_.algorithm() == TrainAlgorithm::SGD) { - parameterClient_->waitPassStart(); - } else { - if (FLAGS_trainer_id == 0) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_START_PASS); - parameterClient_->doOperation(ops, - /* waitForGradient= */ false, - /* sendBackarameter= */ false); - } - parameterClient_->asyncStartPass(); - } -} - -bool SparseRemoteParameterUpdater::finishPass() { - if (config_.algorithm() == TrainAlgorithm::SGD) { - parameterClient_->waitPassFinish(); - } else { - if (FLAGS_trainer_id == 0) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_FINISH_PASS); - parameterClient_->doOperation(ops, - /* waitForGradient= */ false, - /* sendBackarameter= */ false); - } - parameterClient_->asyncFinishPass(); - } - - return true; -} - -// Trainer will call getParametersRemote at batch start or before save, -// so we do not get values in apply() and restore(). -void SparseRemoteParameterUpdater::apply() { - if (useApplyInPserver_) { - PreparedOperations ops; - ops.addOperation(PSERVER_OP_APPLY); - parameterClient_->doOperation(ops, - /* waitForGradient= */ false, - /* sendBackarameter= */ false); - } -} - -void SparseRemoteParameterUpdater::restore() {} - -void SparseRemoteParameterUpdater::getParametersRemote(bool fullSize, - bool apply) { - ParameterType sendBackParameterType = - (useApplyInPserver_ && apply) ? PARAMETER_APPLY : PARAMETER_VALUE; - std::function getParams; - std::function applyL1; - if (fullSize) { - getParams = [&] { - parameterClient_->getParameter( - /* recvParameterType= */ PARAMETER_VALUE, sendBackParameterType); - }; - applyL1 = [](Parameter& para, real decayRate) { - para.getBuf(PARAMETER_VALUE)->applyL1(/*lr=*/1.0f, decayRate); - }; - } else { - getParams = [&] { - parameterClient_->getParameterSparse( - /* recvParameterType= */ PARAMETER_VALUE, sendBackParameterType); - }; - applyL1 = [](Parameter& para, real decayRate) { - para.getMat(PARAMETER_VALUE)->applyL1(/*lr=*/1.0f, decayRate); - }; - } - { - REGISTER_TIMER("getParamDenseAndSparse"); - getParams(); - if (config_.shrink_parameter_value() > 0) { - for (auto& para : parameters_) { - if (para->getConfig().decay_rate_l1() > 0) { - applyL1(*para, config_.shrink_parameter_value()); - } - } - } - } -} - -void SparseRemoteParameterUpdater::randParametersRemote() { - CHECK_EQ(FLAGS_trainer_id, 0); - - PreparedOperations ops; - ops.addOperation(PSERVER_OP_RANDOMIZE); - parameterClient_->doOperation(ops, - /* waitForGradient= */ false, - /* sendBackarameter= */ false); -} - -void SparseRemoteParameterUpdater::loadParametersRemote( - const std::string& dirName) { - if (FLAGS_trainer_id == 0) { - parameterClient_->loadValueVector(dirName); - } - - if (testing_) { - // we do not use synchronize() here, - // because test mode may run only one tester - if (FLAGS_trainer_id == 0) { - parameterClient_->setStatus(PSERVER_STATUS_PARAMETER_READY); - } else { - parameterClient_->waitForStatus(PSERVER_STATUS_PARAMETER_READY); - } - } -} - -void SparseRemoteParameterUpdater::saveParametersRemote( - const std::string& dirName) { - if (FLAGS_trainer_id == 0) { - parameterClient_->saveValueVector(dirName); - } -} - -void SparseRemoteParameterUpdaterComposite::init( - const std::vector& parameters) { - parameters_ = parameters; - - std::vector parametersArray[NUMBER_UPDATERS]; - - for (auto& para : parameters_) { - if (para->isSparseRemoteUpdate()) { - parametersArray[UPDATER_SPARSE_REMOTE].push_back(para); - } else { - parametersArray[UPDATER_NORMAL].push_back(para); - } - } - CHECK(!parametersArray[UPDATER_SPARSE_REMOTE].empty()); - CHECK(!parametersArray[UPDATER_NORMAL].empty()); - - syncThreadPool_->execPlusOwner([&](int tid, size_t numThreads) { - updaters_[tid]->init(parametersArray[tid]); - }); - - parameterTypes_ = updaters_[UPDATER_NORMAL]->getParameterTypes(); -} - -std::vector> - ParameterUpdaterCreators::constructors_; - -} // namespace paddle diff --git a/paddle/legacy/trainer/RemoteParameterUpdater.h b/paddle/legacy/trainer/RemoteParameterUpdater.h deleted file mode 100644 index 68468532981a49ef32f5f0da1170815d657d86c1..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/RemoteParameterUpdater.h +++ /dev/null @@ -1,416 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "ParameterUpdater.h" -#include "paddle/legacy/pserver/ParameterClient2.h" -#include "paddle/legacy/utils/Queue.h" -#include "paddle/legacy/utils/Util.h" - -namespace paddle { - -// TODO(yanfei): -// I think that the biggest feature of rdma is packet lossless control -// feature instead of high bandwiths, zero copy and gpu-direct rdma in -// theroy. -// But zero-copy and gpu-direct rdma features can help to reduce latency -// caused by os system. -// So, for some specified cluster, such as high density gpu cluster, -// gpu-direct and zero copy could help to improve cluster communication -// performance. -// - -/** - * Normal remote parameter updater for dense parameters. - * - * It first packs all parameters for all pservers using ParameterClient - * module, then wait for merged parameters data from all pservers. - * The synchronization pattern specified by sync-sgd or async-sgd is - * achieved by all pservers with the help of the controller within this - * remote parameter updater. - * This module indeedly bridges the gradient machines and parameter servers. - * It helps to transfer the parameters from acceleration device to cpu end - * for network. It contains additional parameters copy buffers for - * acceleration devices at cpu end, such as gpu, otherwise it will - * directly use original parameters data to update pservers. - * - * This remote parameter updater does not use pipeline mechanism to hide - * copy latency from gpu to cpu buffer. In addition the overlapped between - * backward and communication is not supported. - */ -class RemoteParameterUpdater : public ParameterUpdater { - public: - RemoteParameterUpdater( - const OptimizationConfig& config, - int expectedPassCount, - std::unique_ptr&& localUpdater = nullptr); - ~RemoteParameterUpdater() { - if (controllerThread_) { - controllerThread_->join(); - } - } - - /** - * initialize the internal parameter client and itself. - */ - virtual void init(const std::vector& parameters); - /** - * @brief start batch - * - * @note one batch training exhibits stateful feature to help - * to do performance tuning, sgd optimization if necessary. - */ - virtual PassType startBatch(int64_t batchSize) { - if (localUpdater_) { - localUpdater_->startBatch(batchSize); - } - batchSize_ = batchSize; - batchStatus_ = BATCH_START; - return PASS_TRAIN; - } - - /** - * send parameters to pservers and get returned parameters - * from all pservers if necessary. it will implictly - * cooperate with controller thread for sync-sgd. - */ - virtual void finishBatch(real cost); - virtual void startPass(); - virtual bool finishPass(); - -#ifndef PADDLE_DISABLE_TIMER - virtual void setForwardbackwardTime(uint64_t delta) { - parameterClient_->setForwardbackwardTime(delta); - } -#endif - - virtual void apply(); - virtual void restore(); - - protected: - /** - * control all pservers with all trainers for sync-sgd - */ - virtual void controller(); - - /** - * work need to do after finishBatch - */ - virtual void updateImpl(Parameter* para); - - void startController(); - - /** - * @brief copy parameters from cpu host to device, such as gpu. - * - * @note return if all data are transfered. - */ - void copyParametersToDevice(ParameterType parameterType); - - /** - * @brief copy parameters from device to cpu host - * - * @note return if all data are transfered - */ - void copyParametersFromDevice(ParameterType parameterType); - - protected: - /// Optimization config used to guide initialization and finishBatch - OptimizationConfig config_; - /// internal parameter client object for exchanging data with pserver - std::unique_ptr parameterClient_; - /// internal shadow buffer at cpu host end, use original parameters_ - /// if no acceleration devices are used. - std::vector cpuParameters_; - /// local updater for aggregating multi-batches local delta - std::unique_ptr localUpdater_; - /// the size of mini-batch - int64_t batchSize_; - /// batches passed - int64_t numBatches_; - /// for stateful control - BatchStatus batchStatus_; - /// controller thread for sync-sgd - std::unique_ptr controllerThread_; - /// passed already finished - int64_t passCount_; - /// expected passes to finished - int64_t expectedPassCount_; - /// use normal synchronization communication if True - bool separateSendAndRecv_; - /// true if it's first pass - bool isFirstPass_; - bool useApplyInPserver_; - - static const std::string kAverage; - static const std::string kElasticAverage; -}; - -// TODO(yanfei): -// do parameters level synchronization Optimization at pserver end with -// ConcurrentRemoteParameterUpdater to get more parallelization, at last -// to really hide pserver latency in backward computation. -// -/** - * This updater add additional optimization for overlapping synchronization - * from pservers with backward computation. - * - * Parameter can be sent to pservers when related backward stage is finished. - * This concurrent udpater does data copy from acceleration device to host - * memory aynchronously. In addition internal parameter client reads data in - * host memory and send them to all pservers in next stage. So this class - * help to pipeline device-to-host copy and host-to-network to hide network - * latency in backward stage. - * It contains separate send and recv thread for pipeline usage. - */ -class ConcurrentRemoteParameterUpdater : public RemoteParameterUpdater { - public: - ConcurrentRemoteParameterUpdater( - OptimizationConfig config, - int expectedPassCount, - std::unique_ptr&& localUpdater); - ~ConcurrentRemoteParameterUpdater(); - - /** - * @brief send paraemeters to all pservers - * - * @note it just signal the end signal to internal parameter client - * to finished the aynchronous send action. In addition it also - * do synchronization for all asynchronous host-to-device copy. - */ - virtual void finishBatch(real cost); - - protected: - virtual void updateImpl(Parameter* para); - /// internal thread called in send thread - void send(Parameter* para); // para == NULL indicate end of a minibatch - /// internal function called in recv thread - void recv(Parameter* para); - /** - * @brief send thread for relaying data from gradient to parameter client - * - * @note just pipe data to internal parameter client for pipeline - */ - void send(); - /** - * @brief recv thread for relaying data from internal parameter client to - * host memory - * - * @note it contains the asynchronous data copy form host to device - */ - void recv(); - /// copy specified parameter from host to device - void copySingleParaToDevice(Parameter* para, ParameterType parameterType); - /// copy specified parameter from device to host - void copySingleParaFromDevice(Parameter* para, ParameterType parameterType); - bool needToUpdateRemotely() { - return (numBatches_ + 1) % config_.num_batches_per_send_parameter() == 0; - } - - private: - /// send thread used for overlapping - std::unique_ptr sendThread_; - /// recv thread used for overlapping - std::unique_ptr recvThread_; - /// buffer queue for overlapping - Queue sendQueue_; - /// buffer queue for overlapping - Queue recvQueue_; - /// flags indicating to stop - bool stopping_; - /// conditional variable for threads synchronization between the - /// thread calling finishBatch and internal recv thread - LockedCondition finishBatchCond_; - bool oneBatchFinished_; -}; - -// TODO(yanfei): -// merge sparse updater with dense updater, and could help to reduce -// the synchronization between sparse and dense udpater. it could also -// reduce the threads for managing all connections. -/** - * This class is specified for updating sparse parameters. - * - * It allows part of parameter to be exchanged with all pservers. - * If sparse input assigned, part gradients of first hidden layer - * could remained zero which can not need to be exchanged within - * all pservers. This is the key optimization point for this updater - * - * For updating sparse parameters, all latest parameters are stored - * in pservers instead of keeping full copy at train end, so need to - * prefetch parameters weight value which can be changed in next-batch - * before doing next forwardbackward. Also, with above fact that the - * parameters can be stored in pserver instead of trainer, we can - * fetch specified parmeters if necessary, and can support huge - * parameters which is larger enough than the RAM size in single - * node. - * - * Internally, this updater will direct internal parameter client - * to encapsulate sparse specified message for all pservers. - */ -class SparseRemoteParameterUpdater : public ParameterUpdater { - public: - SparseRemoteParameterUpdater(const OptimizationConfig& config, - int expectedPassCount, - bool testing); - ~SparseRemoteParameterUpdater() { - if (controllerThread_) { - controllerThread_->join(); - } - } - - /// initialization - virtual void init(const std::vector& parameters); - - /// stateful batch control - virtual PassType startBatch(int64_t batchSize); - /// send all sparse related parameters to all pservers - virtual void finishBatch(real cost); - virtual void startPass(); - virtual bool finishPass(); - - virtual void apply(); - virtual void restore(); - - /// load parameters from pservers - virtual void loadParametersRemote(const std::string& dirName); - /// save parameters to pservers - virtual void saveParametersRemote(const std::string& dirName); - /** - * @brief get latest sparse parameters value from all pservers - * - * @note call it before next mini-batch - */ - virtual void getParametersRemote(bool fullSize, bool apply); - virtual void randParametersRemote(); -#ifndef PADDLE_DISABLE_TIMER - virtual void setForwardbackwardTime(uint64_t delta) { - parameterClient_->setForwardbackwardTime(delta); - } -#endif - - protected: - /// update implimentation, not implemented - virtual void updateImpl(Parameter* para) {} - - /// internal controller routine for controller thread - virtual void controller(); - - /// start controller thread - void startController(); - - protected: - /// optimization config - OptimizationConfig config_; - /// internal parameter client - std::unique_ptr parameterClient_; - int64_t batchSize_; - std::unique_ptr controllerThread_; - int64_t passCount_; - int64_t expectedPassCount_; - bool testing_; - bool useApplyInPserver_; -}; - -/** - * Class for supporting normal updater and sparse updater - * - * Not all parts of one model are sparse, so it exists dense updater - * for normal layers while sparse updater is for sparse layers. - * - * it directly call internal dense and sparse udpater individually. - */ -class SparseRemoteParameterUpdaterComposite : public ParameterUpdaterComposite { - public: - enum { - UPDATER_SPARSE_REMOTE = 0, // execute in sync thread pool(tid:0) - UPDATER_NORMAL = 1, // execute in Owner thread(tid:1) - NUMBER_UPDATERS = 2, - }; - /** - * @brief create one dense updater and one sparse updater - * - * @note use syncThreadPool to synchronize these two updaters - */ - SparseRemoteParameterUpdaterComposite( - const OptimizationConfig& config, - int expectedPassCount, - bool testing, - std::unique_ptr&& normalUpdater) { - updaters_.resize(NUMBER_UPDATERS); - updaters_[UPDATER_SPARSE_REMOTE].reset( - new SparseRemoteParameterUpdater(config, expectedPassCount, testing)); - updaters_[UPDATER_NORMAL] = std::move(normalUpdater); - - syncThreadPool_.reset(new SyncThreadPool(NUMBER_UPDATERS - 1)); - } - - /// initialization of dense and sparse updaters - virtual void init(const std::vector& parameters); -}; - -class ParameterUpdaterCreators { - public: - /** - * @brief add a creator to create custom ParameterUpdater while training. - * The creator is a function with type (alogrithm, optConfig, isLocal, - * numPasses) -> ParameterUpdater*. Trainer will use this - * ParameterUpdater if creator can create a no nullptr - * ParameterUpdater. Return nullptr will use trainer's default - * updaters. - * - * @param creator method which can create ParameterUpdater. - */ - static void addCreator( - const std::function& creator) { // NOLINT explicit move closing ) in this line - // for readability - constructors_.push_back(creator); - } - - /** - * @brief Try to create an updater by given algo, optConfig, isLocal, - * numPasses. Return nullptr if cannot create anyone. - * @param algo algorithm string. - * @param optConfig optimization config. - * @param isLocal is in local mode or not. - * @param numPasses total passes that trainer will train. - * @return nullptr if fail, not nullptr if we can create an updater. - */ - static ParameterUpdater* tryCreateUpdater(const std::string& algo, - const OptimizationConfig& optConfig, - bool isLocal, - size_t numPasses) { - for (auto& c : constructors_) { - if (auto updater = c(algo, optConfig, isLocal, numPasses)) { - return updater; - } - } - return nullptr; - } - - private: - static std::vector> - constructors_; -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/Tester.cpp b/paddle/legacy/trainer/Tester.cpp deleted file mode 100644 index d977ca9657a7688c101ed060935c644e4876e6d1..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/Tester.cpp +++ /dev/null @@ -1,380 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Tester.h" - -#include -#include - -#include -#include -#include -#include - -#include - -#include "paddle/legacy/utils/GlobalConstants.h" -#include "paddle/legacy/utils/PythonUtil.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" - -#include "TesterConfig.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachineMode.h" -#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/legacy/gserver/layers/ValidationLayer.h" - -namespace paddle { - -Tester::Tester(const std::shared_ptr& config, - std::unique_ptr&& intconfig, - const GradientMachinePtr& gradientMachine, - const std::shared_ptr& parameterUpdater, - std::shared_ptr testDataProvider) - : config_(config), - intconfig_(std::move(intconfig)), - gradientMachine_(gradientMachine), - parameterUpdater_(parameterUpdater), - testDataProvider_(testDataProvider) { - if (config_->getOptConfig().use_sparse_remote_updater()) { - LOG(FATAL) << "It's prohibited to set sparse_remote_update " - << "when doing train and test jobs in the same " - << "process. You could run paddle --job=test in " - << "a separate process."; - } - testEvaluator_.reset(gradientMachine_->makeEvaluator()); - if (intconfig_->distributeTest) { - testParameterClient_.reset(new ParameterClient2(true)); - } - - if (testParameterClient_) { - testParameterClient_->init(gradientMachine_->getParameters()); - } - - std::unique_ptr paramConfig( - new ParameterUtilConfig(intconfig_->saveOnlyOne, - intconfig_->savingPeriod, - intconfig_->loadsaveParametersInPserver, - intconfig_->config)); - - paramUtil_.reset(new ParameterUtil( - config_, std::move(paramConfig), gradientMachine_, parameterUpdater_)); -} - -void Tester::startTestPeriod() { - if (testDataProvider_) { - testDataProvider_->reset(); - } - testEvaluator_->start(); - testContext_.cost = 0; - testContext_.numSamples = 0; - - parameterUpdater_->apply(); - if (intconfig_->prevBatchState) { - gradientMachine_->getState(*intconfig_->trainState); - gradientMachine_->setState(*intconfig_->testState); - } -} - -void Tester::testOneDataBatch(const DataBatch& dataBatch, - std::vector* outArgs) { - testContext_.cost += - forwardOneBatch(dataBatch, testEvaluator_.get(), outArgs); - testContext_.numSamples += dataBatch.getSize(); -} - -void Tester::testOnePeriod() { - DataBatch dataBatch; - int64_t batchSize = config_->getOptConfig().batch_size(); - std::vector outArgs; - startTestPeriod(); - while (testDataProvider_->getNextBatch(batchSize, &dataBatch) != 0) { - testOneDataBatch(dataBatch, &outArgs); - } - finishTestPeriod(); -} - -void Tester::finishTestPeriod() { - if (intconfig_->prevBatchState) { - gradientMachine_->resetState(); - } - testEvaluator_->finish(); - CHECK_GT(testContext_.numSamples, 0) - << "There is no samples in your test batch. Possibly " - "wrong implementation of DataProvidor.reset()"; - LOG(INFO) << " Test samples=" << testContext_.numSamples - << " cost=" << testContext_.cost / testContext_.numSamples - << " Eval: " << *testEvaluator_; - parameterUpdater_->restore(); - if (intconfig_->prevBatchState) { - gradientMachine_->getState(*intconfig_->testState); - gradientMachine_->setState(*intconfig_->trainState); - } -} - -int64_t Tester::testOneBatchById(int64_t batchId) { - DataBatch dataBatch; - int32_t batchSize = config_->getOptConfig().batch_size(); - - testDataProvider_->getNextBatch(batchSize, &dataBatch); - - int64_t actualBatchSize = dataBatch.getSize(); - if (actualBatchSize == 0) { - return 0; - } - - std::vector outArgs; - - stats_ += std::pair{ - actualBatchSize, - forwardOneBatch(dataBatch, testEvaluator_.get(), &outArgs)}; - - if (((batchId + 1) % intconfig_->logPeriod) == 0) { - LOG(INFO) << " Batch=" << batchId + 1 << " " << stats_.getStats(false); - } - - return actualBatchSize; -} - -real Tester::forwardOneBatch(const DataBatch& dataBatch, - Evaluator* evaluator, - std::vector* pOutArgs) { - auto& outArgs = *pOutArgs; - const std::vector& inArgs = dataBatch.getStreams(); - if (intconfig_->loadsaveParametersInPserver) { - REGISTER_TIMER("prefetch"); - gradientMachine_->prefetch(inArgs); - parameterUpdater_->getParametersRemote(false /*full parameter*/, - true /*after apply*/); - } - - gradientMachine_->forward(inArgs, &outArgs, PASS_TEST); - - // write features if set this flag and outArgs is not empty - std::string featFile = intconfig_->featFile; - if (!featFile.empty() && outArgs.empty()) { - size_t numOutputs = outArgs.size(); - std::vector featMatrices; - featMatrices.resize(numOutputs); - for (size_t i = 0; i < numOutputs; ++i) { - featMatrices[i] = Matrix::create(outArgs[i].value->getHeight(), - outArgs[i].value->getWidth(), - false, - false); // CPU data buffer - featMatrices[i]->copyFrom(*(outArgs[i].value), HPPL_STREAM_DEFAULT); - } - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - FILE* fp = fopen(featFile.c_str(), "ab+"); - CHECK(!ferror(fp)) << "Fail to open " << featFile; - - size_t sampleNum = featMatrices[0]->getHeight(); - for (size_t i = 0; i < sampleNum; ++i) { - for (size_t j = 0; j < numOutputs; ++j) { - size_t dim = featMatrices[j]->getWidth(); - fwrite(featMatrices[j]->getData() + i * dim, sizeof(real), dim, fp); - } - } - fclose(fp); - } - if (evaluator) { - gradientMachine_->eval(evaluator); - } - - // Save the output layers if predict_output_dir is not empty - std::string predictOutputDir = intconfig_->predictOutputDir; - if (!predictOutputDir.empty() && !outArgs.empty()) { - CHECK(intconfig_->testing) << "Only valid in test mode"; - if (!os_.is_open()) { - // TODO(yuyang18): Refactor these lines. - constexpr int kBufLen = 100; - char buf[kBufLen]; - snprintf(buf, kBufLen, "rank-%05d", intconfig_->trainerId); - mkDir(predictOutputDir.c_str()); - std::string filename = path::join(predictOutputDir, buf); - os_.open(filename, std::ofstream::trunc); - CHECK(os_.is_open()) << "Failed to open file " << filename; - } - printOutput(outArgs, os_); - return 0.0; // In this case, there is no meaning to calculate cost - } - - return Argument::sum(outArgs); -} - -void Tester::testOnePassBatch(int passId) { - stats_.reset(); - const std::vector inArgs; - gradientMachine_->forward(inArgs, nullptr, PASS_TEST); - int64_t num; - real cost; - gradientMachine_->getStats(cost, num); - stats_ += std::pair{num, cost}; - gradientMachine_->onPassEnd(); - - LOG(INFO) << " Pass=" << passId << " " << stats_.getStats(false); -} - -void Tester::testOnePass(int passId) { - stats_.reset(); - int64_t batchId = 0; - int num = 0; - if (intconfig_->prevBatchState) { - gradientMachine_->resetState(); - } - - testEvaluator_->start(); - - do { - num = testOneBatchById(batchId); - ++batchId; - } while (num > 0); - - gradientMachine_->onPassEnd(); - testEvaluator_->finish(); - - LOG(INFO) << " Pass=" << passId << " " << stats_.getStats(false) - << " Eval: " << *testEvaluator_; - - if (intconfig_->distributeTest) { - testEvaluator_->distributeEval(testParameterClient_.get()); - if (0 == intconfig_->trainerId) { - LOG(INFO) << "distribute eval: " << *testEvaluator_; - } - } -} - -void Tester::test() { - CHECK(testDataProvider_) << "TestData is not specified"; - testDataProvider_->setSkipShuffle(); - testDataProvider_->reset(); - gradientMachine_->start(); - - // For evaluation - std::vector modelList; - std::string modelListFromConfig = intconfig_->modelList; - std::string initModelPath = intconfig_->initModelPath; - if (!modelListFromConfig.empty()) { - loadFileList(modelListFromConfig, modelList); - intconfig_->testPass = 0; - intconfig_->numPasses = modelList.size(); - intconfig_->savingPeriod = 1; - CHECK_EQ(intconfig_->testWait, 0) << "--test_wait must be 0 for evaluation"; - } else if (!initModelPath.empty()) { - modelList.push_back(initModelPath); - intconfig_->testPass = 0; - intconfig_->numPasses = 1; - intconfig_->savingPeriod = 1; - CHECK_EQ(intconfig_->testWait, 0) << "--test_wait must be 0 for evaluation"; - } - - for (int i = intconfig_->testPass; i < intconfig_->numPasses; ++i) { - int passId = i; - if (passId % intconfig_->savingPeriod == 0) { - if (intconfig_->testWait) { - while (paramUtil_->loadParameters( - passId, true /*local*/, true /*remote*/) == false) { - LOG(INFO) << "Waiting for parameters of pass " << passId; - sleep(60); // sleep 60s - } - } else { - if (modelList.size() == 0) { - CHECK_EQ(paramUtil_->loadParameters( - passId, true /*local*/, true /*remote*/), - true); - } else { - paramUtil_->loadParametersWithPath( - modelList[i], true /*local*/, true /*remote*/); - } - } - if (IGradientMachineMode::trainWholeDataInOneBatch(intconfig_->mode)) { - testOnePassBatch(passId); - } else { - testOnePass(passId); - } - if (passId + intconfig_->savingPeriod < intconfig_->numPasses) { - // if there is at least 1 more pass to test, then call reset, - // otherwise not. - testDataProvider_->reset(); - } - } - } - - gradientMachine_->finish(); -} - -void Tester::printOutput(const std::vector& outArgs, - std::ostream& os) { - size_t numOutputs = outArgs.size(); - size_t numIns = outArgs[0].getBatchSize(); - if (cpuMat_.size() != numOutputs || cpuVec_.size() != numOutputs) { - cpuMat_.resize(numOutputs, nullptr); - cpuVec_.resize(numOutputs, nullptr); - } - - for (size_t i = 0; i < numOutputs; ++i) { - if (outArgs[i].value != nullptr) { - if (outArgs[i].value->useGpu()) { - if (dynamic_cast(outArgs[i].value.get())) { - size_t dim = outArgs[i].value->getWidth(); - Matrix::resizeOrCreate(cpuMat_[i], numIns, dim, false, false); - cpuMat_[i]->copyFrom(*outArgs[i].value); - } else if (dynamic_cast(outArgs[i].value.get())) { - auto sparseMat = - dynamic_cast(outArgs[i].value.get()); - cpuMat_[i] = Matrix::createSparseMatrix(sparseMat->getHeight(), - sparseMat->getWidth(), - sparseMat->getElementCnt(), - sparseMat->getValueType(), - sparseMat->format_, - false, /* trans */ - false); /* useGpu */ - hl_stream_t stream = HPPL_STREAM_DEFAULT; - cpuMat_[i]->copyFrom(*sparseMat, stream); - } else { - LOG(WARNING) << "Not supported gpu matrix type"; - } - } - } else if (outArgs[i].ids != nullptr) { - if (outArgs[i].ids->useGpu()) { - IVector::resizeOrCreate(cpuVec_[i], outArgs[i].ids->getSize(), false); - cpuVec_[i]->copyFrom(*outArgs[i].ids); - } - } else if (outArgs[i].strs != nullptr) { - continue; - } else { - LOG(WARNING) << "outArgs[" << i << "] has no data to print"; - } - } - - for (size_t i = 0; i < numIns; ++i) { - for (size_t j = 0; j < numOutputs; ++j) { - if (outArgs[j].value != nullptr) { - if (outArgs[j].value->useGpu()) { - cpuMat_[j]->printOneRow(os, i); - } else { - outArgs[j].value->printOneRow(os, i); - } - } else if (outArgs[j].ids != nullptr) { - if (outArgs[j].ids->useGpu()) { - cpuVec_[j]->printOneElement(os, i); - } else { - outArgs[j].ids->printOneElement(os, i); - } - } else if (outArgs[j].strs != nullptr) { - os << (*outArgs[j].strs)[i] << ";"; - } - } - os << std::endl; - } -} -} // namespace paddle diff --git a/paddle/legacy/trainer/Tester.h b/paddle/legacy/trainer/Tester.h deleted file mode 100644 index a298602d1d0894af90c098818908862a553cb3e7..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/Tester.h +++ /dev/null @@ -1,149 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/utils/Util.h" - -#include - -#include "hl_gpu.h" -#include "paddle/legacy/gserver/dataproviders/DataProvider.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" - -#include "TrainerConfig.pb.h" - -#include -#include -#include "ParamUtil.h" -#include "ParameterUpdater.h" -#include "TesterConfig.h" -#include "TrainerInternalConfig.h" - -namespace paddle { - -/** - * Neural Network test logics code. - * It is a private class for Trainer. - */ -class Tester { - public: - /** - * Ctor - * @param config Trainer Config. - * @param intconfig Tester Config. - * @param gradientMachine Gradient machine(neuralnetwork) that will be tested. - * @param parameterUpdater Parameter Updater. Not for updating parameter, just - * for getting parameter from parameter-server. - * @param testDataProvider Test data provider. - */ - Tester(const std::shared_ptr& config, - std::unique_ptr&& intconfig, - const GradientMachinePtr& gradientMachine, - const std::shared_ptr& parameterUpdater, - std::shared_ptr testDataProvider); - - /** - * test one period. - * - * One period means 2 things. - * if test_period !=0 and not test_all_data_in_one_period, then - * will test test_period * batch_size data. - * else - * will test whole test data. - * - * It is convenience to test small set of data when test data set is large and - * is training at same time. - */ - void testOnePeriod(); - void startTestPeriod(); - void finishTestPeriod(); - void testOneDataBatch(const DataBatch& dataBatch, - std::vector* outArgs); - - /** - * Test for given data batch. - * @param dataBatch Data batch. - * @param evaluator Evaluator - * @return cost - */ - real forwardOneBatch(const DataBatch& dataBatch, - Evaluator* evaluator, - std::vector* outArgs); - - /** - * performance the full pass of test given test data provider - */ - void test(); - - protected: - std::shared_ptr testParameterClient_; - std::shared_ptr config_; - std::unique_ptr intconfig_; - GradientMachinePtr gradientMachine_; - std::shared_ptr parameterUpdater_; - std::unique_ptr testEvaluator_; - std::unique_ptr paramUtil_; - DataProviderPtr testDataProvider_; - TrainerStats stats_; - - // Used for saving the values of output layers - std::ofstream os_; - std::vector cpuMat_; - std::vector cpuVec_; - struct { - int64_t numSamples; - real cost; - } testContext_; - - private: - /** - * Test one batch by batchId. It is only used for testOnePass. - * - * Durning testOnePass, each log_period will print cost statistics. - * - * @param batchId current batch id (from 0) - * @return num of tested samples. Zero if end of pass. - */ - int64_t testOneBatchById(int64_t batchId); - - /** - * Test whole pass in one batch. - * - * - * @param passId current pass id (from 0) - */ - void testOnePassBatch(int passId); - - /** - * test for one pass in several mini-batches. - * - * Used for sgd method. - * - * @param passId current pass id (from 0) - */ - void testOnePass(int passId); - - /** - * print the outArgs to a stream - * - * used for save feature file - * - * @param [in] outArgs output arguments for network. - * @param [in,out] os output stream. - */ - void printOutput(const std::vector& outArgs, std::ostream& os); -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/TesterConfig.h b/paddle/legacy/trainer/TesterConfig.h deleted file mode 100644 index 6c78f7cda347d5808d11e8af98672ef56898d643..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/TesterConfig.h +++ /dev/null @@ -1,138 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/utils/Util.h" - -#include - -#include "hl_gpu.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" - -#include "TrainerConfig.pb.h" - -#include -#include -#include "ParameterUpdater.h" - -namespace paddle { - -/** - * TesterConfig - * general configs for training - */ -struct TesterConfig { - /** - * indicate test period - */ - int testPeriod; - - /** - * indicate whether to save previous batch state - */ - bool prevBatchState; - - /** - * log period - */ - int logPeriod; - - /** - * loadsave parameters in pserver - */ - bool loadsaveParametersInPserver; - - /** - * feat file - */ - std::string featFile; - - /** - * predict output dir - */ - std::string predictOutputDir; - - /** - * trianer id - */ - int trainerId; - - /** - * distribute test - */ - bool distributeTest; - - /** - * training state - */ - MachineState* trainState; - - /** - * test state - */ - MachineState* testState; - - /** - * model list - */ - std::string modelList; - - /** - * test passes - */ - int testPass; - - /** - * num passes - */ - int numPasses; - - /** - * saving period - */ - int savingPeriod; - - /** - * test wait - */ - int testWait; - - /** - * init model path - */ - std::string initModelPath; - - /** - * save only one - */ - bool saveOnlyOne; - - /** - * testing mode - */ - bool testing; - - /** - * mode - */ - int mode; - - /** - * config loc - */ - std::string config; -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/ThreadParameterUpdater.cpp b/paddle/legacy/trainer/ThreadParameterUpdater.cpp deleted file mode 100644 index 0601bdf24e3150f5d182e2addde3a91609a967e4..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/ThreadParameterUpdater.cpp +++ /dev/null @@ -1,309 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ThreadParameterUpdater.h" - -#include "paddle/legacy/utils/Logging.h" - -#include "paddle/legacy/math/SparseRowMatrix.h" -#include "paddle/legacy/parameter/ThreadLocalBuffer.h" -#include "paddle/legacy/utils/Thread.h" - -DECLARE_int32(trainer_count); - -namespace paddle { - -SgdThreadUpdater::SgdThreadUpdater(const OptimizationConfig& optConfig) - : config_(optConfig), numSamplesProcessed_(0) { - // fill types - auto types = sgdOptimizerGetTypes(optConfig, false /*inPserver*/); - for (auto type : types) { - addParameterType(type); - } -} - -void SgdThreadUpdater::init(const std::vector& parameters) { - ParameterUpdater::init(parameters); - - // calc max parameter id - size_t maxId = 0; - for (auto& para : parameters_) { - maxId = std::max(maxId, para->getID()); - } - - optimizers_.resize(maxId + 1); - for (auto& para : parameters_) { - int pid = para->getID(); - optimizers_[pid].reset(sgdOptimizerCreate(config_, - para->getConfig(), - para->isGradSparseUpdate(), - false /*inPserver*/)); - size_t numRows = para->isGradSparseUpdate() ? para->getConfig().dims(0) : 0; - optimizers_[pid]->init(numRows, ¶->getConfig()); - if (para->isGradSparseUpdate() && FLAGS_trainer_count == 1) { - // For trainer_count=1, the gradient machine is NeuralNetwork, which does - // not create parameter buf for PARAMETER_GRADIENT for sparse update in - // Parameter::enableType(). But gradient parameter buf is still used - // in SgdThreadUpdater. We need to explicitly create it. - // - // The AverageOptimizer::restore/apply method will use PARAMETER_GRADIENT - // as a temp buffer. - para->enableBufType(PARAMETER_GRADIENT); - } - } -} - -void SgdThreadUpdater::startPass() { - for (auto& para : parameters_) { - int pid = para->getID(); - optimizers_[pid]->startPass(); - } -} - -bool SgdThreadUpdater::finishPass() { - catchUpWith(); - - for (auto& para : parameters_) { - int pid = para->getID(); - optimizers_[pid]->finishPass(); - } - return true; -} - -void SgdThreadUpdater::updateImpl(Parameter* para) { - if (!para->useGpu()) return; - SetDevice setDevice(para->getDeviceId()); - ParameterOptimizer* optimizer = optimizers_[para->getID()].get(); - optimizer->update(para->getBufs(), para->getConfig()); - if (auto callback = optimizer->needSpecialTraversal(para->getConfig())) { - callback(para->getBufs(), para->getConfig(), -1LU); - } - - para->setValueUpdated(); - para->clearGradient(); -} - -void SgdThreadUpdater::threadTraverse( - const ParameterOptimizer::TraverseCallback& callback, - int tid, - size_t numThreads, - Parameter* para) { - VectorPtr* vecs = parameter::getThreadLocalBuffer(); - if (para->isGradSparseUpdate()) { - size_t height = para->getConfig().dims(0); - size_t width = para->getConfig().dims(1); - for (size_t i = tid; i < height; i += numThreads) { - // setup sub bufs - for (auto type : parameterTypes_) { - vecs[type]->subVecFrom(*para->getBuf(type), i * width, width); - } - callback(vecs, para->getConfig(), i); - } - } else { // dense - // setup sub bufs - auto interval = calcSplitArrayInterval( - para->getSize(), (size_t)tid, numThreads, 8LU /*for avx*/); - for (auto type : parameterTypes_) { - vecs[type]->subVecFrom(*para->getBuf(type), interval); - } - - callback(vecs, para->getConfig(), -1LU); - } -} - -void SgdThreadUpdater::traverse(GetTraverseCallback getTraverseCallback) { - bool hasCpuPara = false; - bool hasGpuPara = false; - for (auto& para : parameters_) { - if (para->useGpu()) { - hasGpuPara = true; - } else { - hasCpuPara = true; - } - } - - auto cpuTraverse = [&](int tid, size_t numThreads) { - for (auto& para : parameters_) { - if (auto callback = getTraverseCallback(para.get())) { - threadTraverse(callback, tid, numThreads, para.get()); - } - } - }; - auto gpuTraverse = [&](int tid, size_t numThreads) { - for (auto& para : parameters_) { - if (para->useGpu()) { - if (auto callback = getTraverseCallback(para.get())) { - SetDevice setDevice(para->getDeviceId()); - callback(para->getBufs(), para->getConfig(), -1LU); - } - } - } - }; - - if (hasCpuPara && hasGpuPara) { - getGlobalSyncThreadPool()->exec(cpuTraverse, gpuTraverse); - } else if (hasCpuPara) { - getGlobalSyncThreadPool()->exec(cpuTraverse); - } else if (hasGpuPara) { - gpuTraverse(0, 0); - } -} - -void SgdThreadUpdater::catchUpWith() { - traverse([this](Parameter* para) { - return optimizers_[para->getID()]->startCatchUpWith(); - }); - - for (auto& para : parameters_) { - int pid = para->getID(); - optimizers_[pid]->finishCatchUpWith(); - } -} - -void SgdThreadUpdater::apply() { - catchUpWith(); - - traverse( - [this](Parameter* para) { return optimizers_[para->getID()]->apply(); }); -} - -void SgdThreadUpdater::restore() { - traverse([this](Parameter* para) { - return optimizers_[para->getID()]->restore(); - }); -} - -PassType SgdThreadUpdater::startBatch(int64_t batchSize) { - numSamplesProcessed_ += batchSize; - for (auto& para : parameters_) { - int pid = para->getID(); - optimizers_[pid]->startBatch(numSamplesProcessed_); - } - return PASS_TRAIN; -} - -void SgdThreadUpdater::finishBatch(real cost) { - getGlobalSyncThreadPool()->exec([&](int tid, size_t numThreads) { - for (auto& para : parameters_) { - if (para->isGradSparseUpdate()) { - threadUpdateSparse(tid, numThreads, para.get()); - } else if (!para->useGpu()) { - threadUpdateDense(tid, numThreads, para.get()); - } - } - }); - - for (auto& para : parameters_) { - int pid = para->getID(); - optimizers_[pid]->finishBatch(); - } -} - -void SgdThreadUpdater::threadUpdateSparse(int tid, - size_t numThreads, - Parameter* para) { - int pid = para->getID(); - ParameterOptimizer* optimizer = optimizers_[pid].get(); - VectorPtr* vecs = parameter::getThreadLocalBuffer(); - - size_t height = para->getConfig().dims(0); - size_t width = para->getConfig().dims(1); - - if (dynamic_cast( - para->getMat(PARAMETER_GRADIENT).get())) { - // From MultiGradientMachine - SparseRowIdsCpuMatrix* mainMat = dynamic_cast( - para->getMat(PARAMETER_GRADIENT).get()); - std::vector& sparseIds = mainMat->getIds(tid); - - for (auto id : sparseIds) { - // setup sub bufs - for (auto type : parameterTypes_) { - vecs[type]->subVecFrom(*para->getBuf(type), id * width, width); - } - optimizer->update(vecs, para->getConfig(), id); - vecs[PARAMETER_GRADIENT]->zeroMem(); - } - sparseIds.clear(); - } else if (dynamic_cast( - para->getMat(PARAMETER_GRADIENT).get())) { - // From NeuralNetwork - SparseRowCpuMatrix* mainMat = dynamic_cast( - para->getMat(PARAMETER_GRADIENT).get()); - - std::vector& localIndices = - mainMat->getIndexDictHandle()->localIndices; - - auto interval = - calcSplitArrayInterval(localIndices.size(), tid, numThreads); - for (size_t i = interval.first; i < interval.second; ++i) { - auto id = localIndices[i]; - real* row = mainMat->getLocalRow(i); - // setup sub bufs - for (auto type : parameterTypes_) { - if (type == PARAMETER_GRADIENT) { - vecs[type]->subVecFrom(row, 0, width); - } else { - vecs[type]->subVecFrom(*para->getBuf(type), id * width, width); - } - } - optimizer->update(vecs, para->getConfig(), id); - vecs[PARAMETER_GRADIENT]->zeroMem(); - } - // For numThreads > 1, MultiGradientMachine is used, which goes - // to the above branch. - CHECK_EQ(numThreads, 1UL); - mainMat->clearIndices(); - } else { - auto& m = *para->getMat(PARAMETER_GRADIENT).get(); - LOG(FATAL) << "Internal error: " << para->getName() << " " - << typeid(m).name(); - } - - if (auto callback = optimizer->needSpecialTraversal(para->getConfig())) { - for (size_t i = tid; i < height; i += numThreads) { - // setup sub bufs - for (auto type : parameterTypes_) { - vecs[type]->subVecFrom(*para->getBuf(type), i * width, width); - } - callback(vecs, para->getConfig(), i); - } - } -} - -void SgdThreadUpdater::threadUpdateDense(int tid, - size_t numThreads, - Parameter* para) { - int pid = para->getID(); - ParameterOptimizer* optimizer = optimizers_[pid].get(); - VectorPtr* vecs = parameter::getThreadLocalBuffer(); - - auto interval = calcSplitArrayInterval( - para->getSize(), (size_t)tid, numThreads, 8LU /*for avx*/); - - // setup sub bufs - for (auto type : parameterTypes_) { - vecs[type]->subVecFrom(*para->getBuf(type), interval); - } - - // update - optimizer->update(vecs, para->getConfig()); - vecs[PARAMETER_GRADIENT]->zeroMem(); - - if (auto callback = optimizer->needSpecialTraversal(para->getConfig())) { - callback(vecs, para->getConfig(), -1LU); - } -} - -} // namespace paddle diff --git a/paddle/legacy/trainer/ThreadParameterUpdater.h b/paddle/legacy/trainer/ThreadParameterUpdater.h deleted file mode 100644 index 172287d4eb56828c83e6670226b4c1f179fac6d8..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/ThreadParameterUpdater.h +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/parameter/AverageOptimizer.h" -#include "paddle/legacy/parameter/FirstOrderOptimizer.h" -#include "paddle/legacy/parameter/OptimizerFunctions.h" -#include "paddle/legacy/parameter/OptimizerWithRegularizer.h" -#include "paddle/legacy/parameter/Parameter.h" -#include "paddle/legacy/parameter/Regularizer.h" -#include "paddle/legacy/utils/Util.h" - -#include -#include - -namespace paddle { - -/** - * \brief A parameter updater that uses multiple threads to update parameters. - This parameter updater handles GPU and CPU updates differently, - because at the current moment, the merging on CPU is happening on the - main thread, and the its parameter size can be much larger than the one GPU. - Thus, for GPU, the parameter updates happens in updateImpl() function, which - is called by gradient machines as a callback function supplied to backward() - and forwardBackward(). - For CPU, the parameter updates happens in separate threads maintained by this - class. - */ -class SgdThreadUpdater : public ParameterUpdater { - public: - explicit SgdThreadUpdater(const OptimizationConfig& optConfig); - virtual ~SgdThreadUpdater() {} - - // Use the startPass() function of the base optimizer. - virtual void startPass(); - - // Use the finishPass() function of the base optimizer. - virtual bool finishPass(); - - virtual void init(const std::vector& parameters); - virtual PassType startBatch(int64_t batchSize); - // Call finishBatch for each optimizer. - virtual void finishBatch(real cost); - virtual void catchUpWith(); - virtual void apply(); - virtual void restore(); - - protected: - // This is the function that will be eventualy called by the GradientMachine. - // used only for GPU update. - virtual void updateImpl(Parameter* para); - OptimizationConfig config_; - int64_t numSamplesProcessed_; - - // One optimizers for each parameter. - std::vector> optimizers_; - - // The update function for CPU sparse parameters. - void threadUpdateSparse(int tid, size_t numThreads, Parameter* para); - - // The update function for CPU dense parameters. - void threadUpdateDense(int tid, size_t numThreads, Parameter* para); - // The update function for after update operations, such as averager. - void threadTraverse(const ParameterOptimizer::TraverseCallback& callback, - int tid, - size_t numThreads, - Parameter* para); - typedef std::function - GetTraverseCallback; - void traverse(GetTraverseCallback getTraverseCallback); -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/Trainer.cpp b/paddle/legacy/trainer/Trainer.cpp deleted file mode 100644 index 2db754793cf19e0c29455f61ada5f1d15b3204af..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/Trainer.cpp +++ /dev/null @@ -1,653 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Trainer.h" - -#include - -#include -#include -#include -#include - -#include - -#include "paddle/legacy/utils/Common.h" -#include "paddle/legacy/utils/GlobalConstants.h" -#include "paddle/legacy/utils/PythonUtil.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" - -#include "RemoteParameterUpdater.h" -#include "TesterConfig.h" -#include "ThreadParameterUpdater.h" -#include "TrainerConfigHelper.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachineMode.h" -#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/legacy/gserver/layers/ValidationLayer.h" - -DEFINE_string(config, "", "Trainer config file"); - -DEFINE_int32(test_period, - 0, - "if equal 0, do test on all test data at the end of " - "each pass. While if equal non-zero, do test on all test " - "data every test_period batches"); -DEFINE_bool(test_all_data_in_one_period, - false, - "This option was deprecated, since we will always do " - "test on all test set "); - -DEFINE_bool(local, true, "Train in local mode or not"); - -DEFINE_int32(average_test_period, - 0, - "Do test on average parameter every so" - " many batches. MUST be devided by FLAGS_log_period." - " Default 0 means do not test average parameter"); - -DEFINE_int32(saving_period, 1, "Save parameteres every so many passes"); -DEFINE_int64(saving_period_by_batches, - 0, - "Save parameters every so many batches in one pass"); -DEFINE_string(save_dir, "", "Directory for saving model parameter"); -DEFINE_int32(start_pass, - 0, - "Start training from this pass. " - "Will load parameter from the previous pass"); -DEFINE_int32(test_pass, -1, "Will load parameter start from this pass to test"); -DEFINE_int32(test_wait, 0, "Waiting for pass parameter if not exist"); -DEFINE_bool(with_cost, true, "enable cost layer or not"); -DEFINE_bool(distribute_test, false, "test in distribute mode"); - -DEFINE_int32(num_passes, 100, "train for so many passes"); - -DEFINE_string(config_args, - "", - "arguments passed to config file." - "Format: key1=value1,key2=value2"); - -DEFINE_bool(save_only_one, - false, - "Save only parameters in last pass, remove previous."); - -DEFINE_string(feat_file, "", "File name of extracted feature."); -DEFINE_string(predict_output_dir, - "", - "Directory that saves the predicted results of output layers"); -DEFINE_string(model_list, "", "File that saves the model list when evaluation"); - -namespace paddle { - -void Trainer::init(const std::shared_ptr& config, - bool testing, - const std::shared_ptr& gradientMachine, - const std::shared_ptr& dataProvider, - const std::shared_ptr& testDataProvider) { - this->stats_ = std::make_shared(); - - config_ = config; - - config_->updateConfigFromFlags(); - - testing_ = testing; - - // in testing, mode_ may GradientMachine::kTesting or - // GradientMachine::kSgdSparseCpuTraining - - if (FLAGS_local) { - CHECK(!FLAGS_loadsave_parameters_in_pserver) - << "local and loadsave_parameters_in_pserver can not both true"; - if (config_->getOptConfig().use_sparse_remote_updater()) { - config_->disableRemoteSparseUpdaterForEachParams(); - LOG(INFO) << "ignore sparse_remote_update=true due to --local=true"; - } - } - if (FLAGS_loadsave_parameters_in_pserver) { - CHECK(config_->getOptConfig().use_sparse_remote_updater()) - << "no parameter to load from pserver, please check network config"; - } - if (testing && !FLAGS_loadsave_parameters_in_pserver) { - if (config_->getOptConfig().use_sparse_remote_updater()) { - config_->disableRemoteSparseUpdater(); - LOG(INFO) << "because parameter is loaded local," - << "tester ignore sparse_remote_update flag"; - } - } - - CHECK(TrainAlgorithm::isValid(config_->getOptConfig().algorithm())) - << "invalid algorithm configuration: " - << config_->getOptConfig().algorithm(); - - bool useSparseUpdater = false; - for (auto& paraConfig : config_->getModelConfig().parameters()) { - if (paraConfig.sparse_update() || paraConfig.sparse_remote_update()) { - useSparseUpdater = true; - } - } - - if (FLAGS_use_mkldnn) { - CHECK_EQ(FLAGS_trainer_count, 1) << "MKLDNN only need 1 trainer"; - } - - if (testing) { - LOG(INFO) << "trainer: in testing mode"; - if (config_->getOptConfig().use_sparse_remote_updater() || - FLAGS_trainer_count > 1) { - mode_ = GradientMachine::kSgdSparseCpuTraining; - LOG(INFO) << "trainer mode: SgdSparseCpuTraining"; - } else { - mode_ = GradientMachine::kTesting; - LOG(INFO) << "trainer mode: Testing"; - } - } else if (IGradientMachineMode::tryGetMode( - (int*)&mode_, - config_->getOptConfig().algorithm(), - FLAGS_trainer_count, - FLAGS_local, - FLAGS_use_gpu)) { - LOG(INFO) << "Custom trainer mode."; - } else if ((config_->getOptConfig().algorithm() == TrainAlgorithm::SGD || - config_->getOptConfig().algorithm() == - TrainAlgorithm::AsyncSGD) && - useSparseUpdater) { - mode_ = GradientMachine::kSgdSparseCpuTraining; - LOG(INFO) << "trainer mode: SgdSparseCpuTraining"; - } else { - mode_ = GradientMachine::kNormal; - LOG(INFO) << "trainer mode: Normal"; - } - - // initialize trainer internal - trainerInternal_.init(config_, - gradientMachine, - TrainerInternalConfig::createFromMode(mode_), - stats_, - testing); - std::unique_ptr paramConfig( - new ParameterUtilConfig(FLAGS_save_only_one, - FLAGS_saving_period, - FLAGS_loadsave_parameters_in_pserver, - FLAGS_config)); - - paramUtil_.reset( - new paddle::ParameterUtil(config_, - std::move(paramConfig), - trainerInternal_.getGradientMachine(), - trainerInternal_.getParameterUpdater())); - - bool gpuData = - FLAGS_use_gpu && (!FLAGS_parallel_nn) && - (!IGradientMachineMode::dataMustInCpu(mode_, FLAGS_trainer_count)); - - dataProvider_ = dataProvider; - if (!dataProvider_ && config_->hasDataConfig() && !testing_) { - dataProvider_.reset(DataProvider::create(*config_, *config_, gpuData)); - } - if (!testDataProvider_) { - // No evaluator_ if there is testDataProvider but no dataProvider. - evaluator_.reset(trainerInternal_.getGradientMachine()->makeEvaluator()); - currentEvaluator_.reset( - trainerInternal_.getGradientMachine()->makeEvaluator()); - if (FLAGS_average_test_period > 0 && FLAGS_trainer_id == 0 && - config_->getOptConfig().average_window() > 0) { - CHECK_EQ(FLAGS_average_test_period % FLAGS_log_period, 0) - << "FLAGS_average_test_period must be divided by FALGS_log_period"; - averageEvaluator_.reset( - trainerInternal_.getGradientMachine()->makeEvaluator()); - } - } - - testDataProvider_ = testDataProvider; - if (!testDataProvider_ && config_->hasTestDataConfig()) { - testDataProvider_.reset( - DataProvider::create(config_->getTestDataConfig(), *config_, gpuData)); - } - if (testDataProvider_) { - createTester(); - } - - if (!testing && - (trainerInternal_.getGradientMachine()->hasStaticParameters())) { - CHECK(!FLAGS_loadsave_parameters_in_pserver) - << "is_static and loadsave_parameters_in_pserver can not both true"; - } - if (testing) { - // will load per pass for tester - } else if (paramUtil_->tryLoadParametersFromConfig()) { - // load from config already. - } else { - trainerInternal_.getGradientMachine()->randParameters(); - } - - // Only non static parameters need to be updated - std::vector& parameters = - trainerInternal_.getGradientMachine()->getNonStaticParameters(); - if (trainerInternal_.getParameterUpdater()) { - trainerInternal_.getParameterUpdater()->init(parameters); - - if (FLAGS_loadsave_parameters_in_pserver && FLAGS_trainer_id == 0) { - if (testing) { - // will load per pass for tester - } else if (!config_->getConfig().init_model_path().empty() && - (FLAGS_local || FLAGS_trainer_id == 0)) { - paramUtil_->loadParametersWithPath( - config_->getConfig().init_model_path(), - false /*local*/, - true /*remote*/); - } else if (config_->getConfig().start_pass() > 0 && - (FLAGS_local || FLAGS_trainer_id == 0)) { - CHECK(paramUtil_->loadParameters(config_->getConfig().start_pass() - 1, - false /*local*/, - true /*remote*/)); - } else { - trainerInternal_.getParameterUpdater()->randParametersRemote(); - } - } - } - - // set current evaluator and evalutor - trainerInternal_.setCurrentEvaluator(currentEvaluator_.get()); - trainerInternal_.setEvaluator(evaluator_.get()); -} - -void Trainer::train(size_t numPasses) { - startTrain(); - for (size_t i = 0; i < numPasses; ++i) { - if (IGradientMachineMode::trainWholeDataInOneBatch(mode_)) { - trainOnePassBatch(config_->getConfig().start_pass() + i); - } else { - trainOnePass(); - } - if (i < numPasses - 1) { - dataProvider_->reset(); - } - } - - finishTrain(); -} - -static double genPerturbation(real* d, real* grad, size_t dim) { - auto& reng = ThreadLocalRandomEngine::get(); - std::uniform_real_distribution dist(-1, 1); - double gradNorm = 0, dNorm = 0; - for (size_t i = 0; i < dim; ++i) { - d[i] = dist(reng); - dNorm += d[i] * d[i]; - gradNorm += grad[i] * grad[i]; - } - if (gradNorm > 0) { - real s = 0.5 * sqrt(gradNorm / dNorm); - for (size_t i = 0; i < dim; ++i) { - d[i] = s * d[i] + grad[i]; - } - } - double delta = 0; - for (size_t i = 0; i < dim; ++i) { - delta += grad[i] * d[i]; - } - return delta; -} - -real Trainer::checkGradient() { - trainerInternal_.getGradientMachine()->start(); - std::vector& parameters = - trainerInternal_.getGradientMachine()->getNonStaticParameters(); - DataBatch dataBatch; - int32_t batchSize = config_->getOptConfig().batch_size(); - - dataProvider_->getNextBatch(batchSize, &dataBatch); - - CHECK(dataBatch.getSize()) << "No data from data provider"; - std::vector& inArgs = dataBatch.getStreams(); - std::vector outArgs; - - trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); - real cost = Argument::sum(outArgs); - LOG(INFO) << "original cost=" << cost; - trainerInternal_.getGradientMachine()->backward(); - - real maxDiff = 0; - char fill = ' '; - for (auto& parameter : parameters) { - CpuVector oldPara(parameter->getSize()); - CpuVector newPara(parameter->getSize()); - oldPara.copyFrom(*parameter->getBuf(PARAMETER_VALUE)); - real* newp = newPara.getData(); - real* oldp = oldPara.getData(); - CpuVector cpuGrad(*parameter->getBuf(PARAMETER_GRADIENT)); - real* grad = cpuGrad.getData(); - size_t dim = parameter->getSize(); - std::vector d(dim); - - double delta = genPerturbation(d.data(), grad, dim); - - // use a step such that delta / cost is FLAGS_checkgrad_eps - real step = - (delta != 0) ? cost / delta * FLAGS_checkgrad_eps : FLAGS_checkgrad_eps; - delta *= step; - for (size_t i = 0; i < dim; ++i) { - newp[i] = oldp[i] + step * d[i]; - } - - parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara); - parameter->setValueUpdated(); - trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); - real newCost1 = Argument::sum(outArgs); - - for (size_t i = 0; i < dim; ++i) { - newp[i] = oldp[i] - step * d[i]; - } - - parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara); - parameter->setValueUpdated(); - trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); - real newCost2 = Argument::sum(outArgs); - - real trueDelta = 0.5 * (newCost1 - newCost2); - real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1; - LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(fill) - << std::setw(20) << parameter->getName() - << "step=" << std::setw(15) << step << "cost1=" << std::setw(10) - << newCost1 << "cost2=" << std::setw(10) << newCost2 - << "true_delta=" << std::setw(15) << trueDelta - << "analytic_delta=" << std::setw(15) << delta << "diff=" << diff - << (std::abs(diff) > 0.01 ? " ***" : ""); - - maxDiff = std::max(maxDiff, std::abs(diff)); - - // restore parameter - parameter->getBuf(PARAMETER_VALUE)->copyFrom(oldPara); - parameter->setValueUpdated(); - - fill = (fill == ' ') ? '.' : ' '; - } - return maxDiff; -} - -void Trainer::startTrain() { - trainPassContext_.passId = config_->getConfig().start_pass(); - srand(config_->getConfig().start_pass() + 1); - if (dataProvider_) { - dataProvider_->reset(); - } - - trainerInternal_.getGradientMachine()->start(); -} - -void Trainer::finishTrain() { trainerInternal_.getGradientMachine()->finish(); } - -void Trainer::startTrainPass() { - stats_->reset(); - trainPassContext_.batchId = 0; - trainPassContext_.avgTestCost = 0; - trainPassContext_.numAvgTests = 0; - trainPassContext_.passInnerId = 1; - - trainerInternal_.getParameterUpdater()->startPass(); - evaluator_->start(); - if (FLAGS_prev_batch_state) { - trainerInternal_.getGradientMachine()->resetState(); - trainerInternal_.getGradientMachine()->getState(testState_); - } -} - -void Trainer::trainOneDataBatch(DataBatch& dataBatch) { - int num = dataBatch.getSize(); - if (averageEvaluator_) { - int64_t mod = trainPassContext_.batchId % FLAGS_average_test_period; - if (mod >= FLAGS_average_test_period - FLAGS_log_period) { - if (mod == FLAGS_average_test_period - FLAGS_log_period) { - averageEvaluator_->start(); - } - trainerInternal_.getParameterUpdater()->apply(); - if (FLAGS_prev_batch_state) { - trainerInternal_.getGradientMachine()->getState(trainState_); - } - trainPassContext_.avgTestCost += tester_->forwardOneBatch( - dataBatch, averageEvaluator_.get(), &forwardOutput_); - if (FLAGS_prev_batch_state) { - trainerInternal_.getGradientMachine()->setState(trainState_); - } - trainPassContext_.numAvgTests += num; - trainerInternal_.getParameterUpdater()->restore(); - } - } - { - REGISTER_TIMER("TrainBatch"); - trainerInternal_.trainOneBatch( - trainPassContext_.batchId, dataBatch, &forwardOutput_); - } - - if (averageEvaluator_ && - trainPassContext_.batchId % FLAGS_average_test_period == - FLAGS_average_test_period - 1) { - averageEvaluator_->finish(); - LOG(INFO) << " Averaged parameter:" - << " cost=" - << trainPassContext_.avgTestCost / trainPassContext_.numAvgTests - << " Eval: " << *averageEvaluator_; - trainPassContext_.numAvgTests = 0; - trainPassContext_.avgTestCost = 0; - } - - ++trainPassContext_.batchId; - - if (trainPassContext_.batchId % FLAGS_log_period == 0) { - FOR_TIMING(globalStat.setThreadInfo(true)); - FOR_TIMING(globalStat.printAllStatus()); - FOR_TIMING(globalStat.reset()); - } - - if (testDataProvider_ && FLAGS_test_period > 0 && - trainPassContext_.batchId % FLAGS_test_period == 0) { - tester_->testOnePeriod(); - } - - if (FLAGS_saving_period_by_batches > 0 && - trainPassContext_.batchId > - FLAGS_saving_period_by_batches * trainPassContext_.passInnerId && - 0 == FLAGS_trainer_id) { - trainerInternal_.getParameterUpdater()->catchUpWith(); - if (testDataProvider_) { - tester_->testOnePeriod(); - } - paramUtil_->saveParametersOnePass(trainPassContext_.passId, - trainPassContext_.passInnerId); - ++trainPassContext_.passInnerId; - } -} - -void Trainer::finishTrainPass() { - if (trainPassContext_.batchId == 0) { - // This means no more data from DataProvider - return; - } - - trainerInternal_.finishTrainPass(trainPassContext_.passId, - trainPassContext_.batchId); - - FOR_TIMING(globalStat.setThreadInfo(true)); - FOR_TIMING(globalStat.printAllStatus()); - FOR_TIMING(globalStat.reset()); - - if (testDataProvider_) { - tester_->testOnePeriod(); - } - - if (trainPassContext_.passId % FLAGS_saving_period == 0 && - FLAGS_trainer_id == 0) { - paramUtil_->saveParametersOnePass(trainPassContext_.passId); - } - ++trainPassContext_.passId; -} - -void Trainer::trainOnePass() { - startTrainPass(); - size_t batchSize = config_->getOptConfig().batch_size(); - while (true) { - DataBatch dataBatch; - - int num = 0; - { - REGISTER_TIMER("getTrainBatch"); - num = dataProvider_->getNextBatch(batchSize, &dataBatch); - } - if (num == 0) break; - CHECK_EQ(num, dataBatch.getSize()); - trainOneDataBatch(dataBatch); - } - - finishTrainPass(); -} - -void Trainer::trainOnePassBatch(int passId) { - this->stats_->reset(); - - trainerInternal_.getParameterUpdater()->startPass(); - const std::vector inArgs; - { - REGISTER_TIMER("onePass"); - trainerInternal_.getGradientMachine()->forwardBackward( - inArgs, nullptr, PASS_TRAIN, nullptr); - } - - real cost = .0; - int64_t num = 0; - trainerInternal_.getGradientMachine()->getStats(cost, num); - *stats_ += {num, cost}; - - trainerInternal_.getGradientMachine()->onPassEnd(); - - bool accepted = trainerInternal_.getParameterUpdater()->finishPass(); - - globalStat.setThreadInfo(true); - globalStat.printAllStatus(); - globalStat.reset(); - - LOG(INFO) << " Pass=" << passId - << " AcceptedPass=" << (accepted ? acceptedPassId_ : -1) - << stats_->getStats(false /*withCurrentCost*/); - - if (accepted) { - if (acceptedPassId_ % FLAGS_saving_period == 0 && FLAGS_trainer_id == 0) { - paramUtil_->saveParameters(acceptedPassId_); - } - acceptedPassId_++; - if (FLAGS_save_only_one && acceptedPassId_ >= FLAGS_saving_period) { - paramUtil_->deleteParameters(acceptedPassId_ - FLAGS_saving_period); - } - } -} - -real Trainer::calcGradient(const DataBatch& dataBatch, - const Vector& value, - Vector& gradient) { - CHECK_EQ(value.getSize(), gradient.getSize()); - std::vector& parameters = - trainerInternal_.getGradientMachine()->getParameters(); - - clearGradient(); - - size_t offset = 0; - size_t valueSize = value.getSize(); - - for (auto& para : parameters) { - CHECK_LE(offset + para->getSize(), valueSize); - VectorPtr val = - Vector::create(para->getSize(), value.getMemoryHandle(), offset); - para->getBuf(PARAMETER_VALUE)->copyFrom(*val); - para->setValueUpdated(); - offset += para->getSize(); - } - - CHECK_EQ(offset, valueSize); - - std::vector inArgs = dataBatch.getStreams(); - std::vector outArgs; - - trainerInternal_.getGradientMachine()->forwardBackward( - inArgs, &outArgs, PASS_TRAIN); - real cost = Argument::sum(outArgs); - - offset = 0; - for (auto& para : parameters) { - VectorPtr grad = - Vector::create(para->getSize(), gradient.getMemoryHandle(), offset); - if (para->getBuf(PARAMETER_GRADIENT)) { - grad->copyFrom(*para->getBuf(PARAMETER_GRADIENT)); - } - offset += para->getSize(); - } - - return cost; -} - -void Trainer::clearGradient() { - std::vector& parameters = - trainerInternal_.getGradientMachine()->getNonStaticParameters(); - for (auto& parameter : parameters) { - parameter->clearGradient(); - } -} - -int Trainer::getBatchSize() { return config_->getOptConfig().batch_size(); } - -void Trainer::createTester() { - tester_.reset(new paddle::Tester(config_, - createTesterConfig(), - trainerInternal_.getGradientMachine(), - trainerInternal_.getParameterUpdater(), - testDataProvider_)); -} - -void Trainer::test() { tester_->test(); } - -std::unique_ptr Trainer::createTesterConfig() { - TesterConfig* conf = new TesterConfig; - if (FLAGS_test_period) { - LOG(WARNING) << "The meaning of --test_period is changed: " - << "if equal 0, do test on all test data at the end of " - << "each pass. While if equal non-zero, do test on all test " - << "data every test_period batches "; - } - if (FLAGS_test_all_data_in_one_period) { - LOG(WARNING) << "--test_all_data_in_one_period was deprecated, since " - << "we will always do test on all test set "; - } - conf->testPeriod = FLAGS_test_period; - conf->prevBatchState = FLAGS_prev_batch_state; - conf->logPeriod = FLAGS_log_period; - conf->loadsaveParametersInPserver = FLAGS_loadsave_parameters_in_pserver; - conf->featFile = FLAGS_feat_file; - conf->predictOutputDir = FLAGS_predict_output_dir; - conf->trainerId = FLAGS_trainer_id; - conf->distributeTest = FLAGS_distribute_test; - conf->config = FLAGS_config; - conf->modelList = FLAGS_model_list; - conf->testPass = FLAGS_test_pass; - conf->numPasses = FLAGS_num_passes; - conf->savingPeriod = FLAGS_saving_period; - conf->testWait = FLAGS_test_wait; - conf->initModelPath = FLAGS_init_model_path; - conf->saveOnlyOne = FLAGS_save_only_one; - conf->testing = testing_; - conf->mode = mode_; - conf->trainState = &trainState_; - conf->testState = &testState_; - return std::unique_ptr(conf); -} - -ParameterUtil* Trainer::getParameterUtilPtr() { return paramUtil_.get(); } -} // namespace paddle diff --git a/paddle/legacy/trainer/Trainer.h b/paddle/legacy/trainer/Trainer.h deleted file mode 100644 index b467f9af0cf12a39dd3d119c59e6cafcb05474b4..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/Trainer.h +++ /dev/null @@ -1,204 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/utils/Util.h" - -#include - -#include "hl_gpu.h" -#include "paddle/legacy/gserver/dataproviders/DataProvider.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" - -#include -#include -#include "ParamUtil.h" -#include "ParameterUpdater.h" -#include "Tester.h" -#include "TrainerConfigHelper.h" -#include "TrainerInternal.h" - -DECLARE_int32(num_passes); - -namespace paddle { - -/** - * Trainer Class - * - * Trainer combines GradientMachine, ParameterUpdater, DataProvider together to - * train/test a NeuralNetwork. - */ -class Trainer { - public: - /** - * Ctor. - * @return - */ - Trainer() : acceptedPassId_(0) {} - - virtual ~Trainer() {} - - /** - * initialize a new trainer using config - * - * @param config TrainerConfig. - * @param testing true if only for testing - * @param gradientMachine GradientMachine that will be trained. - * nullptr if create from config. - * @param dataProvider Train Data Provider. null if create from config. - * @param testDataProvider Test Data Provider. null if create from config. - */ - virtual void init( - const std::shared_ptr& config, - bool testing = false, - const std::shared_ptr& gradientMachine = nullptr, - const std::shared_ptr& dataProvider = nullptr, - const std::shared_ptr& testDataProvider = nullptr); - - /** - * Train until num_passes reached. - * One pass means neural network train through all training data. - * - * @param numPasses the number of traning pass. - * @note Durning neural network training, the num passes may set a very large - * value, and kill training process when result is good enough. - */ - void train(size_t numPasses = (size_t)FLAGS_num_passes); - - /** - * compare the gradient from bp with finite difference - * @return the maximal difference - */ - real checkGradient(); - - void startTrain(); - void finishTrain(); - void startTrainPass(); - void finishTrainPass(); - void trainOneDataBatch(DataBatch& dataBatch); - void time(); - - /** - * given a dataBatch and the current parameter value - * calculate its gradient and return the cost. - * - * TODO(yuyang18): I think this method is deprecated and buggy. Should it be - * removed? - */ - real calcGradient(const DataBatch& dataBatch, - const Vector& value, - Vector& gradient); - - /** - * Get Trainer Config. - */ - const TrainerConfig& getConfig() const { return config_->getConfig(); } - - /** - * Get Train Data Provider - */ - const DataProviderPtr& getDataProvider() { return dataProvider_; } - - /** - * Get Gradient Machine. - */ - const GradientMachinePtr& getGradientMachine() { - return trainerInternal_.getGradientMachine(); - } - - /** - * Get batch size in optimization config. - * @note This method didn't return the actual batch size. Just batch size - * set in the optimization config. The actual batch size in one trainer may - * less than batch size in config due to there are not enough data. - */ - int getBatchSize(); - - /** - * Do test job - */ - void test(); - - /** - * Get parameter util ptr - * - * TODO(yuyang18): Make it return a smart pointer. - */ - ParameterUtil* getParameterUtilPtr(); - - protected: - /** - * Train one pass of data. - * - * SGD Method. - */ - void trainOnePass(); - - /** - * Train one pass in one batch. - * - */ - void trainOnePassBatch(int passId); - - /** - * set parameter gradient to zero - */ - void clearGradient(); - - void createTester(); - - private: - std::unique_ptr createTesterConfig(); - - protected: - std::shared_ptr config_; - std::shared_ptr stats_; - - DataProviderPtr dataProvider_; - DataProviderPtr testDataProvider_; - MachineState trainState_; - MachineState testState_; - - struct TrainPassContext { - int64_t batchId; - real avgTestCost; - int64_t numAvgTests; - int passId; - int passInnerId; - }; - std::vector forwardOutput_; - - TrainPassContext trainPassContext_; - - std::unique_ptr evaluator_; - std::unique_ptr currentEvaluator_; - std::unique_ptr averageEvaluator_; - // training mode - // used to decide which GradientMachine and ParameterUpdater to create - GradientMachine::CreateMode mode_; - int testing_; - int acceptedPassId_; - - // trainer tester - std::unique_ptr tester_; - - // parameter util - std::unique_ptr paramUtil_; - - // trainer Internal - TrainerInternal trainerInternal_; -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/TrainerBenchmark.cpp b/paddle/legacy/trainer/TrainerBenchmark.cpp deleted file mode 100644 index 7f5bd2335481c417b466ac4ca9ca54798524045f..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/TrainerBenchmark.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#undef PADDLE_DISABLE_TIMER - -#include "Trainer.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" - -DECLARE_int32(test_period); - -DEFINE_bool(feed_data, false, "Wether to read data from DataProvider."); - -namespace paddle { - -void Trainer::time() { - startTrain(); - - trainerInternal_.getParameterUpdater()->startPass(); - evaluator_->start(); - - DataBatch dataBatch; - int32_t batchSize = config_->getOptConfig().batch_size(); - int32_t num = dataProvider_->getNextBatch(batchSize, &dataBatch); - CHECK_EQ(num, batchSize) << "The sample number is less than batch size " - << num << " != " << batchSize; - - CHECK(dataBatch.getSize()) << "No data from data provider"; - - std::vector outputs; - // burning time - LOG(INFO) << "Burning time..."; - for (int n = 0; n < 10; ++n) { - trainerInternal_.trainOneBatch(n, dataBatch, &outputs); - } - LOG(INFO) << "Burning time end."; - - for (int n = 0; n < FLAGS_test_period; n++) { - if (FLAGS_feed_data) { - REGISTER_TIMER("GetData"); - num = dataProvider_->getNextBatch(batchSize, &dataBatch); - } - - if (num != batchSize) { - break; - } - - { - REGISTER_TIMER("FwdBwd"); - trainerInternal_.trainOneBatch(n, dataBatch, &outputs); - } - } - globalStat.setThreadInfo(true); - globalStat.printSegTimerStatus(); - globalStat.reset(); - - finishTrain(); -} - -} // namespace paddle diff --git a/paddle/legacy/trainer/TrainerConfigHelper.cpp b/paddle/legacy/trainer/TrainerConfigHelper.cpp deleted file mode 100644 index 4d31ba8d71d52ac51191affc612a79b6734dee74..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/TrainerConfigHelper.cpp +++ /dev/null @@ -1,199 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "TrainerConfigHelper.h" -#include "ParamUtil.h" -#include "TrainerConfig.pb.h" -#include "paddle/legacy/utils/Flags.h" -#include "paddle/legacy/utils/PythonUtil.h" - -DECLARE_string(config); -DECLARE_string(init_model_path); -DECLARE_int32(start_pass); -DECLARE_string(save_dir); -DECLARE_int32(trainer_id); -DECLARE_bool(local); -DECLARE_bool(with_cost); -DECLARE_bool(with_gpu); -DECLARE_bool(parallel_nn); -DECLARE_string(config_args); -DECLARE_bool(use_mkldnn); -DECLARE_bool(use_mkl_packed); - -const char *kConfigParserModuleName = "paddle.trainer.config_parser"; -const char *kConfigParserFuncName = "parse_config_and_serialize"; - -namespace paddle { - -struct TrainerConfigHelperPrivate { - TrainerConfig conf; -}; - -TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) - : m(new TrainerConfigHelperPrivate()) { - std::ostringstream configArgs; - configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local - << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu - << ",parallel_nn=" << FLAGS_parallel_nn - << ",use_mkldnn=" << FLAGS_use_mkldnn - << ",use_mkl_packed=" << FLAGS_use_mkl_packed - << ",cudnn_version=" << hl_get_cudnn_lib_version(); - if (!FLAGS_config_args.empty()) { - configArgs << "," << FLAGS_config_args; - } - - VLOG(3) << "Parsing trainer config " << configFilePath; - std::string configProtoStr = - callPythonFunc(kConfigParserModuleName, - kConfigParserFuncName, - {configFilePath, configArgs.str()}); - CHECK(m->conf.ParseFromString(configProtoStr)); -} - -TrainerConfigHelper::TrainerConfigHelper(const TrainerConfig &config) - : m(new TrainerConfigHelperPrivate()) { - m->conf = config; -} - -TrainerConfigHelper::~TrainerConfigHelper() { delete m; } - -const TrainerConfig &TrainerConfigHelper::getConfig() const { return m->conf; } - -TrainerConfig &TrainerConfigHelper::getMutableConfig() { return m->conf; } - -const OptimizationConfig &TrainerConfigHelper::getOptConfig() const { - return m->conf.opt_config(); -} - -const ModelConfig &TrainerConfigHelper::getModelConfig() const { - return m->conf.model_config(); -} - -const DataConfig *TrainerConfigHelper::getDataConfigPtr() const { - if (m->conf.has_data_config()) { - return &m->conf.data_config(); - } else { - return nullptr; - } -} - -const DataConfig &TrainerConfigHelper::getTestDataConfig() const { - CHECK(m->conf.has_test_data_config()); - return m->conf.test_data_config(); -} - -bool TrainerConfigHelper::hasDataConfig() const { - return m->conf.has_data_config(); -} - -bool TrainerConfigHelper::hasTestDataConfig() const { - return m->conf.has_test_data_config(); -} - -void TrainerConfigHelper::updateConfigFromFlags() { - if (!FLAGS_save_dir.empty()) { - m->conf.set_save_dir(FLAGS_save_dir); - } - if (!FLAGS_init_model_path.empty()) { - m->conf.set_init_model_path(FLAGS_init_model_path); - } - if (FLAGS_start_pass != 0) { - m->conf.set_start_pass(FLAGS_start_pass); - } -} - -void TrainerConfigHelper::disableRemoteSparseUpdater() { - m->conf.mutable_opt_config()->set_use_sparse_remote_updater(false); -} - -void TrainerConfigHelper::disableRemoteSparseUpdaterForEachParams() { - this->disableRemoteSparseUpdater(); - for (int i = 0; i < m->conf.model_config().parameters_size(); ++i) { - m->conf.mutable_model_config() - ->mutable_parameters(i) - ->set_sparse_remote_update(false); - } -} - -OptimizationConfig &TrainerConfigHelper::getOptConfig() { - return *m->conf.mutable_opt_config(); -} - -void TrainerConfigHelper::setSaveDir(const std::string &saveDir) { - m->conf.set_save_dir(saveDir); -} - -const std::string &TrainerConfigHelper::getSaveDir() const { - return m->conf.save_dir(); -} - -std::string TrainerConfigHelper::getConfigNameFromPath( - const std::string &modelPath) { - std::ifstream s(path::join(modelPath, "path.txt")); - CHECK(s.is_open()) << " fail to open path.txt"; - std::string ss; - getline(s, ss); - VLOG(3) << "fileName " << path::join(modelPath, ss); - s.close(); - return path::join(modelPath, ss); -} - -std::string TrainerConfigHelper::getConfigNameFromPassId( - int passId, const std::string &modelPath) { - constexpr int kBufLen = 100; - char buf[kBufLen]; - snprintf(buf, kBufLen, "pass-%05d", passId); - return TrainerConfigHelper::getConfigNameFromPath(path::join(modelPath, buf)); -} - -std::string TrainerConfigHelper::getConfigName(bool *ok) const { - std::string retv = ""; - - if (!m->conf.config_file().empty()) { - retv = m->conf.config_file(); - } else if (!m->conf.init_model_path().empty()) { - retv = getConfigNameFromPath(m->conf.init_model_path()); - } else if (m->conf.start_pass() >= 1) { - retv = getConfigNameFromPassId(m->conf.start_pass(), m->conf.save_dir()); - } - - if (ok) { - *ok = !retv.empty(); - } - - return retv; -} - -std::shared_ptr TrainerConfigHelper::createFromFlags() { - std::string configPath; - if (!FLAGS_config.empty()) { - configPath = FLAGS_config; - } else if (!FLAGS_init_model_path.empty()) { - configPath = getConfigNameFromPath(FLAGS_init_model_path); - } else if (FLAGS_start_pass >= 1) { - configPath = - getConfigNameFromPassId(FLAGS_start_pass - 1, FLAGS_init_model_path); - } else { - return nullptr; - } - return std::make_shared(configPath); -} - -std::shared_ptr -TrainerConfigHelper::createFromFlagConfig() { - CHECK(!FLAGS_config.empty()); - return std::make_shared(FLAGS_config); -} - -} // namespace paddle diff --git a/paddle/legacy/trainer/TrainerConfigHelper.h b/paddle/legacy/trainer/TrainerConfigHelper.h deleted file mode 100644 index 0e428bea2c4b44bf98772ccca8f8b10d315efbbd..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/TrainerConfigHelper.h +++ /dev/null @@ -1,205 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -namespace paddle { - -class TrainerConfig; -class OptimizationConfig; -struct TrainerConfigHelperPrivate; -class ModelConfig; -class DataConfig; - -/** - * @brief TrainerConfig Helper. A class wrap protobuf's TrainerConfig Object, - * simplize the usage for TrainerConfig. - * - * The all operation to TrainerConfig object should use this object. It remove - * many copy & paste code in trainer. - * - * @TODO(yuyang18): Make cmake check compiler support keyword 'final' or not. - * Define a macro to unify 'final' keyword - */ -class TrainerConfigHelper /*final*/ { - public: - DISABLE_COPY(TrainerConfigHelper); - - /** - * @brief Ctor, Create a TrainerConfig from config file - * @param configFilePath Config file path. - */ - explicit TrainerConfigHelper(const std::string& configFilePath); - explicit TrainerConfigHelper(const TrainerConfig& config); - - /** - * Dtor - * @warning this class is a final class. Should not be inherited. - */ - ~TrainerConfigHelper(); - - /** - * @brief Get Trainer Config itself. - */ - const TrainerConfig& getConfig() const; - - TrainerConfig& getMutableConfig(); - - /** - * @brief Get Optimizer Config. - */ - const OptimizationConfig& getOptConfig() const; - - /** - * @brief Get Model Config. - */ - const ModelConfig& getModelConfig() const; - - /** - * @brief Get Train Data Config Pointer. - * @return nullptr if there is no train data. Else will return pointer - */ - const DataConfig* getDataConfigPtr() const; - - /** - * @brief Get Tain Data Config. - * @warning Core when there is no train data. - */ - const DataConfig& getDataConfig() const { - CHECK(this->hasDataConfig()); - auto conf = this->getDataConfigPtr(); - return *conf; - } - - /** - * @brief Get test data config - * @warning Core when there is no test data. - */ - const DataConfig& getTestDataConfig() const; - - /** - * @brief Has train data config or not. - * @return true if has train data. - */ - bool hasDataConfig() const; - - /** - * @brief Has test data config or not. - * @return true if has test data. - */ - bool hasTestDataConfig() const; - - /** - * @brief Update trainer config from command line flags. - * Override config's (save_dir, init_model_path, start_pass) if command - * flags is existed. - */ - void updateConfigFromFlags(); - - /** - * @brief Disable optimization's sparse remote update. - */ - void disableRemoteSparseUpdater(); - - /** - * @brief Disable optimization and each parameter's sparse remote update. - */ - void disableRemoteSparseUpdaterForEachParams(); - - /** - * @brief implicit conversion. - */ - inline operator const TrainerConfig&() const { return this->getConfig(); } - - /** - * @brief implicit conversion. - */ - inline operator const OptimizationConfig&() const { - return this->getOptConfig(); - } - - /** - * @brief implicit conversion. - */ - inline operator const DataConfig&() const { return this->getDataConfig(); } - - /** - * @brief implicit conversion. - */ - inline operator const ModelConfig&() const { return this->getModelConfig(); } - - /** - * @brief Get mutable optimization config. - */ - OptimizationConfig& getOptConfig(); - - /** - * @brief set model save directory. - * @param saveDir Directory path. - */ - void setSaveDir(const std::string& saveDir); - - /** - * @brief get model save directory. - * @return save directory path. - */ - const std::string& getSaveDir() const; - - /** - * @brief Get config file name from model path. - * - * Paddle save model to a directory, and write a file 'path.txt' which save - * config filename. - * - * @param modelPath model saved directory. - * @return config file name. - */ - static std::string getConfigNameFromPath(const std::string& modelPath); - - /** - * @brief Get config file name from this config instance. - * @param[out] ok true if no error. - * @return config file name. - */ - std::string getConfigName(bool* ok = nullptr) const; - - /** - * @brief Try to create TrainerConfigHelper from all command line flags. - * Try to load from --config, --init_model_path, --start_pass one by - * one. Return nullptr if cannot load TrainerConfigHelper from all - * these place. - * @return nullptr if cannot load, otherwise return a TrainerConfigHelper. - */ - static std::shared_ptr createFromFlags(); - - /** - * @brief Try to create TrainerConfigHelper only from '--config' flag. - * @return nullptr if cannot load, otherwise return a TrainerConfigHelper. - */ - static std::shared_ptr createFromFlagConfig(); - - private: - static std::string getConfigNameFromPassId(int passId, - const std::string& modelPath); - - TrainerConfigHelperPrivate* m; -}; - -typedef std::shared_ptr TrainerConfigHelperPtr; - -} // namespace paddle diff --git a/paddle/legacy/trainer/TrainerInternal.cpp b/paddle/legacy/trainer/TrainerInternal.cpp deleted file mode 100644 index ee3dea6340167ab16d2bfefe3d757b10f5d90bb5..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/TrainerInternal.cpp +++ /dev/null @@ -1,303 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "TrainerInternal.h" - -#include -#include - -#include -#include -#include -#include - -#include - -#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/legacy/gserver/layers/ValidationLayer.h" -#include "paddle/legacy/utils/GlobalConstants.h" -#include "paddle/legacy/utils/PythonUtil.h" -#include "paddle/legacy/utils/Stat.h" -#include "paddle/legacy/utils/Util.h" - -#include "RemoteParameterUpdater.h" -#include "ThreadParameterUpdater.h" - -namespace paddle { - -void TrainerInternal::init(const std::shared_ptr& config, - const GradientMachinePtr& gradientMachine, - std::unique_ptr&& intconfig, - const std::shared_ptr& stats, - bool testing) { - config_ = config; - intconfig_ = std::move(intconfig); - stats_ = stats; - - //! in training will use parameter updater definitly. - //! But only use parameter in testing mode when some parameter in pserver. - if (!testing || (config_->getOptConfig().use_sparse_remote_updater() && - intconfig_->loadsave_parameters_in_pserver)) { - createParameterUpdater(testing); - } - - gradientMachine_ = gradientMachine; - if (!gradientMachine) { - CHECK(config_->getConfig().has_model_config()) - << "Missing model_config in trainer_config"; - gradientMachine_.reset( - GradientMachine::create(config_->getConfig().model_config(), - intconfig_->mode, - parameterUpdater_->getParameterTypes())); - } -} - -void TrainerInternal::trainOneBatch(int64_t batchId, - const DataBatch& dataBatch, - std::vector* outArgs) { - // true means updating parameter whenever gradient is ready during backward() - bool doPipelineUpdate = - (intconfig_->mode != GradientMachine::kSgdSparseCpuTraining) && - (intconfig_->local || intconfig_->use_gpu || - intconfig_->trainer_count <= 1); - - int64_t actualBatchSize = dataBatch.getSize(); - if (actualBatchSize == 0) { - return; - } - - bool showStats = intconfig_->show_param_stats_period > 0 && - (batchId + 1) % intconfig_->show_param_stats_period == 0 && - intconfig_->trainer_id == 0; - - std::vector paraStats; - if (showStats) { - paraStats.resize(gradientMachine_->getParameters().size()); - } - - const std::vector& inArgs = dataBatch.getStreams(); - - PassType passType = parameterUpdater_->startBatch(actualBatchSize); - - if (config_->getOptConfig().use_sparse_remote_updater()) { - REGISTER_TIMER("prefetch"); - gradientMachine_->prefetch(inArgs); - parameterUpdater_->getParametersRemote(); - } - - UpdateCallback updateCallback = [this, showStats, ¶Stats]( - Parameter* para) { - if (showStats) { - //! @TODO(yuyang18) Show stats is actually a ParameterHook, refactor - // it - //! to ParameterHook. - auto& grad = para->getBuf(PARAMETER_GRADIENT); - SetDevice device(para->getDeviceId()); - paraStats[para->getID()].avgAbsGrad = grad->getAbsSum() / para->getSize(); - paraStats[para->getID()].maxAbsGrad = grad->getAbsMax(); - } - parameterUpdater_->update(para); - }; - - { -#ifndef PADDLE_DISABLE_TIMER - Timer timer; - timer.start(); -#endif - REGISTER_TIMER("forwardBackward"); - forwardBackwardBatch( - inArgs, *outArgs, passType, updateCallback, doPipelineUpdate); -#ifndef PADDLE_DISABLE_TIMER - timer.stop(); - parameterUpdater_->setForwardbackwardTime(timer.get()); -#endif - } - - if (!doPipelineUpdate) { - auto& parameters = gradientMachine_->getNonStaticParameters(); - for (auto& para : parameters) { - updateCallback(para.get()); - } - } - - real cost = 0; - { - REGISTER_TIMER("sumCost"); - cost = Argument::sum(*outArgs); - } - - if (batchId % intconfig_->log_period == 0) { - currentEvaluator_->start(); - stats_->resetCurrentStat(); - } - { - REGISTER_TIMER("eval"); - gradientMachine_->eval(currentEvaluator_); - gradientMachine_->eval(evaluator_); - } - - *stats_ += {actualBatchSize, cost}; - { - REGISTER_TIMER("finishBatch"); - parameterUpdater_->finishBatch(cost); - } - - if (showStats) { - showParameterStats(paraStats); - } - if ((batchId + 1) % intconfig_->log_period == 0) { - currentEvaluator_->finish(); - - if (intconfig_->dot_period > 0) { - std::cerr << std::endl; - } - LOG(INFO) << " Batch=" << batchId + 1 << " " << *stats_ - << " Eval: " << *evaluator_ - << " CurrentEval: " << *currentEvaluator_; - } else if (intconfig_->dot_period > 0 && - (batchId + 1) % intconfig_->dot_period == 0) { - std::cerr << "."; - } -} - -/** - * finish train pass - */ -void TrainerInternal::finishTrainPass(int passId, int batchId) { - gradientMachine_->onPassEnd(); - parameterUpdater_->finishPass(); - evaluator_->finish(); - LOG(INFO) << " Pass=" << passId << " Batch=" << batchId << " " - << stats_->getStats(false /*without current cost*/) - << " Eval: " << *evaluator_; -} - -void TrainerInternal::showParameterStats( - const std::vector& paraStats) { - std::vector& parameters = gradientMachine_->getParameters(); - for (auto& parameter : parameters) { - SetDevice device(parameter->getDeviceId()); - real sum = parameter->getBuf(PARAMETER_VALUE)->getAbsSum(); - const auto& lr = parameter->getBuf(PARAMETER_LEARNING_RATE); - std::ostringstream osLrHistogram; - if (lr) { - if (VLOG_IS_ON(2)) { - osLrHistogram << " lr_histogram: "; - lr->histogram(osLrHistogram); - } else { - osLrHistogram << " max_lr=" << std::setw(11) << lr->getMax() - << " min_lr=" << std::setw(11) << lr->getMin() - << " avg_lr=" << std::setw(11) - << lr->getSum() / parameter->getSize(); - } - } - int pid = parameter->getID(); - LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ') - << std::setw(20) << parameter->getName() - << " avg_abs_val=" << std::setw(11) << sum / parameter->getSize() - << " max_val=" << std::setw(11) - << parameter->getBuf(PARAMETER_VALUE)->getAbsMax() - << " avg_abs_grad=" << std::setw(11) << paraStats[pid].avgAbsGrad - << " max_grad=" << std::setw(11) << paraStats[pid].maxAbsGrad - << osLrHistogram.str(); - } -} - -void TrainerInternal::createParameterUpdater(bool testing) { - const std::string& alg = config_->getOptConfig().algorithm(); - parameterUpdater_.reset(ParameterUpdaterCreators::tryCreateUpdater( - alg, config_->getOptConfig(), intconfig_->local, intconfig_->num_passes)); - if (parameterUpdater_) { - return; - } - - if (!intconfig_->local) { - if (testing && config_->getOptConfig().use_sparse_remote_updater()) { - std::unique_ptr localUpdater; - localUpdater.reset( - new SgdLocalUpdater(config_->getOptConfig())); // do nothing - parameterUpdater_.reset( - new SparseRemoteParameterUpdaterComposite(config_->getOptConfig(), - intconfig_->num_passes, - testing, - std::move(localUpdater))); - } else { - if (GradientMachine::kSgdSparseCpuTraining == intconfig_->mode && - !intconfig_->use_old_updater) { - intconfig_->use_old_updater = true; - LOG(INFO) << "Sgd sparse training can not work with" - << " ConcurrentRemoteParameterUpdater," - << " automatically reset --use_old_updater=true"; - } - - std::unique_ptr localUpdater; - if (config_->getOptConfig().num_batches_per_send_parameter() > 1) { - CHECK(alg == TrainAlgorithm::SGD || alg == TrainAlgorithm::AsyncSGD) - << "Unsupported algorithm in remote-local mode: " << alg; - if (GradientMachine::kSgdSparseCpuTraining == intconfig_->mode) { - localUpdater.reset(new SgdThreadUpdater(*config_)); - } else { - localUpdater.reset(new SgdLocalUpdater(*config_)); - } - } - - localUpdater.reset( - intconfig_->use_old_updater - ? new RemoteParameterUpdater( - *config_, intconfig_->num_passes, std::move(localUpdater)) - : new ConcurrentRemoteParameterUpdater( - *config_, intconfig_->num_passes, std::move(localUpdater))); - - if (config_->getOptConfig().use_sparse_remote_updater()) { - localUpdater.reset( - new SparseRemoteParameterUpdaterComposite(*config_, - intconfig_->num_passes, - testing, - std::move(localUpdater))); - } - - this->parameterUpdater_ = std::move(localUpdater); - } - } else { - CHECK_EQ(config_->getOptConfig().num_batches_per_send_parameter(), 1) - << "num_batches_per_send_parameter should be one in local mode!"; - - if (GradientMachine::kSgdSparseCpuTraining == intconfig_->mode) { - parameterUpdater_.reset(new SgdThreadUpdater(*config_)); - } else if (alg == TrainAlgorithm::SGD || alg == TrainAlgorithm::AsyncSGD) { - if (config_->getModelConfig().type() == "recursive_nn") { - parameterUpdater_.reset(new SgdCpuUpdater(*config_)); - } else if (intconfig_->use_gpu && - config_->getOptConfig().do_average_in_cpu() && - config_->getOptConfig().average_window() > 0) { - parameterUpdater_.reset(new SgdUpdaterWithCpuAverager(*config_)); - } else { - parameterUpdater_.reset(new SgdLocalUpdater(*config_)); - } - } else { - LOG(FATAL) << "Unsupported algorithm in local mode: " << alg; - } - } -} - -void TrainerInternal::forwardBackwardBatch(const std::vector& inArgs, - std::vector& outArgs, - PassType& passType, - UpdateCallback updateCallback, - bool doPipelineUpdate) { - gradientMachine_->forwardBackward( - inArgs, &outArgs, passType, doPipelineUpdate ? updateCallback : nullptr); -} - -} // namespace paddle diff --git a/paddle/legacy/trainer/TrainerInternal.h b/paddle/legacy/trainer/TrainerInternal.h deleted file mode 100644 index 93919a68fca2930cdf106f45d112e2a459fe695a..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/TrainerInternal.h +++ /dev/null @@ -1,139 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/utils/Util.h" - -#include -#include -#include - -#include "ParameterUpdater.h" -#include "TrainerConfig.pb.h" -#include "TrainerConfigHelper.h" -#include "TrainerInternalConfig.h" -#include "hl_gpu.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" - -namespace paddle { - -/** - * TrainerInteral - * the core training class for driving training logic - */ -class TrainerInternal { - public: - struct ParaStat { - real maxAbsGrad; - real avgAbsGrad; - ParaStat() : maxAbsGrad(.0), avgAbsGrad(.0) {} - }; - - TrainerInternal() {} - - /** - * Intializes trainer internal class - * @param config network config - * @param machine gradient machine - * @param intconfig training config - * @param stats training stats - * @param testing if it is in testing phase - */ - void init(const std::shared_ptr& config, - const GradientMachinePtr& machine, - std::unique_ptr&& intconfig, - const std::shared_ptr& stats, - bool testing); - - virtual ~TrainerInternal() {} - - /** - * CreateParameterUpdater - * @param testing if it is in testing phase - */ - void createParameterUpdater(bool testing); - - /** - * FinishTrainPass - * @param passId current pass id - * @param batchId current batch id, starts from 0 - */ - void finishTrainPass(int passId, int batchId); - - /** - * trainOneBatch - * @param batchId current batch id - * @param dataBatch data for the batch - */ - void trainOneBatch(int64_t batchId, - const DataBatch& dataBatch, - std::vector* outArgs); - - /** - * showParameterStats - * @param paraStats training stats - */ - void showParameterStats(const std::vector& paraStats); - - /** - * getGradientMachine - */ - inline const GradientMachinePtr& getGradientMachine() const { - return gradientMachine_; - } - - /** - * getParameterUpdater - */ - inline const std::shared_ptr& getParameterUpdater() { - return parameterUpdater_; - } - - /** - * setCurrentEvaluator - * @param eval evaluator to set - */ - inline void setCurrentEvaluator(Evaluator* eval) { currentEvaluator_ = eval; } - - /** - * setEvaluator - * @param eval evaluator to set - */ - inline void setEvaluator(Evaluator* eval) { evaluator_ = eval; } - - /** - * forwardBackwardBatch - * @param inArgs input argument for data batch - * @param outArgs output argument from neural network - * @param updateCallback layerwise parameter gradient statistics - * @param doPipelineUpdate whether to do pipeline update - */ - virtual void forwardBackwardBatch(const std::vector& inArgs, - std::vector& outArgs, - PassType& passType, - UpdateCallback updateCallback, - bool doPipelineUpdate); - - protected: - std::shared_ptr parameterUpdater_; - GradientMachinePtr gradientMachine_; - std::shared_ptr config_; - std::unique_ptr intconfig_; - std::shared_ptr stats_; - Evaluator* currentEvaluator_; - Evaluator* evaluator_; -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/TrainerInternalConfig.cpp b/paddle/legacy/trainer/TrainerInternalConfig.cpp deleted file mode 100644 index 039fcdb524527d5e8bfa829fc403b6f2fa789991..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/TrainerInternalConfig.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "TrainerInternalConfig.h" - -DEFINE_int32(show_parameter_stats_period, - 0, - "Whether to show parameter stats during training"); - -DEFINE_int32(dot_period, 1, "Print '.' every so many batches"); - -DEFINE_bool(use_old_updater, false, "Use the old RemoteParameterUpdater"); - -DECLARE_int32(num_passes); - -DECLARE_bool(local); - -namespace paddle { - -std::unique_ptr TrainerInternalConfig::createFromMode( - GradientMachine::CreateMode mode) { - auto config = new TrainerInternalConfig(); - config->mode = mode; - config->local = FLAGS_local; - config->use_gpu = FLAGS_use_gpu; - config->trainer_count = FLAGS_trainer_count; - config->show_param_stats_period = FLAGS_show_parameter_stats_period; - config->trainer_id = FLAGS_trainer_id; - config->log_period = FLAGS_log_period; - config->dot_period = FLAGS_dot_period; - config->num_passes = FLAGS_num_passes; - config->use_old_updater = FLAGS_use_old_updater; - config->loadsave_parameters_in_pserver = FLAGS_loadsave_parameters_in_pserver; - - return std::unique_ptr(config); -} - -} // namespace paddle diff --git a/paddle/legacy/trainer/TrainerInternalConfig.h b/paddle/legacy/trainer/TrainerInternalConfig.h deleted file mode 100644 index b91b53932381a8698b331a2989b5f16829c06a25..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/TrainerInternalConfig.h +++ /dev/null @@ -1,233 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/legacy/utils/Util.h" - -#include - -#include "hl_gpu.h" -#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" - -#include "TrainerConfig.pb.h" - -#include -#include -#include -#include "ParameterUpdater.h" - -namespace paddle { -/** - * @brief TrainerStats object will statistics sample processed and total cost. - * - * There are two stats in it, the 'AvgCost' and 'CurrentAvgCost'. 'AvgCost' - * means cost through one pass(all mini-batches). 'CurrentAvgCost' means cost - * through one mini-batch. - */ -class TrainerStats { - public: - /** - * @brief reset all stats. - * - * often used before pass start. - */ - inline void reset() { - numProcessed_ = 0; - totalCost_ = .0; - this->resetCurrentStat(); - } - - /** - * @brief reset current stat. - * - * 'current' means the most recent --log_period mini-batches - */ - inline void resetCurrentStat() { - currentCost_ = .0; - currentSamples_ = 0; - } - - /** - * @brief add cost to stat. - * @param numProcessed current mini-batch size - * @param cost current mini-batch cost - */ - inline void addCost(int64_t numProcessed, real cost) { - this->numProcessed_ += numProcessed; - this->totalCost_ += cost; - this->currentSamples_ += numProcessed; - this->currentCost_ += cost; - } - - /** - * @brief get average cost through on pass(all processed mini-batches) - * @return pass average cost - */ - inline real getAvgCost() const { - CHECK_NE(this->numProcessed_, 0); - return this->totalCost_ / this->numProcessed_; - } - - /** - * @brief get current mini-batch's average cost. - * @return mini-batch average cost - */ - inline real getCurrentAvgCost() const { - CHECK_NE(this->currentSamples_, 0); - return this->currentCost_ / this->currentSamples_; - } - - /** - * @brief get all processed samples' number - * @return all processed samples' number - */ - inline int64_t getNumProcessed() const { return this->numProcessed_; } - - /** - * @brief same function as addCost. But it is simple to invoke. - * For example: - * - * @code{.cpp} - * TrainerStats stat; - * cost = neuralNetwork.forward(batchSize); - * stat += {batchSize, cost}; - * @endcode - * - * @param p a pair of parameter, first is numProcessed, second is cost. - * @return *this - */ - inline TrainerStats& operator+=(const std::pair& p) { - this->addCost(p.first, p.second); - return *this; - } - - /** - * @brief TrainerStats Constructor. - * - * reset stat when constructed. - */ - inline TrainerStats() { this->reset(); } - - /** - * @brief show stats to ostream. - * - * If there is no need to print current cost, set withCurrentCost to False. - * - * @param os output stream. - * @param withCurrentCost print current cost or not. - */ - void showStats(std::ostream& os, bool withCurrentCost = true) const { - os << "samples=" << this->getNumProcessed() - << " AvgCost=" << this->getAvgCost(); - if (withCurrentCost) { - os << " CurrentCost=" << this->getCurrentAvgCost(); - } - } - - /** - * @brief get stats to std::string - * @param withCurrentCost return current cost or not - * @return stats string - */ - std::string getStats(bool withCurrentCost = true) const { - std::ostringstream os; - this->showStats(os, withCurrentCost); - return os.str(); - } - - private: - int64_t numProcessed_; - real totalCost_; - real currentCost_; - int64_t currentSamples_; -}; - -inline std::ostream& operator<<(std::ostream& os, const TrainerStats& stats) { - stats.showStats(os); - return os; -} - -/** - * TrainerInternalConfig - * general configs for training - */ -struct TrainerInternalConfig { - /** - * @brief Create TrainerInternalConfig from GradientMachine::CreateMode and - * command line arguments. - * @param mode - * @return - */ - static std::unique_ptr createFromMode( - GradientMachine::CreateMode mode); - - /** - * indicate whether the training is local - * if local, no parameter server is used - */ - bool local; - - /** - * indicate whether training uses GPU - */ - bool use_gpu; - - /** - * indicate number of trainer - */ - int trainer_count; - - /** - * how frequently to show param stats - */ - int show_param_stats_period; - - /** - * current trainer id - */ - int trainer_id; - - /** - * frequency to dump log - */ - int log_period; - - /** - * dot period - */ - int dot_period; - - /** - * num passes for training - */ - int num_passes; - - /** - * use old updater - */ - bool use_old_updater; - - /** - * whether to load and save parameter in pserver - */ - bool loadsave_parameters_in_pserver; - - /** - * training mode - */ - GradientMachine::CreateMode mode; -}; - -} // namespace paddle diff --git a/paddle/legacy/trainer/TrainerMain.cpp b/paddle/legacy/trainer/TrainerMain.cpp deleted file mode 100644 index 911aeba1928f7208aecb92910dac981f00fc6db5..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/TrainerMain.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "paddle/legacy/pserver/ParameterServerController.h" -#include "paddle/legacy/utils/PythonUtil.h" - -#include "ParamUtil.h" -#include "Trainer.h" - -DEFINE_bool(start_pserver, false, "Whether to start pserver"); -DECLARE_int32(gpu_id); -DEFINE_string(job, "train", "one of (train, test, checkgrad)"); -DECLARE_int32(start_pass); -DECLARE_string(config); -DECLARE_string(init_model_path); -DECLARE_string(rdma_tcp); - -using namespace paddle; // NOLINT - -int main(int argc, char** argv) { - // write logs instantly (never buffer log messages) - FLAGS_logbuflevel = -1; - - initMain(argc, argv); - initPython(argc, argv); - - std::unique_ptr parameterServerPtr(nullptr); - if (FLAGS_start_pserver) { - parameterServerPtr.reset( - paddle::ParameterServerController::createFromGflags()); - parameterServerPtr->start(); - } - Trainer trainer; - auto config = TrainerConfigHelper::createFromFlags(); - CHECK(config != nullptr) << "no valid config"; - - feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW); - trainer.init(config, FLAGS_job == "test"); - - if (FLAGS_job == "train") { - trainer.train(); - } else if (FLAGS_job == "checkgrad") { - trainer.checkGradient(); - } else if (FLAGS_job == "test") { - trainer.test(); - } else if (FLAGS_job == "time") { - trainer.time(); - } else { - LOG(FATAL) << "Unknown job type: " << FLAGS_job; - } - - return 0; -} diff --git a/paddle/legacy/trainer/tests/.gitignore b/paddle/legacy/trainer/tests/.gitignore deleted file mode 100644 index aedb0ef22e02344af27d18dc3f500fab23f6686f..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -dump_text.test -test_pydata_provider_wrapper.json -*proto.bin diff --git a/paddle/legacy/trainer/tests/CMakeLists.txt b/paddle/legacy/trainer/tests/CMakeLists.txt deleted file mode 100644 index fbefcced5643b65372072856bfeb6c87cd4071a8..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/CMakeLists.txt +++ /dev/null @@ -1,41 +0,0 @@ -add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/sample_trainer_config.conf - COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/* ${CMAKE_CURRENT_BINARY_DIR} -) -add_custom_target(copy_trainer_conf ALL DEPENDS sample_trainer_config.conf) - -set(PYTHON_PATH - ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d - ${PADDLE_BINARY_DIR}/python/:${PADDLE_BINARY_DIR}/paddle/legacy/trainer/tests) -function(trainer_test TARGET) - add_unittest_without_exec(${TARGET} ${TARGET}.cpp) - add_test(NAME ${TARGET} - COMMAND ${PYTHON_PATH} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET} - WORKING_DIRECTORY ${PADDLE_BINARY_DIR}/paddle/) -endfunction() - -trainer_test(test_Compare) -trainer_test(test_PyDataProviderWrapper) -trainer_test(test_recurrent_machine_generation) -if(NOT APPLE) - trainer_test(test_Trainer) -else() - message(WARNING "These tests has been disabled in OSX for random fail: \n test_Trainer") -endif() - -############### test_TrainerOnePass ########################## -if(WITH_PYTHON) - # only run test_TrainerOnePass when PYTHON is enabled, because train one pass - # is using PyDataProvider2. - add_unittest_without_exec(test_TrainerOnePass - test_TrainerOnePass.cpp) - add_test(NAME test_TrainerOnePass - COMMAND ${PYTHON_PATH} ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port - ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass - WORKING_DIRECTORY ${PADDLE_BINARY_DIR}/paddle/) -endif() - -#################### test_config_parser ######################### -add_test(NAME test_config_parser - COMMAND ${PYTHON_PATH} ${PYTHON_EXECUTABLE} - ${PADDLE_SOURCE_DIR}/paddle/legacy/trainer/tests/config_parser_test.py - WORKING_DIRECTORY ${PADDLE_BINARY_DIR}/paddle/) diff --git a/paddle/legacy/trainer/tests/__init__.py b/paddle/legacy/trainer/tests/__init__.py deleted file mode 100644 index f662d6826321eb840739382558f76327d27b5847..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/paddle/legacy/trainer/tests/config_parser_test.py b/paddle/legacy/trainer/tests/config_parser_test.py deleted file mode 100644 index 0d3d82cbdafcf85d42247e810fe7caa685a86e4d..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/config_parser_test.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.config_parser import parse_config_and_serialize - -if __name__ == '__main__': - parse_config_and_serialize('legacy/trainer/tests/test_config.conf', '') - parse_config_and_serialize( - 'legacy/trainer/tests/sample_trainer_config.conf', - 'extension_module_name=paddle.trainer.config_parser_extension') - parse_config_and_serialize( - 'legacy/gserver/tests/pyDataProvider/trainer.conf', '') diff --git a/paddle/legacy/trainer/tests/fake_file_list.list b/paddle/legacy/trainer/tests/fake_file_list.list deleted file mode 100644 index f27ceed277f97ab9c8ea1c9b9d8475b13ccf3ddd..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/fake_file_list.list +++ /dev/null @@ -1 +0,0 @@ -do_not_matter.txt diff --git a/paddle/legacy/trainer/tests/picojson.h b/paddle/legacy/trainer/tests/picojson.h deleted file mode 100644 index 75349537b1c7f10d23bae788e8414a753c7ccab0..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/picojson.h +++ /dev/null @@ -1,1103 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* - * Copyright 2009-2010 Cybozu Labs, Inc. - * Copyright 2011-2014 Kazuho Oku - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef picojson_h -#define picojson_h - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// for isnan/isinf -#if __cplusplus >= 201103L -#include -#else -extern "C" { -#ifdef _MSC_VER -#include -#elif defined(__INTEL_COMPILER) -#include -#else -#include -#endif -} -#endif - -// experimental support for int64_t (see README.mkdn for detail) -#ifdef PICOJSON_USE_INT64 -#define __STDC_FORMAT_MACROS -#include -#include -#endif - -// to disable the use of localeconv(3), set PICOJSON_USE_LOCALE to 0 -#ifndef PICOJSON_USE_LOCALE -#define PICOJSON_USE_LOCALE 1 -#endif -#if PICOJSON_USE_LOCALE -extern "C" { -#include -} -#endif - -#ifndef PICOJSON_ASSERT -#define PICOJSON_ASSERT(e) \ - do { \ - if (!(e)) throw std::runtime_error(#e); \ - } while (0) -#endif - -#ifdef _MSC_VER -#define SNPRINTF _snprintf_s -#pragma warning(push) -#pragma warning(disable : 4244) // conversion from int to char -#pragma warning(disable : 4127) // conditional expression is constant -#pragma warning(disable : 4702) // unreachable code -#else -#define SNPRINTF snprintf -#endif - -namespace picojson { - -enum { - null_type, - boolean_type, - number_type, - string_type, - array_type, - object_type -#ifdef PICOJSON_USE_INT64 - , - int64_type -#endif -}; - -enum { INDENT_WIDTH = 2 }; - -struct null {}; - -class value { - public: - typedef std::vector array; - typedef std::map object; - union _storage { - bool boolean_; - double number_; -#ifdef PICOJSON_USE_INT64 - int64_t int64_; -#endif - std::string* string_; - array* array_; - object* object_; - }; - - protected: - int type_; - _storage u_; - - public: - value(); - value(int type, bool); - explicit value(bool b); -#ifdef PICOJSON_USE_INT64 - explicit value(int64_t i); -#endif - explicit value(double n); - explicit value(const std::string& s); - explicit value(const array& a); - explicit value(const object& o); - explicit value(const char* s); - value(const char* s, size_t len); - ~value(); - value(const value& x); - value& operator=(const value& x); - void swap(value& x); - template - bool is() const; - template - const T& get() const; - template - T& get(); - bool evaluate_as_boolean() const; - const value& get(size_t idx) const; - const value& get(const std::string& key) const; - value& get(size_t idx); - value& get(const std::string& key); - - bool contains(size_t idx) const; - bool contains(const std::string& key) const; - std::string to_str() const; - template - void serialize(Iter os, bool prettify = false) const; - std::string serialize(bool prettify = false) const; - - private: - template - value(const T*); // intentionally defined to block implicit conversion of - // pointer to bool - template - static void _indent(Iter os, int indent); - template - void _serialize(Iter os, int indent) const; - std::string _serialize(int indent) const; -}; - -typedef value::array array; -typedef value::object object; - -inline value::value() : type_(null_type) {} - -inline value::value(int type, bool) : type_(type) { - switch (type) { -#define INIT(p, v) \ - case p##type: \ - u_.p = v; \ - break - INIT(boolean_, false); - INIT(number_, 0.0); -#ifdef PICOJSON_USE_INT64 - INIT(int64_, 0); -#endif - INIT(string_, new std::string()); - INIT(array_, new array()); - INIT(object_, new object()); -#undef INIT - default: - break; - } -} - -inline value::value(bool b) : type_(boolean_type) { u_.boolean_ = b; } - -#ifdef PICOJSON_USE_INT64 -inline value::value(int64_t i) : type_(int64_type) { u_.int64_ = i; } -#endif - -inline value::value(double n) : type_(number_type) { - if ( -#ifdef _MSC_VER - !_finite(n) -#elif __cplusplus >= 201103L || !(defined(isnan) && defined(isinf)) - std::isnan(n) || std::isinf(n) -#else - isnan(n) || isinf(n) -#endif - ) { - throw std::overflow_error(""); - } - u_.number_ = n; -} - -inline value::value(const std::string& s) : type_(string_type) { - u_.string_ = new std::string(s); -} - -inline value::value(const array& a) : type_(array_type) { - u_.array_ = new array(a); -} - -inline value::value(const object& o) : type_(object_type) { - u_.object_ = new object(o); -} - -inline value::value(const char* s) : type_(string_type) { - u_.string_ = new std::string(s); -} - -inline value::value(const char* s, size_t len) : type_(string_type) { - u_.string_ = new std::string(s, len); -} - -inline value::~value() { - switch (type_) { -#define DEINIT(p) \ - case p##type: \ - delete u_.p; \ - break - DEINIT(string_); - DEINIT(array_); - DEINIT(object_); -#undef DEINIT - default: - break; - } -} - -inline value::value(const value& x) : type_(x.type_) { - switch (type_) { -#define INIT(p, v) \ - case p##type: \ - u_.p = v; \ - break - INIT(string_, new std::string(*x.u_.string_)); - INIT(array_, new array(*x.u_.array_)); - INIT(object_, new object(*x.u_.object_)); -#undef INIT - default: - u_ = x.u_; - break; - } -} - -inline value& value::operator=(const value& x) { - if (this != &x) { - value t(x); - swap(t); - } - return *this; -} - -inline void value::swap(value& x) { - std::swap(type_, x.type_); - std::swap(u_, x.u_); -} - -#define IS(ctype, jtype) \ - template <> \ - inline bool value::is() const { \ - return type_ == jtype##_type; \ - } -IS(null, null) -IS(bool, boolean) -#ifdef PICOJSON_USE_INT64 -IS(int64_t, int64) -#endif -IS(std::string, string) -IS(array, array) -IS(object, object) -#undef IS -template <> -inline bool value::is() const { - return type_ == number_type -#ifdef PICOJSON_USE_INT64 - || type_ == int64_type -#endif - ; -} - -#define GET(ctype, var) \ - template <> \ - inline const ctype& value::get() const { \ - PICOJSON_ASSERT("type mismatch! call is() before get()" && \ - is()); \ - return var; \ - } \ - template <> \ - inline ctype& value::get() { \ - PICOJSON_ASSERT("type mismatch! call is() before get()" && \ - is()); \ - return var; \ - } -GET(bool, u_.boolean_) -GET(std::string, *u_.string_) -GET(array, *u_.array_) -GET(object, *u_.object_) -#ifdef PICOJSON_USE_INT64 -GET(double, - (type_ == int64_type && (const_cast(this)->type_ = number_type, - const_cast(this)->u_.number_ = u_.int64_), - u_.number_)) -GET(int64_t, u_.int64_) -#else -GET(double, u_.number_) -#endif -#undef GET - -inline bool value::evaluate_as_boolean() const { - switch (type_) { - case null_type: - return false; - case boolean_type: - return u_.boolean_; - case number_type: - return u_.number_ != 0; -#ifdef PICOJSON_USE_INT64 - case int64_type: - return u_.int64_ != 0; -#endif - case string_type: - return !u_.string_->empty(); - default: - return true; - } -} - -inline const value& value::get(size_t idx) const { - static value s_null; - PICOJSON_ASSERT(is()); - return idx < u_.array_->size() ? (*u_.array_)[idx] : s_null; -} - -inline value& value::get(size_t idx) { - static value s_null; - PICOJSON_ASSERT(is()); - return idx < u_.array_->size() ? (*u_.array_)[idx] : s_null; -} - -inline const value& value::get(const std::string& key) const { - static value s_null; - PICOJSON_ASSERT(is()); - object::const_iterator i = u_.object_->find(key); - return i != u_.object_->end() ? i->second : s_null; -} - -inline value& value::get(const std::string& key) { - static value s_null; - PICOJSON_ASSERT(is()); - object::iterator i = u_.object_->find(key); - return i != u_.object_->end() ? i->second : s_null; -} - -inline bool value::contains(size_t idx) const { - PICOJSON_ASSERT(is()); - return idx < u_.array_->size(); -} - -inline bool value::contains(const std::string& key) const { - PICOJSON_ASSERT(is()); - object::const_iterator i = u_.object_->find(key); - return i != u_.object_->end(); -} - -inline std::string value::to_str() const { - switch (type_) { - case null_type: - return "null"; - case boolean_type: - return u_.boolean_ ? "true" : "false"; -#ifdef PICOJSON_USE_INT64 - case int64_type: { - char buf[sizeof("-9223372036854775808")]; - SNPRINTF(buf, sizeof(buf), "%" PRId64, u_.int64_); - return buf; - } -#endif - case number_type: { - char buf[256]; - double tmp; - SNPRINTF(buf, - sizeof(buf), - fabs(u_.number_) < (1ULL << 53) && modf(u_.number_, &tmp) == 0 - ? "%.f" - : "%.17g", - u_.number_); -#if PICOJSON_USE_LOCALE - char* decimal_point = localeconv()->decimal_point; - if (strcmp(decimal_point, ".") != 0) { - size_t decimal_point_len = strlen(decimal_point); - for (char* p = buf; *p != '\0'; ++p) { - if (strncmp(p, decimal_point, decimal_point_len) == 0) { - return std::string(buf, p) + "." + (p + decimal_point_len); - } - } - } -#endif - return buf; - } - case string_type: - return *u_.string_; - case array_type: - return "array"; - case object_type: - return "object"; - default: - PICOJSON_ASSERT(0); -#ifdef _MSC_VER - __assume(0); -#endif - } - return std::string(); -} - -template -void copy(const std::string& s, Iter oi) { - std::copy(s.begin(), s.end(), oi); -} - -template -void serialize_str(const std::string& s, Iter oi) { - *oi++ = '"'; - for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) { - switch (*i) { -#define MAP(val, sym) \ - case val: \ - copy(sym, oi); \ - break - MAP('"', "\\\""); - MAP('\\', "\\\\"); - MAP('/', "\\/"); - MAP('\b', "\\b"); - MAP('\f', "\\f"); - MAP('\n', "\\n"); - MAP('\r', "\\r"); - MAP('\t', "\\t"); -#undef MAP - default: - if (static_cast(*i) < 0x20 || *i == 0x7f) { - char buf[7]; - SNPRINTF(buf, sizeof(buf), "\\u%04x", *i & 0xff); - copy(buf, buf + 6, oi); - } else { - *oi++ = *i; - } - break; - } - } - *oi++ = '"'; -} - -template -void value::serialize(Iter oi, bool prettify) const { - return _serialize(oi, prettify ? 0 : -1); -} - -inline std::string value::serialize(bool prettify) const { - return _serialize(prettify ? 0 : -1); -} - -template -void value::_indent(Iter oi, int indent) { - *oi++ = '\n'; - for (int i = 0; i < indent * INDENT_WIDTH; ++i) { - *oi++ = ' '; - } -} - -template -void value::_serialize(Iter oi, int indent) const { - switch (type_) { - case string_type: - serialize_str(*u_.string_, oi); - break; - case array_type: { - *oi++ = '['; - if (indent != -1) { - ++indent; - } - for (array::const_iterator i = u_.array_->begin(); i != u_.array_->end(); - ++i) { - if (i != u_.array_->begin()) { - *oi++ = ','; - } - if (indent != -1) { - _indent(oi, indent); - } - i->_serialize(oi, indent); - } - if (indent != -1) { - --indent; - if (!u_.array_->empty()) { - _indent(oi, indent); - } - } - *oi++ = ']'; - break; - } - case object_type: { - *oi++ = '{'; - if (indent != -1) { - ++indent; - } - for (object::const_iterator i = u_.object_->begin(); - i != u_.object_->end(); - ++i) { - if (i != u_.object_->begin()) { - *oi++ = ','; - } - if (indent != -1) { - _indent(oi, indent); - } - serialize_str(i->first, oi); - *oi++ = ':'; - if (indent != -1) { - *oi++ = ' '; - } - i->second._serialize(oi, indent); - } - if (indent != -1) { - --indent; - if (!u_.object_->empty()) { - _indent(oi, indent); - } - } - *oi++ = '}'; - break; - } - default: - copy(to_str(), oi); - break; - } - if (indent == 0) { - *oi++ = '\n'; - } -} - -inline std::string value::_serialize(int indent) const { - std::string s; - _serialize(std::back_inserter(s), indent); - return s; -} - -template -class input { - protected: - Iter cur_, end_; - int last_ch_; - bool ungot_; - int line_; - - public: - input(const Iter& first, const Iter& last) - : cur_(first), end_(last), last_ch_(-1), ungot_(false), line_(1) {} - int getc() { - if (ungot_) { - ungot_ = false; - return last_ch_; - } - if (cur_ == end_) { - last_ch_ = -1; - return -1; - } - if (last_ch_ == '\n') { - line_++; - } - last_ch_ = *cur_ & 0xff; - ++cur_; - return last_ch_; - } - void ungetc() { - if (last_ch_ != -1) { - PICOJSON_ASSERT(!ungot_); - ungot_ = true; - } - } - Iter cur() const { return cur_; } - int line() const { return line_; } - void skip_ws() { - while (1) { - int ch = getc(); - if (!(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')) { - ungetc(); - break; - } - } - } - bool expect(int expect) { - skip_ws(); - if (getc() != expect) { - ungetc(); - return false; - } - return true; - } - bool match(const std::string& pattern) { - for (std::string::const_iterator pi(pattern.begin()); pi != pattern.end(); - ++pi) { - if (getc() != *pi) { - ungetc(); - return false; - } - } - return true; - } -}; - -template -inline int _parse_quadhex(input& in) { - int uni_ch = 0, hex; - for (int i = 0; i < 4; i++) { - if ((hex = in.getc()) == -1) { - return -1; - } - if ('0' <= hex && hex <= '9') { - hex -= '0'; - } else if ('A' <= hex && hex <= 'F') { - hex -= 'A' - 0xa; - } else if ('a' <= hex && hex <= 'f') { - hex -= 'a' - 0xa; - } else { - in.ungetc(); - return -1; - } - uni_ch = uni_ch * 16 + hex; - } - return uni_ch; -} - -template -inline bool _parse_codepoint(String& out, input& in) { - int uni_ch; - if ((uni_ch = _parse_quadhex(in)) == -1) { - return false; - } - if (0xd800 <= uni_ch && uni_ch <= 0xdfff) { - if (0xdc00 <= uni_ch) { - // a second 16-bit of a surrogate pair appeared - return false; - } - // first 16-bit of surrogate pair, get the next one - if (in.getc() != '\\' || in.getc() != 'u') { - in.ungetc(); - return false; - } - int second = _parse_quadhex(in); - if (!(0xdc00 <= second && second <= 0xdfff)) { - return false; - } - uni_ch = ((uni_ch - 0xd800) << 10) | ((second - 0xdc00) & 0x3ff); - uni_ch += 0x10000; - } - if (uni_ch < 0x80) { - out.push_back(uni_ch); - } else { - if (uni_ch < 0x800) { - out.push_back(0xc0 | (uni_ch >> 6)); - } else { - if (uni_ch < 0x10000) { - out.push_back(0xe0 | (uni_ch >> 12)); - } else { - out.push_back(0xf0 | (uni_ch >> 18)); - out.push_back(0x80 | ((uni_ch >> 12) & 0x3f)); - } - out.push_back(0x80 | ((uni_ch >> 6) & 0x3f)); - } - out.push_back(0x80 | (uni_ch & 0x3f)); - } - return true; -} - -template -inline bool _parse_string(String& out, input& in) { - while (1) { - int ch = in.getc(); - if (ch < ' ') { - in.ungetc(); - return false; - } else if (ch == '"') { - return true; - } else if (ch == '\\') { - if ((ch = in.getc()) == -1) { - return false; - } - switch (ch) { -#define MAP(sym, val) \ - case sym: \ - out.push_back(val); \ - break - MAP('"', '\"'); - MAP('\\', '\\'); - MAP('/', '/'); - MAP('b', '\b'); - MAP('f', '\f'); - MAP('n', '\n'); - MAP('r', '\r'); - MAP('t', '\t'); -#undef MAP - case 'u': - if (!_parse_codepoint(out, in)) { - return false; - } - break; - default: - return false; - } - } else { - out.push_back(ch); - } - } - return false; -} - -template -inline bool _parse_array(Context& ctx, input& in) { - if (!ctx.parse_array_start()) { - return false; - } - size_t idx = 0; - if (in.expect(']')) { - return ctx.parse_array_stop(idx); - } - do { - if (!ctx.parse_array_item(in, idx)) { - return false; - } - idx++; - } while (in.expect(',')); - return in.expect(']') && ctx.parse_array_stop(idx); -} - -template -inline bool _parse_object(Context& ctx, input& in) { - if (!ctx.parse_object_start()) { - return false; - } - if (in.expect('}')) { - return true; - } - do { - std::string key; - if (!in.expect('"') || !_parse_string(key, in) || !in.expect(':')) { - return false; - } - if (!ctx.parse_object_item(in, key)) { - return false; - } - } while (in.expect(',')); - return in.expect('}'); -} - -template -inline std::string _parse_number(input& in) { - std::string num_str; - while (1) { - int ch = in.getc(); - if (('0' <= ch && ch <= '9') || ch == '+' || ch == '-' || ch == 'e' || - ch == 'E') { - num_str.push_back(ch); - } else if (ch == '.') { -#if PICOJSON_USE_LOCALE - num_str += localeconv()->decimal_point; -#else - num_str.push_back('.'); -#endif - } else { - in.ungetc(); - break; - } - } - return num_str; -} - -template -inline bool _parse(Context& ctx, input& in) { - in.skip_ws(); - int ch = in.getc(); - switch (ch) { -#define IS(ch, text, op) \ - case ch: \ - if (in.match(text) && op) { \ - return true; \ - } else { \ - return false; \ - } - IS('n', "ull", ctx.set_null()); - IS('f', "alse", ctx.set_bool(false)); - IS('t', "rue", ctx.set_bool(true)); -#undef IS - case '"': - return ctx.parse_string(in); - case '[': - return _parse_array(ctx, in); - case '{': - return _parse_object(ctx, in); - default: - if (('0' <= ch && ch <= '9') || ch == '-') { - double f; - char* endp; - in.ungetc(); - std::string num_str = _parse_number(in); - if (num_str.empty()) { - return false; - } -#ifdef PICOJSON_USE_INT64 - { - errno = 0; - intmax_t ival = strtoimax(num_str.c_str(), &endp, 10); - if (errno == 0 && std::numeric_limits::min() <= ival && - ival <= std::numeric_limits::max() && - endp == num_str.c_str() + num_str.size()) { - ctx.set_int64(ival); - return true; - } - } -#endif - f = strtod(num_str.c_str(), &endp); - if (endp == num_str.c_str() + num_str.size()) { - ctx.set_number(f); - return true; - } - return false; - } - break; - } - in.ungetc(); - return false; -} - -class deny_parse_context { - public: - bool set_null() { return false; } - bool set_bool(bool) { return false; } -#ifdef PICOJSON_USE_INT64 - bool set_int64(int64_t) { return false; } -#endif - bool set_number(double) { return false; } - template - bool parse_string(input&) { - return false; - } - bool parse_array_start() { return false; } - template - bool parse_array_item(input&, size_t) { - return false; - } - bool parse_array_stop(size_t) { return false; } - bool parse_object_start() { return false; } - template - bool parse_object_item(input&, const std::string&) { - return false; - } -}; - -class default_parse_context { - protected: - value* out_; - - public: - default_parse_context(value* out) : out_(out) {} - bool set_null() { - *out_ = value(); - return true; - } - bool set_bool(bool b) { - *out_ = value(b); - return true; - } -#ifdef PICOJSON_USE_INT64 - bool set_int64(int64_t i) { - *out_ = value(i); - return true; - } -#endif - bool set_number(double f) { - *out_ = value(f); - return true; - } - template - bool parse_string(input& in) { - *out_ = value(string_type, false); - return _parse_string(out_->get(), in); - } - bool parse_array_start() { - *out_ = value(array_type, false); - return true; - } - template - bool parse_array_item(input& in, size_t) { - array& a = out_->get(); - a.push_back(value()); - default_parse_context ctx(&a.back()); - return _parse(ctx, in); - } - bool parse_array_stop(size_t) { return true; } - bool parse_object_start() { - *out_ = value(object_type, false); - return true; - } - template - bool parse_object_item(input& in, const std::string& key) { - object& o = out_->get(); - default_parse_context ctx(&o[key]); - return _parse(ctx, in); - } - - private: - default_parse_context(const default_parse_context&); - default_parse_context& operator=(const default_parse_context&); -}; - -class null_parse_context { - public: - struct dummy_str { - void push_back(int) {} - }; - - public: - null_parse_context() {} - bool set_null() { return true; } - bool set_bool(bool) { return true; } -#ifdef PICOJSON_USE_INT64 - bool set_int64(int64_t) { return true; } -#endif - bool set_number(double) { return true; } - template - bool parse_string(input& in) { - dummy_str s; - return _parse_string(s, in); - } - bool parse_array_start() { return true; } - template - bool parse_array_item(input& in, size_t) { - return _parse(*this, in); - } - bool parse_array_stop(size_t) { return true; } - bool parse_object_start() { return true; } - template - bool parse_object_item(input& in, const std::string&) { - return _parse(*this, in); - } - - private: - null_parse_context(const null_parse_context&); - null_parse_context& operator=(const null_parse_context&); -}; - -// obsolete, use the version below -template -inline std::string parse(value& out, Iter& pos, const Iter& last) { - std::string err; - pos = parse(out, pos, last, &err); - return err; -} - -template -inline Iter _parse(Context& ctx, - const Iter& first, - const Iter& last, - std::string* err) { - input in(first, last); - if (!_parse(ctx, in) && err != NULL) { - char buf[64]; - SNPRINTF(buf, sizeof(buf), "syntax error at line %d near: ", in.line()); - *err = buf; - while (1) { - int ch = in.getc(); - if (ch == -1 || ch == '\n') { - break; - } else if (ch >= ' ') { - err->push_back(ch); - } - } - } - return in.cur(); -} - -template -inline Iter parse(value& out, - const Iter& first, - const Iter& last, - std::string* err) { - default_parse_context ctx(&out); - return _parse(ctx, first, last, err); -} - -inline std::string parse(value& out, const std::string& s) { - std::string err; - parse(out, s.begin(), s.end(), &err); - return err; -} - -inline std::string parse(value& out, std::istream& is) { - std::string err; - parse(out, - std::istreambuf_iterator(is.rdbuf()), - std::istreambuf_iterator(), - &err); - return err; -} - -template -struct last_error_t { - static std::string s; -}; -template -std::string last_error_t::s; - -inline void set_last_error(const std::string& s) { last_error_t::s = s; } - -inline const std::string& get_last_error() { return last_error_t::s; } - -inline bool operator==(const value& x, const value& y) { - if (x.is()) return y.is(); -#define PICOJSON_CMP(type) \ - if (x.is()) return y.is() && x.get() == y.get() - PICOJSON_CMP(bool); - PICOJSON_CMP(double); - PICOJSON_CMP(std::string); - PICOJSON_CMP(array); - PICOJSON_CMP(object); -#undef PICOJSON_CMP - PICOJSON_ASSERT(0); -#ifdef _MSC_VER - __assume(0); -#endif - return false; -} - -inline bool operator!=(const value& x, const value& y) { return !(x == y); } -} // namespace picojson - -namespace std { -template <> -inline void swap(picojson::value& x, picojson::value& y) { - x.swap(y); -} -} // namespace std - -inline std::istream& operator>>(std::istream& is, picojson::value& x) { - picojson::set_last_error(std::string()); - std::string err = picojson::parse(x, is); - if (!err.empty()) { - picojson::set_last_error(err); - is.setstate(std::ios::failbit); - } - return is; -} - -inline std::ostream& operator<<(std::ostream& os, const picojson::value& x) { - x.serialize(std::ostream_iterator(os)); - return os; -} -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -#endif diff --git a/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data b/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data deleted file mode 100644 index ed83e6ae84bcf75d2e6238da2becdc7011df6d40..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data +++ /dev/null @@ -1,2 +0,0 @@ -0;0 1 3 5;1 3.42 2.25;2 4:4.2 6:2.8;3 aa -2;0 7 3 8;1 2.25 1.24;2 1:2.3 5:8.24;3 bb diff --git a/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.list b/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.list deleted file mode 100644 index 11c1b1b38b9edacc4953fdf526906d28bcc2d720..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.list +++ /dev/null @@ -1 +0,0 @@ -legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data diff --git a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.beam b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.beam deleted file mode 100644 index 47401c949eff1d24ccd530750dd8af621d604860..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.beam +++ /dev/null @@ -1,60 +0,0 @@ -0 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -1 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -2 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -3 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -4 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -5 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -6 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -7 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -8 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -9 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -10 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -11 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -12 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -13 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - -14 -0 0 1 2 3 4 -1 -0.2 0 1 2 3 4 - diff --git a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nest b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nest deleted file mode 100644 index 02c7f142a34d833acf9d5e245e1eaa6f4293a0bc..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nest +++ /dev/null @@ -1,16 +0,0 @@ -0 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - 1 2 3 4 - diff --git a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nobeam b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nobeam deleted file mode 100644 index 23bf1179ebb2f10a99345722d1e7bf32b21ba550..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nobeam +++ /dev/null @@ -1,16 +0,0 @@ -0 1 2 3 4 -1 1 2 3 4 -2 1 2 3 4 -3 1 2 3 4 -4 1 2 3 4 -5 1 2 3 4 -6 1 2 3 4 -7 1 2 3 4 -8 1 2 3 4 -9 1 2 3 4 -10 1 2 3 4 -11 1 2 3 4 -12 1 2 3 4 -13 1 2 3 4 -14 1 2 3 4 - diff --git a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/transtable b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/transtable deleted file mode 100644 index 161624fbf795ac6188795a6350ab0887b53e6bba..0000000000000000000000000000000000000000 Binary files a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/transtable and /dev/null differ diff --git a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/wordvec b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/wordvec deleted file mode 100644 index 30ccf33d2e308ae12f1c719986d2a317344cf39b..0000000000000000000000000000000000000000 Binary files a/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/wordvec and /dev/null differ diff --git a/paddle/legacy/trainer/tests/sample_data.txt b/paddle/legacy/trainer/tests/sample_data.txt deleted file mode 100644 index 3398a38bdfcc1b96b1227f0f950ec7dfdb3e5500..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/sample_data.txt +++ /dev/null @@ -1,10 +0,0 @@ -0 1 2 -1 -2 3 -1 2 -1 2 2 1 -0 2 1 2 -1 3 1 2 -1 1 2 1 -0 3 -1 2 -1 -2 2 1 -2 2 1 2 -1 3 1 2 diff --git a/paddle/legacy/trainer/tests/sample_filelist.txt b/paddle/legacy/trainer/tests/sample_filelist.txt deleted file mode 100644 index 8573f9e1795edd37cfa0d21f0effc08a80d38e29..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/sample_filelist.txt +++ /dev/null @@ -1 +0,0 @@ -legacy/trainer/tests/sample_data.txt diff --git a/paddle/legacy/trainer/tests/sample_trainer_config.conf b/paddle/legacy/trainer/tests/sample_trainer_config.conf deleted file mode 100644 index 5800b3625661efac80b84b19c2a5cedc34718488..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/sample_trainer_config.conf +++ /dev/null @@ -1,87 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -TrainData(SimpleData( - files = "legacy/trainer/tests/sample_filelist.txt", - feat_dim = 3, - context_len = 0, - buffer_capacity = 1000000)) - -TestData(SimpleData( - files = "legacy/trainer/tests/sample_filelist.txt", - feat_dim = 3, - context_len = 0, - buffer_capacity = 1000000)) - -settings(batch_size = 100) - -data = data_layer(name='input', size=3) - -fc1 = fc_layer(input=data, size=5, - bias_attr=False, - act=SigmoidActivation()) - -fc2 = fc_layer(input=data, size=9, - bias_attr=False, - act=LinearActivation()) - -fc3 = fc_layer(input=data, size=3, - bias_attr=False, - act=TanhActivation()) - -fc4 = fc_layer(input=data, size=5, - bias_attr=False, - act=LinearActivation(), - param_attr=ParamAttr(name='sharew')) - -fc5 = fc_layer(input=data, size=5, - bias_attr=False, - act=BReluActivation()) - -fc6 = fc_layer(input=data, size=5, - bias_attr=False, - act=SoftReluActivation()) - -fc7 = fc_layer(input=data, size=3, - bias_attr=False, - act=SquareActivation()) - -fc8 = fc_layer(input=data, size=5, - bias_attr=True, - act=SquareActivation()) - -with mixed_layer(size=3, act=SoftmaxActivation()) as layer9: - layer9 += full_matrix_projection(input=fc1) - layer9 += full_matrix_projection(input=fc2) - layer9 += full_matrix_projection(input=fc3) - layer9 += trans_full_matrix_projection(input=fc4, - param_attr=ParamAttr(name='sharew')) - layer9 += full_matrix_projection(input=fc5) - layer9 += full_matrix_projection(input=fc6) - layer9 += full_matrix_projection(input=fc7) - layer9 += full_matrix_projection(input=fc8) - -if get_config_arg('with_cost', bool, True): - # This is for training the neural network. - # We need to have another data layer for label - # and a layer for calculating cost - lbl = data_layer(name='label', size=1) - outputs(classification_cost(input=layer9, label=lbl)) -else: - # This is for prediction where we don't have label - # and don't need to calculate cost - outputs(layer9) diff --git a/paddle/legacy/trainer/tests/sample_trainer_config_hsigmoid.conf b/paddle/legacy/trainer/tests/sample_trainer_config_hsigmoid.conf deleted file mode 100644 index 155c40b31f30c40e1ddeb65500f55162beb9a0ee..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/sample_trainer_config_hsigmoid.conf +++ /dev/null @@ -1,53 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -TrainData(SimpleData( - files = "legacy/trainer/tests/sample_filelist.txt", - feat_dim = 3, - context_len = 0, - buffer_capacity = 1000000, -)) - -settings(batch_size = 100) - -data = data_layer(name='input', size=3) - -fc1 = fc_layer(input=data, size=12, - bias_attr=False, - act=SigmoidActivation()) - -fc2 = fc_layer(input=data, size=19, - bias_attr=False, - act=LinearActivation()) - -fc3 = fc_layer(input=data, size=5, - bias_attr=False, - act=TanhActivation()) - -fc4 = fc_layer(input=data, size=5, - bias_attr=False, - act=LinearActivation()) - -# This is for training the neural network. -# We need to have another data layer for label -# and a layer for calculating cost -lbl = data_layer(name='label', size=1) - -outputs(hsigmoid(input=[fc1, fc2, fc3, fc4], - label=lbl, - num_classes=3)) diff --git a/paddle/legacy/trainer/tests/sample_trainer_config_parallel.conf b/paddle/legacy/trainer/tests/sample_trainer_config_parallel.conf deleted file mode 100644 index 49cdde7fa2c55e6536a49633f959af6a888ec463..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/sample_trainer_config_parallel.conf +++ /dev/null @@ -1,86 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -TrainData(SimpleData( - files = "legacy/trainer/tests/sample_filelist.txt", - feat_dim = 3, - context_len = 0, - buffer_capacity = 1000000)) - -TestData(SimpleData( - files = "legacy/trainer/tests/sample_filelist.txt", - feat_dim = 3, - context_len = 0, - buffer_capacity = 1000000)) - -settings(batch_size = 100) - -# Output layer, label layer, cost layer, preferably set to the same environment. -output_device = 0 - -# Input Layer does not need to specify the device number. -data = data_layer(name='input', size=3) - -# Calculate in the CPU. -fc1 = fc_layer(input=data, size=5, - bias_attr=True, - layer_attr=ExtraAttr(device=-1), - act=SigmoidActivation()) - -# Calculate in the GPU 0. -fc2 = fc_layer(input=fc1, size=10, - bias_attr=True, - layer_attr=ExtraAttr(device=0), - act=SigmoidActivation()) - -# Calculate in the GPU 1. -fc3 = fc_layer(input=fc1, size=10, - bias_attr=True, - layer_attr=ExtraAttr(device=1), - act=SigmoidActivation()) - -# Calculate in the GPU 0. -fc4 = fc_layer(input=[fc2,fc3], size=10, - bias_attr=True, - layer_attr=ExtraAttr(device=0), - act=SigmoidActivation()) - -# Calculate in the GPU 1. -fc5 = fc_layer(input=[fc2,fc3], size=10, - bias_attr=True, - layer_attr=ExtraAttr(device=1), - act=SigmoidActivation()) - -output = fc_layer(input=[fc4,fc5], size=10, - bias_attr=True, - layer_attr=ExtraAttr(device=output_device), - act=SoftmaxActivation()) - -if get_config_arg('with_cost', bool, True): - # This is for training the neural network. - # We need to have another data layer for label - # and a layer for calculating cost - lbl = data_layer(name='label', size=1, - layer_attr=ExtraAttr(device=output_device)) - - outputs(classification_cost(input=output, - label=lbl, - layer_attr=ExtraAttr(device=output_device))) -else: - # This is for prediction where we don't have label - # and don't need to calculate cost - outputs(output) diff --git a/paddle/legacy/trainer/tests/sample_trainer_nest_rnn_gen.conf b/paddle/legacy/trainer/tests/sample_trainer_nest_rnn_gen.conf deleted file mode 100644 index 51ef905a5a182464f69a1629e51bf8180eadb3fb..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/sample_trainer_nest_rnn_gen.conf +++ /dev/null @@ -1,73 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=15, learning_rate=0) - -num_words = 5 -beam_flag = get_config_arg('beam_search', bool, False) - -sent_id = data_layer(name="sent_id", size=1) - -# This layer has no actual use, but only to decide batch_size in generation. -# When generating, at least one Memory in RecurrentLayer MUST have a boot layer. -dummy_data = data_layer(name="dummy_data_input", size=2) - -def outer_step(dummy_data): - - gen_inputs = [StaticInput(input=dummy_data, size=2, is_seq=True), - GeneratedInput(size=num_words, - embedding_name="wordvec", - embedding_size=num_words)] - - def inner_step(dummy_memory, predict_word): - - # simplified RNN for testing - with mixed_layer(size=num_words) as layer: - layer += full_matrix_projection(input=predict_word, - param_attr=ParamAttr(name="transtable")) - - with mixed_layer(size=num_words, act=ExpActivation()) as out: - out += trans_full_matrix_projection(input=layer, - param_attr=ParamAttr(name="wordvec")) - - return out - - beam_gen = beam_search(name="rnn_gen", - step=inner_step, - input=gen_inputs, - bos_id=0, - eos_id=num_words-1, - beam_size=2 if beam_flag else 1, - num_results_per_sample=1, - max_length=10) - return beam_gen - -beam_gen_concat = recurrent_group(name="rnn_gen_concat", - step=outer_step, - input=[SubsequenceInput(dummy_data)]) - -seqtext_printer_evaluator(input=beam_gen_concat, - id_input=sent_id, - dict_file="./legacy/trainer/tests/test_gen_dict.txt", - result_file="./legacy/trainer/tests/dump_text.test") -#outputs(beam_gen_concat) -# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory -# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs -# as follows. Note that "__beam_search_predict__" is the default output name of beam_search. -Inputs("sent_id","dummy_data_input") -Outputs("__beam_search_predict__") diff --git a/paddle/legacy/trainer/tests/sample_trainer_rnn_gen.conf b/paddle/legacy/trainer/tests/sample_trainer_rnn_gen.conf deleted file mode 100644 index 35c7f0fcd91f9b534a4f535387af720659d7f9b8..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/sample_trainer_rnn_gen.conf +++ /dev/null @@ -1,66 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from paddle.trainer_config_helpers import * - -settings(batch_size=15, learning_rate=0) - -num_words = 5 -beam_flag = get_config_arg('beam_search', bool, False) - -sent_id = data_layer(name="sent_id", size=1) - -# This layer has no actual use, but only to decide batch_size in generation. -# When generating, at least one Memory in RecurrentLayer MUST have a boot layer. -dummy_data = data_layer(name="dummy_data_input", size=2) - -gen_inputs = [StaticInput(input=dummy_data, size=2), - GeneratedInput(size=num_words, - embedding_name="wordvec", - embedding_size=num_words)] - -def step(dummy_memory, predict_word): - - # simplified RNN for testing - with mixed_layer(size=num_words) as layer: - layer += full_matrix_projection(input=predict_word, - param_attr=ParamAttr(name="transtable")) - - with mixed_layer(size=num_words, act=ExpActivation()) as out: - out += trans_full_matrix_projection(input=layer, - param_attr=ParamAttr(name="wordvec")) - - return out - -beam_gen = beam_search(name="rnn_gen", - step=step, - input=gen_inputs, - bos_id=0, - eos_id=num_words-1, - beam_size=2 if beam_flag else 1, - num_results_per_sample=2 if beam_flag else 1, - max_length=10) - -seqtext_printer_evaluator(input=beam_gen, - id_input=sent_id, - dict_file="./legacy/trainer/tests/test_gen_dict.txt", - result_file="./legacy/trainer/tests/dump_text.test") -#outputs(beam_gen) -# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory -# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs -# as follows. Note that "__beam_search_predict__" is the default output name of beam_search. -Inputs("sent_id","dummy_data_input") -Outputs("__beam_search_predict__") diff --git a/paddle/legacy/trainer/tests/simple_sparse_neural_network.py b/paddle/legacy/trainer/tests/simple_sparse_neural_network.py deleted file mode 100644 index 9419f4d903b1de205a6c549c7dcd9bb85ed7396b..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/simple_sparse_neural_network.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -settings(batch_size=17, learning_method=AdaGradOptimizer(), learning_rate=1e-4) - -file_list = 'legacy/trainer/tests/fake_file_list.list' - -define_py_data_sources2( - train_list=file_list, - test_list=file_list, - module="simple_sparse_neural_network_dp", - obj="process") - -embedding = embedding_layer( - input=data_layer( - name="word_ids", size=8191), - size=128, - param_attr=ParamAttr(sparse_update=True)) -prediction = fc_layer(input=embedding, size=10, act=SoftmaxActivation()) - -outputs( - classification_cost( - input=prediction, label=data_layer( - name='label', size=10))) diff --git a/paddle/legacy/trainer/tests/simple_sparse_neural_network_dp.py b/paddle/legacy/trainer/tests/simple_sparse_neural_network_dp.py deleted file mode 100644 index 49043c91758b7199d063670616826656f7e8b485..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/simple_sparse_neural_network_dp.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import provider, integer_sequence, integer_value -import random - - -def init_hook(settings, is_train, **kwargs): - settings.is_train = is_train - - -@provider( - input_types={'word_ids': integer_value(8191), - 'label': integer_value(10)}, - min_pool_size=0, - init_hook=init_hook) -def process(settings, filename): - if settings.is_train: - data_size = 2**10 - else: - data_size = 2**5 - - for _ in xrange(data_size): - yield random.randint(0, 8190), random.randint(0, 9) diff --git a/paddle/legacy/trainer/tests/testPyDataWrapper.py b/paddle/legacy/trainer/tests/testPyDataWrapper.py deleted file mode 100644 index a76eeeacb91cdba305d2f71c6292f79e4b98dd73..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/testPyDataWrapper.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append("../") - -from paddle.trainer.PyDataProviderWrapper import * -import random -import json -import string - -SPARSE_ID_LIMIT = 1000 -SPARSE_ID_COUNT = 100 -SEQUENCE_LIMIT = 50 -STRING_LIMIT = 10 - -sparse_id_randomer = lambda: random.randrange(0, SPARSE_ID_LIMIT - 1) -sparse_count_randomer = lambda: random.randrange(1, SPARSE_ID_COUNT) -val_randomer = lambda: random.uniform(-1.0, 1.0) -seq_count_randomer = lambda: random.randrange(1, SEQUENCE_LIMIT) -str_count_randomer = lambda: random.randrange(1, STRING_LIMIT) - - -class IDRandomer(): # A random generator, return unique id - def __init__(self): - self.id_set = set() - - def __call__(self): - idx = sparse_id_randomer() - if idx not in self.id_set: - self.id_set.add(idx) - return idx - else: - return self.__call__() - - -# SparseValueSlot -def sparse_value_creator(_): - rand = IDRandomer() - return [(rand(), val_randomer()) for _ in xrange(sparse_count_randomer())] - - -sparse_value = map(sparse_value_creator, range(seq_count_randomer())) - - -# DenseSlot -def dense_creator(_): - return [val_randomer() for _ in xrange(SPARSE_ID_LIMIT)] - - -dense = map(dense_creator, range(seq_count_randomer())) - - -# SparseNonValueSlot -def sparse_creator(_): - rand = IDRandomer() - return [rand() for _ in xrange(sparse_count_randomer())] - - -sparse_nonvalue = map(sparse_creator, range(seq_count_randomer())) - -# IndexSlot -ids = [sparse_id_randomer() for _ in range(seq_count_randomer())] - - -# StringSlot -def random_str(size=8, chars=string.ascii_letters + string.digits): - return ''.join(random.choice(chars) for _ in range(size)) - - -strs = [random_str(str_count_randomer()) for _ in range(seq_count_randomer())] - - -def processSeqAndGenerateDataInit(obj, *args, **kwargs): - obj.json_filename = kwargs.get("load_data_args", "test_data.json") - - -@provider( - slots=[ - SparseValueSlot(SPARSE_ID_LIMIT), DenseSlot(SPARSE_ID_LIMIT), - SparseNonValueSlot(SPARSE_ID_LIMIT), IndexSlot(SPARSE_ID_LIMIT), - StringSlot(SPARSE_ID_LIMIT) - ], - use_seq=True, - init_hook=processSeqAndGenerateDataInit) -def processSeqAndGenerateData(obj, name): - retv = [sparse_value, dense, sparse_nonvalue, ids, strs] - # Write to protoseq. - with open(obj.json_filename, "w") as f: - json.dump(retv, f) - yield retv - - -def processSubSeqAndGenerateDataInit(obj, *args, **kwargs): - obj.json_filename = kwargs.get("load_data_args", "test_data.json") - - -@provider( - slots=[ - SparseValueSlot(SPARSE_ID_LIMIT), DenseSlot(SPARSE_ID_LIMIT), - SparseNonValueSlot(SPARSE_ID_LIMIT), IndexSlot(SPARSE_ID_LIMIT), - StringSlot(SPARSE_ID_LIMIT) - ], - use_seq=True, - init_hook=processSubSeqAndGenerateDataInit) -def processSubSeqAndGenerateData(obj, name): - retv_json = [sparse_value, dense, sparse_nonvalue, ids, strs] - retv_wrapper = [[sparse_value], [dense], [sparse_nonvalue], [ids], [strs]] - # Write to protoseq. - with open(obj.json_filename, "w") as f: - json.dump(retv_json, f) - yield retv_wrapper - - -if __name__ == "__main__": - pvd = processSeqAndGenerateData("_") - print pvd.getNextBatch(100) - pvd = processSubSeqAndGenerateData("_") - print pvd.getNextBatch(1) diff --git a/paddle/legacy/trainer/tests/test_Compare.cpp b/paddle/legacy/trainer/tests/test_Compare.cpp deleted file mode 100644 index e37e546be8513b1cc7438810a01641859a4bad18..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/test_Compare.cpp +++ /dev/null @@ -1,158 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/legacy/trainer/Trainer.h" - -#include -#include - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -static const string& configFile = - "legacy/trainer/tests/sample_trainer_config.conf"; - -DECLARE_int32(gpu_id); -DECLARE_bool(use_gpu); -DECLARE_string(config); -DECLARE_string(config_args); - -struct comData { - vector outArgs; - vector parameters; -}; - -void calcGradient(bool useGpu, comData& Data) { - FLAGS_use_gpu = useGpu; - FLAGS_config = configFile; - - *ThreadLocalRand::getSeed() = 0; - srand(0); - Trainer trainer; - trainer.init(TrainerConfigHelper::createFromFlagConfig()); - - Data.parameters = trainer.getGradientMachine()->getParameters(); - DataBatch dataBatch; - int32_t batchSize = trainer.getConfig().opt_config().batch_size(); - trainer.getDataProvider()->setSkipShuffle(); - trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch); - CHECK(dataBatch.getSize()) << "No data from data provider"; - vector& inArgs = dataBatch.getStreams(); - trainer.getGradientMachine()->start(); - for (int i = 0; i < 2; ++i) { - trainer.getGradientMachine()->forwardBackward( - inArgs, &Data.outArgs, PASS_TRAIN); - } - trainer.getGradientMachine()->finish(); -} - -void compareGradient(comData& comDataCpu, comData& comDataGpu); - -TEST(Trainer, create) { - int devCount = 0; - devCount = hl_get_device_count(); - FLAGS_config_args = "drop_rate=0"; - - comData comDataCpu; - calcGradient(false, comDataCpu); - LOG(INFO) << "Cpu is completed"; - - { - LOG(INFO) << "Test GPU"; - comData comData; - calcGradient(true, comData); - compareGradient(comDataCpu, comData); - LOG(INFO) << "Gpu is completed"; - } - - { - LOG(INFO) << "Test test multi gpu"; - comData comData; - FLAGS_trainer_count = devCount; - calcGradient(true, comData); - compareGradient(comDataCpu, comData); - LOG(INFO) << "Gpu4 is completed"; - } - - { - LOG(INFO) << "Test use_sparse_update=true"; - comData comData; - calcGradient(false, comData); - compareGradient(comDataCpu, comData); - LOG(INFO) << "Cpu4 is completed"; - } -} - -double checkBuffer(real* A, real* B, size_t len) { -#ifdef PADDLE_TYPE_DOUBLE - double precision = 1e-7; -#else - double precision = 2e-3; -#endif - int nNum = 0; - double maxE = 0; - for (size_t i = 0; i < len; ++i) { - double e = fabs(A[i] - B[i]); - maxE = std::max(e, maxE); - nNum += e > precision * fabs(A[i]); - } - EXPECT_EQ(0, nNum); - return maxE; -} - -void compareGradient(comData& comDataCpu, comData& comDataGpu) { - /*compare outArgs*/ - vector outArgs1 = comDataCpu.outArgs; - vector outArgs2 = comDataGpu.outArgs; - CpuMatrix out1(outArgs1[0].value->getHeight(), outArgs1[0].value->getWidth()); - CpuMatrix out2(outArgs2[0].value->getHeight(), outArgs2[0].value->getWidth()); - out1.copyFrom(*outArgs1[0].value); - out2.copyFrom(*outArgs2[0].value); - checkBuffer(out1.getData(), out2.getData(), out1.getElementCnt()); - - /*compare parameters*/ - vector& parameters1 = comDataCpu.parameters; - vector& parameters2 = comDataGpu.parameters; - for (size_t i = 0; i < parameters1.size(); ++i) { - ParameterPtr parameter1, parameter2; - parameter1 = parameters1[i]; - parameter2 = parameters2[i]; - /*compare parameters value*/ - CpuVector para1(parameter1->getSize()); - CpuVector para2(parameter2->getSize()); - para1.copyFrom(*parameter1->getBuf(PARAMETER_VALUE)); - para2.copyFrom(*parameter2->getBuf(PARAMETER_VALUE)); - checkBuffer(para1.getData(), para2.getData(), para1.getSize()); - - /*compare parameters grad*/ - CpuVector cpuGrad1(*parameter1->getBuf(PARAMETER_GRADIENT)); - CpuVector cpuGrad2(*parameter2->getBuf(PARAMETER_GRADIENT)); - double e = - checkBuffer(cpuGrad1.getData(), cpuGrad2.getData(), cpuGrad1.getSize()); - LOG(INFO) << parameter1->getName() << " max error=" << e; - } -} - -int main(int argc, char** argv) { -#ifndef PADDLE_WITH_CUDA - exit(0); -#endif - paddle::initMain(argc, argv); - testing::InitGoogleTest(&argc, argv); - initPython(argc, argv); - int ret = RUN_ALL_TESTS(); - exit(ret); -} diff --git a/paddle/legacy/trainer/tests/test_PyDataProviderWrapper.cpp b/paddle/legacy/trainer/tests/test_PyDataProviderWrapper.cpp deleted file mode 100644 index 847adcfabada18e11203d3f18fb6dc355c670afb..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/test_PyDataProviderWrapper.cpp +++ /dev/null @@ -1,220 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef PADDLE_NO_PYTHON -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "picojson.h" - -void checkValue(std::vector& arguments, picojson::array& arr); -const std::string kDir = "./legacy/trainer/tests/pydata_provider_wrapper_dir/"; - -TEST(PyDataProviderWrapper, SequenceData) { - paddle::DataConfig conf; - conf.set_type("py"); - conf.set_load_data_module("testPyDataWrapper"); - conf.set_load_data_object("processSeqAndGenerateData"); - conf.set_load_data_args(kDir + "test_pydata_provider_wrapper.json"); - conf.clear_files(); - conf.set_files(kDir + "test_pydata_provider_wrapper.list"); - paddle::DataProviderPtr provider(paddle::DataProvider::create(conf, false)); - provider->setSkipShuffle(); - provider->reset(); - paddle::DataBatch batchFromPy; - provider->getNextBatch(100, &batchFromPy); - - picojson::value val; - std::fstream fin; - fin.open(kDir + "test_pydata_provider_wrapper.json", std::ios_base::in); - EXPECT_TRUE(fin.is_open()); - if (fin.is_open()) { - std::string err = picojson::parse(val, fin); - EXPECT_TRUE(err.empty()); - EXPECT_TRUE(val.is()); - picojson::array& arr = val.get(); - std::vector& arguments = batchFromPy.getStreams(); - // CHECK Value - checkValue(arguments, arr); - // CHECK sequenceStartPositions - for (size_t i = 0; i < arr.size(); i++) { - int row_id = arr[i].get().size(); - EXPECT_EQ(0, arguments[i].sequenceStartPositions->getData(false)[0]); - EXPECT_EQ((int)row_id, - arguments[i].sequenceStartPositions->getData(false)[1]); - } - fin.close(); - } -} - -TEST(PyDataProviderWrapper, HasSubSequenceData) { - paddle::DataConfig conf; - conf.set_type("py"); - conf.set_load_data_module("testPyDataWrapper"); - conf.set_load_data_object("processSubSeqAndGenerateData"); - conf.set_load_data_args(kDir + "test_pydata_provider_wrapper.json"); - conf.clear_files(); - conf.set_files(kDir + "test_pydata_provider_wrapper.list"); - paddle::DataProviderPtr provider(paddle::DataProvider::create(conf, false)); - provider->setSkipShuffle(); - provider->reset(); - paddle::DataBatch batchFromPy; - provider->getNextBatch(1, &batchFromPy); - - picojson::value val; - std::fstream fin; - fin.open(kDir + "test_pydata_provider_wrapper.json", std::ios_base::in); - EXPECT_TRUE(fin.is_open()); - if (fin.is_open()) { - std::string err = picojson::parse(val, fin); - EXPECT_TRUE(err.empty()); - EXPECT_TRUE(val.is()); - picojson::array& arr = val.get(); - std::vector& arguments = batchFromPy.getStreams(); - // CHECK Value - checkValue(arguments, arr); - // CHECK sequenceStartPositions and subSequenceStartPositions - for (size_t i = 0; i < arr.size(); i++) { - int row_id = arr[i].get().size(); - EXPECT_EQ(0, arguments[i].sequenceStartPositions->getData(false)[0]); - EXPECT_EQ((int)row_id, - arguments[i].sequenceStartPositions->getData(false)[1]); - EXPECT_EQ(0, arguments[i].subSequenceStartPositions->getData(false)[0]); - EXPECT_EQ((int)row_id, - arguments[i].subSequenceStartPositions->getData(false)[1]); - } - fin.close(); - } -} - -int main(int argc, char** argv) { - paddle::initMain(argc, argv); - paddle::initPython(argc, argv); - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - -void checkValue(std::vector& arguments, - picojson::array& arr) { - // CHECK SLOT 0, Sparse Value. - paddle::Argument& sparse_values_seq = arguments[0]; - paddle::MatrixPtr& sparse_values_seq_rawmatrix = sparse_values_seq.value; - EXPECT_TRUE(sparse_values_seq_rawmatrix != nullptr); - paddle::CpuSparseMatrix* sparse_val_seq_sparse_mat = - dynamic_cast(sparse_values_seq_rawmatrix.get()); - EXPECT_TRUE(sparse_val_seq_sparse_mat != nullptr); - EXPECT_EQ(arr.size(), arguments.size()); - EXPECT_TRUE(arr[0].is()); - size_t row_id = 0; - for (picojson::value& sparse_val_seq : arr[0].get()) { - std::unordered_map cols; - for (picojson::value& kv : sparse_val_seq.get()) { - EXPECT_TRUE(kv.get(0).is()); - EXPECT_TRUE(kv.get(1).is()); - int col = (int)(kv.get(0).get()); - real val = (real)(kv.get(1).get()); - cols.insert({col, val}); - } - size_t colNum = sparse_val_seq_sparse_mat->getColNum(row_id); - EXPECT_EQ(cols.size(), colNum); - int* rowIds = sparse_val_seq_sparse_mat->getRowCols(row_id); - real* rowBuf = sparse_val_seq_sparse_mat->getRowValues(row_id); - for (size_t i = 0; i < colNum; ++i) { - int id = rowIds[i]; - auto it = cols.find(id); - EXPECT_NE(cols.end(), it); - real expect = it->second; - EXPECT_NEAR(expect, *rowBuf, 1e-5); - ++rowBuf; - } - ++row_id; - } - - // CHECK SLOT 1, Dense Value. - paddle::Argument& dense_arg = arguments[1]; - paddle::MatrixPtr& dense_mat = dense_arg.value; - EXPECT_NE(nullptr, dense_mat); - EXPECT_TRUE(arr[1].is()); - row_id = 0; - for (picojson::value& dense_seq : arr[1].get()) { - EXPECT_TRUE(dense_seq.is()); - picojson::array& row = dense_seq.get(); - EXPECT_EQ(row.size(), dense_mat->getWidth()); - real* rowBuf = dense_mat->getRowBuf(row_id++); - - for (picojson::value& val : row) { - EXPECT_TRUE(val.is()); - real expect = val.get(); - EXPECT_NEAR(expect, *rowBuf++, 1e-5); - } - } - - // CHECK SLOT 2, Sparse Non Value. - paddle::Argument& sparse_non_val_arg = arguments[2]; - paddle::MatrixPtr& sparse_non_val_rawm = sparse_non_val_arg.value; - EXPECT_NE(nullptr, sparse_non_val_rawm); - paddle::CpuSparseMatrix* sparse_non_val_m = - dynamic_cast(sparse_non_val_rawm.get()); - EXPECT_NE(nullptr, sparse_non_val_m); - row_id = 0; - for (picojson::value& row : arr[2].get()) { - EXPECT_TRUE(row.is()); - std::unordered_set ids; - for (picojson::value& id : row.get()) { - EXPECT_TRUE(id.is()); - ids.insert((int)(id.get())); - } - size_t colNum = sparse_non_val_m->getColNum(row_id); - EXPECT_EQ(ids.size(), colNum); - for (size_t i = 0; i < colNum; ++i) { - int col = sparse_non_val_m->getRowCols(row_id)[i]; - EXPECT_TRUE(ids.find(col) != ids.end()); - } - ++row_id; - } - - // CHECK SLOT 3, Index. - paddle::Argument& index_arg = arguments[3]; - paddle::IVectorPtr indices = index_arg.ids; - EXPECT_NE(nullptr, indices); - int* idPtr = indices->getData(); - for (picojson::value& id : arr[3].get()) { - EXPECT_TRUE(id.is()); - int _id = (int)(id.get()); - EXPECT_EQ(_id, *idPtr++); - } - - // CHECK SLOT 4, String. - paddle::Argument& strArg = arguments[4]; - std::vector* strPtr = strArg.strs.get(); - EXPECT_NE(nullptr, strPtr); - size_t vecIndex = 0; - for (picojson::value& str : arr[4].get()) { - EXPECT_TRUE(str.is()); - std::string _str = str.get(); - EXPECT_EQ(_str, (*strPtr)[vecIndex++]); - } -} - -#else -int main() { return 0; } - -#endif diff --git a/paddle/legacy/trainer/tests/test_Trainer.cpp b/paddle/legacy/trainer/tests/test_Trainer.cpp deleted file mode 100644 index 14ad0a265281a8df20a70b0da2873ea451338ddb..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/test_Trainer.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "paddle/legacy/trainer/Trainer.h" - -#include - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -static const string& configFile1 = - "legacy/trainer/tests/sample_trainer_config.conf"; -static const string& configFile2 = - "legacy/trainer/tests/sample_trainer_config_hsigmoid.conf"; -static const string& configFile4 = - "legacy/trainer/tests/sample_trainer_config_parallel.conf"; - -DECLARE_bool(use_gpu); -DECLARE_string(config); -DECLARE_int32(gpu_id); -DECLARE_bool(allow_only_one_model_on_one_gpu); - -void checkGradientTest(const string& configFile, - bool useGpu, - bool parallel, - int trainerCount = 1) { - FLAGS_use_gpu = useGpu; - FLAGS_parallel_nn = parallel; - FLAGS_config = configFile; - FLAGS_trainer_count = trainerCount; - LOG(INFO) << " useGpu=" << useGpu << " trainerCount=" << trainerCount - << " configFile=" << configFile; - - Trainer trainer; - trainer.init(TrainerConfigHelper::createFromFlagConfig()); - EXPECT_LE(fabs(trainer.checkGradient()), 0.02); -} - -TEST(checkGradient, cpu) { checkGradientTest(configFile1, false, false); } - -#ifdef PADDLE_WITH_CUDA -TEST(checkGradient, gpu) { checkGradientTest(configFile1, true, false); } - -TEST(checkGradient, multiGpu) { - int numGpu; - numGpu = hl_get_device_count(); - for (auto count : {2, 4}) { - if (count <= numGpu) { - checkGradientTest(configFile1, true, false, count); - } - } -} - -TEST(checkGradient, parallel) { - if (hl_get_device_count() >= 2) { - checkGradientTest(configFile4, true, true); - } -} - -TEST(checkGradient, multiParallel) { - FLAGS_allow_only_one_model_on_one_gpu = false; - checkGradientTest(configFile4, true, true, 2); - FLAGS_allow_only_one_model_on_one_gpu = true; -} - -#endif - -TEST(checkGradient, multi) { - int numGpu; - if (version::isWithGpu()) { - numGpu = hl_get_device_count(); - } else { - numGpu = 0; - } - for (bool useGpu : {false, true}) { - for (auto count : {2, 4}) { - if (useGpu && count > numGpu) continue; - checkGradientTest(configFile1, useGpu, false, count); - } - } -} - -TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); } - -TEST(checkGradient, non_parallel) { - checkGradientTest(configFile4, false, false); -} - -int main(int argc, char** argv) { - initMain(argc, argv); - initPython(argc, argv); - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/trainer/tests/test_TrainerOnePass.cpp b/paddle/legacy/trainer/tests/test_TrainerOnePass.cpp deleted file mode 100644 index 3e5c5ea723f3fd80316ee826fe9c6566e7049b7b..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/test_TrainerOnePass.cpp +++ /dev/null @@ -1,318 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "paddle/legacy/trainer/Trainer.h" -#include "paddle/legacy/trainer/TrainerInternal.h" - -#include -#include - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -static const string& configFile1 = - "legacy/trainer/tests/sample_trainer_config.conf"; -static const string& configFile2 = - "legacy/trainer/tests/sample_trainer_config_parallel.conf"; - -static const string& configFileSimpleSparse = - "legacy/trainer/tests/simple_sparse_neural_network.py"; - -DECLARE_bool(use_gpu); -DECLARE_string(config); -DECLARE_int32(gpu_id); -DECLARE_int32(seed); -DECLARE_int32(num_passes); -DECLARE_int32(saving_period); - -class TrainerForTest : public paddle::Trainer { - public: - inline const std::shared_ptr& getParameterUpdaterForTest() { - return this->trainerInternal_.getParameterUpdater(); - } -}; - -int gNumDevices = 0; - -void trainerOnePassTest(const string& configFile, - bool useGpu, - bool parallel, - int trainerCount = 1, - double averageWindow = 0.0f, - bool doAverageInCpu = false) { - FLAGS_use_gpu = useGpu; - FLAGS_parallel_nn = parallel; - FLAGS_config = configFile; - FLAGS_trainer_count = trainerCount; - LOG(INFO) << " useGpu=" << useGpu << " trainerCount=" << trainerCount - << " configFile=" << configFile; - srand(FLAGS_seed); - - if (useGpu) { - if (gNumDevices < trainerCount) { - return; - } - } - - Trainer trainer; - auto config = TrainerConfigHelper::createFromFlagConfig(); - if (averageWindow > 0) { - config->getOptConfig().set_average_window(averageWindow); - config->getOptConfig().set_do_average_in_cpu(doAverageInCpu); - } - trainer.init(config); - trainer.train(); -} - -// 1. test trainer (cpu, gpu). -TEST(trainerOnePass, cpu) { trainerOnePassTest(configFile1, false, false); } - -#ifdef PADDLE_WITH_CUDA -TEST(trainerOnePass, gpu) { trainerOnePassTest(configFile1, true, false); } - -TEST(trainerOnePass, gpu2) { trainerOnePassTest(configFile1, true, false, 2); } - -TEST(trainerOnePass, gpu4) { trainerOnePassTest(configFile1, true, false, 4); } - -TEST(trainerOnePass, parallel) { - if (hl_get_device_count() >= 2) { - trainerOnePassTest(configFile2, true, true); - } -} -#endif - -// 2. test average_window. -#ifdef PADDLE_WITH_CUDA -TEST(average_window, gpu) { - trainerOnePassTest(configFile1, true, false, 4, 0.01); -} - -TEST(average_window, gpu2) { - FLAGS_num_passes = 20; - trainerOnePassTest(configFile1, true, false, 2, 0.01); - FLAGS_num_passes = 1; -} - -TEST(average_window, gpu4) { - FLAGS_num_passes = 20; - trainerOnePassTest(configFile1, true, false, 4, 0.01); - FLAGS_num_passes = 1; -} - -TEST(average_window_cpu, gpu2) { - FLAGS_num_passes = 20; - trainerOnePassTest(configFile1, true, false, 2, 0.01, true); - FLAGS_num_passes = 1; -} - -TEST(average_window_cpu, gpu4) { - FLAGS_num_passes = 20; - trainerOnePassTest(configFile1, true, false, 4, 0.01, true); - FLAGS_num_passes = 1; -} -#endif - -// 3. test trainer + pserver. -DECLARE_int32(num_gradient_servers); -DECLARE_int32(port); -DECLARE_bool(local); -DECLARE_bool(use_old_updater); - -double checkRemoteParameterUpdater(TrainerForTest& trainer) { - auto gradientMachine = trainer.getGradientMachine(); - auto parameterUpdater = trainer.getParameterUpdaterForTest(); - auto dataProvider = trainer.getDataProvider(); - auto& parameters = gradientMachine->getParameters(); - const TrainerConfig& config = trainer.getConfig(); - const string& alg = config.opt_config().algorithm(); - - vector parameterCheck; - for (auto& parameter : parameters) { - parameterCheck.emplace_back( - new Parameter(parameter->getConfig(), /* useGpu= */ false)); - parameterCheck.back() - ->getBuf(PARAMETER_VALUE) - ->copyFrom(*parameter->getBuf(PARAMETER_VALUE)); - parameterCheck.back() - ->getBuf(PARAMETER_GRADIENT) - ->copyFrom(*parameter->getBuf(PARAMETER_GRADIENT)); - } - - std::unique_ptr parameterUpdaterCheck; - if (alg == TrainAlgorithm::SGD) { - parameterUpdaterCheck.reset(new SgdLocalUpdater(config.opt_config())); - } else { - LOG(INFO) << "unsupported algorithm in remote parameter check: " << alg; - return -1.0; - } - parameterUpdaterCheck->init(parameterCheck); - - // gradientMachine->start(config, *dataProvider); - DataBatch dataBatch; - int32_t batchSize = config.opt_config().batch_size(); - dataProvider->getNextBatch(batchSize, &dataBatch); - CHECK(dataBatch.getSize()) << "No data from data provider"; - int64_t actualBatchSize = dataBatch.getSize(); - const vector& inArgs = dataBatch.getStreams(); - vector outArgs; - - UpdateCallback updateCallback = [parameterUpdater, - parameterCheck](Parameter* para) { - parameterCheck[para->getID()] - ->getBuf(PARAMETER_GRADIENT) - ->copyFrom(*para->getBuf(PARAMETER_GRADIENT)); - parameterUpdater->update(para); - }; - - parameterUpdater->startPass(); - parameterUpdaterCheck->startPass(); - - for (int i = 0; i < config.opt_config().num_batches_per_get_parameter() * 2; - ++i) { - PassType passType = parameterUpdater->startBatch(actualBatchSize); - gradientMachine->forwardBackward( - inArgs, &outArgs, passType, updateCallback); - parameterUpdater->finishBatch(0); - - parameterUpdaterCheck->startBatch(actualBatchSize); - for (auto& para : parameterCheck) { - parameterUpdaterCheck->update(para.get()); - } - parameterUpdaterCheck->finishBatch(0); - } - - double sum = 0.0f; - for (size_t i = 0; i != parameters.size(); ++i) { - real *v1, *v2; - CpuVector trainerPara(parameters[i]->getSize()); - trainerPara.copyFrom(*parameters[i]->getBuf(PARAMETER_VALUE)); - if (!FLAGS_use_gpu) { - v1 = parameters[i]->getBuf(PARAMETER_VALUE)->getData(); - } else { - v1 = trainerPara.getData(); - } - v2 = parameterCheck[i]->getBuf(PARAMETER_VALUE)->getData(); - - size_t size = parameters[i]->getSize(); - double diff = 0; - for (size_t j = 0; j < size; ++j) { - diff += fabs(v1[j] - v2[j]); - } - sum += diff; - LOG(INFO) << setiosflags(ios::left) << setfill(' ') << setw(20) - << parameters[i]->getName() << "diff=" << setw(15) << diff; - } - - parameterUpdater->finishPass(); - parameterUpdaterCheck->finishPass(); - gradientMachine->finish(); - return sum; -} - -void checkRemoteParameterUpdaterTest(const string& configFile, - bool useGpu, - bool parallel, - int trainerCount = 1, - bool useOldUpdater = false, - int num_batches_per_get_parameter = 1) { - FLAGS_use_gpu = useGpu; - FLAGS_parallel_nn = parallel; - FLAGS_config = configFile; - FLAGS_trainer_count = trainerCount; - FLAGS_use_old_updater = useOldUpdater; - LOG(INFO) << " useGpu=" << useGpu << " trainerCount=" << trainerCount - << " configFile=" << configFile; - srand(FLAGS_seed); - - if (useGpu) { - if (gNumDevices < trainerCount) { - return; - } - } - - FLAGS_local = 0; - std::shared_ptr pserver; - pserver.reset(new ParameterServer2(std::string(), FLAGS_port)); - pserver->init(); - pserver->start(); - - TrainerForTest trainer; - auto config = TrainerConfigHelper::createFromFlagConfig(); - config->getOptConfig().set_num_batches_per_get_parameter( - num_batches_per_get_parameter); - trainer.init(config); - EXPECT_EQ(checkRemoteParameterUpdater(trainer), 0); - - FLAGS_local = 1; -} - -TEST(checkRemoteUpdater, cpuTrainer) { - checkRemoteParameterUpdaterTest(configFile1, false, false); -} - -TEST(checkRemoteUpdater, cpuTrainerOldUpdater) { - checkRemoteParameterUpdaterTest(configFile1, false, false, 1, true); -} - -#ifdef PADDLE_WITH_CUDA -TEST(checkRemoteUpdater, gpuTrainer) { - checkRemoteParameterUpdaterTest(configFile1, true, false); -} - -TEST(checkRemoteUpdater, gpu2Trainer) { - checkRemoteParameterUpdaterTest(configFile1, true, false, 2); -} - -TEST(checkRemoteUpdater, gpu4Trainer) { - checkRemoteParameterUpdaterTest(configFile1, true, false, 4); -} - -TEST(checkRemoteUpdater, gpuTrainerOldUpdater) { - checkRemoteParameterUpdaterTest(configFile1, true, false, 1, true); -} - -TEST(checkRemoteUpdater, gpu2TrainerOldUpdater) { - checkRemoteParameterUpdaterTest(configFile1, true, false, 2, true); -} - -TEST(checkRemoteUpdater, gpu4TrainerOldUpdater) { - checkRemoteParameterUpdaterTest(configFile1, true, false, 4, true); -} - -#endif - -TEST(checkRemoteUpdater, cpuDeltaTrainer) { - checkRemoteParameterUpdaterTest(configFile1, false, false, 1, false, 10); -} - -TEST(checkRemoteUpdater, cpuDeltaTrainerOldUpdater) { - checkRemoteParameterUpdaterTest(configFile1, false, false, 1, true, 10); -} - -TEST(SgdThreadUpdater, simpleSparseNN) { - trainerOnePassTest(configFileSimpleSparse, false, false, 1, 0.5, true); -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - initPython(argc, argv); - gNumDevices = hl_get_device_count(); - - FLAGS_num_passes = 1; // train one pass - FLAGS_saving_period = 100000; // do not save parameteres - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/trainer/tests/test_config.conf b/paddle/legacy/trainer/tests/test_config.conf deleted file mode 100644 index bce687ad83686d465987d72defd37db2b50953a1..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/test_config.conf +++ /dev/null @@ -1,77 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -TrainData(SimpleData( - files = "legacy/trainer/tests/sample_filelist.txt", - feat_dim = 3, - context_len = 0, - buffer_capacity = 1000000, - async_load_data = False)) - -settings(batch_size = 100) - -data = data_layer(name='input', size=3) - -wt = data_layer(name='weight', size=1) - -fc1 = fc_layer(input=data, size=5, - bias_attr=True, - act=SigmoidActivation()) - -fc2 = fc_layer(input=data, size=12, - bias_attr=True, - param_attr=ParamAttr(name='sharew'), - act=LinearActivation()) - -fc3 = fc_layer(input=data, size=3, - bias_attr=True, - act=TanhActivation()) - -fc4 = fc_layer(input=data, size=5, - bias_attr=True, - layer_attr=ExtraAttr(drop_rate=0.5), - act=SquareActivation()) - -pool = img_pool_layer(input=fc2, - pool_size=2, - pool_size_y=3, - num_channels=1, - padding=1, - padding_y=2, - stride=2, - stride_y=3, - pool_type=CudnnAvgPooling()) - -concat = concat_layer(input=[fc3, fc4]) - -with mixed_layer(size=3, act=SoftmaxActivation()) as output: - output += full_matrix_projection(input=fc1) - output += trans_full_matrix_projection(input=fc2, - param_attr=ParamAttr(name='sharew')) - output += full_matrix_projection(input=concat) - output += identity_projection(input=fc3) - -lbl = data_layer(name='label', size=1) - -cost = classification_cost(input=output, label=lbl, weight=wt, - layer_attr=ExtraAttr(device=-1)) - -nce = nce_layer(input=fc2, label=lbl, weight=wt, - num_classes=3, - neg_distribution=[0.1, 0.3, 0.6]) - -outputs(cost, nce) diff --git a/paddle/legacy/trainer/tests/test_gen_dict.txt b/paddle/legacy/trainer/tests/test_gen_dict.txt deleted file mode 100644 index 1000f90057824bf665b32fe47a7f78e7a0077e7b..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/test_gen_dict.txt +++ /dev/null @@ -1,9 +0,0 @@ -0 -1 -2 -3 -4 -5 -6 -7 -8 diff --git a/paddle/legacy/trainer/tests/test_recurrent_machine_generation.cpp b/paddle/legacy/trainer/tests/test_recurrent_machine_generation.cpp deleted file mode 100644 index 47b4e82cd32917fcf32dbb5ffabb47330dab93d9..0000000000000000000000000000000000000000 --- a/paddle/legacy/trainer/tests/test_recurrent_machine_generation.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include -#include - -#include - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -static const string& CONFIG_FILE = - "legacy/trainer/tests/sample_trainer_rnn_gen.conf"; -static const string& NEST_CONFIG_FILE = - "legacy/trainer/tests/sample_trainer_nest_rnn_gen.conf"; -static const string& OUTPUT_DIR = "legacy/trainer/tests/dump_text.test"; -static string modelDir = - "legacy/trainer/tests/rnn_gen_test_model_dir/t1"; // NOLINT -static string expectFile = // NOLINT - "legacy/trainer/tests/rnn_gen_test_model_dir/r1.test"; // NOLINT - -DECLARE_string(config_args); - -vector readRetFile(const string& fname) { - ifstream inFile(fname); - float ret; - vector nums; - while (inFile >> ret) { - nums.push_back(ret); - } - return nums; -} - -void checkOutput(const string& expRetFile) { - vector rets = readRetFile(OUTPUT_DIR); - vector expRets = readRetFile(expRetFile); - EXPECT_EQ(rets.size(), expRets.size()); - for (size_t i = 0; i < rets.size(); i++) { - EXPECT_FLOAT_EQ(rets[i], expRets[i]); - } -} - -void prepareInArgs(vector& inArgs, - const size_t batchSize, - bool useGpu, - bool hasSubseq) { - inArgs.clear(); - // sentence id - Argument sentId; - sentId.value = nullptr; - if (hasSubseq) { - // as there is only one sequence, there is only one label. - IVector::resizeOrCreate(sentId.ids, 1, useGpu); - sentId.ids->setElement(0, 0); - } else { - // as there is batchSize word, there is batchSize label. - IVector::resizeOrCreate(sentId.ids, batchSize, useGpu); - for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i); - } - inArgs.emplace_back(sentId); - - // a dummy layer to decide batch size - Argument dummyInput; - dummyInput.value = Matrix::create(batchSize, 2, false, useGpu); - dummyInput.value->randomizeUniform(); - if (hasSubseq) { - // generate one sequence with batchSize subsequence, - // and each subsequence has only one word. - dummyInput.sequenceStartPositions = ICpuGpuVector::create(2, false); - int* buf = dummyInput.sequenceStartPositions->getMutableData(false); - dummyInput.subSequenceStartPositions = - ICpuGpuVector::create(batchSize + 1, false); - int* subBuf = dummyInput.subSequenceStartPositions->getMutableData(false); - buf[0] = 0; - buf[1] = batchSize; - for (size_t i = 0; i < batchSize + 1; i++) subBuf[i] = i; - } - inArgs.emplace_back(dummyInput); -} - -void testGeneration(const string& configFile, - bool useGpu, - bool hasSubseq, - const string& expRetFile) { - FLAGS_use_gpu = useGpu; - auto config = std::make_shared(configFile); - unique_ptr gradientMachine(GradientMachine::create(*config)); - gradientMachine->loadParameters(modelDir); - vector inArgs(2); - - const size_t batchSize = 15; - prepareInArgs(inArgs, batchSize, useGpu, hasSubseq); - vector outArgs; - unique_ptr testEvaluator(gradientMachine->makeEvaluator()); - testEvaluator->start(); - gradientMachine->forward(inArgs, &outArgs, PASS_TEST); - gradientMachine->eval(testEvaluator.get()); - testEvaluator->finish(); - checkOutput(expRetFile); -} - -#ifndef PADDLE_TYPE_DOUBLE - -TEST(RecurrentGradientMachine, test_generation) { -#ifndef PADDLE_WITH_CUDA - const auto useGpuConfs = {false}; -#else - const auto useGpuConfs = {true, false}; -#endif - auto testGen = [&](const string& configFile, - bool hasSubseq, - const string& expRetFile, - bool beam_search) { - FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0"; - for (auto useGpu : useGpuConfs) { - LOG(INFO) << configFile << " useGpu=" << useGpu - << " beam_search=" << beam_search; - testGeneration(configFile, useGpu, hasSubseq, expRetFile); - } - }; - testGen(CONFIG_FILE, false, expectFile + ".nobeam", false); // no beam search - testGen(CONFIG_FILE, false, expectFile + ".beam", true); // beam search - // In hierarchical RNN, beam search and one way search are only in inner-RNN, - // outer-RNN will concat the generated inner-results (first for beam search) - // from inner-RNN. Thus, they have the same outer-results. - testGen(NEST_CONFIG_FILE, - true, - expectFile + ".nest", - false); // no beam search - testGen(NEST_CONFIG_FILE, true, expectFile + ".nest", true); // beam search -} -#endif - -int main(int argc, char** argv) { - initMain(argc, argv); - initPython(argc, argv); - CHECK(argc == 1 || argc == 3); - if (argc == 3) { - modelDir = argv[1]; - expectFile = argv[2]; - } - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/legacy/utils/.gitignore b/paddle/legacy/utils/.gitignore deleted file mode 100644 index f2cfd7409412de68f4183daebcb48e7a3ae37672..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/.gitignore +++ /dev/null @@ -1 +0,0 @@ -enable_virtualenv.c diff --git a/paddle/legacy/utils/Any.h b/paddle/legacy/utils/Any.h deleted file mode 100644 index 99a0139accc4988f1e4cce45eeb688a6603c2c31..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Any.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#if __cplusplus > 201402L -#include - -namespace paddle { -// using std::any for C++ 17 -using std::any; -using std::any_cast; -using std::bad_any_cast; -} // namespace paddle - -#else -#include - -namespace paddle { -// use linb::any for C++ 11 -using linb::any; -using linb::any_cast; -using linb::bad_any_cast; -} // namespace paddle -#endif diff --git a/paddle/legacy/utils/CMakeLists.txt b/paddle/legacy/utils/CMakeLists.txt deleted file mode 100644 index b42b2bae968a10c581c594054f853347eb5d5445..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -# The utilities for paddle -file(GLOB UTIL_HEADERS . *.h) -file(GLOB UTIL_SOURCES . *.cpp) -create_resources(${CMAKE_CURRENT_SOURCE_DIR}/enable_virtualenv.py - ${CMAKE_CURRENT_BINARY_DIR}/enable_virtualenv.c) -set(UTIL_RES ${CMAKE_CURRENT_BINARY_DIR}/enable_virtualenv.c) - -if(APPLE) - file(GLOB UTIL_ARCH_SOURCES . arch/osx/*.cpp) -else() - file(GLOB UTIL_ARCH_SOURCES . arch/linux/*.cpp) -endif() -add_library(paddle_utils STATIC - ${UTIL_SOURCES} - ${UTIL_ARCH_SOURCES} - ${UTIL_RES}) -add_dependencies(paddle_utils paddle_proto ${external_project_dependencies}) -if(WITH_TESTING) - add_subdirectory(tests) -endif() diff --git a/paddle/legacy/utils/ClassRegistrar.h b/paddle/legacy/utils/ClassRegistrar.h deleted file mode 100644 index 5f40a0b25e92c7adcfe3f8c4be96016be801da3b..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/ClassRegistrar.h +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include - -#include "Util.h" - -namespace paddle { - -/** - * This class is used to keep a set of class types. It can register a - * class by a type name and create an instance of a class by type. - * Example: - * // Declare the registrar - * ClassRegistrar registar_; - * - * // Register a class using its constructor - * registrar_.registerClass("conv"); - * - * // Register a class using a creation function - * registrar_.registerClass("pool", [](LayerConfig& config){ - * return PoolLayer::create(config); - * }); - * - * // create a class instance by type name - * Layer* layer = registrar_.createByType("conv", config); - */ -template -class ClassRegistrar { - public: - typedef std::function ClassCreator; - - // Register a class using a creation function. - // The creation function's arguments are CreateArgs - void registerClass(const std::string& type, ClassCreator creator) { - CHECK(creatorMap_.count(type) == 0) << "Duplicated class type: " << type; - creatorMap_[type] = creator; - } - - // Register a class using its constructor - // The constructor's arguments are CreateArgs - template - void registerClass(const std::string& type) { - registerClass(type, - [](CreateArgs... args) { return new ClassType(args...); }); - } - - // Create a class instance of type @type using args - BaseClass* createByType(const std::string& type, CreateArgs... args) { - ClassCreator creator; - CHECK(mapGet(type, creatorMap_, &creator)) << "Unknown class type: " - << type; - return creator(args...); - } - - template - inline void forEachType(T callback) { - for (auto it = creatorMap_.begin(); it != creatorMap_.end(); ++it) { - callback(it->first); - } - } - - protected: - std::map creatorMap_; -}; - -} // namespace paddle diff --git a/paddle/legacy/utils/Common.h b/paddle/legacy/utils/Common.h deleted file mode 100644 index 1f1d0255a5eaef824171ddeaf9480167f232007e..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Common.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Excepts.h" - -/** - * Disable copy macro. - */ -#define DISABLE_COPY(class_name) \ - class_name(class_name &&) = delete; \ - class_name(const class_name &other) = delete; \ - class_name &operator=(const class_name &other) = delete - -namespace paddle { - -#ifdef PADDLE_TYPE_DOUBLE -using real = double; -#else -using real = float; -#endif - -} // namespace paddle diff --git a/paddle/legacy/utils/CpuId.cpp b/paddle/legacy/utils/CpuId.cpp deleted file mode 100644 index 66e7c6606f070aef4fd954b8f4ada994b2f4fb96..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/CpuId.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/CpuId.h" -#include "paddle/legacy/utils/Util.h" - -#ifdef _WIN32 - -#include - -/// for MSVC -#define CPUID(info, x) __cpuidex(info, x, 0) - -#else - -#if !defined(__arm__) && !defined(__aarch64__) -#include -/// for GCC/Clang -#define CPUID(info, x) __cpuid_count(x, 0, info[0], info[1], info[2], info[3]) -#endif - -#endif - -namespace paddle { - -SIMDFlags::SIMDFlags() { -#if defined(__arm__) || defined(__aarch64__) - simd_flags_ = SIMD_NEON; -#else - unsigned int cpuInfo[4]; - // CPUID: https://en.wikipedia.org/wiki/CPUID - // clang-format off - CPUID(cpuInfo, 0x00000001); - simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE; - simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 0) ? SIMD_SSE3 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 9) ? SIMD_SSSE3 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 19) ? SIMD_SSE41 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 20) ? SIMD_SSE42 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3 : SIMD_NONE; - simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX : SIMD_NONE; - - CPUID(cpuInfo, 0x00000007); - simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE; - simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512: SIMD_NONE; - - CPUID(cpuInfo, 0x80000001); - simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE; - // clang-fotmat on -#endif -} - -SIMDFlags const* SIMDFlags::instance() { - static SIMDFlags instance; - return &instance; -} - -} // namespace paddle diff --git a/paddle/legacy/utils/CpuId.h b/paddle/legacy/utils/CpuId.h deleted file mode 100644 index ed58211d13ac1e0f80d6728950f0b88dc0ae625f..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/CpuId.h +++ /dev/null @@ -1,136 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "Common.h" -#include "Error.h" - -namespace paddle { - -// clang-format off -enum simd_t { - SIMD_NONE = 0, ///< None - SIMD_SSE = 1 << 0, ///< SSE - SIMD_SSE2 = 1 << 1, ///< SSE 2 - SIMD_SSE3 = 1 << 2, ///< SSE 3 - SIMD_SSSE3 = 1 << 3, ///< SSSE 3 - SIMD_SSE41 = 1 << 4, ///< SSE 4.1 - SIMD_SSE42 = 1 << 5, ///< SSE 4.2 - SIMD_FMA3 = 1 << 6, ///< FMA 3 - SIMD_FMA4 = 1 << 7, ///< FMA 4 - SIMD_AVX = 1 << 8, ///< AVX - SIMD_AVX2 = 1 << 9, ///< AVX 2 - SIMD_AVX512 = 1 << 10, ///< AVX 512 - SIMD_NEON = 1 << 11, /// NEON -}; -// clang-format on - -class SIMDFlags final { - public: - DISABLE_COPY(SIMDFlags); - - SIMDFlags(); - - static SIMDFlags const* instance(); - - inline bool check(int flags) const { - return !((simd_flags_ & flags) ^ flags); - } - - private: - int simd_flags_ = SIMD_NONE; -}; - -/** - * @brief Check SIMD flags at runtime. - * - * For example. - * @code{.cpp} - * - * if (HAS_SIMD(SIMD_AVX2 | SIMD_FMA4)) { - * avx2_fm4_stub(); - * } else if (HAS_SIMD(SIMD_AVX)) { - * avx_stub(); - * } - * - * @endcode - */ -#define HAS_SIMD(__flags) SIMDFlags::instance()->check(__flags) - -/** - * @brief Check SIMD flags at runtime. - * - * 1. Check all SIMD flags at runtime: - * - * @code{.cpp} - * if (HAS_AVX && HAS_AVX2) { - * avx2_stub(); - * } - * @endcod - * - * 2. Check one SIMD flag at runtime: - * - * @code{.cpp} - * if (HAS_SSE41 || HAS_SSE42) { - * sse4_stub(); - * } - * @endcode - */ -// clang-format off -#define HAS_SSE HAS_SIMD(SIMD_SSE) -#define HAS_SSE2 HAS_SIMD(SIMD_SSE2) -#define HAS_SSE3 HAS_SIMD(SIMD_SSE3) -#define HAS_SSSE3 HAS_SIMD(SIMD_SSSE3) -#define HAS_SSE41 HAS_SIMD(SIMD_SSE41) -#define HAS_SSE42 HAS_SIMD(SIMD_SSE42) -#define HAS_FMA3 HAS_SIMD(SIMD_FMA3) -#define HAS_FMA4 HAS_SIMD(SIMD_FMA4) -#define HAS_AVX HAS_SIMD(SIMD_AVX) -#define HAS_AVX2 HAS_SIMD(SIMD_AVX2) -#define HAS_AVX512 HAS_SIMD(SIMD_AVX512) -#define HAS_NEON HAS_SIMD(SIMD_NEON) -// clang-format on - -/** - * Invoke checkCPUFeature() before Paddle initialization to - * check target machine whether support compiled instructions. - * If not, simply throw out an error. - */ -inline Error __must_check checkCPUFeature() { - Error err; -#ifndef __AVX__ - if (HAS_AVX) { - LOG(WARNING) << "PaddlePaddle wasn't compiled to use avx instructions, " - << "but these are available on your machine and could " - << "speed up CPU computations via CMAKE .. -DWITH_AVX=ON"; - } -#else - if (!HAS_AVX) { - err = Error( - "PaddlePaddle was compiled to use avx instructions, " - "but these aren't available on your machine, please " - "disable it via CMAKE .. -DWITH_AVX=OFF"); - } -#endif // __AVX__ -#ifdef __SSE3__ - if (!HAS_SSE3) { - err = Error( - "PaddlePaddle was compiled to use sse3 instructions, " - "which is the minimum requirement of PaddlePaddle. " - "But these aren't available on your current machine."); - } -#endif // __SSE3__ - - return err; -} - -} // namespace paddle diff --git a/paddle/legacy/utils/CustomStackTrace.cpp b/paddle/legacy/utils/CustomStackTrace.cpp deleted file mode 100644 index 9723d7df9744989d9dd6035e51eae35764656065..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/CustomStackTrace.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CustomStackTrace.h" -#include -#include - -DEFINE_bool( - layer_stack_error_only_current_thread, - true, - "Dump current thread or whole process layer stack when signal error " - "occurred. true means only dump current thread layer stack"); - -namespace paddle { - -CustomStackTrace gLayerStackTrace; - -static std::mutex gLayerStackTraceMtx; -void installLayerStackTracer() { - logging::installFailureWriter([](const char* data, int sz) { - std::lock_guard guard(gLayerStackTraceMtx); - if (!gLayerStackTrace.empty()) { - size_t curTid = -1UL; - std::hash hasher; - gLayerStackTrace.dump( - [&curTid, &hasher](std::thread::id tid, - bool* isForwarding, - const std::string& layerName) { - if (curTid != hasher(tid)) { - if (curTid != -1UL) { - std::cerr << std::endl; - } - curTid = hasher(tid); - std::cerr << "Thread [" << tid << "] "; - if (isForwarding) { - std::cerr << (*isForwarding ? "Forwarding " : "Backwarding "); - } - } - std::cerr << layerName << ", "; - }, - FLAGS_layer_stack_error_only_current_thread); - std::cerr << std::endl; - } - std::cerr.write(data, sz); - }); -} - -} // namespace paddle diff --git a/paddle/legacy/utils/CustomStackTrace.h b/paddle/legacy/utils/CustomStackTrace.h deleted file mode 100644 index b60077ea2d946366910780eeb773635972211e04..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/CustomStackTrace.h +++ /dev/null @@ -1,193 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include - -#include "ThreadLocal.h" - -namespace paddle { - -/** - * A ThreadLocal stack for tracing train/test process. - * (More details of ThreadLocal can be find - * in the comments of ThreadLocal class.) - * - * For example. - * @code{.cpp} - * - * paddle::CustomStackTrace stack; - * for (auto& layer : layers){ - * stack.push(layer->getName()); - * layer->forward(); - * } - * - * stack.pop(""); // mark under pop stage. - * - * for (auto it = layers.rbegin(); it != layers.rend(); ++it){ - * auto& layer = *it; - * layer->backward(passType); - * stack.pop(layer->getName()); - * } - * - * @endcode - */ -template -class CustomStackTrace { - public: - /** - * @brief Pop out an item from the top of the stack if item == top. - * Else, just set status to popping. - */ - void pop(const T& item) { - auto& s = this->stack(); - if (item == s.top()) { - s.pop(); - } - } - - /** - * @brief Indicate whether we are at forward or backward stage of computation - */ - void set_stage(bool isForward) { pushing() = isForward; } - - /** - * @brief clear current thread stack. - */ - void clear() { - auto& s = stack(); - while (!s.empty()) { - s.pop(); - } - } - - /** - * @brief return true if all thread's stack is empty. - * @return true if empty - */ - bool empty() const { - std::lock_guard g(this->mtx_); - for (auto p : this->stackBuffers_) { - std::stack& s = *p.second; - if (!s.empty()) { - return false; - } - } - return true; - } - - /** - * @brief DumpCallback Type. It will be invoked many times by dump method. - * - * The first parameter is stack thread id. - * The second parameter is the last action of stack is push or not. - * The third parameter is the item in stack. - */ - typedef std::function - DumpCallback; - - /** - * Dump all thread stack, and all stack will be cleared. - */ - void dump(const DumpCallback& callback, bool onlyCurrentThread = false) { - std::lock_guard g(this->mtx_); - for (auto p : this->stackBuffers_) { - std::thread::id tid = p.first; - if (onlyCurrentThread && tid != std::this_thread::get_id()) { - continue; - } - std::stack& s = *p.second; - bool* isPush = nullptr; - auto it = this->pushingBuffers_.find(tid); - if (it != this->pushingBuffers_.end()) { - isPush = it->second; - } - - while (!s.empty()) { - callback(tid, isPush, s.top()); - s.pop(); - } - } - } - - /** - * @brief Push item to current thread stack. - */ - void push(const T& item) { - pushing() = true; - auto& p = this->stack(); - p.push(item); - } - - private: - /** - * Get thread local attribute, and save them into a map (threadId => TYPE*) - * - * @tparam TYPE thread local attribute type. - * @param threadLocal Thread Local object. - * @param buffers a map from threadId to TYPE* - */ - template - inline TYPE& getThreadLocal( - ThreadLocal& threadLocal, - std::unordered_map& buffers) { - TYPE* retv = threadLocal.get(false); - if (retv) { - return *retv; - } else { - std::lock_guard guard(this->mtx_); - retv = threadLocal.get(); - auto id = std::this_thread::get_id(); - buffers.insert({id, retv}); - return *retv; - } - } - - /** - * @brief Get thread local stack reference. - */ - std::stack& stack() { - return this->getThreadLocal(this->logStack_, this->stackBuffers_); - } - - /** - * @brief Get thread local pushing flag. - */ - bool& pushing() { - return this->getThreadLocal(this->isPushing_, this->pushingBuffers_); - } - - private: - mutable std::mutex mtx_; - - std::unordered_map*> stackBuffers_; - std::unordered_map pushingBuffers_; - ThreadLocal isPushing_; - ThreadLocal> logStack_; -}; - -extern CustomStackTrace gLayerStackTrace; - -/** - * @brief Install a failure handler to print layer stack when error. - */ -extern void installLayerStackTracer(); - -} // namespace paddle diff --git a/paddle/legacy/utils/DynamicLoader.cpp b/paddle/legacy/utils/DynamicLoader.cpp deleted file mode 100644 index 9ac4a56c6e300d299467630b39a32567af72cf40..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/DynamicLoader.cpp +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "DynamicLoader.h" -#include -#include "Logging.h" - -DEFINE_string(cudnn_dir, - "", - "Specify path for loading libcudnn.so. For instance, " - "/usr/local/cudnn/lib. If empty [default], dlopen " - "will search cudnn from LD_LIBRARY_PATH"); - -DEFINE_string(cuda_dir, - "", - "Specify path for loading cuda library, such as libcublas, " - "libcurand. For instance, /usr/local/cuda/lib64. If default, " - "dlopen will search cuda from LD_LIBRARY_PATH"); - -DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); - -DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so."); - -DEFINE_string(tensorrt_dir, "", "Specify path for loading libnvinfer.so."); - -static inline std::string join(const std::string& part1, - const std::string& part2) { - // directory separator - const char sep = '/'; - if (!part2.empty() && part2.front() == sep) { - return part2; - } - std::string ret; - ret.reserve(part1.size() + part2.size() + 1); - ret = part1; - if (!ret.empty() && ret.back() != sep) { - ret += sep; - } - ret += part2; - return ret; -} - -static inline void GetDsoHandleFromDefaultPath(std::string& dso_path, - void** dso_handle, - int dynload_flags) { - VLOG(3) << "Try to find library: " << dso_path - << " from default system path."; - // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH - *dso_handle = dlopen(dso_path.c_str(), dynload_flags); - -// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to -// bring System Integrity Projection (SIP), if dso_handle -// is null, search from default package path in Mac OS. -#if defined(__APPLE__) || defined(__OSX__) - if (nullptr == *dso_handle) { - dso_path = join("/usr/local/cuda/lib/", dso_path); - *dso_handle = dlopen(dso_path.c_str(), dynload_flags); - if (nullptr == *dso_handle) { - if (dso_path == "libcudnn.dylib") { - LOG(FATAL) - << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n" // NOLINT - << "For instance, sudo tar -xzf " - "cudnn-7.5-osx-x64-v5.0-ga.tgz -C " // NOLINT - << "/usr/local \n sudo chmod a+r " - "/usr/local/cuda/include/cudnn.h " // NOLINT - << "/usr/local/cuda/lib/libcudnn*"; - } - } - } -#endif -} - -static inline void GetDsoHandleFromSearchPath(const std::string& search_root, - const std::string& dso_name, - void** dso_handle) { - int dynload_flags = RTLD_LAZY | RTLD_LOCAL; - *dso_handle = nullptr; - - std::string dlPath = dso_name; - if (search_root.empty()) { - GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); - } else { - // search xxx.so from custom path - dlPath = join(search_root, dso_name); - *dso_handle = dlopen(dlPath.c_str(), dynload_flags); - // if not found, search from default path - if (nullptr == *dso_handle) { - LOG(WARNING) << "Failed to find dynamic library: " << dlPath << " (" - << dlerror() << ")"; - dlPath = dso_name; - GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); - } - } - - CHECK(nullptr != *dso_handle) << "Failed to find dynamic library: " << dlPath - << " (" << dlerror() << ") \n" - << "Please specify its path correctly using " - "following ways: \n" - - << "Method. set environment variable " - "LD_LIBRARY_PATH on Linux or " - << "DYLD_LIBRARY_PATH on Mac OS. \n" - << "For instance, issue command: export " - "LD_LIBRARY_PATH=... \n" - - << "Note: After Mac OS 10.11, using the " - "DYLD_LIBRARY_PATH is impossible " - << "unless System Integrity Protection (SIP) " - "is disabled."; -} - -void GetCublasDsoHandle(void** dso_handle) { -#if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib", dso_handle); -#else - GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.so", dso_handle); -#endif -} - -void GetCudnnDsoHandle(void** dso_handle) { -#if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle); -#else - GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", dso_handle); -#endif -} - -void GetCurandDsoHandle(void** dso_handle) { -#if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib", dso_handle); -#else - GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so", dso_handle); -#endif -} - -void GetWarpCTCDsoHandle(void** dso_handle) { -#if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.dylib", dso_handle); -#else - GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle); -#endif -} - -void GetLapackDsoHandle(void** dso_handle) { -#if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.dylib", dso_handle); -#else - GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.so", dso_handle); -#endif -} - -void GetTensorRtDsoHandle(void** dso_handle) { -#if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath( - FLAGS_tensorrt_dir, "libnvinfer.dylib", dso_handle); -#else - GetDsoHandleFromSearchPath(FLAGS_tensorrt_dir, "libnvinfer.so", dso_handle); -#endif -} diff --git a/paddle/legacy/utils/DynamicLoader.h b/paddle/legacy/utils/DynamicLoader.h deleted file mode 100644 index 02f519de4b3988fb6aca323aaa1751ee2c4bd738..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/DynamicLoader.h +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include - -/** - * @brief load the DSO of CUBLAS - * - * @param **dso_handle dso handler - * - */ -void GetCublasDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of CUDNN - * - * @param **dso_handle dso handler - * - */ -void GetCudnnDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of CURAND - * - * @param **dso_handle dso handler - * - */ -void GetCurandDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of warp-ctc - * - * @param **dso_handle dso handler - * - */ -void GetWarpCTCDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of lapack - * - * @param **dso_handle dso handler - * - */ -void GetLapackDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of tensorrt - * - * @param **dso_handle dso handler - * - */ -void GetTensorRtDsoHandle(void** dso_handle); diff --git a/paddle/legacy/utils/Error.h b/paddle/legacy/utils/Error.h deleted file mode 100644 index 1fc8482e3a1bef869d4df147bbd3cab6e62ccf49..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Error.h +++ /dev/null @@ -1,145 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include - -/** - * __must_check macro. It make the function's return value must be used, - * otherwise it will raise a compile warning. And also Paddle treat all compile - * warnings as errors. - */ -#ifdef __GNUC__ -#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 30400 -#define __must_check __attribute__((warn_unused_result)) -#else -#define __must_check -#endif -#else -#define __must_check -#endif - -namespace paddle { - -/** - * Error is Paddle error code. It only contain a std::string as error message. - * - * - * There are two styles to return error in Paddle. - * - * 1. Return Error - * When method return a status, the return must use `__must_check` attribute. - * Example as below. - * @code{cpp} - * Error __must_check foo(); - * - * Error __must_check bar() { - * // do something. - * Error err = foo(); // invoke other method return status. - * if (err) return err; - * // do something else. - * return Error(); - * } - * @endcode{cpp} - * - * 2. Return by parameter. - * It is another way to return an error, by using a pointer parameter. - * Example as below. - * - * @code{cpp} - * Error bar(); - * - * int foo(Error* error) { - * // Do something. - * Error err = bar(); - * if (err) { - * *error = s; - * return 0; - * } - * // Do something else. - * if (someInternalErrorHappend) { - * *error = Error("Some dimension is too large, %d", dimension); - * return 0; - * } - * // End of method. - * return someValue; - * } - * - * Error foobar() { - * Error err; - * // do something. - * foo(&err); - * if (err) return err; - * } - * @endcode{cpp} - * - * - * Currently there is a helper method 'check' in status, because Paddle always - * use log(FATAL) or CHECK to make program exit before. When we clean all - * log(FATAL) and CHECK in Paddle, 'check' method will be removed. - */ -class Error { - public: - /** - * Construct a no-error value. - */ - Error() {} - - /** - * @brief Create an Error use printf syntax. - */ - explicit Error(const char* fmt, ...) { - va_list ap; - va_start(ap, fmt); - constexpr size_t kBufferSize = 1024; - char buffer[kBufferSize]; - vsnprintf(buffer, kBufferSize, fmt, ap); - this->msg_.reset(new std::string(buffer)); - va_end(ap); - } - - /** - * @brief msg will return the error message. If no error, return nullptr. - */ - const char* msg() const { - if (msg_) { - return msg_->c_str(); - } else { - return nullptr; - } - } - - /** - * @brief check this status by glog. - * @note It is a temp method used during cleaning Paddle code. It will be - * removed later. - */ - void check() const { CHECK(this->isOK()) << msg(); } - - /** - * @brief isOK return True if there is no error. - * @return True if no error. - */ - bool isOK() const { return msg_ == nullptr; } - - private: - std::shared_ptr msg_; -}; - -} // namespace paddle diff --git a/paddle/legacy/utils/Excepts.h b/paddle/legacy/utils/Excepts.h deleted file mode 100644 index 5c2c504f53a586f2991ccfae891991465fdb39b6..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Excepts.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef EXCEPTS_H_ -#define EXCEPTS_H_ - -#include - -#if defined(__APPLE__) || defined(__OSX__) - -int fegetexcept(void); -int feenableexcept(unsigned int excepts); -int fedisableexcept(unsigned int excepts); - -#endif - -#endif // EXCEPTS_H_ diff --git a/paddle/legacy/utils/Flags.cpp b/paddle/legacy/utils/Flags.cpp deleted file mode 100644 index ea47cf23eb6e56082eeb92f3c6dff8d03be0d679..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Flags.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Flags.h" - -#ifndef PADDLE_WITH_CUDA -DEFINE_bool(use_gpu, false, "Only support CPU training"); -#else -DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); -#endif - -#ifdef PADDLE_WITH_MKLDNN -// TODO(TJ): change to true when MKLDNN layers support multi-inputs -DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); -#else -DEFINE_bool(use_mkldnn, false, "Only support CPU training"); -#endif - -#ifdef PADDLE_WITH_MKLML -// TODO(TJ): change to true when fully confirmed -DEFINE_bool(use_mkl_packed, false, "Whether to use MKL Packed Optimization"); -#else -DEFINE_bool(use_mkl_packed, false, "Not to use MKL Packed Optimization"); -#endif - -DEFINE_bool(parallel_nn, - false, - "Whether to use multi-threads to calculate one neural network." - "If it was set false, use gpu_id specify which gpu core to use" - "(the device property in the trainer config file will be ingored)." - "If it was set true, the gpu core is specified by the trainer" - " config file(gpu_id will be ignored)."); -DEFINE_int32(trainer_count, 1, "Defined how many trainers to train"); -DEFINE_int32(gpu_id, 0, "Which gpu core to use"); -DEFINE_int32(port, 20134, "Listening port for pserver"); -DEFINE_int32(ports_num, - 1, - "Number of ports for sending dense parameter," - " following ports on parameter server will be visited" - " for sending dense parameter: [port, port+ports_num-1]"); -DEFINE_int32(ports_num_for_sparse, - 0, - "Number of ports for sending sparse parameter," - " following ports on parameter server will be visited" - " for sending sparse parameter:" - " [port+ports_num, port+ports_num+ports_num_for_sparse-1]"); -DEFINE_string(nics, "xgbe0,xgbe1", "network device name for pservers"); -DEFINE_string(rdma_tcp, "tcp", "use rdma or tcp rdma transport protocol"); -DEFINE_int32(trainer_id, - 0, - "For distributed training, each trainer must be given an unique id" - " ranging from 0 to num_trainers-1. Trainer 0 is the master" - " trainer"); -DEFINE_int32(num_gradient_servers, 1, "number of gradient servers"); -DEFINE_string(comment, "", "A string for commenting this training task"); -DEFINE_string(load_missing_parameter_strategy, - "fail", - "which operation to take on load model fails. support " - "fail/rand/zero only."); -DEFINE_int32(log_period, 100, "Log progress every so many batches"); -DEFINE_int32(log_period_server, - 500, - "Log progress every so many batches at pserver end"); -DEFINE_double(checkgrad_eps, 1e-5, "parameter change size for checkgrad"); -DEFINE_int32(enable_parallel_vector, 0, "threshold for enable parallel vector"); -DEFINE_bool(loadsave_parameters_in_pserver, - false, - "load and save parameters in pserver. " - "only work while parameter set sparse_remote_update."); -DEFINE_int32(beam_size, - 1, - "Beam size used in generating most probable output sequences."); - -DEFINE_bool(show_layer_stat, false, "show the statistics of each layer"); -DEFINE_string(predict_file, "", "File name for saving predict result"); -DEFINE_bool(prev_batch_state, false, "batch is continue with next batch"); -DEFINE_string(init_model_path, - "", - "Path of the initial model parameters." - "If it was set, start_pass will be ignored."); diff --git a/paddle/legacy/utils/Flags.h b/paddle/legacy/utils/Flags.h deleted file mode 100644 index b64295bca09a199f24605a158d1d9db7e7d91660..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Flags.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -DECLARE_bool(parallel_nn); -DECLARE_int32(async_count); -DECLARE_int32(port); -DECLARE_bool(use_gpu); -DECLARE_int32(gpu_id); -DECLARE_int32(trainer_count); -DECLARE_int32(ports_num); -DECLARE_int32(ports_num_for_sparse); -DECLARE_string(nics); -DECLARE_string(rdma_tcp); -DECLARE_int32(trainer_id); -DECLARE_int32(num_gradient_servers); -DECLARE_string(comment); -DECLARE_string(load_missing_parameter_strategy); -DECLARE_int32(log_period); -DECLARE_int32(log_period_server); -DECLARE_double(checkgrad_eps); -DECLARE_int32(enable_parallel_vector); -DECLARE_bool(loadsave_parameters_in_pserver); -DECLARE_int32(beam_size); -DECLARE_bool(show_layer_stat); -DECLARE_string(predict_file); -DECLARE_bool(prev_batch_state); -DECLARE_string(init_model_path); -DECLARE_bool(use_mkldnn); -DECLARE_bool(use_mkl_packed); diff --git a/paddle/legacy/utils/GlobalConstants.cpp b/paddle/legacy/utils/GlobalConstants.cpp deleted file mode 100644 index 9e8dade0b228eb642a965eaa5bfe0653fe2749de..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/GlobalConstants.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "GlobalConstants.h" - -namespace paddle { - -const std::string TrainAlgorithm::SGD = "sgd"; -const std::string TrainAlgorithm::AsyncSGD = "async_sgd"; -const std::string TrainAlgorithm::OWLQN = "owlqn"; - -} // namespace paddle diff --git a/paddle/legacy/utils/GlobalConstants.h b/paddle/legacy/utils/GlobalConstants.h deleted file mode 100644 index 3f45e82268435e4c22d1879e909b0c90838d6693..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/GlobalConstants.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -namespace paddle { - -namespace enumeration_wrapper { -enum PassType { - PASS_TRAIN, // Train pass - PASS_TEST, // Test pass - PASS_GC, // Gradient Check pass - PASS_METRIC, // pass for generate template output with no drop rate. -}; - -enum ParameterType { - PARAMETER_VALUE = 0, - PARAMETER_GRADIENT, - PARAMETER_MOMENTUM, - - // Used by ParameterAverager - PARAMETER_SUM1, - PARAMETER_SUM2, - PARAMETER_SUM3, - - // also used by AdagradParameterUpdater/AdadeltaParameterUpdater - PARAMETER_LEARNING_RATE, - - // Used by Sparse SGD update - PARAMETER_UPDATE_TIME, - - // Used by async_sgd - // Change of the parameter since last remote update - PARAMETER_DELTA, - - // Used by BatchRemoteParameterUpdater - PARAMETER_GRADIENT_SUM, - - // Used by AdagradParameterUpdater/AdadeltaParameterUpdater - PARAMETER_GRADIENT_SQURESUM, - PARAMETER_GRADIENT_SQURESUM1, - - // Used by SparseConnected layer - PARAMETER_ROWS, - PARAMETER_COLS, - - // Used by Adam Optimizer. - PARAMETER_SECOND_MOMENTUM, - - // Used By AdaMax Optimizer. - PARAMETER_WEIGHTED_INFINITY_NORM, - - // Used by remote parameter average - PARAMETER_APPLY, - - // Used by sparse momentum - PARAMETER_MOMENTUM_UT, - PARAMETER_MOMENTUM_VT, - - NUM_PARAMETER_TYPES, -}; - -} // namespace enumeration_wrapper - -//! explicit import enum into paddle namespace. -using namespace enumeration_wrapper; // NOLINT - -class TrainAlgorithm { - public: - static const std::string SGD; - static const std::string AsyncSGD; - static const std::string OWLQN; - - static inline bool isValid(const std::string& algo) { - return algo == SGD || algo == AsyncSGD || algo == OWLQN; - } -}; - -#ifdef __AVX__ -const int ALIGN_HINT = 32; -#else -const int ALIGN_HINT = 16; -#endif - -} // namespace paddle diff --git a/paddle/legacy/utils/Locks.h b/paddle/legacy/utils/Locks.h deleted file mode 100644 index 65f983685f5e178345a6a875a79a6573ce1ccca1..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Locks.h +++ /dev/null @@ -1,242 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include - -#include "Common.h" - -namespace paddle { - -/** - * A simple read-write lock. - * The RWlock allows a number of readers or at most one writer - * at any point in time. - * The RWlock disable copy. - * - * Lock: - * - * Use lock() to lock on write mode, no other thread can get it - * until unlock. - * - * Use lock_shared() to lock on read mode, other thread can get - * it by using the same method lock_shared(). - * - * Unlock: - * - * Use unlock() to unlock the lock. - */ -class RWLock { - public: - RWLock() { pthread_rwlock_init(&rwlock_, NULL); } - ~RWLock() { pthread_rwlock_destroy(&rwlock_); } - RWLock(const RWLock&) = delete; - RWLock& operator=(const RWLock&) = delete; - - /** - * @brief lock on write mode. - * @note the method will block the thread, if failed to get the lock. - */ - // std::mutex interface - void lock() { pthread_rwlock_wrlock(&rwlock_); } - /** - * @brief lock on read mode. - * @note if another thread is writing, it can't get the lock, - * and will block the thread. - */ - void lock_shared() { pthread_rwlock_rdlock(&rwlock_); } - void unlock() { pthread_rwlock_unlock(&rwlock_); } - - protected: - pthread_rwlock_t rwlock_; -}; - -/** - * The ReadLockGuard is a read mode RWLock - * using RAII management mechanism. - */ -class ReadLockGuard { - public: - /** - * @brief Construct Function. Lock on rwlock in read mode. - */ - explicit ReadLockGuard(RWLock& rwlock) : rwlock_(&rwlock) { - rwlock_->lock_shared(); - } - - /** - * @brief Destruct Function. - * @note This method just unlock the read mode rwlock, - * won't destroy the lock. - */ - ~ReadLockGuard() { rwlock_->unlock(); } - - protected: - RWLock* rwlock_; -}; - -/** - * A simple wrapper for spin lock. - * The lock() method of SpinLock is busy-waiting - * which means it will keep trying to lock until lock on successfully. - * The SpinLock disable copy. - */ -class SpinLockPrivate; -class SpinLock { - public: - DISABLE_COPY(SpinLock); - SpinLock(); - ~SpinLock(); - - // std::mutext interface - void lock(); - void unlock(); - - private: - SpinLockPrivate* m; -}; - -/** - * A simple wapper of semaphore which can only be shared in the same process. - */ -class SemaphorePrivate; -class Semaphore { - public: - //! Disable copy & assign - Semaphore(const Semaphore& other) = delete; - Semaphore& operator=(const Semaphore&& other) = delete; - - //! Enable move. - Semaphore(Semaphore&& other) : m(std::move(other.m)) {} - - public: - /** - * @brief Construct Function. - * @param[in] initValue the initial value of the - * semaphore, default 0. - */ - explicit Semaphore(int initValue = 0); - - ~Semaphore(); - - /** - * @brief The same as wait(), except if the decrement can not - * be performed until ts return false install of blocking. - * @param[in] ts an absolute timeout in seconds and nanoseconds - * since the Epoch 1970-01-01 00:00:00 +0000(UTC). - * @return ture if the decrement proceeds before ts, - * else return false. - */ - bool timeWait(struct timespec* ts); - - /** - * @brief decrement the semaphore. If the semaphore's value is 0, then call - * blocks. - */ - void wait(); - - /** - * @brief increment the semaphore. If the semaphore's value - * greater than 0, wake up a thread blocked in wait(). - */ - void post(); - - private: - SemaphorePrivate* m; -}; - -/** - * A simple wrapper of thread barrier. - * The ThreadBarrier disable copy. - */ -class ThreadBarrierPrivate; -class ThreadBarrier { - public: - DISABLE_COPY(ThreadBarrier); - - /** - * @brief Construct Function. Initialize the barrier should - * wait for count threads in wait(). - */ - explicit ThreadBarrier(int count); - ~ThreadBarrier(); - - /** - * @brief . - * If there were count - 1 threads waiting before, - * then wake up all the count - 1 threads and continue run together. - * Else block the thread until waked by other thread . - */ - void wait(); - - private: - ThreadBarrierPrivate* m; -}; - -/** - * A wrapper for condition variable with mutex. - */ -class LockedCondition : public std::condition_variable { - public: - /** - * @brief execute op and notify one thread which was blocked. - * @param[in] op a thread can do something in op before notify. - */ - template - void notify_one(Op op) { - std::lock_guard guard(mutex_); - op(); - std::condition_variable::notify_one(); - } - - /** - * @brief execute op and notify all the threads which were blocked. - * @param[in] op a thread can do something in op before notify. - */ - template - void notify_all(Op op) { - std::lock_guard guard(mutex_); - op(); - std::condition_variable::notify_all(); - } - - /** - * @brief wait until pred return ture. - * @tparam Predicate c++ concepts, describes a function object - * that takes a single iterator argument - * that is dereferenced and used to - * return a value testable as a bool. - * @note pred shall not apply any non-constant function - * through the dereferenced iterator. - */ - template - void wait(Predicate pred) { - std::unique_lock lock(mutex_); - std::condition_variable::wait(lock, pred); - } - - /** - * @brief get mutex. - */ - std::mutex* mutex() { return &mutex_; } - - protected: - std::mutex mutex_; -}; - -} // namespace paddle diff --git a/paddle/legacy/utils/Logging.cpp b/paddle/legacy/utils/Logging.cpp deleted file mode 100644 index ea96bad240ad81c4c29b7dab35b015549052e2bb..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Logging.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - * Basically from tensorflow/core/platform/default/logging.cc - * Used in embedded system where there is no glogs. - */ - -#include "Logging.h" -#include - -namespace paddle { - -void initializeLogging(int argc, char** argv) { - (void)(argc); - if (!getenv("GLOG_logtostderr")) { - google::LogToStderr(); - } - google::InstallFailureSignalHandler(); - google::InitGoogleLogging(argv[0]); -} - -namespace logging { - -void setMinLogLevel(int level) { FLAGS_minloglevel = level; } - -void installFailureFunction(void (*callback)()) { - google::InstallFailureFunction(callback); -} - -void installFailureWriter(void (*callback)(const char*, int)) { - google::InstallFailureWriter(callback); -} - -} // namespace logging -} // namespace paddle diff --git a/paddle/legacy/utils/Logging.h b/paddle/legacy/utils/Logging.h deleted file mode 100644 index d9e551f0891fa0808b8699aea94a0d2ab4f81cb3..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Logging.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - * Basically from tensorflow/core/platform/default/logging.h - * Used in embedded system where there is no glogs. - */ - -#pragma once -#include -#include -#include - -#include -namespace paddle { - -void initializeLogging(int argc, char** argv); - -namespace logging { - -void setMinLogLevel(int level); - -void installFailureFunction(void (*callback)()); - -void installFailureWriter(void (*callback)(const char*, int)); - -} // namespace logging -} // namespace paddle - -#ifndef NDEBUG -#define DEBUG_LEVEL 5 -#define DBG VLOG(DEBUG_LEVEL) -#else -#define DBG DLOG(INFO) -#endif diff --git a/paddle/legacy/utils/PythonUtil.cpp b/paddle/legacy/utils/PythonUtil.cpp deleted file mode 100644 index 21ed049c4d2743d1fa914d6948d6c8c2862f0bfc..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/PythonUtil.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PythonUtil.h" -#include -#include - -namespace paddle { - -#ifdef PADDLE_NO_PYTHON - -DEFINE_string(python_path, "", "python path"); -DEFINE_string(python_bin, "python2.7", "python bin"); - -constexpr int kExecuteCMDBufLength = 204800; - -int executeCMD(const char* cmd, char* result) { - char bufPs[kExecuteCMDBufLength]; - char ps[kExecuteCMDBufLength] = {0}; - FILE* ptr; - strncpy(ps, cmd, kExecuteCMDBufLength); - if ((ptr = popen(ps, "r")) != NULL) { - size_t count = fread(bufPs, 1, kExecuteCMDBufLength, ptr); - memcpy(result, - bufPs, - count - 1); // why count-1: remove the '\n' at the end - result[count] = 0; - pclose(ptr); - ptr = NULL; - return count - 1; - } else { - LOG(FATAL) << "popen failed"; - return -1; - } -} - -std::string callPythonFunc(const std::string& moduleName, - const std::string& funcName, - const std::vector& args) { - std::string pythonLibPath = ""; - std::string pythonBinPath = ""; - if (!FLAGS_python_path.empty()) { - pythonLibPath = FLAGS_python_path + "/lib:"; - pythonBinPath = FLAGS_python_path + "/bin/"; - } - std::string s = "LD_LIBRARY_PATH=" + pythonLibPath + "$LD_LIBRARY_PATH " + - pythonBinPath + std::string(FLAGS_python_bin) + - " -c 'import " + moduleName + "\n" + "print " + moduleName + - "." + funcName + "("; - for (auto& arg : args) { - s = s + "\"" + arg + "\", "; - } - s += ")'"; - char result[kExecuteCMDBufLength] = {0}; - LOG(INFO) << " cmd string: " << s; - int length = executeCMD(s.c_str(), result); - CHECK_NE(-1, length); - return std::string(result, length); -} - -#else - -static std::recursive_mutex g_pyMutex; - -PyGuard::PyGuard() : guard_(g_pyMutex) {} - -static void printPyErrorStack(std::ostream& os, - bool withEndl = false, - bool withPyPath = true) { - PyObject *ptype, *pvalue, *ptraceback; - PyErr_Fetch(&ptype, &pvalue, &ptraceback); - PyErr_NormalizeException(&ptype, &pvalue, &ptraceback); - PyErr_Clear(); - if (withPyPath) { - os << "Current PYTHONPATH: " << py::repr(PySys_GetObject(strdup("path"))); - if (withEndl) { - os << std::endl; - } - } - PyTracebackObject* obj = (PyTracebackObject*)ptraceback; - - os << "Python Error: " << PyString_AsString(PyObject_Str(ptype)) << " : " - << (pvalue == NULL ? "" : PyString_AsString(PyObject_Str(pvalue))); - if (withEndl) { - os << std::endl; - } - os << "Python Callstack: "; - if (withEndl) { - os << std::endl; - } - while (obj != NULL) { - int line = obj->tb_lineno; - const char* filename = - PyString_AsString(obj->tb_frame->f_code->co_filename); - os << " " << filename << " : " << line; - if (withEndl) { - os << std::endl; - } - obj = obj->tb_next; - } - - Py_XDECREF(ptype); - Py_XDECREF(pvalue); - Py_XDECREF(ptraceback); -} -PyObjectPtr callPythonFuncRetPyObj(const std::string& moduleName, - const std::string& funcName, - const std::vector& args) { - PyGuard guard; - PyObjectPtr pyModule = py::import(moduleName); - PyObjectPtr pyFunc(PyObject_GetAttrString(pyModule.get(), funcName.c_str())); - CHECK_PY(pyFunc) << "GetAttrString failed."; - PyObjectPtr pyArgs(PyTuple_New(args.size())); - for (size_t i = 0; i < args.size(); ++i) { - PyObjectPtr pyArg(PyString_FromString(args[i].c_str())); - CHECK_PY(pyArg) << "Import pyArg failed."; - PyTuple_SetItem(pyArgs.get(), i, pyArg.release()); // Maybe a problem - } - PyObjectPtr ret(PyObject_CallObject(pyFunc.get(), pyArgs.get())); - CHECK_PY(ret) << "Call Object failed."; - return ret; -} - -std::string callPythonFunc(const std::string& moduleName, - const std::string& funcName, - const std::vector& args) { - PyObjectPtr obj = callPythonFuncRetPyObj(moduleName, funcName, args); -#if PY_MAJOR_VERSION >= 3 - Py_ssize_t str_size = 0u; - const char* str = PyUnicode_AsUTF8AndSize(obj.get(), &str_size); - return std::string(str, (size_t)str_size); -#else - return std::string(PyString_AsString(obj.get()), PyString_Size(obj.get())); -#endif // PY_MAJOR_VERSION >= 3 -} - -PyObjectPtr createPythonClass( - const std::string& moduleName, - const std::string& className, - const std::vector& args, - const std::map& kwargs) { - PyGuard guard; - PyObjectPtr pyModule = py::import(moduleName); - LOG(INFO) << "createPythonClass moduleName.c_str:" << moduleName.c_str(); - CHECK_PY(pyModule) << "Import module " << moduleName << " failed."; - PyObjectPtr pyDict(PyModule_GetDict(pyModule.get())); - CHECK_PY(pyDict) << "Get Dict failed."; - PyObjectPtr pyClass(PyDict_GetItemString(pyDict.get(), className.c_str())); - LOG(INFO) << "createPythonClass className.c_str():" << className.c_str(); - CHECK_PY(pyClass) << "Import class " << className << " failed."; - PyObjectPtr argsObjectList(PyTuple_New(args.size())); - for (size_t i = 0; i < args.size(); ++i) { - PyObjectPtr pyArg(Py_BuildValue("s#", args[i].c_str(), args[i].length())); - PyTuple_SetItem(argsObjectList.get(), i, pyArg.release()); - } - - PyObjectPtr kwargsObjectList(PyDict_New()); - for (auto& x : kwargs) { - PyObjectPtr pyArg(Py_BuildValue("s#", x.second.c_str(), x.second.length())); - PyDict_SetItemString( - kwargsObjectList.get(), x.first.c_str(), pyArg.release()); - } - - PyObjectPtr pyInstance(PyInstance_New( - pyClass.get(), argsObjectList.release(), kwargsObjectList.release())); - CHECK_PY(pyInstance) << "Create class " << className << " failed."; - return pyInstance; -} - -namespace py { -char* repr(PyObject* obj) { return PyString_AsString(PyObject_Repr(obj)); } - -std::string getPyCallStack() { - std::ostringstream os; - printPyErrorStack(os, true); - return os.str(); -} - -PyObjectPtr import(const std::string& moduleName) { - auto module = PyImport_ImportModule(moduleName.c_str()); - CHECK_PY(module) << "Import " << moduleName << "Error"; - return PyObjectPtr(module); -} - -} // namespace py - -#endif -extern "C" { -extern const char enable_virtualenv_py[]; -} -void initPython(int argc, char** argv) { -#ifndef PADDLE_NO_PYTHON - Py_SetProgramName(argv[0]); - Py_Initialize(); - PySys_SetArgv(argc, argv); - // python blocks SIGINT. Need to enable it. - signal(SIGINT, SIG_DFL); - - // Manually activate virtualenv when user is using virtualenv - PyRun_SimpleString(enable_virtualenv_py); -#endif -} - -} // namespace paddle diff --git a/paddle/legacy/utils/PythonUtil.h b/paddle/legacy/utils/PythonUtil.h deleted file mode 100644 index d5b2dbddde21f5c2a0696aadeda2b057175fc5e9..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/PythonUtil.h +++ /dev/null @@ -1,381 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -// clang-format off -#include "paddle/legacy/utils/Util.h" - -#ifndef PADDLE_NO_PYTHON -// must include the following two blocks, otherwise, -// gcc compiler may produce warning -#ifdef __APPLE__ -#define _POSIX_SOURCE -#define _POSIX_C_SOURCE 200809L -#define _XOPEN_SOURCE 700 -#endif - -#ifdef _POSIX_C_SOURCE -#define __TEMP_POSIX_C_SOURCE _POSIX_C_SOURCE -#undef _POSIX_C_SOURCE -#endif -#ifdef _XOPEN_SOURCE -#define __TEMP_XOPEN_SOURCE _XOPEN_SOURCE -#undef _XOPEN_SOURCE -#endif -#include -#include -#endif - -#include -#include -#include -// clang-format on - -namespace paddle { - -std::string callPythonFunc(const std::string& moduleName, - const std::string& funcName, - const std::vector& args); - -#ifndef PADDLE_NO_PYTHON - -/** - * Global lock guard of python C-api invokes. - * NOTE: the lock of this guard is reentrant or recursive. - */ -class PyGuard { - public: - PyGuard(); - PyGuard(const PyGuard& other) = delete; - PyGuard& operator=(const PyGuard& other) = delete; - - private: - std::lock_guard guard_; -}; - -struct PyObjectDeleter { - void operator()(PyObject* obj) { - if (obj) { - Py_DECREF(obj); - } - } -}; - -typedef std::unique_ptr PyObjectPtr; - -PyObjectPtr callPythonFuncRetPyObj(const std::string& moduleName, - const std::string& funcName, - const std::vector& args); - -PyObjectPtr createPythonClass(const std::string& moduleName, - const std::string& className, - const std::vector& args, - const std::map& kwargs); - -#define CHECK_PY(x) CHECK((x) != nullptr) << ::paddle::py::getPyCallStack() - -namespace py { -PyObjectPtr import(const std::string& moduleName); - -#if PY_MAJOR_VERSION >= 3 -/** - * Cast a PyLong to int type T. - * @tparam T return type. - * @param [in] obj PyLong object. - * @param [out] ok status for casting. False if error occured. nullptr if user - * don't care is ok or not. - * @return The value of python object, or 0 if not ok. - */ -template -T castInt(PyObject* obj, bool* ok = nullptr) { - // Refer to https://www.python.org/dev/peps/pep-0237/, the int and long object - // were unified to long since python3 - if (PyLong_Check(obj)) { - if (ok) *ok = true; - return (T)PyLong_AsUnsignedLong(obj); - } else { - if (ok) *ok = false; - return (T)0; - } -} - -// Convert PyAPI from 2.x to 3.x -#define PyString_FromString PyUnicode_FromString -#define PyString_AsString PyUnicode_AsUTF8 - -#else -/** - * Cast a PyLong or PyInt to int type T. - * @tparam T return type. - * @param [in] obj PyLong or PyInt object. - * @param [out] ok status for casting. False if error occured. nullptr if user - * don't care is ok or not. - * @return The value of python object, or 0 if not ok. - */ -template -T castInt(PyObject* obj, bool* ok = nullptr) { - if (PyLong_Check(obj)) { - if (ok) *ok = true; - return (T)PyLong_AsUnsignedLong(obj); - } else if (PyInt_Check(obj)) { - if (ok) *ok = true; - return (T)PyInt_AsLong(obj); - } else { - if (ok) *ok = false; - return (T)0; - } -} -#endif // PY_MAJOR_VERSION >= 3 - -/** - * Invoke repr of python object. - * - * Just like toString method in java. - */ -char* repr(PyObject* obj); - -/** - * Invoke repr of python object. - */ -inline char* repr(const PyObjectPtr& obj) { return repr(obj.get()); } - -/** - * Get Python Error Stack String. - */ -std::string getPyCallStack(); - -/** - * Object Helper for PyObjectPtr. - * - * Implements getAttr method for object. - */ -class ObjectHelper { - public: - explicit ObjectHelper(const PyObjectPtr& obj) : obj_(obj) {} - - /** - * get attribute - */ - inline PyObject* getAttr(const std::string& field) const { - auto obj = PyObject_GetAttrString(obj_.get(), field.c_str()); - CHECK_PY(obj) << "Cannot get attribute on python object " << obj_.get(); - return obj; - } - - /** - * Get Int attribute - * @param [in] field attribute name. - * @param [out] ok true if this attribute is int. - * @tparam T int type. - * @return int value. - */ - template - T getIntAttr(const std::string& field, bool* ok = nullptr) const { - PyObjectPtr tmp(getAttr(field)); - return castInt(tmp.get(), ok); - } - - /** - * Get int attribute. Log(Fatal) when not ok - * @param field attribute name. - * @return int value. - */ - template - T getIntAttrWithError(const std::string& field) const { - bool ok; - T tmp = getIntAttr(field, &ok); - CHECK(ok) << "Cannot get integer attribute on object " << obj_.get(); - return tmp; - } - - /** - * Get bool attribute. - * @param field - * @param [out] isBoolType return true if attribute is bool type. If the - * attribute is not bool type, then an implicit - * conversion will happens, and will return the - * conversion result. - * - * Such as, if the attribute is 1, then the return - * value of function will be true, but the isBoolType - * will return false. - * @return - */ - bool getBoolAttr(const std::string& field, bool* isBoolType = nullptr) const { - PyObjectPtr tmp(getAttr(field)); - if (isBoolType) { - *isBoolType = PyBool_Check(tmp.get()); - } - return PyObject_IsTrue(tmp.get()); - } - - private: - const PyObjectPtr& obj_; -}; - -/** - * Python Sequence Helper - * - * The python sequence means list or tuple. - */ -class SequenceHelper { - public: - explicit SequenceHelper(const PyObjectPtr& seq) : seq_(seq.get()) { - CHECK(PySequence_Check(seq_)); - } - - explicit SequenceHelper(PyObject* seq) : seq_(seq) { - CHECK(PySequence_Check(seq_)); - } - - inline size_t size() const { return (size_t)PySequence_Size(seq_); } - - inline PyObject* operator[](size_t i) const { - return PySequence_Fast_GET_ITEM(seq_, i); - } - - inline double getDouble(size_t i) const { - auto* ptr = (*this)[i]; - return PyFloat_AsDouble(ptr); - } - - /** - * Set a sequence item o[i] = obj; - * @param i index - * @param obj setted item. - * @param steal if steal = true, sequence will move object in iteself, - * just like std::move. Otherwise, it will increase reference - * count. Default is false. - */ - inline void set(size_t i, const PyObjectPtr& obj, bool steal = false) { - this->set(i, obj.get(), steal); - } - - /** - * Set a sequence item o[i] = obj; - */ - inline void set(size_t i, PyObject* obj, bool steal = false) { - if (!steal) { - Py_XINCREF(obj); - } - if (PyTuple_Check(seq_)) { - CHECK_NE(PyTuple_SetItem(seq_, i, obj), -1) << getPyCallStack(); - } else { - CHECK_NE(PySequence_SetItem(seq_, i, obj), -1) << getPyCallStack(); - } - } - - private: - PyObject* seq_; -}; - -class DictHelper { - public: - explicit DictHelper(PyObject* d) : dict_(d) {} - - explicit DictHelper(const PyObjectPtr& d) : dict_(d.get()) {} - - void set(const std::string& key, PyObject* item) { - PyDict_SetItemString(dict_, key.c_str(), item); - } - - void setBool(const std::string& key, bool b) { - this->set(key, PyBool_FromLong(b)); - } - - void setStringList(const std::string& key, - const std::vector& items) { - auto* list = PyList_New(items.size()); - for (size_t i = 0; i < items.size(); ++i) { - PyList_SetItem(list, i, PyString_FromString(items[i].c_str())); - } - this->set(key, list); - } - - private: - inline void checkDict() { CHECK(PyDict_Check(this->dict_)); } - - PyObject* dict_; -}; - -inline static bool isCallable(const PyObjectPtr& obj) { - return PyCallable_Check(obj.get()); -} - -/** - * Wrap a callable object. - */ -class CallableHelper { - public: - explicit CallableHelper(const PyObjectPtr& obj) : obj_(obj) { - CHECK(py::isCallable(obj_)); - } - - ~CallableHelper() {} - - /** - * reset args, and create new tuple. - * @param sz args size. - */ - void setArgsSize(size_t sz) { args.reset(PyTuple_New(sz)); } - - /** - * Get args sequence. User can set/get by SequenceHelper. - */ - SequenceHelper getArgs() { return SequenceHelper(args); } - - /** - * Call python method, return an object. - */ - PyObject* operator()() { - PyGuard guard; - return PyObject_Call(obj_.get(), args.get(), kwargs.get()); - } - - private: - const PyObjectPtr& obj_; - PyObjectPtr args; - PyObjectPtr kwargs; -}; - -inline static PyObject* iterNext(const PyObjectPtr& context, bool* atEnd) { - PyGuard g; - PyObject* data = PyIter_Next(context.get()); - if (data == nullptr) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) { - PyErr_Clear(); - *atEnd = true; - return nullptr; - } else if (PyErr_Occurred()) { - CHECK_PY(data) << "Calling iterator next error"; - return nullptr; - } else { - *atEnd = false; - return data; // just return none in iterator. - } - } else { - *atEnd = false; - return data; - } -} -} // namespace py - -#endif - -/** - * Initialize python. - */ -void initPython(int argc, char** argv); - -} // namespace paddle diff --git a/paddle/legacy/utils/Queue.h b/paddle/legacy/utils/Queue.h deleted file mode 100644 index 189e1a14f7b2d133408a50418d96431164248f0e..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Queue.h +++ /dev/null @@ -1,255 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -#include "Locks.h" - -namespace paddle { - -/** - * A thread-safe queue that automatically grows but never shrinks. - * Dequeue a empty queue will block current thread. Enqueue an element - * will wake up another thread that blocked by dequeue method. - * - * For example. - * @code{.cpp} - * - * paddle::Queue q; - * END_OF_JOB=-1 - * void thread1() { - * while (true) { - * auto job = q.dequeue(); - * if (job == END_OF_JOB) { - * break; - * } - * processJob(job); - * } - * } - * - * void thread2() { - * while (true) { - * auto job = getJob(); - * q.enqueue(job); - * if (job == END_OF_JOB) { - * break; - * } - * } - * } - * - * @endcode - */ -template -class Queue { - public: - /** - * @brief Construct Function. Default capacity of Queue is zero. - */ - Queue() : numElements_(0) {} - - ~Queue() {} - - /** - * @brief enqueue an element into Queue. - * @param[in] el The enqueue element. - * @note This method is thread-safe, and will wake up another blocked thread. - */ - void enqueue(const T& el) { - std::unique_lock lock(queueLock_); - elements_.emplace_back(el); - numElements_++; - - queueCV_.notify_all(); - } - - /** - * @brief enqueue an element into Queue. - * @param[in] el The enqueue element. rvalue reference . - * @note This method is thread-safe, and will wake up another blocked thread. - */ - void enqueue(T&& el) { - std::unique_lock lock(queueLock_); - elements_.emplace_back(std::move(el)); - numElements_++; - - queueCV_.notify_all(); - } - - /** - * Dequeue from a queue and return a element. - * @note this method will be blocked until not empty. - */ - T dequeue() { - std::unique_lock lock(queueLock_); - queueCV_.wait(lock, [this]() { return numElements_ != 0; }); - T el; - - using std::swap; - // Becuase of the previous statement, the right swap() can be found - // via argument-dependent lookup (ADL). - swap(elements_.front(), el); - - elements_.pop_front(); - numElements_--; - if (numElements_ == 0) { - queueCV_.notify_all(); - } - return el; - } - - /** - * Return size of queue. - * - * @note This method is not thread safe. Obviously this number - * can change by the time you actually look at it. - */ - inline int size() const { return numElements_; } - - /** - * @brief is empty or not. - * @return true if empty. - * @note This method is not thread safe. - */ - inline bool empty() const { return numElements_ == 0; } - - /** - * @brief wait util queue is empty - */ - void waitEmpty() { - std::unique_lock lock(queueLock_); - queueCV_.wait(lock, [this]() { return numElements_ == 0; }); - } - - /** - * @brief wait queue is not empty at most for some seconds. - * @param seconds wait time limit. - * @return true if queue is not empty. false if timeout. - */ - bool waitNotEmptyFor(int seconds) { - std::unique_lock lock(queueLock_); - return queueCV_.wait_for(lock, std::chrono::seconds(seconds), [this] { - return numElements_ != 0; - }); - } - - private: - std::deque elements_; - int numElements_; - std::mutex queueLock_; - std::condition_variable queueCV_; -}; - -/* - * A thread-safe circular queue that - * automatically blocking calling thread if capacity reached. - * - * For example. - * @code{.cpp} - * - * paddle::BlockingQueue q(capacity); - * END_OF_JOB=-1 - * void thread1() { - * while (true) { - * auto job = q.dequeue(); - * if (job == END_OF_JOB) { - * break; - * } - * processJob(job); - * } - * } - * - * void thread2() { - * while (true) { - * auto job = getJob(); - * q.enqueue(job); //Block until q.size() < capacity . - * if (job == END_OF_JOB) { - * break; - * } - * } - * } - */ -template -class BlockingQueue { - public: - /** - * @brief Construct Function. - * @param[in] capacity the max numer of elements the queue can have. - */ - explicit BlockingQueue(size_t capacity) : capacity_(capacity) {} - - /** - * @brief enqueue an element into Queue. - * @param[in] x The enqueue element, pass by reference . - * @note This method is thread-safe, and will wake up another thread - * who was blocked because of the queue is empty. - * @note If it's size() >= capacity before enqueue, - * this method will block and wait until size() < capacity. - */ - void enqueue(const T& x) { - std::unique_lock lock(mutex_); - notFull_.wait(lock, [&] { return queue_.size() < capacity_; }); - queue_.push_back(x); - notEmpty_.notify_one(); - } - - /** - * Dequeue from a queue and return a element. - * @note this method will be blocked until not empty. - * @note this method will wake up another thread who was blocked because - * of the queue is full. - */ - T dequeue() { - std::unique_lock lock(mutex_); - notEmpty_.wait(lock, [&] { return !queue_.empty(); }); - - T front(queue_.front()); - queue_.pop_front(); - notFull_.notify_one(); - return front; - } - - /** - * Return size of queue. - * - * @note This method is thread safe. - * The size of the queue won't change until the method return. - */ - size_t size() { - std::lock_guard guard(mutex_); - return queue_.size(); - } - - /** - * @brief is empty or not. - * @return true if empty. - * @note This method is thread safe. - */ - size_t empty() { - std::lock_guard guard(mutex_); - return queue_.empty(); - } - - private: - std::mutex mutex_; - std::condition_variable notEmpty_; - std::condition_variable notFull_; - std::deque queue_; - size_t capacity_; -}; - -} // namespace paddle diff --git a/paddle/legacy/utils/Stat.cpp b/paddle/legacy/utils/Stat.cpp deleted file mode 100644 index ff1b1bf888f3915f14752cb89115f7c9ed98d67f..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Stat.cpp +++ /dev/null @@ -1,165 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Stat.h" -#include -#include -#include "Util.h" - -namespace paddle { - -StatSet globalStat("GlobalStatInfo"); - -void Stat::addSample(uint64_t value) { - StatInfo* statInfo = statInfo_.get(false); - if (!statInfo) { - statInfo = new StatInfo(this); - statInfo_.set(statInfo); - std::lock_guard guard(lock_); - threadLocalBuf_.push_back({statInfo, getTID()}); - } - if (value > statInfo->max_) { - statInfo->max_ = value; - } - if (value < statInfo->min_) { - statInfo->min_ = value; - } - statInfo->total_ += value; - statInfo->count_++; -} - -void Stat::mergeThreadStat(StatInfo& allThreadStat) { - allThreadStat = destructStat_; - for (auto& buf : threadLocalBuf_) { - if (buf.first->max_ > allThreadStat.max_) { - allThreadStat.max_ = buf.first->max_; - } - if (buf.first->min_ < allThreadStat.min_) { - allThreadStat.min_ = buf.first->min_; - } - allThreadStat.total_ += buf.first->total_; - allThreadStat.count_ += buf.first->count_; - } -} - -void Stat::reset() { - std::lock_guard guard(lock_); - for (auto& buf : threadLocalBuf_) { - buf.first->reset(); - } -} - -std::ostream& operator<<(std::ostream& outPut, const Stat& stat) { - std::lock_guard guard(const_cast(stat).lock_); - auto showStat = [&](const StatInfo* info, pid_t tid, bool isFirst = true) { - uint64_t average = 0; - if (info->count_ > 0) { - outPut << std::setfill(' ') << std::left; - if (!isFirst) { - outPut << std::setw(42) << " "; - } - average = info->total_ / info->count_; - outPut << "Stat=" << std::setw(30) << stat.getName(); - if (tid) { - outPut << " TID=" << std::setw(6) << tid; - } - outPut << " total=" << std::setw(10) << info->total_ * 0.001 - << " avg=" << std::setw(10) << average * 0.001 - << " max=" << std::setw(10) << info->max_ * 0.001 - << " min=" << std::setw(10) << info->min_ * 0.001 - << " count=" << std::setw(10) << info->count_ << std::endl; - } - }; - if (!stat.getThreadInfo()) { - StatInfo infoVarTmp; - const_cast(stat).mergeThreadStat(infoVarTmp); - showStat(&infoVarTmp, 0); - } else { - bool isFirst = true; - for (auto& buf : stat.threadLocalBuf_) { - showStat(buf.first, buf.second, isFirst); - if (isFirst) isFirst = false; - } - showStat(&stat.destructStat_, 0); - } - - return outPut; -} - -void StatSet::printSegTimerStatus() { - ReadLockGuard guard(lock_); - LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ') - << "======= StatSet: [" << name_ << "] status ======" << std::endl; - for (auto& stat : statSet_) { - LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ') - << *(stat.second); - } -} - -void StatSet::printAllStatus() { -#ifndef PADDLE_DISABLE_TIMER - printSegTimerStatus(); -#endif - LOG(INFO) << std::setiosflags(std::ios::left) - << "--------------------------------------------------" - << std::endl; -} - -void StatSet::reset(bool clearRawData) { - ReadLockGuard guard(lock_); - for (auto& stat : statSet_) { - stat.second->reset(); - } -} - -void StatSet::setThreadInfo(const std::string& name, bool flag) { - ReadLockGuard guard(lock_); - auto iter = statSet_.find(name); - CHECK(iter != statSet_.end()) << name << " is not registed in " << name_; - iter->second->setThreadInfo(flag); -} - -StatInfo::~StatInfo() { - if (stat_) { - std::lock_guard guard(stat_->lock_); - if (stat_->destructStat_.max_ < this->max_) { - stat_->destructStat_.max_ = this->max_; - } - if (stat_->destructStat_.min_ > this->min_) { - stat_->destructStat_.min_ = this->min_; - } - stat_->destructStat_.total_ += this->total_; - stat_->destructStat_.count_ += this->count_; - stat_->threadLocalBuf_.remove({this, getTID()}); - } -} - -static unsigned g_profileCount = 0; -static std::recursive_mutex g_profileMutex; - -GpuProfiler::GpuProfiler(std::string statName, std::string info) - : guard_(g_profileMutex) { - if (++g_profileCount == 1) { - LOG(INFO) << "Enable GPU Profiler Stat: [" << statName << "] " << info; - hl_profiler_start(); - } -} - -GpuProfiler::~GpuProfiler() { - if (--g_profileCount == 0) { - hl_profiler_end(); - } -} - -} // namespace paddle diff --git a/paddle/legacy/utils/Stat.h b/paddle/legacy/utils/Stat.h deleted file mode 100644 index 100e9eba909466fcca57f755405ab63b638a8ebd..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Stat.h +++ /dev/null @@ -1,302 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Locks.h" -#include "Logging.h" -#include "ThreadLocal.h" -#include "hl_gpu.h" - -namespace paddle { - -class Stat; - -class StatInfo { - public: - explicit StatInfo(Stat* stat = nullptr) : stat_(stat) { - total_ = 0; - max_ = 0; - count_ = 0; - min_ = UINT64_MAX; - } - - void reset() { - total_ = 0; - count_ = 0; - max_ = 0; - min_ = UINT64_MAX; - } - - ~StatInfo(); - - Stat* stat_; - uint64_t total_; - uint64_t max_; - uint64_t count_; - uint64_t min_; -}; - -class Stat; -typedef std::shared_ptr StatPtr; - -class StatSet { - public: - explicit StatSet(const std::string& name) : name_(name) {} - ~StatSet() {} - - // print to LOG(INFO) - void printSegTimerStatus(); - void printAllStatus(); - - StatPtr getStat(const std::string& name) { - { - ReadLockGuard guard(lock_); - auto it = statSet_.find(name); - if (it != statSet_.end()) { - return it->second; - } - } - StatPtr stat = std::make_shared(name); - std::lock_guard guard(lock_); - auto ret = statSet_.insert(std::make_pair(name, stat)); - return ret.first->second; - } - - // true for showing stats for each thread - // false for showing stats aggragated over threads - void setThreadInfo(const std::string& name, bool flag); - - // true for showing stats for each thread - // false for showing stats aggragated over threads - void setThreadInfo(bool flag) { - for (auto& iter : statSet_) { - setThreadInfo(iter.first, flag); - } - } - - // reset the counters for all stats - // clearRawData means also clearing raw tuning data, because at pserver end, - // barrier rawData(timeVector_) is stateful, clearing it will cause rubbish - // data, while rawData should be cleared at the new pass (so complicated - // pserver code logic, -_- ). - void reset(bool clearRawData = true); - - private: - std::unordered_map statSet_; - const std::string name_; - RWLock lock_; -}; - -extern StatSet globalStat; - -/*@brief : a simple stat*/ -class Stat { - public: - explicit Stat(const std::string& statName) - : destructStat_(nullptr), name_(statName), openThreadInfo_(false) {} - ~Stat() {} - - typedef std::list> ThreadLocalBuf; - - const std::string& getName() const { return name_; } - - void addSample(uint64_t value); - - // clear all stats - void reset(); - - friend std::ostream& operator<<(std::ostream& outPut, const Stat& stat); - - /* Set operator << whether to print thread info. - * If openThreadInfo_ == true, then print, else print merge thread info. - */ - void setThreadInfo(bool flag) { openThreadInfo_ = flag; } - - bool getThreadInfo() const { return openThreadInfo_; } - - friend class StatInfo; - - private: - void mergeThreadStat(StatInfo& allThreadStat); - - std::mutex lock_; - ThreadLocalBuf threadLocalBuf_; - StatInfo destructStat_; - ThreadLocal statInfo_; - const std::string name_; - bool openThreadInfo_; -}; - -extern StatSet globalStat; - -inline StatPtr getStat(const std::string& name) { - return globalStat.getStat(name); -} - -inline uint64_t nowInMicroSec() { - timeval tvTime; - (void)gettimeofday(&tvTime, NULL); - return tvTime.tv_sec * 1000000LU + tvTime.tv_usec; -} - -/** - * A simple help class to measure time interval - */ -class Timer { - public: - explicit Timer(bool autoStart = true) : total_(0), startStamp_(0) { - if (autoStart) { - start(); - } - } - void start() { startStamp_ = nowInMicroSec(); } - void setStartStamp(uint64_t startStamp) { startStamp_ = startStamp; } - uint64_t stop() { - total_ += nowInMicroSec() - startStamp_; - return total_; - } - - uint64_t get() const { return total_; } - - void reset() { total_ = 0; } - - protected: - uint64_t total_; - uint64_t startStamp_; -}; - -class TimerOnce { - public: - TimerOnce(Stat* stat, - const char* info = "", - uint64_t threshold = -1, - bool autoStart = true, - uint64_t startStamp = 0) - : stat_(stat), info_(info), timer_(autoStart), threshold_(threshold) { - if (!autoStart) { - timer_.setStartStamp(startStamp); - } - } - ~TimerOnce() { - uint64_t span = timer_.stop(); - if (span >= threshold_) { - LOG(INFO) << "Stat: [" << stat_->getName() << "] " << info_ - << " [Span:" << span / 1000 << "ms" << span % 1000 << "us" - << "] "; - } - stat_->addSample(span); - } - - private: - Stat* stat_; - const char* info_; - Timer timer_; - uint64_t threshold_; -}; - -inline uint64_t registerTimerArg1(uint64_t threshold = -1, - StatSet& statSet = globalStat) { - return threshold; -} - -inline StatSet& registerTimerArg2(uint64_t threshold = -1, - StatSet& statSet = globalStat) { - return statSet; -} - -#ifdef PADDLE_DISABLE_TIMER - -#define REGISTER_TIMER(statName, ...) -#define REGISTER_TIMER_SET(statName, start, ...) -#define REGISTER_TIMER_DYNAMIC(statName, ...) -#define REGISTER_TIMER_DYNAMIC_SET(statName, start, ...) -#define REGISTER_TIMER_INFO(statName, info) -#define FOR_TIMING(statement) - -#else - -#define FOR_TIMING(statement) statement - -// The default arguments are shown in the following line: -// REGISTER_TIMER(statName, threshold = -1, statSet = globalStat) -// TODO(yuyang18,wangyanfei01): if UNIQUE_NAME is needed -#define REGISTER_TIMER(statName, ...) \ - static ::paddle::StatPtr __stat = \ - ::paddle::registerTimerArg2(__VA_ARGS__).getStat(statName); \ - ::paddle::TimerOnce __timerOnce( \ - __stat.get(), "", ::paddle::registerTimerArg1(__VA_ARGS__)); - -#define REGISTER_TIMER_SET(statName, start, ...) \ - static ::paddle::StatPtr __stat = \ - ::paddle::registerTimerArg2(__VA_ARGS__).getStat(statName); \ - ::paddle::TimerOnce __timerOnce(__stat.get(), \ - "", \ - ::paddle::registerTimerArg1(__VA_ARGS__), \ - false, \ - start); - -// dynmaic timer, support to discriminate runtime entity, used in pserver -#define REGISTER_TIMER_DYNAMIC(statName, ...) \ - ::paddle::StatPtr __stat = \ - ::paddle::registerTimerArg2(__VA_ARGS__).getStat(statName); \ - ::paddle::TimerOnce __timerOnce( \ - __stat.get(), "", ::paddle::registerTimerArg1(__VA_ARGS__)); - -#define REGISTER_TIMER_DYNAMIC_SET(statName, start, ...) \ - ::paddle::StatPtr __stat = \ - ::paddle::registerTimerArg2(__VA_ARGS__).getStat(statName); \ - ::paddle::TimerOnce __timerOnce(__stat.get(), \ - "", \ - ::paddle::registerTimerArg1(__VA_ARGS__), \ - false, \ - start); - -#define REGISTER_TIMER_INFO(statName, info) \ - static ::paddle::StatPtr __stat = ::paddle::globalStat.getStat(statName); \ - ::paddle::TimerOnce __timerOnce( \ - __stat.get(), info, 10 * 1000000LU /*threshold*/); - -#endif // DISABLE_TIMER - -class GpuProfiler final { - public: - GpuProfiler(std::string statName, std::string info); - ~GpuProfiler(); - - private: - std::lock_guard guard_; -}; - -#ifdef PADDLE_DISABLE_PROFILER - -#define REGISTER_GPU_PROFILER(statName, ...) - -#else - -#define REGISTER_GPU_PROFILER(statName, ...) \ - GpuProfiler __gpuProfiler(statName, #__VA_ARGS__); - -#endif // DISABLE_PROFILER - -} // namespace paddle diff --git a/paddle/legacy/utils/StringUtil.cpp b/paddle/legacy/utils/StringUtil.cpp deleted file mode 100644 index 0c98e6db34530ae40a7245768051b8ce8aa69202..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/StringUtil.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "StringUtil.h" - -namespace paddle { -namespace str { - -bool endsWith(const std::string& str, const std::string& ext) { - if (str.size() >= ext.size() && ext == str.substr(str.size() - ext.size())) { - return true; - } else { - return false; - } -} - -void split(const std::string& str, char sep, std::vector* pieces) { - pieces->clear(); - if (str.empty()) { - return; - } - size_t pos = 0; - size_t next = str.find(sep, pos); - while (next != std::string::npos) { - pieces->push_back(str.substr(pos, next - pos)); - pos = next + 1; - next = str.find(sep, pos); - } - if (!str.substr(pos).empty()) { - pieces->push_back(str.substr(pos)); - } -} - -bool startsWith(const std::string& str, const std::string& prefix) { - if (prefix.size() <= str.size()) { - for (size_t i = 0; i < prefix.size(); ++i) { - if (str[i] != prefix[i]) return false; - } - return true; - } else { - return false; - } -} - -} // namespace str -} // namespace paddle diff --git a/paddle/legacy/utils/StringUtil.h b/paddle/legacy/utils/StringUtil.h deleted file mode 100644 index 95f071cb7de87d87f6988c136d7993c66fa9dde1..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/StringUtil.h +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include "Logging.h" - -namespace paddle { - -namespace str { -/// test whether a string ends with another string -bool endsWith(const std::string& str, const std::string& ext); - -bool startsWith(const std::string& str, const std::string& prefix); - -/** - * Use sep to split str into pieces. - * If str is empty, *pieces will be empty. - * If str ends with sep, the last piece will be an empty string. - */ -void split(const std::string& str, char sep, std::vector* pieces); - -/** - * Cast string to type T with status. - * - * @param [in] s input string. - * @param [out] ok status, return true if there is no error in casting. Set - * nullptr if user don't care error at all. - * @return result of casting. If error occurred, a default value of T() will - * return. - */ -template -inline T toWithStatus(const std::string& s, bool* ok = nullptr) { - std::istringstream sin(s); - T v; - sin >> v; - if (ok) { - *ok = sin.eof() && !sin.fail(); - } - return v; -} - -/** - * Cast type T to string with status. - * - * @param [in] v input value of type T. - * @param [out] ok status, return true if there is no error in casting. Set - * nullptr if user don't care error at all. - * @return result of casting. If error occurred, a empty string will be - * returned. - */ -template -inline std::string toWithStatus(const T v, bool* ok = nullptr) { - std::ostringstream sout; - sout << v; - if (ok) { - *ok = !sout.fail(); - } - return sout.str(); -} - -/// Convert string to type T. It makes sure all the characters in s are used. -/// Otherwise it will abort. -/// -/// @tparam T type of return -/// @param s string input. -template -inline T to(const std::string& s) { - bool ok; - T v = toWithStatus(s, &ok); - CHECK(ok) << "Cannot convert s(" << s << ") to type " << typeid(T).name(); - return v; -} - -/// Convert type T to string. -/// -/// @tparam T type of input value -/// @param v input value of type T -template -std::string to_string(T v) { - bool ok; - std::string s = toWithStatus(v, &ok); - CHECK(ok) << "Cannot convert v(" << v << ") to type std::string"; - return s; -} - -} // namespace str - -#undef DEFINE_STRING_CONVERSION - -} // namespace paddle diff --git a/paddle/legacy/utils/Thread.h b/paddle/legacy/utils/Thread.h deleted file mode 100644 index 2ee6eba1a68202282537788160a77f7689a2ffdb..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Thread.h +++ /dev/null @@ -1,615 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include "Logging.h" -#include "Util.h" - -#include "Queue.h" -#include "ThreadLocal.h" - -#include - -namespace paddle { - -/** - * A simple wrapper for std::thread - */ - -class Thread { - public: - /** - * @brief Construct Function. Default thread pointer is null. - */ - Thread() { thread_ = nullptr; } - - virtual ~Thread() {} - - /** - * @brief Creat a new thread and call *run()* function. - */ - void start() { - thread_.reset(new std::thread([this]() { this->run(); })); - } - - /** - * @brief Detach the thread. - * It don't need to be waited until it finish. - */ - void detach() { thread_->detach(); } - - /** - * @brief Join the thread. - * It should be waited until it finish. - */ - void join() { thread_->join(); } - - /** - * @brief Define what to be done on this thread through override this - * function. - */ - virtual void run() = 0; - - protected: - std::unique_ptr thread_; -}; - -/** - * ThreadWorker maintains a job queue. It executes the jobs in the job queue - * sequentianlly in a separate thread. - * - * Use addJob() to add a new job to the job queue. - */ -class ThreadWorker : protected Thread { - public: - typedef std::function JobFunc; - - /** - * @brief Construct Function. Default size of job queue is 0 and not stopping. - */ - ThreadWorker() : stopping_(false), empty_(true) { start(); } - - /** - * @brief Destruct Function. - * If it's running, wait until all job finish and then stop it. - */ - ~ThreadWorker() { - if (!stopping_) { - wait(); - stop(); - } - } - - /** - * @brief Finish current running job and quit the thread. - */ - void stop() { - stopping_ = true; - jobs_.enqueue([]() {}); - join(); - } - - /** - * @brief Add a new job to the job queue. - */ - void addJob(JobFunc func) { - empty_ = false; - jobs_.enqueue(func); - } - - /** - * @brief Wait until all jobs was done (the job queue was empty). - */ - void wait() { - finishCV_.wait([this] { return empty_; }); - } - - protected: - /** - * @brief Execute jobs in the job queue sequentianlly, - * @note If finish all the jobs in the job queue, - * notifies all the waiting threads the job queue was empty. - */ - virtual void run() { - while (true) { - JobFunc func = jobs_.dequeue(); - if (stopping_) break; - func(); - if (jobs_.empty()) { - finishCV_.notify_all([this] { empty_ = true; }); - } - } - } - - Queue jobs_; - bool stopping_; - LockedCondition finishCV_; - bool empty_; -}; - -/** - * SyncThreadPool maintains a pool of threads. - * It executes the job use all workers in the pool. - * - * Use exec() to run a new job, job complete when exec returned. - * Only one job can exec simultaneously. - * - * Each worker has an tid whose range is [0, getNumThreads()). - * JobFunc can use tid to divide input data. - */ -class SyncThreadPool { - public: - typedef std::function JobFunc; - - /** - * @brief Construct Function. No thread will be created. - */ - SyncThreadPool() : jobStartBarrier_(0), jobFinishBarrier_(0) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief Construct Fucntion. Create numWorkers of threads in the pool. - * @param[in] numWorkers Number of the workers in the pool. - * @param[in] checkOwner Default true. If checkOwner is true, - * this sync thread pool should be used by it's owner thread. - */ - explicit SyncThreadPool(size_t numWorkers, bool checkOwner = true) - : stopping_(false), - jobStartBarrier_(numWorkers + 1), - jobFinishBarrier_(numWorkers + 1), - jobFunc_(nullptr), - checkOwner_(checkOwner) { - ownerThreadId_ = getTID(); - workers_.resize(numWorkers); - start(); - } - - ~SyncThreadPool() { - if (!stopping_) { - stop(); - } - } - - /** - * @brief Return num of threads in the pool. - */ - size_t getNumThreads() { return workers_.size(); } - - /** - * @brief Execute a job using all the theads in the pool. - * @param[in] jobFunc The function to be executed. - * @param[in] ownerFunc Owner thread can do something in owerFunc when job - * executing. - * @note For the ownerFunc, tid=getNumThreads(). - */ - void exec(JobFunc jobFunc, JobFunc ownerFunc = nullptr) { - if (checkOwner_) { - CHECK_EQ(ownerThreadId_, getTID()) - << "this sync thread pool should be used in one thread"; - } - - CHECK(jobFunc_ == nullptr); - jobFunc_ = jobFunc; - jobStartBarrier_.wait(); // notify worker thread start job - - if (ownerFunc) { - ownerFunc(workers_.size(), workers_.size()); - } - - jobFinishBarrier_.wait(); // wait all worker thread complete - jobFunc_ = nullptr; - } - - /** - * @brief Execute a job using all the threads in the pool. - * And the owner thread will do the same job. - * @param jobFunc The job to be executed. - * @note Assume that JobFunc will execute numThread + 1 times, - * with tid ranging [0,numThread]. The thread whose tid is numThread - * is the owner thread. - */ - void execPlusOwner(JobFunc jobFunc) { exec(jobFunc, jobFunc); } - - /** - * @brief Execute a job if has pool, else use caller thread as a worker. - * @param[in] pool The pool to execute the job. - * @param[in] jobFunc The job to be excuted. - */ - static void execHelper(SyncThreadPool* pool, JobFunc jobFunc) { - if (pool) { - pool->exec(jobFunc); - } else { - jobFunc(0, 1); - } - } - - protected: - /** - * @brief Start all the workers in the pool, call their run() function. - */ - void start() { - for (size_t i = 0; i < workers_.size(); ++i) { - workers_[i].reset( - new std::thread([this](int tid) { this->run(tid); }, i)); - } - } - - /** - * @brief Stop all the workers in the pool. - */ - void stop() { - stopping_ = true; - // notify worker thread to stop - jobStartBarrier_.wait(); - - // stop workers - for (auto& thread : workers_) { - if (thread) { - thread->join(); - thread.reset(nullptr); - } - } - } - - /** - * @brief Execute the jobFunc_ using the worker thread tid, if not stopping. - */ - void run(int tid) { - VLOG(1) << "SyncThreadPool worker thread " << tid; - // init seed deterministic, but differs from global srand() - ThreadLocalRand::initThreadSeed(tid + workers_.size()); - - while (true) { - jobStartBarrier_.wait(); // wait job - - if (stopping_) { - break; - } - - jobFunc_(tid, workers_.size()); - - jobFinishBarrier_.wait(); // notify job complete - } - } - - protected: - pid_t ownerThreadId_; - bool stopping_; - ThreadBarrier jobStartBarrier_; - ThreadBarrier jobFinishBarrier_; - - JobFunc jobFunc_; - bool checkOwner_; - std::vector> workers_; -}; - -/** - * MultiThreadWorker maintains a job queue and a result queue. - * It executes the jobs in the job queue and puts the results into the - * result queue sequentially in multi separate threads. - * - * Add jobs: - * - * Use addJob() to add a new job to the job queue - * (the user added jobs should not return nullptr). - * - * Use stopAddJob() to stop adding new jobs to the job queue - * (addJob() can not be called after stopAddJob()). - * - * Normal stop: - * - * Use waitResult() to get the results until nullptr is returned. - * Use stop() to exit normally - * (stopAddJob() should be called first). - * - * Force stop: - * - * Use forceStop() to exit forcibly even though there are remaining jobs in - * the - * job queue. - */ -template -class MultiThreadWorker { - public: - typedef T ResultType; - typedef std::shared_ptr ResultPtrType; - typedef std::function JobFunc; - /** - * @brief Construct Function. Initialize the multithread worker. - * @param[in] workerNum Number of the workers. - * @param[in] queueCapacity Capapcity of the result queue. - */ - MultiThreadWorker(size_t workerNum, size_t queueCapacity) - : stopping_(false), - jobAdding_(true), - nullResultNum_(0), - results_(queueCapacity) { - workers_.resize(workerNum); - for (auto& worker : workers_) { - worker.reset(new std::thread([this]() { this->run(); })); - } - } - - /** - * @brief Destruct Function. Force stop the workers - * even though there are remaining jobs in the job queue. - */ - virtual ~MultiThreadWorker() { forceStop(); } - - /** - * @brief Stop all the workers normally. - * @note stopAddJob() should be called before it. - */ - void stop() { - CHECK(!jobAdding_) << "stopAddJob() should be called before stop()"; - for (auto& worker : workers_) { - if (worker) { - worker->join(); - worker = nullptr; - } - } - stopping_ = true; - } - - /** - * @brief Stop all the workers forcibly. - * @note This function will call stopAddJob() first - * and empty the result queue. - */ - void forceStop() { - if (!stopping_) { - stopping_ = true; - stopAddJob(); - while (nullptr != waitResult()) { - } - stop(); - } - } - - /** - * @brief Add a job to the job queue. - * @note Job can not be added after calling stopAddJob(). - */ - void addJob(JobFunc func) { - CHECK(jobAdding_) << "addJob() can not be called after stopAddJob()"; - jobs_.enqueue(func); - } - - /** - * @brief Stop adding new jobs to the job queue. - * @note This fuction enqueue a return nullptr function to the job queue. - */ - void stopAddJob() { - for (size_t i = 0; i < workers_.size(); ++i) { - jobs_.enqueue([]() { return nullptr; }); - } - jobAdding_ = false; - } - - /** - * @brief Dequeue the first result in the result queue and return it. - * @note If the result queue is empty, wait until it's not empty - * or return nullptr if all the results have been returned. - */ - ResultPtrType waitResult() { - while (true) { - ResultPtrType result = results_.dequeue(); - if (result) { - return result; - } - - ++nullResultNum_; - if (nullResultNum_ == workers_.size()) { - return nullptr; - } - } - } - - /** - * @brief The result queue is empty or not. - * @return true if empty. - */ - bool testResult() { return results_.empty(); } - - protected: - /** - * @brief Do the jobs in the job queue sequentianlly - * and enqueue the result into the result queue. - * @note A nullptr will be enqueued into the resulte queue, when a worker - * finished. - */ - virtual void run() { - while (true) { - JobFunc func = jobs_.dequeue(); - ResultPtrType result = func(); - if (result == nullptr || stopping_) { - // When a worker finished, a nullptr would be enqueued into results_ - results_.enqueue(nullptr); - break; - } - results_.enqueue(result); - } - } - - bool stopping_; - bool jobAdding_; - size_t nullResultNum_; - Queue jobs_; - BlockingQueue results_; - std::vector> workers_; -}; - -/** - * AsyncThreadPool maintains a job queue and threads pool. - * It executes the jobs from queue asynchronously. - * - * Add jobs: - * - * Use addJob() to add a new job to the job queue and get a std::future - * result. The caller's thread continues running. Call std::future::get() - * when the result's value is needed, and the caller's thread may be - * blocked until thread-pool finished the job. - * - * Use addBatchJobs() to add a batch of jobs. - * Unlike addJob()'s asynchronization, addBatchJobs will block caller's - * thread until all jobs in the batch are finished. - * - * Stop: - * Use stop() to stop the thread pool. Job can be added once stopped. - * - * Process-wide Singleton: - * Use AsyncThreadPool::ProcessChannel(N) first to create N threads. - * Then call AsyncThreadPool::ProcessChannel() to get the process-wide global - * thread pool. - */ -class AsyncThreadPool { - public: - typedef std::function JobFunc; - - AsyncThreadPool() { LOG(FATAL) << "Not implemented"; } - - /** - * @brief Construct Function. Install all the workers. - * @param[in] threadNum Number of the threads, must greater than 1. - */ - explicit AsyncThreadPool(size_t threadNum) { - CHECK_GT(threadNum, 1U); - stopping_ = false; - workers_.resize(threadNum); - for (auto& worker : workers_) { - worker.reset(new std::thread([this]() { this->run(); })); - } - } - - ~AsyncThreadPool() { - if (!stopping_) { - stop(); - } - } - - /** - * @brief Stop all the workers normally. - */ - void stop() { - stopping_ = true; - for (size_t i = 0; i < workers_.size(); i++) { - jobs_.enqueue([] {}); - } - for (auto& worker : workers_) { - worker->join(); - } - } - - /** - * @brief A process-wide singleton. Used as a global thread pool - * It should be initialized by calling - * AsyncThreadPool::ProcessChannel(N) first to create N threads, - * then call AsyncThreadPool::ProcessChannel() will get the thread pool. - */ - static AsyncThreadPool& ProcessChannel(size_t initThreadNum = 0) { - static std::shared_ptr channel( - new AsyncThreadPool(initThreadNum)); - return *channel; - } - - /** - * @brief Add a job to queue and return a std::future. - * @note The job will be executed - * asynchronously. - * Call std::future::get() when the execturation result is needed; - */ - template - auto addJob(F&& f, Args&&... args) - -> std::future::type> { - CHECK(!stopping_) << "AsyncThreadPool is closed"; - typedef typename std::result_of::type T; - - auto task = std::make_shared>( - std::bind(std::forward(f), std::forward(args)...)); - auto res = task->get_future(); - jobs_.enqueue([task] { (*task)(); }); - return res; - } - - /** - * @brief Add a batch of jobs to the queue. The main thread will be blocked - * until these jobs are finished. - * The results will be stored in `results` according to `jobs` order. - * - * @tparam F should have a return value. - * - * @param[in] jobs a vector of executable objection. - * @param[in] results a vector to store the results. - * - * @note *results* may need to be carefully cleared before *addBatchJobs()*. - */ - template - void addBatchJobs(const std::vector& jobs, - std::vector::type>& results) { - typedef typename std::result_of::type T; - static_assert(!std::is_same::value, - "should pass a non-void function as job"); - - std::vector> resFuts; - for (const auto& job : jobs) { - resFuts.emplace_back(addJob(job)); - } - for (auto& fut : resFuts) { - results.emplace_back(fut.get()); - } - } - - /** - * @brief Add a batch of jobs reguardless of its result. - * @tparam F don't need to have a return value. - * @param[in] jobs a vector of executable objection. - */ - template - void addBatchJobs(const std::vector& jobs) { - CHECK(!stopping_) << "AsyncThreadPool is closed"; - std::vector> tmpRes; - - for (const auto& job : jobs) { - tmpRes.emplace_back(addJob([&job] { - job(); - return true; - })); - } - - for (auto& res : tmpRes) { - res.get(); - } - } - - protected: - /** - * @brief Execute the jobs in the job queue. - */ - void run() { - while (true) { - JobFunc func = jobs_.dequeue(); - func(); - if (stopping_) break; - } - } - - private: - std::vector> workers_; - Queue jobs_; - bool stopping_; -}; // class AsyncThreadPool - -} // namespace paddle diff --git a/paddle/legacy/utils/ThreadLocal.cpp b/paddle/legacy/utils/ThreadLocal.cpp deleted file mode 100644 index 58fe51bd40c36088fdc6ee51e22d120b63486bf4..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/ThreadLocal.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ThreadLocal.h" - -#include - -#include "Util.h" - -DEFINE_bool(thread_local_rand_use_global_seed, - false, - "Whether to use global seed in thread local rand."); - -namespace paddle { - -unsigned int ThreadLocalRand::defaultSeed_ = 1; -ThreadLocal ThreadLocalRand::seed_; - -unsigned int* ThreadLocalRand::getSeed() { - unsigned int* p = seed_.get(false /*createLocal*/); - if (!p) { // init seed - if (FLAGS_thread_local_rand_use_global_seed) { - p = new unsigned int(defaultSeed_); - } else if (getpid() == getTID()) { // main thread - // deterministic, but differs from global srand() - p = new unsigned int(defaultSeed_ - 1); - } else { - p = new unsigned int(defaultSeed_ + getTID()); - VLOG(3) << "thread use undeterministic rand seed:" << *p; - } - seed_.set(p); - } - return p; -} - -ThreadLocal ThreadLocalRandomEngine::engine_; -std::default_random_engine& ThreadLocalRandomEngine::get() { - auto engine = engine_.get(false); - if (!engine) { - engine = new std::default_random_engine; - int defaultSeed = ThreadLocalRand::getDefaultSeed(); - engine->seed(FLAGS_thread_local_rand_use_global_seed - ? defaultSeed - : defaultSeed + getTID()); - engine_.set(engine); - } - return *engine; -} - -} // namespace paddle diff --git a/paddle/legacy/utils/ThreadLocal.h b/paddle/legacy/utils/ThreadLocal.h deleted file mode 100644 index 6268b73a85540c25d93b07f2c3aad74c1802aa72..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/ThreadLocal.h +++ /dev/null @@ -1,231 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifndef _WIN32 -#include -#include -#include -#endif -#include -#include -#include -#include -#include "Logging.h" -#include "Util.h" - -namespace paddle { - -/** - * Thread local storage for object. - * Example: - * - * Declarartion: - * ThreadLocal> vec_; - * - * Use in thread: - * vector& vec = *vec; // obtain the thread specific object - * vec.resize(100); - * - * Note that this ThreadLocal will desconstruct all internal data when thread - * exits - * This class is suitable for cases when frequently creating and deleting - * threads. - * - * Consider implementing a new ThreadLocal if one needs to frequently create - * both instances and threads. - * - * see also ThreadLocalD - */ -template -class ThreadLocal { - public: - ThreadLocal() { - CHECK_EQ(pthread_key_create(&threadSpecificKey_, dataDestructor), 0); - } - ~ThreadLocal() { pthread_key_delete(threadSpecificKey_); } - - /** - * @brief get thread local object. - * @param if createLocal is true and thread local object is never created, - * return a new object. Otherwise, return nullptr. - */ - T* get(bool createLocal = true) { - T* p = (T*)pthread_getspecific(threadSpecificKey_); - if (!p && createLocal) { - p = new T(); - int ret = pthread_setspecific(threadSpecificKey_, p); - CHECK_EQ(ret, 0); - } - return p; - } - - /** - * @brief set (overwrite) thread local object. If there is a thread local - * object before, the previous object will be destructed before. - * - */ - void set(T* p) { - if (T* q = get(false)) { - dataDestructor(q); - } - CHECK_EQ(pthread_setspecific(threadSpecificKey_, p), 0); - } - - /** - * return reference. - */ - T& operator*() { return *get(); } - - /** - * Implicit conversion to T* - */ - operator T*() { return get(); } - - private: - static void dataDestructor(void* p) { delete (T*)p; } - - pthread_key_t threadSpecificKey_; -}; - -/** - * Almost the same as ThreadLocal, but note that this ThreadLocalD will - * destruct all internal data when ThreadLocalD instance destructs. - * - * This class is suitable for cases when frequently creating and deleting - * objects. - * - * see also ThreadLocal - * - * @note The type T must implemented default constructor. - */ -template -class ThreadLocalD { - public: - ThreadLocalD() { CHECK_EQ(pthread_key_create(&threadSpecificKey_, NULL), 0); } - ~ThreadLocalD() { - pthread_key_delete(threadSpecificKey_); - for (auto t : threadMap_) { - dataDestructor(t.second); - } - } - - /** - * @brief Get thread local object. If not exists, create new one. - */ - T* get() { - T* p = (T*)pthread_getspecific(threadSpecificKey_); - if (!p) { - p = new T(); - CHECK_EQ(pthread_setspecific(threadSpecificKey_, p), 0); - updateMap(p); - } - return p; - } - - /** - * @brief Set thread local object. If there is an object create before, the - * old object will be destructed. - */ - void set(T* p) { - if (T* q = (T*)pthread_getspecific(threadSpecificKey_)) { - dataDestructor(q); - } - CHECK_EQ(pthread_setspecific(threadSpecificKey_, p), 0); - updateMap(p); - } - - /** - * @brief Get reference of the thread local object. - */ - T& operator*() { return *get(); } - - private: - static void dataDestructor(void* p) { delete (T*)p; } - - void updateMap(T* p) { - pid_t tid = getTID(); - CHECK_NE(tid, -1); - std::lock_guard guard(mutex_); - auto ret = threadMap_.insert(std::make_pair(tid, p)); - if (!ret.second) { - ret.first->second = p; - } - } - - pthread_key_t threadSpecificKey_; - std::mutex mutex_; - std::map threadMap_; -}; - -/** - * @brief Thread-safe C-style random API. - */ -class ThreadLocalRand { - public: - /** - * initSeed just like srand, - * called by main thread, - * init defaultSeed for all thread - */ - static void initSeed(unsigned int seed) { defaultSeed_ = seed; } - - /** - * initThreadSeed called by each thread, - * init seed to defaultSeed + *tid* - * It should be called after main initSeed and before using rand() - * It's optional, getSeed will init seed if it's not initialized. - */ - static void initThreadSeed(int tid) { - seed_.set(new unsigned int(defaultSeed_ + tid)); - } - - /// thread get seed, then can call rand_r many times. - /// Caller thread can modify the seed value if it's necessary. - /// - /// if flag thread_local_rand_use_global_seed set, - /// the seed will be set to defaultSeed in thread's first call. - static unsigned int* getSeed(); - - /// like ::rand - static int rand() { return rand_r(getSeed()); } - - /** - * Get defaultSeed for all thread. - */ - static int getDefaultSeed() { return defaultSeed_; } - - protected: - static unsigned int defaultSeed_; - static ThreadLocal seed_; -}; - -/** - * @brief Thread-safe C++ style random engine. - */ -class ThreadLocalRandomEngine { - public: - /** - * get random_engine for each thread. - * - * Engine's seed will be initialized by ThreadLocalRand. - */ - static std::default_random_engine& get(); - - protected: - static ThreadLocal engine_; -}; - -} // namespace paddle diff --git a/paddle/legacy/utils/Util.cpp b/paddle/legacy/utils/Util.cpp deleted file mode 100644 index 2755fdd9cd1c2509cad996557c6fb24363d42d8a..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Util.cpp +++ /dev/null @@ -1,409 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Util.h" - -#include -#include -#include -#include - -#ifdef __SSE__ -#include -#endif -#ifdef __SSE3__ -#include -#endif - -#include -#include - -#include - -#include "CpuId.h" -#include "CustomStackTrace.h" -#include "Logging.h" -#include "StringUtil.h" -#include "Thread.h" -#include "ThreadLocal.h" -#include "Version.h" - -DEFINE_int32(seed, 1, "random number seed. 0 for srand(time)"); - -#ifdef WITH_GOOGLE_PERFTOOLS -/* - In order to use google profiler, you need to install gperftools, - which can be obtained at: - https://gperftools.googlecode.com/files/gperftools-2.0.tar.gz - - gperftools should be configured with --enable-frame-pointers - - Then link the executable with -lprofiler. - - After you start the application, you can use kill -s signal PID to - start/stop profiling. The profile data will be stored in file - FLAGS_profile_data_file, which can be analyzed by pprof. -*/ - -#include - -DEFINE_int32(profile_signal, 12, "signal for switch google profiler"); -DEFINE_string(profile_data_file, "gperf.prof", "file for storing profile data"); - -static void profilerSwitch(int signalNumber) { - bool static started = false; - - if (!started) { - if (ProfilerStart(FLAGS_profile_data_file.c_str())) { - LOG(INFO) << "Profiler started"; - } else { - LOG(WARNING) << "Can't turn on cpu profiling for " - << FLAGS_profile_data_file; - } - } else { - ProfilerStop(); - LOG(INFO) << "Profiler stopped"; - } - started = !started; -} - -static void installProfilerSwitch() { - sighandler_t oldHandler = signal(FLAGS_profile_signal, profilerSwitch); - - if (!oldHandler) { - LOG(INFO) << "Using signal " << FLAGS_profile_signal - << " to turn on/off profiler"; - } else { - LOG(WARNING) << "Signal " << FLAGS_profile_signal << " is already in use\n"; - } -} - -#else - -static void installProfilerSwitch() {} - -#endif // WITH_GOOGLE_PERFTOOLS - -namespace paddle { - -pid_t getTID() { -#if defined(__APPLE__) || defined(__OSX__) - // syscall is deprecated: first deprecated in macOS 10.12. - // syscall is unsupported; - // syscall pid_t tid = syscall(SYS_thread_selfid); - uint64_t tid; - pthread_threadid_np(NULL, &tid); -#else -#ifndef __NR_gettid -#define __NR_gettid 224 -#endif - pid_t tid = syscall(__NR_gettid); -#endif - CHECK_NE((int)tid, -1); - return tid; -} - -static bool g_initialized = false; -typedef std::pair> PriorityFuncPair; -typedef std::vector InitFuncList; -static InitFuncList* g_initFuncs = nullptr; -static std::once_flag g_onceFlag; -void registerInitFunction(std::function func, int priority) { - if (g_initialized) { - LOG(FATAL) << "registerInitFunction() should only called before initMain()"; - } - if (!g_initFuncs) { - g_initFuncs = new InitFuncList(); - } - g_initFuncs->push_back(std::make_pair(priority, func)); -} - -void runInitFunctions() { - std::call_once(g_onceFlag, []() { - VLOG(3) << "Calling runInitFunctions"; - if (g_initFuncs) { - std::sort(g_initFuncs->begin(), - g_initFuncs->end(), - [](const PriorityFuncPair& x, const PriorityFuncPair& y) { - return x.first > y.first; - }); - for (auto& f : *g_initFuncs) { - f.second(); - } - delete g_initFuncs; - g_initFuncs = nullptr; - } - g_initialized = true; - VLOG(3) << "Call runInitFunctions done."; - }); -} - -void initMain(int argc, char** argv) { - installLayerStackTracer(); - std::string line; - for (int i = 0; i < argc; ++i) { - line += argv[i]; - line += ' '; - } - -#ifndef GFLAGS_GFLAGS_H_ - namespace gflags = google; -#endif - - gflags::ParseCommandLineFlags(&argc, &argv, true); - initializeLogging(argc, argv); - LOG(INFO) << "commandline: " << line; - CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1]; - - installProfilerSwitch(); - -#ifdef __SSE__ - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); -#endif -#ifdef __SSE3__ - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); -#endif - - if (FLAGS_seed == 0) { - unsigned int t = time(NULL); - srand(t); - ThreadLocalRand::initSeed(t); - LOG(INFO) << "random number seed=" << t; - } else { - srand(FLAGS_seed); - ThreadLocalRand::initSeed(FLAGS_seed); - } - - if (FLAGS_use_gpu) { - // This is the initialization of the CUDA environment, - // need before runInitFunctions. - // TODO(hedaoyuan) Can be considered in the runInitFunctions, - // but to ensure that it is the first to initialize. - hl_start(); - hl_init(FLAGS_gpu_id); - } - - version::printVersion(); - checkCPUFeature().check(); - runInitFunctions(); -} - -std::string readFile(const std::string& fileName) { - std::ifstream is(fileName); - - // get length of file: - is.seekg(0, is.end); - size_t length = is.tellg(); - is.seekg(0, is.beg); - std::string str(length, (char)0); - CHECK(is.read(&str[0], length)) << "Fail to read file: " << fileName; - return str; -} - -namespace path { - -std::string basename(const std::string& path) { - size_t pos = path.rfind(sep); - ++pos; - return path.substr(pos, std::string::npos); -} - -std::string dirname(const std::string& path) { - size_t pos = path.rfind(sep); - if (pos == std::string::npos) return std::string(); - return path.substr(0, pos); -} - -std::string join(const std::string& part1, const std::string& part2) { - if (!part2.empty() && part2.front() == sep) { - return part2; - } - std::string ret; - ret.reserve(part1.size() + part2.size() + 1); - ret = part1; - if (!ret.empty() && ret.back() != sep) { - ret += sep; - } - ret += part2; - return ret; -} - -} // namespace path - -void copyFileToPath(const std::string& file, const std::string& dir) { - VLOG(3) << "copy " << file << " to " << dir; - std::string fileName = path::basename(file); - std::string dst = path::join(dir, fileName); - std::ifstream source(file, std::ios_base::binary); - std::ofstream dest(dst, std::ios_base::binary); - CHECK(source) << "Fail to open " << file; - CHECK(dest) << "Fail to open " << dst; - dest << source.rdbuf(); - source.close(); - dest.close(); -} - -bool fileExist(const char* filename) { return (access(filename, 0) == 0); } - -void touchFile(const char* filename) { - if (!fileExist(filename)) { - std::ofstream os(filename); - } -} - -int isDir(const char* path) { - struct stat s_buf; - if (stat(path, &s_buf)) { - return 0; - } - return S_ISDIR(s_buf.st_mode); -} - -void rmDir(const char* folderName) { - if (isDir(folderName)) { - DIR* dp; - struct dirent* ep; - std::string buf; - dp = opendir(folderName); - while ((ep = readdir(dp)) != NULL) { - if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, "..")) { - buf = std::string(folderName) + "/" + std::string(ep->d_name); - if (isDir(buf.c_str())) { - rmDir(buf.c_str()); - } else { - remove(buf.c_str()); - } - } - } - closedir(dp); - rmdir(folderName); - } -} - -void mkDir(const char* filename) { - if (mkdir(filename, 0755)) { - CHECK(errno == EEXIST) << filename << "mkdir failed!"; - } -} - -void mkDirRecursively(const char* dir) { - struct stat sb; - - if (*dir == 0) return; // empty string - if (!stat(dir, &sb)) return; - - mkDirRecursively(path::dirname(dir).c_str()); - - mkDir(dir); -} - -void loadFileList(const std::string& fileListFileName, - std::vector& fileList) { - std::ifstream is(fileListFileName); - CHECK(is) << "Fail to open " << fileListFileName; - std::string line; - while (is) { - if (!getline(is, line)) break; - fileList.push_back(line); - } -} - -double getMemoryUsage() { -#if defined(__ANDROID__) - return 0.0; -#else - FILE* fp = fopen("/proc/meminfo", "r"); - CHECK(fp) << "failed to fopen /proc/meminfo"; - size_t bufsize = 256 * sizeof(char); - char* buf = new (std::nothrow) char[bufsize]; - CHECK(buf); - int totalMem = -1; - int freeMem = -1; - int bufMem = -1; - int cacheMem = -1; - while (getline(&buf, &bufsize, fp) >= 0) { - if (0 == strncmp(buf, "MemTotal", 8)) { - if (1 != sscanf(buf, "%*s%d", &totalMem)) { - LOG(FATAL) << "failed to get MemTotal from string: [" << buf << "]"; - } - } else if (0 == strncmp(buf, "MemFree", 7)) { - if (1 != sscanf(buf, "%*s%d", &freeMem)) { - LOG(FATAL) << "failed to get MemFree from string: [" << buf << "]"; - } - } else if (0 == strncmp(buf, "Buffers", 7)) { - if (1 != sscanf(buf, "%*s%d", &bufMem)) { - LOG(FATAL) << "failed to get Buffers from string: [" << buf << "]"; - } - } else if (0 == strncmp(buf, "Cached", 6)) { - if (1 != sscanf(buf, "%*s%d", &cacheMem)) { - LOG(FATAL) << "failed to get Cached from string: [" << buf << "]"; - } - } - if (totalMem != -1 && freeMem != -1 && bufMem != -1 && cacheMem != -1) { - break; - } - } - CHECK(totalMem != -1 && freeMem != -1 && bufMem != -1 && cacheMem != -1) - << "failed to get all information"; - fclose(fp); - delete[] buf; - double usedMem = 1.0 - 1.0 * (freeMem + bufMem + cacheMem) / totalMem; - return usedMem; -#endif -} - -SyncThreadPool* getGlobalSyncThreadPool() { - static std::unique_ptr syncThreadPool; - if (syncThreadPool && - syncThreadPool->getNumThreads() != (size_t)FLAGS_trainer_count) { - LOG(WARNING) << "trainer_count changed in training process!"; - syncThreadPool.reset(nullptr); - } - if (!syncThreadPool) { - syncThreadPool.reset(new SyncThreadPool(FLAGS_trainer_count)); - } - return syncThreadPool.get(); -} - -size_t calculateServiceNum(const std::string& pservers, int ports_num) { - std::vector hosts; - str::split(pservers, ',', &hosts); - return hosts.size() * ports_num; -} - -void memcpyWithCheck(void* dest, - const void* src, - size_t num, - const void* srcEnd) { - int minus = (char*)srcEnd - (char*)src - num; - CHECK_LE(0, minus) << "memcpyWithCheck: copy " << num - << " bytes data out of range."; - memcpy(dest, src, num); -} - -hl_activation_mode_t hlActiveType(const std::string& type) { - if (type == "sigmoid") { - return HL_ACTIVATION_SIGMOID; - } else if (type == "relu") { - return HL_ACTIVATION_RELU; - } else if (type == "tanh") { - return HL_ACTIVATION_TANH; - } else if (type == "linear" || type == "") { - return HL_ACTIVATION_LINEAR; - } else { - LOG(FATAL) << "Do not support activation type " << type; - } -} - -} // namespace paddle diff --git a/paddle/legacy/utils/Util.h b/paddle/legacy/utils/Util.h deleted file mode 100644 index 3a878b2b30127f0ff4c785a1720f27849eca6fda..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Util.h +++ /dev/null @@ -1,597 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifndef _WIN32 -#include // for syscall() -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Common.h" -#include "Logging.h" -#include "TrainerConfig.pb.h" - -#include "Flags.h" -#include "hl_gpu.h" - -#if defined(__ANDROID__) && (__ANDROID_API__ < 21) -inline int rand_r(unsigned int* seedp) { - (void)seedp; - return rand(); -} -#endif - -#ifdef _WIN32 -#define NOMINMAX // msvc max/min macro conflict with std::min/max -#include - -template -inline int __builtin_clz(const T& value) { - DWORD leadning_zero = 0; - if (_BitScanReverse(&leadning_zero, value)) { - return static_cast(sizeof(T) * 8 - leadning_zero); - } else { - return static_cast(0); - } -} - -inline int __builtin_clzl(const unsigned long& value) { - return __builtin_clz(value); -} - -inline int __builtin_clzll(const unsigned long long& value) { - return __builtin_clz(value); -} - -#define pid_t int -#endif - -/** - * Loop over the elements in a container - * TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach, - * or make it a inline method? - * Example: - * FOR_EACH(it, array) { - * sum += *it; - * } - */ -#define FOR_EACH(iterator_name, container) \ - for (auto iterator_name = (container).begin(), e = (container).end(); \ - iterator_name != e; \ - ++iterator_name) - -/** - * Loop over the elements in a container in reverse order - * TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach, - * or make it a inline method? - * Example: - * FOR_EACH_R(it, array) { - * sum += *it; - * } - */ -#define FOR_EACH_R(iterator_name, container) \ - for (auto iterator_name = (container).rbegin(), e = (container).rend(); \ - iterator_name != e; \ - ++iterator_name) - -namespace paddle { - -// return the thread id used by glog -pid_t getTID(); - -/** - * return the 1-based index of the highest bit set - * - * for x > 0: - * \f[ - * findLastSet(x) = 1 + \floor*{\log_{2}x} - * \f] - */ -inline constexpr size_t findLastSet(size_t x) { - return std::is_same::value - ? (x ? 8 * sizeof(x) - __builtin_clz(x) : 0) - : (std::is_same::value // NOLINT - ? (x ? 8 * sizeof(x) - __builtin_clzl(x) : 0) - : (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0)); -} - -/** - * calculate the non-negative remainder of a/b - * @param[in] a - * @param[in] b, should be positive - * @return the non-negative remainder of a / b - */ -inline int mod(int a, int b) { - int r = a % b; - return r >= 0 ? r : r + b; -} - -/** - * find the value given a key k from container c. - * If the key can be found, the value is stored in *value - * return true if the key can be found. false otherwise. - */ -template -bool mapGet(const K& k, const C& c, V* value) { - auto it = c.find(k); - if (it != c.end()) { - *value = it->second; - return true; - } else { - return false; - } -} - -template -static bool contains(const Container& container, const T& val) { - return std::find(container.begin(), container.end(), val) != container.end(); -} - -/** - * pop and get the front element of a container - */ -template -typename Container::value_type pop_get_front(Container& c) { - typename Container::value_type v; - swap(v, c.front()); - c.pop_front(); - return v; -} - -#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) - -/** - * Initialize some creators or initFunctions for layers and data - * providers. - * Client codes should call this function before they refer any other - * codes that use the layer class and data provider class. - * - * Codes inside 'core' directory can call initMain which calls - * runInitFunctions directly, while codes outside core can simply - * call runInitFunctions if they don't need the commandline flags - * designed for PADDLE main procedure. - */ -void runInitFunctions(); - -/** - * Initialize logging and parse commandline - */ -void initMain(int argc, char** argv); - -// read the whole file into a string -std::string readFile(const std::string& fileName); - -// copy file to path -void copyFileToPath(const std::string& file, const std::string& path); - -// test file exist or not -bool fileExist(const char* filename); -// touch file if not exist -void touchFile(const char* filename); -// make dir if not exist -void mkDir(const char* filename); -void mkDirRecursively(const char* filename); - -void rmDir(const char* folderName); - -// load a file list file into a vector(fileList) -void loadFileList(const std::string& fileListFileName, - std::vector& fileList); - -/** - * Register a function, the function will be called in initMain(). Functions - * with higher priority will be called first. The execution order of functions - * with same priority is not defined. - */ -void registerInitFunction(std::function func, int priority = 0); -class InitFunction { - public: - explicit InitFunction(std::function func, int priority = 0) { - registerInitFunction(func, priority); - } -}; - -/** - * Class SetDevice provides a mechanism for set device enviroment. - * When a SetDevice object is created, it attempts to change device enviroment. - * When the SetDevice object is destructed, it will restore device environment. - */ -class SetDevice { - public: - explicit SetDevice(int deviceId) { - isSet_ = deviceId >= 0; - devId_ = 0; - if (isSet_) { - devId_ = hl_get_device(); - hl_set_device(deviceId); - } - } - ~SetDevice() { - if (isSet_) { - hl_set_device(devId_); - } - } - - protected: - bool isSet_; - int devId_; -}; - -/** - * Enables direct access to memory allocations on a peer device(d2). - * input: - * *d1* is device can direct access device d2. - * *d2* is peer device to enable direct access to by the d1 device. - */ -inline void enablePeerAccess(int d1, int d2) { -#ifdef PADDLE_WITH_CUDA - if (hl_device_can_access_peer(d1, d2)) { - SetDevice dev(d1); - hl_device_enable_peer_access(d2); - } -#else - LOG(FATAL) << "Paddle should be compiled in GPU mode to use this method."; -#endif -} - -/** - * Change the gpu computation mode to asynchronized mode for the rest of the - * compilation block. This is useful if the computation consists of multiple - * small steps. Async mode can overlap the cuda-kernel launch overhead with the - * actual computation. - * Example: - * { - * AsycnGpuBlock asyncBlock; - * do_some_gpu_computation - * } - */ -class AsyncGpuBlock { - public: - AsyncGpuBlock() : syncFlag_(hl_get_sync_flag()) { hl_set_sync_flag(false); } - ~AsyncGpuBlock() { - if (syncFlag_) { - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - hl_set_sync_flag(syncFlag_); - } - } - - private: - bool syncFlag_; -}; - -inline bool useGpu(int deviceId) { - return FLAGS_parallel_nn ? (deviceId >= 0 ? true : false) : FLAGS_use_gpu; -} - -/* - * hppl activation mode - */ -hl_activation_mode_t hlActiveType(const std::string& type); - -/** - * Return value: memory usage ratio (from 0-1) - */ -double getMemoryUsage(); - -/** - * split array by index. - * used by sync multi thread task, - * each thread call calcSplitArrayInterval with thread id, - * get a interval as return. - * input: - * *totalSize* is array size, - * *tId* is thread id, *tSize* is total worker thread num - * output: - * start and end index as a std::pair - */ -inline std::pair calcSplitArrayInterval(size_t totalSize, - size_t tId, - size_t tSize) { - size_t start = totalSize * tId / tSize; - size_t end = totalSize * (tId + 1) / tSize; - return std::make_pair(start, end); -} - -/** - * same as above, but split at boundary of block. - */ -inline std::pair calcSplitArrayInterval(size_t totalSize, - size_t tId, - size_t tSize, - size_t blockSize) { - size_t numBlocks = totalSize / blockSize; - if (numBlocks * blockSize < totalSize) { - numBlocks++; - } - - auto interval = calcSplitArrayInterval(numBlocks, tId, tSize); - size_t start = std::min(interval.first * blockSize, totalSize); - size_t end = std::min(interval.second * blockSize, totalSize); - - return std::make_pair(start, end); -} - -// Calculate the number of pservers/dservers based -// on the host list and port_num. -size_t calculateServiceNum(const std::string& pservers, int ports_num); - -/** - * sort and unique ids vector. - */ -inline void uniqueIds(std::vector& ids) { - std::sort(ids.begin(), ids.end()); - auto endpos = std::unique(ids.begin(), ids.end()); - ids.erase(endpos, ids.end()); -} - -/** - * Read Type value - */ -template -T readT(char*& p, const char* pEnd) { - int minus = pEnd - p - sizeof(T); - CHECK_LE(0, minus) << "readT: Out of range."; - T v = *reinterpret_cast(p); - p += sizeof(T); - return v; -} - -void memcpyWithCheck(void* dest, - const void* src, - size_t num, - const void* srcEnd); - -/** - * A global sync thread pool, has #FLAGS_trainer_count of threads. - * can be used in main thread. - */ -class SyncThreadPool; -SyncThreadPool* getGlobalSyncThreadPool(); - -namespace path { - -// directory separator -const char sep = '/'; - -// Return the base name of pathname path. -std::string basename(const std::string& path); - -// Return the directory name of path. If the path does not contains any -// directory, it returns an empty string. -std::string dirname(const std::string& path); - -/* - Join two path components intelligently. - The return value is the concatenation of part1 and part2 with exactly one - directory separator (path.sep) following each non-empty part except the last, - meaning that the result will only end in a separator if the last part is - empty. - If a component is an absolute path, all previous components are thrown away - and joining continues from the absolute path component. -*/ -std::string join(const std::string& part1, const std::string& part2); - -template -std::string join(const std::string& part1, - const std::string& part2, - Args... args) { - return join(join(part1, part2), args...); -} - -} // namespace path - -/** - * A Checker for each invoke of method in same thread. - */ -class SameThreadChecker { - public: - SameThreadChecker() {} - - /** - * Disable copy - */ - SameThreadChecker(const SameThreadChecker& other) = delete; - SameThreadChecker& operator=(const SameThreadChecker& other) = delete; - - /** - * Each invoke of check method should be in same thread, otherwise, it will - * failed and core dump. - */ - void check() { - std::thread::id curThreadId = std::this_thread::get_id(); - std::call_once(onceFlag_, [&] { invokeThreadId_ = curThreadId; }); - CHECK_EQ(invokeThreadId_, curThreadId) - << "This method should invoke in " - "same thread, but first invoked in " - << invokeThreadId_ << " current invoked in " << curThreadId; - } - - private: - std::once_flag onceFlag_; - std::thread::id invokeThreadId_; -}; - -/** - * Key-Value Cache Helper. - * - * It store a object instance global. User can invoke get method by key and a - * object creator callback. If there is a instance stored in cache, then it will - * return a shared_ptr of it, otherwise, it will invoke creator callback, create - * a new instance store global, and return it. - * - * The cache instance will release when nobody hold a reference to it. - * - * The KType is the key type. - * The VType is the value type. - * The Hash is the key hasher object. - */ -template -class WeakKVCache { - public: - WeakKVCache() {} - - std::shared_ptr get(const KType& key, - const std::function& creator) { - std::lock_guard guard(this->lock_); - auto it = this->storage_.find(key); - if (it != this->storage_.end()) { - auto& val = it->second; - auto retVal = val.lock(); - if (retVal != nullptr) { - return retVal; - } // else fall trough. Because it is WeakPtr Cache. - } - auto rawPtr = creator(); - CHECK(rawPtr != nullptr); - std::shared_ptr retVal(rawPtr); - this->storage_[key] = retVal; - return retVal; - } - - private: - std::mutex lock_; - std::unordered_map, Hash> storage_; -}; - -/** - * @brief The ScopedCallbacks class is a callback invoker when object is - * created and destroyed. - */ -template -class ScopedCallbacks { - public: - ScopedCallbacks(CallbackType enter, CallbackType exit, Args&... args) - : exit_(std::bind(exit, args...)) { - enter(args...); - } - - ScopedCallbacks(const ScopedCallbacks& other) = delete; - ScopedCallbacks& operator=(const ScopedCallbacks& other) = delete; - - ~ScopedCallbacks() { exit_(); } - - private: - std::function exit_; -}; - -/** - * std compatible allocator with memory alignment. - * @tparam T type of allocator elements. - * @tparam Alignment the alignment in bytes. - */ -template -class AlignedAllocator { - public: - /// std campatible typedefs. - typedef T* pointer; - typedef const T* const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef T value_type; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - - T* address(T& r) const { return &r; } - - const T* address(const T& r) const { return &r; } - - size_t max_size() const { - return std::numeric_limits::max() / sizeof(T); - } - - template - struct rebind { - typedef AlignedAllocator other; - }; - - bool operator==(const AlignedAllocator& other) const { return true; } - - bool operator!=(const AlignedAllocator& other) const { - return !(*this == &other); - } - - void construct(const T* p, const T& t) const { - void* pv = const_cast(p); - new (pv) T(t); - } - - void deallocate(const T* p, const size_type n) const { - (void)(n); // UNUSED n - free(const_cast(p)); - } - - void destroy(const T* p) const { p->~T(); } - - AlignedAllocator() {} - ~AlignedAllocator() {} - - AlignedAllocator(const AlignedAllocator&) {} - template - AlignedAllocator(const AlignedAllocator&) {} - - /** - * @brief allocate n elements of type T, the first address is aligned by - * Alignment bytes. - * @param n element count. - * @return begin address of allocated buffer - * @throw std::length_error for n * sizeof(T) is overflowed. - * @throw std::bad_alloc - */ - T* allocate(const size_type n) const { - if (n == 0) { - return nullptr; - } - if (n > max_size()) { - throw std::length_error("AlignAllocator::allocate() - Int Overflow."); - } - void* r = nullptr; - CHECK_EQ(posix_memalign(&r, Alignment * 8, sizeof(T) * n), 0); - if (r == nullptr) { - throw std::bad_alloc(); - } else { - return static_cast(r); - } - } - - template - T* allocate(const std::size_t n, const U* /* const hint */) const { - return this->allocate(n); - } - - private: - AlignedAllocator& operator=(const AlignedAllocator&); // disable -}; - -class Deprecated { - public: - explicit Deprecated(const std::string& msg = "") { - if (msg.empty()) { - LOG(WARNING) << "This class is deprecated, please do not use this class."; - } else { - LOG(WARNING) << msg; - } - } -}; - -} // namespace paddle diff --git a/paddle/legacy/utils/Version.cpp b/paddle/legacy/utils/Version.cpp deleted file mode 100644 index 731c30842118bce59ce45297d9c8f47fa0a69d69..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Version.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "Version.h" - -#include -#include -#include "Flags.h" -#include "Util.h" - -DECLARE_bool(version); - -namespace paddle { -namespace version { - -void printVersion(std::ostream& os) { -#ifndef PADDLE_VERSION -#define PADDLE_VERSION "unknown" -#endif -// converts macro to string -// https://gcc.gnu.org/onlinedocs/cpp/Stringification.html -#define xstr(s) str(s) -#define str(s) #s - - os << "paddle version: " << xstr(PADDLE_VERSION) << std::endl - << std::boolalpha << "\t" - << "withGpu: " << version::isWithGpu() << std::endl - << "\t" - << "withAvx: " << version::isWithAvx() << std::endl - << "\t" - << "withPyDataProvider: " << version::isWithPyDataProvider() << std::endl - << "\t" - << "withTimer: " << version::isWithTimer() << std::endl - << "\t" - << "withFpga: " << version::isWithFpga() << std::endl - << "\t" - << "real byte size: " << version::sizeofReal() << std::endl - << std::endl; -} - -void printVersion() { - if (FLAGS_version) { - printVersion(std::cout); - exit(0); - } -} - -} // namespace version -} // namespace paddle diff --git a/paddle/legacy/utils/Version.h b/paddle/legacy/utils/Version.h deleted file mode 100644 index 004d62451cddfee8fbd687938086e04ecb2332a9..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/Version.h +++ /dev/null @@ -1,131 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include "Common.h" - -namespace paddle { - -/** - * namespace paddle::version - * Some constexpr to detect paddle version. - * use paddle_trainer --version to print version information. - * - * Possible output as follow: - * paddle version: - * withGpu: false - * withAvx: false - * withPyDataProvider: true - * withTimer: false - * withFpga: false - * real byte size: 4 - */ - -namespace version { - -/** - * @brief print paddle version and exit when --version flag setted. Otherwise, - * do nothing. - */ -void printVersion(); - -void printVersion(std::ostream& os); -/** - * @brief isWithGpu - * @return return true if paddle compiled with GPU - */ -constexpr bool isWithGpu() { -#ifndef PADDLE_WITH_CUDA - return false; -#else - return true; -#endif -} - -/** - * @brief isWithPyDataProvider - * @return return true if paddle compiled with PyDataProvider - * - * @note: A complete python interpreter is embeded into paddle binary if paddle - * is compiled with PyDataProvider. Then the config parser just invoke python - * method. Otherwise, ConfigParser just serializes config into protobuf, and - * pass to C++ by using stdio. - */ -constexpr bool isWithPyDataProvider() { -#ifdef PADDLE_NO_PYTHON - return false; -#else - return true; -#endif -} - -/** - * @brief isWithTimer - * @return true if paddle compiled with timer. - */ -constexpr bool isWithTimer() { -#ifdef PADDLE_DISABLE_TIMER - return false; -#else - return true; -#endif -} - -/** - * @brief isWithAvx - * @return true if paddle compiled with AVX instructs. - */ -constexpr bool isWithAvx() { -#ifdef __AVX__ - return true; -#else - return false; -#endif -} - -/** - * @brief isWithFpga - * @return true if paddle compiled with FPGA for prediction. - */ -constexpr bool isWithFpga() { -#ifdef PADDLE_USE_FPGA - return true; -#else - return false; -#endif -} - -/** - * @brief sizeofReal - * @return return the byte size of real - */ -constexpr size_t sizeofReal() { return sizeof(real); } - -/** - * @brief isPaddleUseDouble - * @return true if paddle compiled with double precision. - */ -constexpr bool isPaddleUseDouble() { return sizeofReal() == sizeof(double); } - -/** - * @brief isPaddleUseFloat - * @return true if paddle compiled with float precision - */ -constexpr bool isPaddleUseFloat() { return sizeofReal() == sizeof(float); } - -} // namespace version - -} // namespace paddle diff --git a/paddle/legacy/utils/arch/linux/Locks.cpp b/paddle/legacy/utils/arch/linux/Locks.cpp deleted file mode 100644 index 32d351e3328afd79007aea7a51e59cbfc941eeeb..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/arch/linux/Locks.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Locks.h" -#include -#include -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { -class SemaphorePrivate { - public: - sem_t sem; -}; - -Semaphore::Semaphore(int initValue) : m(new SemaphorePrivate()) { - sem_init(&m->sem, 0, initValue); -} - -Semaphore::~Semaphore() { - sem_destroy(&m->sem); - delete m; -} - -bool Semaphore::timeWait(struct timespec* ts) { - return (0 == sem_timedwait(&m->sem, ts)); -} - -void Semaphore::wait() { sem_wait(&m->sem); } - -void Semaphore::post() { sem_post(&m->sem); } - -/// SpinLockPrivate - -#ifdef PADDLE_USE_PTHREAD_SPINLOCK - -class SpinLockPrivate { - public: - inline SpinLockPrivate() { pthread_spin_init(&lock_, 0); } - inline ~SpinLockPrivate() { pthread_spin_destroy(&lock_); } - - inline void lock() { pthread_spin_lock(&lock_); } - inline void unlock() { pthread_spin_unlock(&lock_); } - - pthread_spinlock_t lock_; - char padding_[64 - sizeof(pthread_spinlock_t)]; -}; - -#else -// clang-format off -#include -#include -// clang-format on - -class SpinLockPrivate { - public: - inline void lock() { - while (lock_.test_and_set(std::memory_order_acquire)) { - } - } - inline void unlock() { lock_.clear(std::memory_order_release); } - - std::atomic_flag lock_ = ATOMIC_FLAG_INIT; - char padding_[64 - sizeof(lock_)]; // Padding to cache line size -}; - -#endif - -SpinLock::SpinLock() : m(new SpinLockPrivate()) {} -SpinLock::~SpinLock() { delete m; } -void SpinLock::lock() { m->lock(); } -void SpinLock::unlock() { m->unlock(); } - -/// ThreadBarrierPrivate - -#ifdef PADDLE_USE_PTHREAD_BARRIER - -class ThreadBarrierPrivate { - public: - pthread_barrier_t barrier_; - - inline explicit ThreadBarrierPrivate(int count) { - pthread_barrier_init(&barrier_, nullptr, count); - } - - inline ~ThreadBarrierPrivate() { pthread_barrier_destroy(&barrier_); } - - inline void wait() { pthread_barrier_wait(&barrier_); } -}; - -#else - -class ThreadBarrierPrivate { - public: - pthread_mutex_t mutex_; - pthread_cond_t cond_; - int count_; - int tripCount_; - - inline explicit ThreadBarrierPrivate(int cnt) : count_(0), tripCount_(cnt) { - CHECK_NE(cnt, 0); - CHECK_GE(pthread_mutex_init(&mutex_, 0), 0); - CHECK_GE(pthread_cond_init(&cond_, 0), 0); - } - - inline ~ThreadBarrierPrivate() { - pthread_cond_destroy(&cond_); - pthread_mutex_destroy(&mutex_); - } - - /** - * @brief wait - * @return true if the last wait - */ - inline bool wait() { - pthread_mutex_lock(&mutex_); - ++count_; - if (count_ >= tripCount_) { - count_ = 0; - pthread_cond_broadcast(&cond_); - pthread_mutex_unlock(&mutex_); - return true; - } else { - pthread_cond_wait(&cond_, &mutex_); - pthread_mutex_unlock(&mutex_); - return false; - } - } -}; - -#endif - -/// ThreadBarrier - -ThreadBarrier::ThreadBarrier(int count) : m(new ThreadBarrierPrivate(count)) {} -ThreadBarrier::~ThreadBarrier() { delete m; } -void ThreadBarrier::wait() { m->wait(); } - -} // namespace paddle diff --git a/paddle/legacy/utils/arch/osx/Excepts.cpp b/paddle/legacy/utils/arch/osx/Excepts.cpp deleted file mode 100644 index 2b7d6dca8454417fd78f6da7f906785d24a6219b..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/arch/osx/Excepts.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Excepts.h" - -#if defined(__APPLE__) || defined(__OSX__) -#if defined(__arm__) || defined(__arm64__) -// TODO(liuyiqun): implement the arm version -int fegetexcept(void) { return -1; } -int feenableexcept(unsigned int excepts) { return -1; } -int fedisableexcept(unsigned int excepts) { return -1; } -#else -int fegetexcept(void) { - static fenv_t fenv; - return fegetenv(&fenv) ? -1 : (fenv.__control & FE_ALL_EXCEPT); -} - -int feenableexcept(unsigned int excepts) { - static fenv_t fenv; - unsigned int new_excepts = excepts & FE_ALL_EXCEPT, old_excepts; - - if (fegetenv(&fenv)) return -1; - old_excepts = fenv.__control & FE_ALL_EXCEPT; - - // unmask - fenv.__control &= ~new_excepts; - fenv.__mxcsr &= ~(new_excepts << 7); - - return (fesetenv(&fenv) ? -1 : old_excepts); -} - -int fedisableexcept(unsigned int excepts) { - static fenv_t fenv; - unsigned int new_excepts = excepts & FE_ALL_EXCEPT, old_excepts; - - if (fegetenv(&fenv)) return -1; - old_excepts = fenv.__control & FE_ALL_EXCEPT; - - // mask - fenv.__control |= new_excepts; - fenv.__mxcsr |= new_excepts << 7; - - return (fesetenv(&fenv) ? -1 : old_excepts); -} -#endif -#endif diff --git a/paddle/legacy/utils/arch/osx/Locks.cpp b/paddle/legacy/utils/arch/osx/Locks.cpp deleted file mode 100644 index b68c48f0c31aa928a634e0369295ec084b9ccd8e..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/arch/osx/Locks.cpp +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Locks.h" -#include -#include -#include -#include "paddle/legacy/utils/Logging.h" - -namespace paddle { - -class SemaphorePrivate { - public: - ~SemaphorePrivate() { dispatch_release(sem); } - - dispatch_semaphore_t sem; -}; - -Semaphore::Semaphore(int initValue) : m(new SemaphorePrivate()) { - m->sem = dispatch_semaphore_create(initValue); -} - -Semaphore::~Semaphore() { delete m; } - -bool Semaphore::timeWait(timespec *ts) { - dispatch_time_t tm = dispatch_walltime(ts, 0); - return (0 == dispatch_semaphore_wait(m->sem, tm)); -} - -void Semaphore::wait() { - dispatch_semaphore_wait(m->sem, DISPATCH_TIME_FOREVER); -} - -void Semaphore::post() { dispatch_semaphore_signal(m->sem); } - -class SpinLockPrivate { - public: - std::atomic_flag lock_ = ATOMIC_FLAG_INIT; - char padding_[64 - sizeof(lock_)]; // Padding to cache line size -}; - -SpinLock::SpinLock() : m(new SpinLockPrivate()) {} -SpinLock::~SpinLock() { delete m; } - -void SpinLock::lock() { - while (m->lock_.test_and_set(std::memory_order_acquire)) { - } -} - -void SpinLock::unlock() { m->lock_.clear(std::memory_order_release); } - -class ThreadBarrierPrivate { - public: - pthread_mutex_t mutex_; - pthread_cond_t cond_; - int count_; - int tripCount_; - - inline explicit ThreadBarrierPrivate(int cnt) : count_(0), tripCount_(cnt) { - CHECK_NE(cnt, 0); - CHECK_GE(pthread_mutex_init(&mutex_, 0), 0); - CHECK_GE(pthread_cond_init(&cond_, 0), 0); - } - - inline ~ThreadBarrierPrivate() { - pthread_cond_destroy(&cond_); - pthread_mutex_destroy(&mutex_); - } - - /** - * @brief wait - * @return true if the last wait - */ - inline bool wait() { - pthread_mutex_lock(&mutex_); - ++count_; - if (count_ >= tripCount_) { - count_ = 0; - pthread_cond_broadcast(&cond_); - pthread_mutex_unlock(&mutex_); - return true; - } else { - pthread_cond_wait(&cond_, &mutex_); - pthread_mutex_unlock(&mutex_); - return false; - } - } -}; - -ThreadBarrier::ThreadBarrier(int count) : m(new ThreadBarrierPrivate(count)) {} -ThreadBarrier::~ThreadBarrier() { delete m; } -void ThreadBarrier::wait() { m->wait(); } - -} // namespace paddle diff --git a/paddle/legacy/utils/enable_virtualenv.py b/paddle/legacy/utils/enable_virtualenv.py deleted file mode 100644 index 4e998381e9e2a9254c642e969abb9f976d0e3938..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/enable_virtualenv.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - - -def __activate_virtual_env__(): - __path__ = os.getenv('VIRTUAL_ENV') - if __path__ is None: - return - __script__ = os.path.join(__path__, 'bin', 'activate_this.py') - execfile(__script__, {'__file__': __script__}) - - -__activate_virtual_env__() diff --git a/paddle/legacy/utils/tests/CMakeLists.txt b/paddle/legacy/utils/tests/CMakeLists.txt deleted file mode 100644 index 4af01db5c84cb497b756027cbb6ad06c081a8899..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -add_simple_unittest(test_Thread) -add_simple_unittest(test_StringUtils) -add_simple_unittest(test_CustomStackTrace) -add_simple_unittest(test_ThreadBarrier) -add_simple_unittest(test_SpinLock) -add_simple_unittest(test_SIMDFlags) -add_simple_unittest(test_Error) - -add_executable( - test_CustomStackTracePrint - test_CustomStackTracePrint.cpp -) -link_paddle_exe(test_CustomStackTracePrint) -if(NOT APPLE) - add_test(NAME test_CustomStackTracePrint - COMMAND ${PADDLE_SOURCE_DIR}/paddle/legacy/utils/tests/test_CustomStackTracePrint.sh - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -endif() diff --git a/paddle/legacy/utils/tests/test_CustomStackTrace.cpp b/paddle/legacy/utils/tests/test_CustomStackTrace.cpp deleted file mode 100644 index 2a418e3ae2277fc5dc6856d131dafa9daf0bad47..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/test_CustomStackTrace.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include // NOLINT -#include // NOLINT - -#include "paddle/legacy/utils/CustomStackTrace.h" -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/StringUtil.h" -#include "paddle/legacy/utils/Util.h" - -DEFINE_int32(test_thread_num, 10, "testing thread number"); - -void testNormalImpl( - const std::function&, - size_t, - size_t, - paddle::ThreadBarrier&, - paddle::ThreadBarrier&)>& callback) { - paddle::CustomStackTrace tracer; - paddle::ThreadBarrier doneBarrier(FLAGS_test_thread_num + 1); - paddle::ThreadBarrier startBarrier(FLAGS_test_thread_num + 1); - constexpr size_t countDown = 10; - constexpr size_t layerSize = 1000; - std::vector> threads; - threads.reserve(FLAGS_test_thread_num); - - for (int32_t i = 0; i < FLAGS_test_thread_num; ++i) { - threads.emplace_back( - new std::thread([&tracer, &startBarrier, &doneBarrier, &callback] { - callback(tracer, countDown, layerSize, startBarrier, doneBarrier); - })); - } - size_t cntDown = countDown; - while (cntDown-- > 0) { - startBarrier.wait(); - sleep(1); - doneBarrier.wait(); - ASSERT_TRUE(tracer.empty()); - } - - for (auto& thread : threads) { - thread->join(); - } -} - -TEST(CustomStackTrace, normalTrain) { - testNormalImpl([](paddle::CustomStackTrace& tracer, - size_t countDown, - size_t layerSize, - paddle::ThreadBarrier& start, - paddle::ThreadBarrier& finish) { - while (countDown-- > 0) { - start.wait(); - for (size_t i = 0; i < layerSize; ++i) { - tracer.push("layer_" + paddle::str::to_string(i)); - } - for (size_t i = 0; i < layerSize; ++i) { - tracer.pop("layer_" + paddle::str::to_string(layerSize - 1 - i)); - } - finish.wait(); - } - }); -} - -TEST(CustomStackTrace, normalTest) { - testNormalImpl([](paddle::CustomStackTrace& tracer, - size_t countDown, - size_t layerSize, - paddle::ThreadBarrier& start, - paddle::ThreadBarrier& finish) { - while (countDown-- > 0) { - start.wait(); - for (size_t i = 0; i < layerSize; ++i) { - tracer.push("layer_" + paddle::str::to_string(i)); - } - tracer.clear(); // in forward test, tracer will clear after forward. - finish.wait(); - } - }); -} diff --git a/paddle/legacy/utils/tests/test_CustomStackTracePrint.cpp b/paddle/legacy/utils/tests/test_CustomStackTracePrint.cpp deleted file mode 100644 index 78886a3ed9f237a39079bbf604a376f98bd86b59..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/test_CustomStackTracePrint.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/CustomStackTrace.h" -#include "paddle/legacy/utils/StringUtil.h" -#include "paddle/legacy/utils/Util.h" - -int main(int argc, char** argv) { - paddle::initMain(argc, argv); - - for (size_t i = 0; i < 1000; ++i) { - paddle::gLayerStackTrace.push("layer_" + paddle::str::to_string(i)); - if (i == 998) { - throw "Unhandle exception"; - } - } - - return 0; -} diff --git a/paddle/legacy/utils/tests/test_CustomStackTracePrint.sh b/paddle/legacy/utils/tests/test_CustomStackTracePrint.sh deleted file mode 100755 index b5543485f365adee49629578d470a14e0c742547..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/test_CustomStackTracePrint.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -echo "Test Custom Stack Trace print correct result when fail" -./test_CustomStackTracePrint >customStackTraceLog 2>&1 -if [ $? -eq 0 ]; then - exit 1 -else - set -e - TEXT="" - for ((i=0; i<=998; i++)) - do - TEXT="layer_$i, "$TEXT - done - TEXT="Forwarding "$TEXT - grep -q "$TEXT" customStackTraceLog -fi diff --git a/paddle/legacy/utils/tests/test_Error.cpp b/paddle/legacy/utils/tests/test_Error.cpp deleted file mode 100644 index 250c4d58a64a0d284a15418e40264f1857d30050..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/test_Error.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/Error.h" - -#include - -TEST(Error, testAll) { - paddle::Error error; - ASSERT_TRUE(error.isOK()); - error = paddle::Error("I'm the error"); - ASSERT_FALSE(error.isOK()); - ASSERT_STREQ("I'm the error", error.msg()); - - error = paddle::Error("error2"); - ASSERT_FALSE(error.isOK()); - ASSERT_STREQ("error2", error.msg()); - - int i = 3; - auto error3 = paddle::Error("error%d", i); - ASSERT_FALSE(error3.isOK()); - ASSERT_STREQ("error3", error3.msg()); -} diff --git a/paddle/legacy/utils/tests/test_SIMDFlags.cpp b/paddle/legacy/utils/tests/test_SIMDFlags.cpp deleted file mode 100644 index 6362210acdaf26a26a2548ddaf8ed455b9c76618..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/test_SIMDFlags.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/legacy/utils/CpuId.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Util.h" - -using namespace paddle; // NOLINT - -TEST(SIMDFlags, gccTest) { -#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)) && \ - !defined(__arm__) && !defined(__aarch64__) - // clang-format off - CHECK(!__builtin_cpu_supports("sse") != HAS_SSE); - CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2); - CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3); - CHECK(!__builtin_cpu_supports("ssse3") != HAS_SSSE3); - CHECK(!__builtin_cpu_supports("sse4.1") != HAS_SSE41); - CHECK(!__builtin_cpu_supports("sse4.2") != HAS_SSE42); - CHECK(!__builtin_cpu_supports("avx") != HAS_AVX); - CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2); -// clang-format on -#endif -} - -TEST(SIMDFlags, normalPrint) { - LOG(INFO) << "Has SSE: " << std::boolalpha << HAS_SSE; - LOG(INFO) << "Has SSE2: " << std::boolalpha << HAS_SSE2; - LOG(INFO) << "Has SSE3: " << std::boolalpha << HAS_SSE3; - LOG(INFO) << "Has SSSE3: " << std::boolalpha << HAS_SSSE3; - LOG(INFO) << "Has SSE4: " << std::boolalpha << HAS_SSE41 || HAS_SSE42; - LOG(INFO) << "Has FMA3: " << std::boolalpha << HAS_FMA3; - LOG(INFO) << "Has FMA4: " << std::boolalpha << HAS_FMA4; - LOG(INFO) << "Has AVX: " << std::boolalpha << HAS_AVX; - LOG(INFO) << "Has AVX2: " << std::boolalpha << HAS_AVX2; - LOG(INFO) << "Has AVX512: " << std::boolalpha << HAS_AVX512; - LOG(INFO) << "Has NEON: " << std::boolalpha << HAS_NEON; -} diff --git a/paddle/legacy/utils/tests/test_SpinLock.cpp b/paddle/legacy/utils/tests/test_SpinLock.cpp deleted file mode 100644 index 4cd7836d6af251b48925de95c2811361313d7b41..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/test_SpinLock.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include -#include - -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Util.h" - -DEFINE_int32(test_thread_num, 100, "testing thread number"); - -void testNormalImpl( - size_t thread_num, - const std::function& callback) { - paddle::SpinLock mutex; - std::vector threads; - threads.reserve(thread_num); - - size_t count = 0; - for (size_t i = 0; i < thread_num; ++i) { - threads.emplace_back([&thread_num, &count, &mutex, &callback] { - callback(thread_num, count, mutex); - }); - } - for (auto& thread : threads) { - thread.join(); - } - // Check whether all threads reach this point or not - CHECK_EQ(count, thread_num); -} - -TEST(ThreadSpinLock, normalTest) { - for (auto& thread_num : {10, 30, 50, 100, 300, 1000}) { - testNormalImpl( - thread_num, - [](size_t thread_num, size_t& count, paddle::SpinLock& mutex) { - std::lock_guard lock(mutex); - ++count; - }); - } -} diff --git a/paddle/legacy/utils/tests/test_StringUtils.cpp b/paddle/legacy/utils/tests/test_StringUtils.cpp deleted file mode 100644 index 61d2815f097af7125bfefdc4909509564300d6aa..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/test_StringUtils.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/legacy/utils/StringUtil.h" - -#include - -TEST(StringUtil, to) { - ASSERT_NEAR(paddle::str::to("12.45"), 12.45, 1e-5); - ASSERT_DEATH_IF_SUPPORTED(paddle::str::to("12.45x23"), ".*"); - ASSERT_DEATH_IF_SUPPORTED(paddle::str::to(""), ".*"); -} diff --git a/paddle/legacy/utils/tests/test_Thread.cpp b/paddle/legacy/utils/tests/test_Thread.cpp deleted file mode 100644 index 5e07da3236862c5f585671d9bb8e3fbbd1c5b5fc..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/test_Thread.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -using paddle::AsyncThreadPool; // NOLINT - -TEST(AsyncThreadPool, addJob) { - AsyncThreadPool pool(8); - auto a = pool.addJob([] { return 1; }); - auto b = pool.addJob([] { return true; }); - auto c = pool.addJob([] { return false; }); - - ASSERT_EQ(a.get(), 1); - ASSERT_TRUE(b.get()); - ASSERT_FALSE(c.get()); -} - -TEST(AsyncThreadPool, addBatchJob) { - AsyncThreadPool pool(8); - std::atomic counter{0}; - - std::vector jobs; - - for (int i = 0; i < 10000; i++) { - jobs.emplace_back([&] { counter++; }); - } - - pool.addBatchJobs(jobs); - - ASSERT_EQ(counter, 10000); -} - -TEST(AsyncThreadPool, multiThreadAddBatchJob) { - AsyncThreadPool levelOnePool(200); - AsyncThreadPool levelTwoPool(200); - - std::shared_ptr mut = std::make_shared(); - int counter = 0; - const int numMonitors = 300; - const int numSlaves = 300; - std::vector moniterJobs(numMonitors, [&] { - std::vector slaveJobs(numSlaves, [mut, &counter] { - std::lock_guard lk(*mut); - counter++; - }); - levelTwoPool.addBatchJobs(slaveJobs); - }); - levelOnePool.addBatchJobs(moniterJobs); - ASSERT_EQ(counter, numMonitors * numSlaves); -} - -TEST(AsyncThreadPool, addBatchJobWithResults) { - AsyncThreadPool pool(100); - - std::vector> jobs; - const int numJobs = 100; - for (int i = 0; i < numJobs; i++) { - jobs.emplace_back([i] { return i; }); - } - - std::vector res; - pool.addBatchJobs(jobs, res); - - for (int i = 0; i < numJobs; i++) { - ASSERT_EQ(res[i], i); - } -} diff --git a/paddle/legacy/utils/tests/test_ThreadBarrier.cpp b/paddle/legacy/utils/tests/test_ThreadBarrier.cpp deleted file mode 100644 index 9c8851ae2112320c89aa3e7ed6e850d00cc14006..0000000000000000000000000000000000000000 --- a/paddle/legacy/utils/tests/test_ThreadBarrier.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include -#include - -#include "paddle/legacy/utils/Locks.h" -#include "paddle/legacy/utils/Logging.h" -#include "paddle/legacy/utils/Util.h" - -DEFINE_int32(test_thread_num, 100, "testing thread number"); - -void testNormalImpl( - size_t thread_num, - const std::function&, - paddle::ThreadBarrier&)>& callback) { - std::mutex mutex; - std::set tids; - paddle::ThreadBarrier barrier(thread_num); - - std::vector threads; - threads.reserve(thread_num); - for (size_t i = 0; i < thread_num; ++i) { - threads.emplace_back([&thread_num, &mutex, &tids, &barrier, &callback] { - callback(thread_num, mutex, tids, barrier); - }); - } - - for (auto& thread : threads) { - thread.join(); - } -} - -TEST(ThreadBarrier, normalTest) { - for (auto& thread_num : {10, 30, 50, 100, 300, 1000}) { - testNormalImpl(thread_num, - [](size_t thread_num, - std::mutex& mutex, - std::set& tids, - paddle::ThreadBarrier& barrier) { - { - std::lock_guard guard(mutex); - tids.insert(std::this_thread::get_id()); - } - barrier.wait(); - // Check whether all threads reach this point or not - CHECK_EQ(tids.size(), thread_num); - }); - } -}