diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst index 1583fce981fed64141acdccc0d89b46b63d13cc0..f902d1c995bc5045d62d0b2e279ee612f9dc7c93 100644 --- a/doc/ui/api/trainer_config_helpers/layers.rst +++ b/doc/ui/api/trainer_config_helpers/layers.rst @@ -245,10 +245,10 @@ addto_layer :members: addto_layer :noindex: -convex_comb_layer +linear_comb_layer ----------------- .. automodule:: paddle.trainer_config_helpers.layers - :members: convex_comb_layer + :members: linear_comb_layer :noindex: interpolation_layer @@ -280,7 +280,13 @@ tensor_layer .. automodule:: paddle.trainer_config_helpers.layers :members: tensor_layer :noindex: - + +cos_sim +------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: cos_sim + :noindex: + trans_layer ------------ .. automodule:: paddle.trainer_config_helpers.layers @@ -341,12 +347,6 @@ rank_cost :members: rank_cost :noindex: -cos_sim -------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: cos_sim - :noindex: - crf_layer ----------------- .. automodule:: paddle.trainer_config_helpers.layers diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc index 19c94b2453981301bcb632ecbe5d322369009973..c2dce1977bdf5daefb6c5b8032bb6b12563e9425 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/cuda/src/hl_cuda_cudnn.cc @@ -150,7 +150,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP) // APIs available after R4: -#if CUDNN_VERSION >= 4000 +#if CUDNN_VERSION >= 4007 #define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \ __macro(cudnnBatchNormalizationForwardTraining) \ __macro(cudnnBatchNormalizationForwardInference) \ @@ -999,7 +999,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, double epsilon, real *savedMean, real *savedVar) { -#if CUDNN_VERSION >= 4000 +#if CUDNN_VERSION >= 4007 if ((NULL != runningMean && NULL == runningInvVar) || (NULL == runningMean && NULL != runningInvVar)) { LOG(FATAL) << "runningMean and runningInvVar can be NULL " @@ -1024,7 +1024,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, CHECK_SYNC("hl_batch_norm_forward_training failed"); #else - LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. " + LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. " << "But cudnn lib version is " << g_cudnn_lib_version; #endif } @@ -1039,7 +1039,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, real *estimatedMean, real *estimatedInvVar, double epsilon) { -#if CUDNN_VERSION >= 4000 +#if CUDNN_VERSION >= 4007 cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc); cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc); cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc); @@ -1053,7 +1053,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, CHECK_SYNC("hl_batch_norm_forward_inference failed"); #else - LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. " + LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. " << "But cudnn lib version is " << g_cudnn_lib_version; #endif } @@ -1071,7 +1071,7 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, double epsilon, real *savedMean, real *savedInvVar) { -#if CUDNN_VERSION >= 4000 +#if CUDNN_VERSION >= 4007 if ((NULL != savedMean && NULL == savedInvVar) || (NULL == savedMean && NULL != savedInvVar)) { LOG(FATAL) << "savedMean and savedVar can be NULL " @@ -1087,16 +1087,14 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward( t_resource.cudnn_handle, mode, &alpha, &beta, -#if CUDNN_VERSION >= 5000 &alpha, &beta, -#endif xDesc, input, dyDesc, outGrad, dxDesc, inGrad, bnDesc, scale, scaleGrad, biasGrad, epsilon, savedMean, savedInvVar)); CHECK_SYNC("hl_batch_norm_backward failed"); #else - LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. " + LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. " << "But cudnn lib version is " << g_cudnn_lib_version; #endif } diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index fca52828957a2da42238c9f945f5126beea95008..eb1522a178d48c1d71b5b4a63ce73f65e1167288 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -277,6 +277,7 @@ void NeuralNetwork::getState(MachineState& machineState) { } void NeuralNetwork::backward(const UpdateCallback& callback) { + gLayerStackTrace.pop(""); // tell layer trace is during backward. FOR_EACH_R(layer, layers_) { REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); if ((*layer)->needGradient()) { diff --git a/paddle/gserver/layers/ConvexCombinationLayer.cpp b/paddle/gserver/layers/ConvexCombinationLayer.cpp index e092b2e390f37cd322db8bed8273f561fa979791..a81cf939af671f3fb34fb52ae33035a7bb524aed 100644 --- a/paddle/gserver/layers/ConvexCombinationLayer.cpp +++ b/paddle/gserver/layers/ConvexCombinationLayer.cpp @@ -21,18 +21,20 @@ limitations under the License. */ namespace paddle { /** - * @brief A layer for convex weighted average of vectors, + * @brief A layer for weighted sum of vectors, * which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND * TRANSLATE - * - Input: the first input contains the convex weights (batchSize x weightDim), - * and the shape of second input is (batchSize x (weightdim*dataDim)). - * - Output: the shape of output is (batchSize x dataDim). + * - Input: the the size of the first input is weightDim, + * and the size of the second input is weightdim * dataDim. + * - Output: the sizeof the output is dataDim * \f[ - * out[i][j] = \sum_{j}(in0(i, j) * in1(i,j + i * dataDim)), - * i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1) + * out(j) = \sum_{i}(in0(i) * in1(i,j + i * dataDim)), + * i = 0,1,...,(weightDim-1); j = 0, 1,...,(dataDim-1) * \f] + * Note that the above computation is for one sample. Multiple samples are + * processed in one batch. * - * The config file api is convex_comb_layer. + * The config file api is linear_comb_layer. */ class ConvexCombinationLayer : public Layer { protected: diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index b10bd1d886ecf42170914c619b7b4040d984501d..05a70aeff5e8ff3789bca966d351bffc8efb1cb3 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -48,7 +48,7 @@ void CosSimLayer::forward(PassType passType) { REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); MatrixPtr prevOut1 = getInputValue(0); MatrixPtr prevOut2 = getInputValue(1); - outV->cosSim(*prevOut1, *prevOut2, kCosSimScale_); + outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale()); } } @@ -59,7 +59,7 @@ void CosSimLayer::backward(const UpdateCallback& callback) { outG->cosSimDerivative(*this->getOutputValue(), *getInputValue(0), *getInputValue(1), *getInputGrad(0), - *getInputGrad(1), kCosSimScale_); + *getInputGrad(1), config_.cos_scale()); } } diff --git a/paddle/gserver/layers/CosSimLayer.h b/paddle/gserver/layers/CosSimLayer.h index 9b0e53335b2503513ce11a4ab19f2199acfee499..65eb807ab2e6f16aab5ef2a9b08d697868c743a3 100644 --- a/paddle/gserver/layers/CosSimLayer.h +++ b/paddle/gserver/layers/CosSimLayer.h @@ -36,7 +36,7 @@ namespace paddle { class CosSimLayer : public Layer { public: explicit CosSimLayer(const LayerConfig& config) - : Layer(config), kCosSimScale_(5.0f) {} + : Layer(config) {} ~CosSimLayer() {} @@ -44,8 +44,6 @@ public: void forward(PassType passType); void backward(const UpdateCallback& callback = nullptr); - - const real kCosSimScale_; }; } // namespace paddle diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index cef8772fc254f98d676e6fb89042487315280c61..3c6d13b0bf92ea98eb5c3331a1fdff6b177529b6 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -115,29 +115,11 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { create(tmpBiasGrad_, 1, channels_, &betaGrad); } - // because of the different api of cudnn v4 and v5. - if (hl_get_cudnn_lib_version() < 5000) { - if (weight_->getWGrad()) { - create(tmpWGrad_, 1, channels_, &gammaGrad); - } - if (biases_ && biases_->getWGrad()) { - create(tmpBiasGrad_, 1, channels_, &betaGrad); - } - } - hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad, ioDesc_, inGrad, bnParamDesc_, gamma, gammaGrad, betaGrad, EPS, savedMean, savedInvVar); - // because of the different api of cudnn v4 and v5. - if (hl_get_cudnn_lib_version() < 5000) { - if (weight_->getWGrad() && biases_->getWGrad()) { - weight_->getWGrad()->add(*tmpWGrad_); - biases_->getWGrad()->add(*tmpBiasGrad_); - } - } - { REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); biases_->getParameterPtr()->incUpdate(callback); diff --git a/paddle/utils/CustomStackTrace.cpp b/paddle/utils/CustomStackTrace.cpp index 50d7f5402f586771194fa5b1578293b7614ea1f2..232a478ecd93a7dcb7da7b02a5a1af37a1d1bc43 100644 --- a/paddle/utils/CustomStackTrace.cpp +++ b/paddle/utils/CustomStackTrace.cpp @@ -14,9 +14,44 @@ limitations under the License. */ #include "CustomStackTrace.h" +#include "CommandLineParser.h" +#include + +P_DEFINE_bool(layer_stack_error_only_current_thread, + true, + "Dump current thread or whole process layer stack when signal error " + "occurred. true means only dump current thread layer stack"); namespace paddle { CustomStackTrace gLayerStackTrace; +static std::mutex gLayerStackTraceMtx; +void installLayerStackTracer() { + logging::installFailureWriter([](const char* data, int sz) { + std::lock_guard guard(gLayerStackTraceMtx); + if (!gLayerStackTrace.empty()) { + size_t curTid = -1UL; + std::hash hasher; + gLayerStackTrace.dump([&curTid, &hasher](std::thread::id tid, + bool* isForwarding, + const std::string& layerName) { + if (curTid != hasher(tid)) { + if (curTid != -1UL) { + std::cerr << std::endl; + } + curTid = hasher(tid); + std::cerr << "Thread [" << tid << "] "; + if (isForwarding) { + std::cerr << (*isForwarding ? "Forwarding ": "Backwarding "); + } + } + std::cerr << layerName << ", "; + }, FLAGS_layer_stack_error_only_current_thread); + std::cerr << std::endl; + } + std::cerr.write(data, sz); + }); +} + } // namespace paddle diff --git a/paddle/utils/CustomStackTrace.h b/paddle/utils/CustomStackTrace.h index e1b2d2d8e5ee6ce572b10b94a42fb285078dddc1..774c4db2b9be40c38286ef1248bf77746949fd6b 100644 --- a/paddle/utils/CustomStackTrace.h +++ b/paddle/utils/CustomStackTrace.h @@ -15,6 +15,9 @@ limitations under the License. */ #pragma once #include +#include +#include +#include #include "ThreadLocal.h" @@ -29,25 +32,18 @@ namespace paddle { * @code{.cpp} * * paddle::CustomStackTrace stack; - * PASS_TEST=0; * for (auto& layer : layers){ * stack.push(layer->getName()); - * layer->forward(passType); + * layer->forward(); * } - * for (auto& layer : layers){ + * + * stack.pop(""); // mark under pop stage. + * + * for (auto it = layers.rbegin(); it != layers.rend(); ++it){ + * auto& layer = *it; * layer->backward(passType); * stack.pop(layer->getName()); * } - * - * if(passType == PASS_TEST) { - * stack.clear(); - * } - * else { - * stack.dump([](const std::string& layername){ - * LOG(INFO) << "LayerName: " << layername; - * }) - * } - * * * @endcode */ @@ -55,45 +51,141 @@ template class CustomStackTrace{ public: /** - * @brief Pop out an item from the top of the stack. For safety the item - * will be poped should equal to ip. + * @brief Pop out an item from the top of the stack if item == top. + * Else, just set status to popping. */ - void pop(const T& ip) { - auto& p = *logstack_; - CHECK_EQ(ip, p.top()); - p.pop(); + void pop(const T& item) { + pushing() = false; + auto& s = this->stack(); + if (item == s.top()) { + s.pop(); + } } + /** - * @brief Empty the stack by sequence from top to button. - * @param[in] callback A function deal with each item while dumping. - * It must have and only have a in parameter which is the stack item. + * @brief clear current thread stack. */ - template - void dump(Callback callback) { - auto& p = *logstack_; - while (!p.empty()) { - callback(p.top()); - p.pop(); + void clear() { + auto& s = stack(); + while (!s.empty()) { + s.pop(); } } + /** - * @brief Only empty the stack. + * @brief return true if all thread's stack is empty. + * @return true if empty */ - void clear() { - dump([](const T& ip){}); + bool empty() const { + std::lock_guard g(this->mtx_); + for (auto p : this->stackBuffers_) { + std::stack& s = *p.second; + if (!s.empty()) { + return false; + } + } + return true; + } + + + /** + * @brief DumpCallback Type. It will be invoked many times by dump method. + * + * The first parameter is stack thread id. + * The second parameter is the last action of stack is push or not. + * The third parameter is the item in stack. + */ + typedef std::function DumpCallback; + + /** + * Dump all thread stack, and all stack will be cleared. + */ + void dump(const DumpCallback& callback, bool onlyCurrentThread = false) { + std::lock_guard g(this->mtx_); + for (auto p : this->stackBuffers_) { + std::thread::id tid = p.first; + if (onlyCurrentThread && tid != std::this_thread::get_id()) { + continue; + } + std::stack& s = *p.second; + bool* isPush = nullptr; + auto it = this->pushingBuffers_.find(tid); + if (it != this->pushingBuffers_.end()) { + isPush = it->second; + } + + while (!s.empty()) { + callback(tid, isPush, s.top()); + s.pop(); + } + } } + /** - * @brief Push item ip to the top of the stack. + * @brief Push item to current thread stack. */ - void push(const T& ip) { - auto& p = *logstack_; - p.push(ip); + void push(const T& item) { + pushing() = true; + auto& p = this->stack(); + p.push(item); } private: - ThreadLocalD > logstack_; + /** + * Get thread local attribute, and save them into a map (threadId => TYPE*) + * + * @tparam TYPE thread local attribute type. + * @param threadLocal Thread Local object. + * @param buffers a map from threadId to TYPE* + */ + template + inline TYPE& getThreadLocal( + ThreadLocal& threadLocal, + std::unordered_map& buffers) { + TYPE* retv = threadLocal.get(false); + if (retv) { + return *retv; + } else { + std::lock_guard guard(this->mtx_); + retv = threadLocal.get(); + auto id = std::this_thread::get_id(); + buffers.insert({id, retv}); + return *retv; + } + } + + /** + * @brief Get thread local stack reference. + */ + std::stack& stack() { + return this->getThreadLocal(this->logStack_, + this->stackBuffers_); + } + + /** + * @brief Get thread local pushing flag. + */ + bool& pushing() { + return this->getThreadLocal(this->isPushing_, + this->pushingBuffers_); + } + +private: + mutable std::mutex mtx_; + + std::unordered_map* > stackBuffers_; + std::unordered_map pushingBuffers_; + ThreadLocal isPushing_; + ThreadLocal > logStack_; }; extern CustomStackTrace gLayerStackTrace; +/** + * @brief Install a failure handler to print layer stack when error. + */ +extern void installLayerStackTracer(); + } // namespace paddle diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index 1c1d75dc5bed98848fcb03366b383201ee6f5024..d8c3376fb18c48185abdcb7a6d65fa56f0eaa290 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -129,13 +129,7 @@ void runInitFunctions() { void initMain(int argc, char** argv) { initializeLogging(argc, argv); - logging::installFailureWriter([](const char* data, int sz) { - std::cerr << "Current Layer forward/backward stack is " << std::endl; - gLayerStackTrace.dump([](const std::string& layername){ - std::cerr << "LayerName: " << layername << std::endl; - }); - std::cerr.write(data, sz); - }); + installLayerStackTracer(); std::string line; for (int i = 0; i < argc; ++i) { line += argv[i]; diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt index 147ee3f6d6d86775f2f8c7839c79180f1daffa76..be59a785ecf366dc38a01ac53642eb137abec798 100644 --- a/paddle/utils/tests/CMakeLists.txt +++ b/paddle/utils/tests/CMakeLists.txt @@ -2,3 +2,13 @@ add_simple_unittest(test_CommandLineParser) add_simple_unittest(test_Logging) add_simple_unittest(test_Thread) add_simple_unittest(test_StringUtils) +add_simple_unittest(test_CustomStackTrace) + +add_executable( + test_CustomStackTracePrint + test_CustomStackTracePrint.cpp +) +link_paddle_exe(test_CustomStackTracePrint) +add_test(NAME test_CustomStackTracePrint + COMMAND ${PROJ_ROOT}/paddle/utils/tests/test_CustomStackTracePrint.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/paddle/utils/tests/test_CustomStackTrace.cpp b/paddle/utils/tests/test_CustomStackTrace.cpp new file mode 100644 index 0000000000000000000000000000000000000000..26ca4c678a650df50d372b0fbb4c3e03d52f91df --- /dev/null +++ b/paddle/utils/tests/test_CustomStackTrace.cpp @@ -0,0 +1,95 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/utils/CustomStackTrace.h" +#include "paddle/utils/CommandLineParser.h" +#include "paddle/utils/Util.h" +#include "paddle/utils/Locks.h" + +P_DEFINE_int32(test_thread_num, 10, "testing thread number"); + +void testNormalImpl(const std::function&, + size_t, size_t, + paddle::ThreadBarrier&, + paddle::ThreadBarrier&)>& callback) { + paddle::CustomStackTrace tracer; + paddle::ThreadBarrier doneBarrier(FLAGS_test_thread_num + 1); + paddle::ThreadBarrier startBarrier(FLAGS_test_thread_num + 1); + constexpr size_t countDown = 10; + constexpr size_t layerSize = 1000; + std::vector> threads; + threads.reserve(FLAGS_test_thread_num); + + for (int32_t i=0; i < FLAGS_test_thread_num; ++i) { + threads.emplace_back(new std::thread([&tracer, &countDown, &layerSize, + &startBarrier, &doneBarrier, + &callback]{ + callback(tracer, countDown, layerSize, startBarrier, doneBarrier); + })); + } + size_t cntDown = countDown; + while (cntDown-- > 0) { + startBarrier.wait(); + doneBarrier.wait(); + ASSERT_TRUE(tracer.empty()); + } + + for (auto& thread : threads) { + thread->join(); + } +} + + +TEST(CustomStackTrace, normalTrain) { + testNormalImpl([](paddle::CustomStackTrace& tracer, + size_t countDown, size_t layerSize, + paddle::ThreadBarrier& start, paddle::ThreadBarrier& finish){ + while (countDown-- > 0) { + start.wait(); + for (size_t i=0; i < layerSize; ++i) { + tracer.push("layer_" + std::to_string(i)); + } + tracer.pop(""); + for (size_t i=0; i < layerSize; ++i) { + tracer.pop("layer_" + std::to_string(layerSize - 1 - i)); + } + finish.wait(); + } + }); +} + +TEST(CustomStackTrace, normalTest) { + testNormalImpl([] (paddle::CustomStackTrace& tracer, + size_t countDown, size_t layerSize, + paddle::ThreadBarrier& start, paddle::ThreadBarrier& finish){ + while (countDown-- > 0) { + start.wait(); + for (size_t i=0; i < layerSize; ++i) { + tracer.push("layer_" + std::to_string(i)); + } + tracer.clear(); // in forward test, tracer will clear after forward. + finish.wait(); + } + }); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + paddle::initMain(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/utils/tests/test_CustomStackTracePrint.cpp b/paddle/utils/tests/test_CustomStackTracePrint.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c19c98614e6a7d6285990aa19849131579f7307b --- /dev/null +++ b/paddle/utils/tests/test_CustomStackTracePrint.cpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/utils/Util.h" +#include "paddle/utils/CustomStackTrace.h" + +int main(int argc, char** argv) { + paddle::initMain(argc, argv); + + for (size_t i=0; i < 1000; ++i) { + paddle::gLayerStackTrace.push("layer_" + std::to_string(i)); + if (i == 998) { + throw "Unhandle exception"; + } + } + + return 0; +} diff --git a/paddle/utils/tests/test_CustomStackTracePrint.sh b/paddle/utils/tests/test_CustomStackTracePrint.sh new file mode 100755 index 0000000000000000000000000000000000000000..b5543485f365adee49629578d470a14e0c742547 --- /dev/null +++ b/paddle/utils/tests/test_CustomStackTracePrint.sh @@ -0,0 +1,15 @@ +#!/bin/bash +echo "Test Custom Stack Trace print correct result when fail" +./test_CustomStackTracePrint >customStackTraceLog 2>&1 +if [ $? -eq 0 ]; then + exit 1 +else + set -e + TEXT="" + for ((i=0; i<=998; i++)) + do + TEXT="layer_$i, "$TEXT + done + TEXT="Forwarding "$TEXT + grep -q "$TEXT" customStackTraceLog +fi diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 68cc40247041093d3eec6dc93b22d358f4cbbaa1..fd9a003bb018c87fb8e8e2992390f27edfd72f4b 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -22,6 +22,8 @@ find_python_module(pip REQUIRED) find_python_module(wheel REQUIRED) find_python_module(google.protobuf REQUIRED) +add_subdirectory(paddle/trainer_config_helpers/tests) + install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ DESTINATION opt/paddle/share/wheels ) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index aed317df67bd0880e23d8fb98cb3073d2be7030c..7a6053940178db472999ed7a0d4c49cce40680e4 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1623,7 +1623,7 @@ class BatchNormLayer(LayerBase): # Also based on cudnn version. use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \ ((not parallel_nn) or self.config.device > -1) and \ - cudnn_version >= 4000 + cudnn_version >= 4007 self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm" super(BatchNormLayer, self).__init__(name, self.layer_type, 0, active_type=active_type, @@ -2273,6 +2273,9 @@ class ConvexCombinationLayer(LayerBase): name, 'convex_comb', size, inputs=inputs, device=device) config_assert(len(self.inputs) == 2, 'ConvexCombinationLayer must have 2 inputs') + config_assert( + size * self.get_input_layer(0).size == self.get_input_layer(1).size, + 'Wrong input size for ConvexCombinationLayer') self.set_layer_size(size) @config_layer('interpolation') @@ -2322,6 +2325,9 @@ class CosSimVecMatLayer(LayerBase): self.config.cos_scale = cos_scale config_assert(len(self.inputs) == 2, 'CosSimVecMatLayer must have 2 inputs') + config_assert( + size * self.get_input_layer(0).size == self.get_input_layer(1).size, + 'Wrong input size for CosSimVecMatLayer') @config_layer('sampling_id') class SamplingIdLayer(LayerBase): @@ -2370,6 +2376,7 @@ class CosSimLayer(LayerBase): self, name, inputs, + cos_scale=5, device=None): super(CosSimLayer, self).__init__( name, 'cos', 1, inputs=inputs, device=device) @@ -2377,6 +2384,7 @@ class CosSimLayer(LayerBase): config_assert( self.get_input_layer(0).size == self.get_input_layer(1).size, 'inputs of CosSimLayer must have same dim') + self.config.cos_scale = cos_scale @config_layer('tensor') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b7e5f566bb8c39fa6ea9ed491f28fa046bba71ee..f3f0077f9798f7e2097ae8cd4f39ce270a49b28f 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -47,6 +47,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", 'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer', 'sampling_id_layer', 'slope_intercept_layer', 'trans_full_matrix_projection', + 'linear_comb_layer', 'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', 'multi_binary_label_cross_entropy', @@ -70,7 +71,8 @@ class LayerType(object): POOLING_AVG = 'average' FC_LAYER = "fc" COST = 'cost' - COSINE_SIM = 'cos_vm' + COSINE_SIM_VEC = 'cos_vm' + COSINE_SIM = 'cos' HSIGMOID = 'hsigmoid' CONV_LAYER = "conv" POOL_LAYER = "pool" @@ -102,7 +104,7 @@ class LayerType(object): SEL_FC_LAYER = "selective_fc" SAMPLING_ID_LAYER = "sampling_id" SLOPE_INTERCEPT_LAYER = "slope_intercept" - CONVEX_COMBINATION_LAYER = "convex_comb" + LINEAR_COMBINATION_LAYER = "convex_comb" BLOCK_EXPAND = "blockexpand" CTC_LAYER = "ctc" @@ -171,6 +173,8 @@ class LayerOutput(object): assert LayerType.is_layer_type(layer_type) self.name = name self.layer_type = layer_type + if parents is not None and type(parents) != list: + parents = [parents] self.parents = [] if parents is None else parents self.activation = activation self.num_filters = num_filters @@ -512,7 +516,7 @@ class MixedLayerType(LayerOutput): :rtype: MixedLayerType """ if not self.finalized: - assert isinstance(other, Projection) + assert isinstance(other, Projection) or isinstance(other, Operator) self.inputs.append(other) self.parents.append(other.origin) return self @@ -1169,13 +1173,16 @@ def power_layer(input, weight, name=None, layer_attr=None): @layer_support() def scaling_layer(input, weight, name=None, layer_attr=None): """ - A layer for each row of a matrix, multiplying with a element of a vector. + A layer for multiplying input vector by weight scalar. .. math:: - y.row[i] = w[i] * x.row[i] + y = w x - where :math:`x` is (batchSize x dataDim) input, :math:`w` is - (batchSize x 1) weight vector, and :math:`y` is (batchSize x dataDim) output. + where :math:`x` is size=dataDim input, :math:`w` is size=1 weight, + and :math:`y` is size=dataDim output. + + Note that the above computation is for one sample. Multiple samples are + processed in one batch. The example usage is: @@ -1249,11 +1256,14 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): .. math:: similarity = cos(\\theta) = {\\mathbf{a} \\cdot \\mathbf{b} - \\over \\|\\mathbf{b}\\| \\|\\mathbf{b}\\|} + \\over \\|\\mathbf{a}\\| \\|\\mathbf{b}\\|} + + The size of a is M, size of b is M*N, + Similarity will be calculated N times by step M. The output size is + N. The scale will be multiplied to similarity. - And the input dimension is :math:`a \in R^M`, :math:`b \in R^{MN}`. The - similarity will be calculated N times by step M. The output dimension is - :math:`R^N`. The scale will be multiplied to similarity. + Note that the above computation is for one sample. Multiple samples are + processed in one batch. :param name: layer name :type name: basestring @@ -1270,14 +1280,23 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): :return: LayerOutput object. :rtype: LayerOutput """ - Layer( - name=name, - type=LayerType.COSINE_SIM, - size=size, - cos_scale=scale, - inputs=[a.name, b.name], - **ExtraLayerAttribute.to_kwargs(layer_attr) - ) + if size == 1: + Layer( + name=name, + type=LayerType.COSINE_SIM, + cos_scale=scale, + inputs=[a.name, b.name], + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) + else: + Layer( + name=name, + type=LayerType.COSINE_SIM_VEC, + size=size, + cos_scale=scale, + inputs=[a.name, b.name], + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b]) @wrap_name_default() @@ -2909,29 +2928,37 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0): @wrap_name_default() -def convex_comb_layer(input, size, name=None): +def linear_comb_layer(weights, vectors, size, name=None): """ - A layer for convex weighted average of vectors takes two inputs. - - Input: a vector containing the convex weights (batchSize x weightdim), - and a matrix in a vector form (batchSize x (weightdim * datadim)). - - Output: a vector (batchSize * datadim). + A layer for weighted sum of vectors takes two inputs. + - Input: size of weights is M + size of vectors is M*N + - Output: a vector of size=N .. math:: - y[i][j] = \sum_{j}(x_{1}(i, j) * x_{2}(i,j + i * dataDim)), + z(i) = \sum_{j=0}^{M-1} x(j) y(i+Nj) + where :math:`0 \le i \le N-1` + + Or in the matrix notation: + + .. math:: - i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1) + z = x^T Y In this formular: - - :math:`x_{1}`: the first input. - - :math:`x_{2}`: the second input. - - :math:`y`: the output. + - :math:`x`: weights + - :math:`y`: vectors. + - :math:`z`: the output. + + Note that the above computation is for one sample. Multiple samples are + processed in one batch. The simple usage is: .. code-block:: python - convex_comb = convex_comb_layer(input=inputs, + linear_comb = linear_comb_layer(weighs=weight, vectors=vectors, size=elem_dim) :param input: The input layers. @@ -2944,15 +2971,16 @@ def convex_comb_layer(input, size, name=None): :rtype: LayerOutput """ - assert isinstance(input, list) or isinstance(input, tuple) - assert len(input) == 2 Layer( name=name, - type=LayerType.CONVEX_COMBINATION_LAYER, + type=LayerType.LINEAR_COMBINATION_LAYER, size=size, - inputs=[Input(input[0].name), Input(input[1].name)], + inputs=[Input(weights.name), Input(vectors.name)], ) - return LayerOutput(name, LayerType.CONVEX_COMBINATION_LAYER, input, size=size) + return LayerOutput(name, LayerType.LINEAR_COMBINATION_LAYER, + [weights, vectors], size=size) + +convex_comb_layer = linear_comb_layer @wrap_name_default() def block_expand_layer(input, diff --git a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..611fb855a8c9ad6679167105dd737c995b23c209 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt @@ -0,0 +1,5 @@ +#################### test_config_parser ######################### +add_test(NAME layers_test + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + python ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/trainer_config_helpers/tests/layers_test.py b/python/paddle/trainer_config_helpers/tests/layers_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3b55667354750066a7d3ab3a0af59eb9e7d47d86 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/layers_test.py @@ -0,0 +1,19 @@ +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer.config_parser import parse_config_and_serialize + +if __name__ == '__main__': + parse_config_and_serialize( + 'trainer_config_helpers/tests/layers_test_config.py', '') diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py new file mode 100644 index 0000000000000000000000000000000000000000..ec171fc6013f454da78570c96e64240017e849b9 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py @@ -0,0 +1,43 @@ +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +num_classes = 5 + +x = data_layer(name="input1", size=3) +y = data_layer(name="input2", size=5) + +x1 = fc_layer(input=x, size=5) +y1 = fc_layer(input=y, size=5) +y2 = fc_layer(input=y, size=15) + +cos1 = cos_sim(a=x1, b=y1) +cos3 = cos_sim(a=x1, b=y2, size=3) + +linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3) + +out = fc_layer(input=[cos1, cos3, linear_comb], + size=num_classes, + act=SoftmaxActivation()) + +outputs(classification_cost(out, data_layer(name="label", size=num_classes))) + +settings( + batch_size=10, + learning_rate=2e-3, + learning_method=AdamOptimizer(), + regularization=L2Regularization(8e-4), + gradient_clipping_threshold=25 +)