diff --git a/Dockerfile b/Dockerfile index b6f99ca539d077164c71d797a5ccda7b1b5c44ba..39af60966b6cab7d8b9e644f4ea658613f8ba518 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,7 +30,8 @@ RUN apt-get update && \ python-numpy python-matplotlib gcc g++ \ automake locales clang-format-3.8 swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ - clang-3.8 llvm-3.8 libclang-3.8-dev && \ + clang-3.8 llvm-3.8 libclang-3.8-dev \ + net-tools && \ apt-get clean -y # Install Go diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 1efa74ecda4170332d96603ca2253c68468474f9..59bd8b91255944a8ef702edd389214c17d0cb35d 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -130,7 +130,7 @@ recurrent_group --------------- .. autoclass:: paddle.v2.layer.recurrent_group :noindex: - + lstm_step --------- .. autoclass:: paddle.v2.layer.lstm_step @@ -145,12 +145,12 @@ beam_search ------------ .. autoclass:: paddle.v2.layer.beam_search :noindex: - + get_output ---------- .. autoclass:: paddle.v2.layer.get_output :noindex: - + Mixed Layer =========== @@ -203,7 +203,7 @@ trans_full_matrix_projection ---------------------------- .. autoclass:: paddle.v2.layer.trans_full_matrix_projection :noindex: - + Aggregate Layers ================ @@ -434,10 +434,26 @@ smooth_l1_cost .. autoclass:: paddle.v2.layer.smooth_l1_cost :noindex: -Check Layer +Check Layer ============ eos --- .. autoclass:: paddle.v2.layer.eos :noindex: + +Miscs +===== + +dropout +-------------- +.. autoclass:: paddle.v2.layer.dropout + :noindex: + +Activation with learnable parameter +=================================== + +prelu +-------- +.. autoclass:: paddle.v2.layer.prelu + :noindex: diff --git a/doc/api/v2/config/networks.rst b/doc/api/v2/config/networks.rst index b2a617fff134035c04eeabbbaf6d9cbe2a525f1c..6e813ab1a820d068ea3e54cad6178f1cf928eadc 100644 --- a/doc/api/v2/config/networks.rst +++ b/doc/api/v2/config/networks.rst @@ -125,11 +125,3 @@ simple_attention :members: simple_attention :noindex: -Miscs -===== - -dropout_layer --------------- -.. automodule:: paddle.v2.networks - :members: dropout_layer - :noindex: diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 9898dc083ebb1783a0e2ddd12afaa9c3d5a79e98..47ca1833967ee705d6558b1dad06a6335b30f03a 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(gserver) add_subdirectory(pserver) add_subdirectory(trainer) add_subdirectory(scripts) +add_subdirectory(strings) # Do not build go directory until go cmake is working smoothly. # if(CMAKE_Go_COMPILER) diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index e147659566dba6cfbfd677e3b616bdaa4a73485c..071bc36c2ded51ba977350aeae15f6d244cea5be 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -41,6 +41,7 @@ SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS paddle_network paddle_proto ${external_project_dependencies} + ${RDMA_LIBS} ) IF(APPLE) @@ -73,6 +74,7 @@ SWIG_LINK_LIBRARIES(swig_paddle ${CMAKE_DL_LIBS} ${EXTERNAL_LIBS} ${CMAKE_THREAD_LIBS_INIT} + ${RDMA_LD_FLAGS} ${START_END} ) diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 6d9365af2d14673146d9e427138bf6dd5f5b41b6..5beced3bb5a1050078f88dfd4350a2df71d27f35 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -632,7 +632,7 @@ void Argument::printValueString(std::ostream& stream, const std::string& prefix) const { std::unordered_map out; getValueString(&out); - for (auto field : {"value", "id", "sequence pos", "sub-sequence pos"}) { + for (auto field : {"value", "ids", "sequence pos", "sub-sequence pos"}) { auto it = out.find(field); if (it != out.end()) { stream << prefix << field << ":\n" << it->second; diff --git a/paddle/pserver/LightNetwork.cpp b/paddle/pserver/LightNetwork.cpp index 8c8ba0a2e51b85bde0544c6780b07130336a6bdd..922f25734dee0a6db7fbcfcef3d29d2bad5b7858 100644 --- a/paddle/pserver/LightNetwork.cpp +++ b/paddle/pserver/LightNetwork.cpp @@ -383,20 +383,23 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) { setOption(sockfd); /// Now connect to the server - int retry_second = 0; - int error = 0; + int retry_count = 0; do { - error = connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)); - if (error == ECONNREFUSED) { + if (connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) == 0) { + break; + } + + if (errno == ECONNREFUSED) { LOG(WARNING) << "connection refused by pserver, try again!"; - if (retry_second++ >= 7) { + if (retry_count++ >= 7) { LOG(FATAL) << "connection refused by pserver, maybe pserver failed!"; } std::this_thread::sleep_for(std::chrono::seconds(1)); } else { - PCHECK(error >= 0) << "ERROR connecting to " << serverAddr; + PCHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":" + << serverPort << "errorno: " << errno; } - } while (error == ECONNREFUSED); + } while (errno == ECONNREFUSED); channel_.reset(new SocketChannel(sockfd, serverAddr)); tcpRdma_ = F_TCP; diff --git a/paddle/strings/CMakeLists.txt b/paddle/strings/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e55eecd484c0e218ecd51bbd19b3eb4f6f92a25 --- /dev/null +++ b/paddle/strings/CMakeLists.txt @@ -0,0 +1,2 @@ +cc_library(stringpiece SRCS stringpiece.cc) +cc_test(stringpiece_test SRCS stringpiece_test.cc DEPS stringpiece glog gflags) diff --git a/paddle/strings/stringpiece.cc b/paddle/strings/stringpiece.cc new file mode 100644 index 0000000000000000000000000000000000000000..415b3558d5dfffde26275bcb16ea3922424ca9f3 --- /dev/null +++ b/paddle/strings/stringpiece.cc @@ -0,0 +1,141 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/strings/stringpiece.h" + +#include + +#include +#include +#include + +namespace paddle { + +StringPiece::StringPiece() : data_(NULL), size_(0) {} + +StringPiece::StringPiece(const char* d, size_t n) : data_(d), size_(n) { + if (d == NULL && n != 0) + throw std::invalid_argument( + "StringPiece requires len to be 0 for NULL data"); +} + +StringPiece::StringPiece(const char* s) : data_(s) { + size_ = (s == NULL) ? 0 : strlen(s); +} + +StringPiece::StringPiece(const std::string& s) + : data_(s.data()), size_(s.size()) {} + +char StringPiece::operator[](size_t n) const { + if (n >= len()) + throw std::invalid_argument("index out of StringPiece length"); + return data_[n]; +} + +int Compare(StringPiece a, StringPiece b) { + const size_t min_len = (a.len() < b.len()) ? a.len() : b.len(); + int r = memcmp(a.data(), b.data(), min_len); + if (r == 0) { + if (a.len() < b.len()) + return -1; + else if (a.len() > b.len()) + return 1; + } + return r; +} + +bool operator==(StringPiece x, StringPiece y) { + return ((x.len() == y.len()) && + (x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0)); +} + +bool operator!=(StringPiece x, StringPiece y) { return !(x == y); } + +bool operator<(StringPiece x, StringPiece y) { return Compare(x, y) < 0; } +bool operator>(StringPiece x, StringPiece y) { return Compare(x, y) > 0; } + +bool operator<=(StringPiece x, StringPiece y) { return Compare(x, y) <= 0; } +bool operator>=(StringPiece x, StringPiece y) { return Compare(x, y) >= 0; } + +bool HasPrefix(StringPiece s, StringPiece x) { + return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0)); +} + +bool HasSuffix(StringPiece s, StringPiece x) { + return ((s.len() >= x.len()) && + (memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0)); +} + +StringPiece SkipPrefix(StringPiece s, size_t n) { + if (n > s.len()) + throw std::invalid_argument("Skip distance larger than StringPiece length"); + return StringPiece(s.data() + n, s.len() - n); +} + +StringPiece SkipSuffix(StringPiece s, size_t n) { + if (n > s.len()) + throw std::invalid_argument("Skip distance larger than StringPiece length"); + return StringPiece(s.data(), s.len() - n); +} + +StringPiece TrimPrefix(StringPiece s, StringPiece x) { + return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s; +} + +StringPiece TrimSuffix(StringPiece s, StringPiece x) { + return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s; +} + +bool Contains(StringPiece s, StringPiece sub) { + return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end(); +} + +size_t Index(StringPiece s, StringPiece sub) { + auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end()); + return e != s.end() ? e - s.data() : StringPiece::npos; +} + +size_t Find(StringPiece s, char c, size_t pos) { + if (pos >= s.len()) { + return StringPiece::npos; + } + const char* result = + reinterpret_cast(memchr(s.data() + pos, c, s.len() - pos)); + return result != nullptr ? result - s.data() : StringPiece::npos; +} + +size_t RFind(StringPiece s, char c, size_t pos) { + if (s.len() == 0) return StringPiece::npos; + for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data(); + p--) { + if (*p == c) { + return p - s.data(); + } + } + return StringPiece::npos; +} + +StringPiece SubStr(StringPiece s, size_t pos, size_t n) { + if (pos > s.len()) pos = s.len(); + if (n > s.len() - pos) n = s.len() - pos; + return StringPiece(s.data() + pos, n); +} + +std::ostream& operator<<(std::ostream& o, StringPiece piece) { + return o << piece.ToString(); +} + +} // namespace paddle diff --git a/paddle/strings/stringpiece.h b/paddle/strings/stringpiece.h new file mode 100644 index 0000000000000000000000000000000000000000..adff713e86f49349b8f189c1d24584bfc1bb8aa7 --- /dev/null +++ b/paddle/strings/stringpiece.h @@ -0,0 +1,105 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include + +namespace paddle { + +// StringPiece points into a std::string object but doesn't own the +// string. It is for efficient access to strings. Like Go's string +// type. Not that StringPiece doesn't mutate the underlying string, +// so it is thread-safe given that the underlying string doesn't +// change. Because StringPiece contains a little data members, and +// its syntax is simple as it doesn't own/manage the string, it is +// cheap to construct StringPieces and pass them around. +class StringPiece { +public: + static const size_t npos = static_cast(-1); + + // We provide non-explicit singleton constructors so users can + // pass in a "const char*" or a "string" wherever a "StringPiece" + // is expected. These contructors ensure that if data_ is NULL, + // size_ is 0. + StringPiece(); + StringPiece(const char* d, size_t n); + StringPiece(const char* d); + StringPiece(const std::string& s); + + const char* data() const { return data_; } + size_t len() const { return size_; } + + char operator[](size_t n) const; + + // StringPiece doesn't own the string, so both iterator and const + // iterator are const char* indeed. + typedef const char* const_iterator; + typedef const char* iterator; + iterator begin() const { return data_; } + iterator end() const { return data_ + size_; } + + // Return a string that contains the copy of the referenced data. + std::string ToString() const { return std::string(data_, size_); } + +private: + const char* data_; + size_t size_; + + // Intentionally copyable +}; + +int Compare(StringPiece a, StringPiece b); + +bool operator==(StringPiece x, StringPiece y); +bool operator!=(StringPiece x, StringPiece y); +bool operator<(StringPiece x, StringPiece y); +bool operator>(StringPiece x, StringPiece y); +bool operator<=(StringPiece x, StringPiece y); +bool operator>=(StringPiece x, StringPiece y); + +bool HasPrefix(StringPiece s, StringPiece prefix); +bool HasSuffix(StringPiece s, StringPiece suffix); + +StringPiece SkipPrefix(StringPiece s, size_t n); +StringPiece SkipSuffix(StringPiece s, size_t n); + +// Skip the prefix (or suffix) if it matches with the string. +StringPiece TrimPrefix(StringPiece s, StringPiece prefix); +StringPiece TrimSuffix(StringPiece s, StringPiece suffix); + +// Returns if s contains sub. Any s except for empty s contains an +// empty sub. +bool Contains(StringPiece s, StringPiece sub); + +// Return the first occurrence of sub in s, or npos. If both s and +// sub is empty, it returns npos; otherwise, if only sub is empty, it +// returns 0. +size_t Index(StringPiece s, StringPiece sub); + +// Return the first occurrence of c in s[pos:end], or npos. +size_t Find(StringPiece s, char c, size_t pos); + +// Search range is [0..pos] inclusive. If pos == npos, search everything. +size_t RFind(StringPiece s, char c, size_t pos); + +StringPiece SubStr(StringPiece s, size_t pos, size_t n); + +// allow StringPiece to be logged +std::ostream& operator<<(std::ostream& o, StringPiece piece); + +} // namespace paddle diff --git a/paddle/strings/stringpiece_test.cc b/paddle/strings/stringpiece_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..2ba66a04f641c3457efa713383484491a213668f --- /dev/null +++ b/paddle/strings/stringpiece_test.cc @@ -0,0 +1,293 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/strings/stringpiece.h" + +#include + +#include "gtest/gtest.h" + +TEST(StringPiece, Construct) { + { + paddle::StringPiece s; + EXPECT_EQ(NULL, s.data()); + EXPECT_EQ(0U, s.len()); + } + { EXPECT_THROW(paddle::StringPiece s(NULL, 10000U), std::invalid_argument); } + { + paddle::StringPiece s(NULL); + EXPECT_EQ(0U, s.len()); + } + { + std::string a; + EXPECT_EQ(0U, a.size()); + paddle::StringPiece s(a); + EXPECT_EQ(0U, s.len()); + } +} + +TEST(StringPiece, CopyAndAssign) { + paddle::StringPiece empty; + EXPECT_EQ(0U, empty.len()); + + paddle::StringPiece a("hello"); + paddle::StringPiece b = a; + EXPECT_EQ(b.len(), strlen("hello")); + EXPECT_EQ(a, b); + + std::string storage("hello"); + paddle::StringPiece c(storage); + EXPECT_EQ(a, c); + EXPECT_NE(a.data(), c.data()); +} + +TEST(StringPiece, Compare) { + { + paddle::StringPiece a("hello"); + paddle::StringPiece b("world"); + EXPECT_TRUE(a != b); + EXPECT_FALSE(a == b); + EXPECT_TRUE(a < b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a > b); + EXPECT_FALSE(a >= b); + EXPECT_LT(Compare(a, b), 0); + EXPECT_GT(Compare(b, a), 0); + } + { + paddle::StringPiece a, b; + EXPECT_TRUE(a == b); + EXPECT_FALSE(a != b); + EXPECT_FALSE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_TRUE(a >= b); + EXPECT_EQ(0, Compare(a, b)); + EXPECT_EQ(0, Compare(b, a)); + } +} + +TEST(StringPiece, ToString) { + { + paddle::StringPiece s; + EXPECT_EQ(std::string(""), s.ToString()); + } + { + paddle::StringPiece s(NULL); + EXPECT_EQ(std::string(""), s.ToString()); + } + { + paddle::StringPiece s("hello"); + EXPECT_EQ(std::string("hello"), s.ToString()); + } +} + +TEST(StringPiece, HasPrefixSuffix) { + using paddle::HasPrefix; + using paddle::HasSuffix; + { + paddle::StringPiece s; + EXPECT_FALSE(HasPrefix(s, "something")); + EXPECT_TRUE(HasPrefix(s, "")); + EXPECT_FALSE(HasSuffix(s, "something")); + EXPECT_TRUE(HasSuffix(s, "")); + } + { + paddle::StringPiece s("app"); + EXPECT_TRUE(HasPrefix(s, "")); + EXPECT_TRUE(HasPrefix(s, "a")); + EXPECT_TRUE(HasPrefix(s, "ap")); + EXPECT_TRUE(HasPrefix(s, "app")); + + EXPECT_TRUE(HasSuffix(s, "")); + EXPECT_TRUE(HasSuffix(s, "p")); + EXPECT_TRUE(HasSuffix(s, "pp")); + EXPECT_TRUE(HasSuffix(s, "app")); + } +} + +TEST(StringPiece, SkipPrefixSuffix) { + using paddle::SkipPrefix; + using paddle::SkipSuffix; + { + paddle::StringPiece s; + EXPECT_EQ("", SkipPrefix(s, 0)); + EXPECT_THROW(SkipPrefix(s, 1), std::invalid_argument); + + EXPECT_EQ("", SkipSuffix(s, 0)); + EXPECT_THROW(SkipSuffix(s, 1), std::invalid_argument); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ("app", SkipPrefix(s, 0)); + EXPECT_EQ("pp", SkipPrefix(s, 1)); + EXPECT_EQ("p", SkipPrefix(s, 2)); + EXPECT_EQ("", SkipPrefix(s, 3)); + EXPECT_THROW(SkipPrefix(s, 4), std::invalid_argument); + + EXPECT_EQ("app", SkipSuffix(s, 0)); + EXPECT_EQ("ap", SkipSuffix(s, 1)); + EXPECT_EQ("a", SkipSuffix(s, 2)); + EXPECT_EQ("", SkipSuffix(s, 3)); + EXPECT_THROW(SkipSuffix(s, 4), std::invalid_argument); + } +} + +TEST(StringPiece, TrimPrefixSuffix) { + using paddle::TrimPrefix; + using paddle::TrimSuffix; + { + paddle::StringPiece s; + EXPECT_EQ("", TrimPrefix(s, "")); + EXPECT_EQ("", TrimPrefix(s, "something")); + + EXPECT_EQ("", TrimSuffix(s, "")); + EXPECT_EQ("", TrimSuffix(s, "something")); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ("app", TrimPrefix(s, "")); + EXPECT_EQ("pp", TrimPrefix(s, "a")); + EXPECT_EQ("p", TrimPrefix(s, "ap")); + EXPECT_EQ("", TrimPrefix(s, "app")); + EXPECT_EQ("app", TrimPrefix(s, "something")); + + EXPECT_EQ("app", TrimSuffix(s, "")); + EXPECT_EQ("ap", TrimSuffix(s, "p")); + EXPECT_EQ("a", TrimSuffix(s, "pp")); + EXPECT_EQ("", TrimSuffix(s, "app")); + EXPECT_EQ("app", TrimSuffix(s, "something")); + } +} + +TEST(StringPiece, Contains) { + using paddle::Contains; + { + paddle::StringPiece s; + EXPECT_FALSE(Contains(s, "")); + EXPECT_FALSE(Contains(s, "something")); + } + { + paddle::StringPiece s("app"); + EXPECT_TRUE(Contains(s, "")); + EXPECT_TRUE(Contains(s, "a")); + EXPECT_TRUE(Contains(s, "p")); + EXPECT_TRUE(Contains(s, "ap")); + EXPECT_TRUE(Contains(s, "pp")); + EXPECT_TRUE(Contains(s, "app")); + EXPECT_FALSE(Contains(s, "something")); + } +} + +TEST(StringPiece, Index) { + using paddle::Index; + auto npos = paddle::StringPiece::npos; + { + paddle::StringPiece s; + EXPECT_EQ(npos, Index(s, "")); + EXPECT_EQ(npos, Index(s, "something")); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ(0U, Index(s, "")); + EXPECT_EQ(0U, Index(s, "a")); + EXPECT_EQ(1U, Index(s, "p")); + EXPECT_EQ(0U, Index(s, "ap")); + EXPECT_EQ(1U, Index(s, "pp")); + EXPECT_EQ(0U, Index(s, "app")); + EXPECT_EQ(npos, Index(s, "something")); + } +} + +TEST(StringPiece, Find) { + using paddle::Find; + auto npos = paddle::StringPiece::npos; + { + paddle::StringPiece s; + EXPECT_EQ(npos, Find(s, 'a', 0U)); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ(0U, Find(s, 'a', 0U)); + EXPECT_EQ(1U, Find(s, 'p', 0U)); + EXPECT_EQ(1U, Find(s, 'p', 1U)); + EXPECT_EQ(2U, Find(s, 'p', 2U)); + EXPECT_EQ(npos, Find(s, 'z', 2U)); + } +} + +TEST(StringPiece, RFind) { + using paddle::RFind; + auto npos = paddle::StringPiece::npos; + { + paddle::StringPiece s; + EXPECT_EQ(npos, RFind(s, 'a', 0U)); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ(2U, RFind(s, 'p', 2U)); + EXPECT_EQ(0U, RFind(s, 'a', 2U)); + EXPECT_EQ(1U, RFind(s, 'p', 1U)); + EXPECT_EQ(0U, RFind(s, 'a', 0)); + EXPECT_EQ(npos, RFind(s, 'z', 2U)); + } +} + +TEST(StringPiece, SubStr) { + using paddle::SubStr; + { + paddle::StringPiece s; + EXPECT_EQ("", SubStr(s, 0, 0)); + EXPECT_EQ("", SubStr(s, 0, 1)); + EXPECT_EQ("", SubStr(s, 1, 0)); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ("", SubStr(s, 0, 0)); + EXPECT_EQ("", SubStr(s, 1, 0)); + EXPECT_EQ("", SubStr(s, 2, 0)); + EXPECT_EQ("", SubStr(s, 3, 0)); + + EXPECT_EQ("a", SubStr(s, 0, 1)); + EXPECT_EQ("p", SubStr(s, 1, 1)); + EXPECT_EQ("p", SubStr(s, 2, 1)); + EXPECT_EQ("", SubStr(s, 3, 1)); + + EXPECT_EQ("ap", SubStr(s, 0, 2)); + EXPECT_EQ("pp", SubStr(s, 1, 2)); + EXPECT_EQ("p", SubStr(s, 2, 2)); + EXPECT_EQ("", SubStr(s, 3, 2)); + + EXPECT_EQ("app", SubStr(s, 0, 3)); + EXPECT_EQ("pp", SubStr(s, 1, 3)); + EXPECT_EQ("p", SubStr(s, 2, 3)); + EXPECT_EQ("", SubStr(s, 3, 3)); + } +} + +TEST(StringPiece, StreamOutput) { + using paddle::StringPiece; + + std::stringstream o; + o << StringPiece(); + EXPECT_EQ("", o.str()); + + o << StringPiece("hello"); + EXPECT_EQ("hello", o.str()); + + o << StringPiece(); + EXPECT_EQ("hello", o.str()); +} diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index bebb76d9847173740eacf3614f9ba1d1fdf6e60f..e0147b1b37c6574c65ce53e58eccaf6cede91a67 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -73,7 +73,6 @@ To use this from paddle_trainer, paddle_trainer should be called with --config_args=extension_module_name=[MODULE_NAME] ''' - import copy import logging import os @@ -1731,9 +1730,10 @@ class ParameterReluLayer(LayerBase): def __init__(self, name, inputs, partial_sum=1, **args): super(ParameterReluLayer, self).__init__( name, self.layer_type, 0, inputs=inputs, **args) - config_assert(len(self.inputs) == 1) - config_assert(self.input_layer.size % partial_sum == 0) input_layer = self.get_input_layer(0) + config_assert(len(self.inputs) == 1, "prelu layer has only one input.") + config_assert(input_layer.size % partial_sum == 0, + "a wrong setting for partial_sum") self.set_layer_size(input_layer.size) self.create_input_parameter(0, input_layer.size / partial_sum) @@ -3546,11 +3546,7 @@ def update_g_config(): return g_config -def begin_parse(config_arg_str=''): - ''' - @param config_arg_str: a string of the form var1=val1,var2=val2. It will be - passed to config script as a dictionary CONFIG_ARGS - ''' +def begin_parse(): init_config_environment() for hook in _parse_config_hooks: hook() @@ -3568,8 +3564,12 @@ def begin_parse(config_arg_str=''): def parse_config(trainer_config, config_arg_str): - begin_parse(config_arg_str) + ''' + @param config_arg_str: a string of the form var1=val1,var2=val2. It will be + passed to config script as a dictionary CONFIG_ARGS + ''' + begin_parse() config_args = {} if config_arg_str: diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 5667e5ff2bccd38f2da00a3b17ea8bc8e3a6fb8e..5320f5c32ce00f4780cea16abaee718c95707467 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -31,31 +31,31 @@ except ImportError: import copy __all__ = [ - "full_matrix_projection", - "AggregateLevel", - "ExpandLevel", - "identity_projection", - "dotmul_projection", - "dotmul_operator", - "repeat_layer", - "seq_reshape_layer", - "table_projection", - "mixed_layer", - "data_layer", - "embedding_layer", - "fc_layer", - "grumemory", - "pooling_layer", - "lstmemory", - "last_seq", - "first_seq", - "cos_sim", - "hsigmoid", - "conv_projection", - "mse_cost", - "regression_cost", + 'full_matrix_projection', + 'AggregateLevel', + 'ExpandLevel', + 'identity_projection', + 'dotmul_projection', + 'dotmul_operator', + 'repeat_layer', + 'seq_reshape_layer', + 'table_projection', + 'mixed_layer', + 'data_layer', + 'embedding_layer', + 'fc_layer', + 'grumemory', + 'pooling_layer', + 'lstmemory', + 'last_seq', + 'first_seq', + 'cos_sim', + 'hsigmoid', + 'conv_projection', + 'mse_cost', + 'regression_cost', 'classification_cost', - "LayerOutput", + 'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', @@ -121,6 +121,8 @@ __all__ = [ 'smooth_l1_cost', 'layer_support', 'multiplex_layer', + 'dropout_layer', + 'prelu_layer', ] @@ -129,26 +131,26 @@ class LayerType(object): Layer type enumerations. """ - DATA = "data" - MIXED_LAYER = "mixed" - LSTMEMORY = "lstmemory" - GRUMEMORY = "gated_recurrent" - SEQUENCE_LAST_INSTANCE = "seqlastins" - SEQUENCE_FIRST_INSTANCE = "seqfirstins" - SEQUENCE_RESHAPE = "seqreshape" - POOLING_MAX = "max" + DATA = 'data' + MIXED_LAYER = 'mixed' + LSTMEMORY = 'lstmemory' + GRUMEMORY = 'gated_recurrent' + SEQUENCE_LAST_INSTANCE = 'seqlastins' + SEQUENCE_FIRST_INSTANCE = 'seqfirstins' + SEQUENCE_RESHAPE = 'seqreshape' + POOLING_MAX = 'max' POOLING_AVG = 'average' - FC_LAYER = "fc" + FC_LAYER = 'fc' COST = 'cost' COSINE_SIM_VEC = 'cos_vm' COSINE_SIM = 'cos' HSIGMOID = 'hsigmoid' - CONV_LAYER = "conv" - CONVTRANS_LAYER = "convt" - EXCONV_LAYER = "exconv" - EXCONVTRANS_LAYER = "exconvt" - CUDNNCONV_LAYER = "cudnn_conv" - POOL_LAYER = "pool" + CONV_LAYER = 'conv' + CONVTRANS_LAYER = 'convt' + EXCONV_LAYER = 'exconv' + EXCONVTRANS_LAYER = 'exconvt' + CUDNNCONV_LAYER = 'cudnn_conv' + POOL_LAYER = 'pool' BATCH_NORM_LAYER = 'batch_norm' NORM_LAYER = 'norm' SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' @@ -177,36 +179,38 @@ class LayerType(object): EOSID_LAYER = 'eos_id' RECURRENT_LAYER = 'recurrent' - CONV_SHIFT_LAYER = "conv_shift" - TENSOR_LAYER = "tensor" - SEL_FC_LAYER = "selective_fc" - SAMPLING_ID_LAYER = "sampling_id" - SLOPE_INTERCEPT_LAYER = "slope_intercept" - LINEAR_COMBINATION_LAYER = "convex_comb" - BLOCK_EXPAND = "blockexpand" - MAXOUT = "maxout" - SPP_LAYER = "spp" - PAD_LAYER = "pad" - MULTIPLEX_LAYER = "multiplex" - - PRINT_LAYER = "print" - PRIORBOX_LAYER = "priorbox" - - CTC_LAYER = "ctc" - WARP_CTC_LAYER = "warp_ctc" - CRF_LAYER = "crf" - CRF_DECODING_LAYER = "crf_decoding" + CONV_SHIFT_LAYER = 'conv_shift' + TENSOR_LAYER = 'tensor' + SEL_FC_LAYER = 'selective_fc' + SAMPLING_ID_LAYER = 'sampling_id' + SLOPE_INTERCEPT_LAYER = 'slope_intercept' + LINEAR_COMBINATION_LAYER = 'convex_comb' + BLOCK_EXPAND = 'blockexpand' + MAXOUT = 'maxout' + SPP_LAYER = 'spp' + PAD_LAYER = 'pad' + MULTIPLEX_LAYER = 'multiplex' + + PRINT_LAYER = 'print' + PRIORBOX_LAYER = 'priorbox' + + CTC_LAYER = 'ctc' + WARP_CTC_LAYER = 'warp_ctc' + CRF_LAYER = 'crf' + CRF_DECODING_LAYER = 'crf_decoding' NCE_LAYER = 'nce' - RANK_COST = "rank-cost" - LAMBDA_COST = "lambda_cost" - HUBER = "huber" - CROSS_ENTROPY = "multi-class-cross-entropy" - CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm" - SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" - MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" - SUM_COST = "sum_cost" - SMOOTH_L1 = "smooth_l1" + RANK_COST = 'rank-cost' + LAMBDA_COST = 'lambda_cost' + HUBER = 'huber' + CROSS_ENTROPY = 'multi-class-cross-entropy' + CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' + SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' + MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy' + SUM_COST = 'sum_cost' + SMOOTH_L1 = 'smooth_l1' + + PRELU = 'prelu' @staticmethod def is_layer_type(type_name): @@ -3768,7 +3772,6 @@ def beam_search(step, assert generated_input_index != -1 gipt = input[generated_input_index] - assert isinstance(gipt, BaseGeneratedInput) gipt.bos_id = bos_id gipt.eos_id = eos_id @@ -3788,7 +3791,6 @@ def beam_search(step, predict = gipt.after_real_step(step(*args)) eos_layer(input=predict, eos_id=eos_id, name=eos_name) - return predict tmp = recurrent_group( @@ -3860,7 +3862,6 @@ def classification_cost(input, label, weight=None, name=None, - top_k=None, evaluator=classification_error_evaluator, layer_attr=None): """ @@ -3875,8 +3876,6 @@ def classification_cost(input, :param weight: The weight affects the cost, namely the scale of cost. It is an optional argument. :type weight: LayerOutput - :param top_k: number k in top-k error rate - :type top_k: int :param evaluator: Evaluator method. :param layer_attr: layer's extra attribute. :type layer_attr: ExtraLayerAttribute @@ -3904,7 +3903,7 @@ def classification_cost(input, assert isinstance(e.for_classification, bool) assert e.for_classification - e(name=e.__name__, input=input, label=label, weight=weight, top_k=top_k) + e(name=e.__name__, input=input, label=label, weight=weight) if not isinstance(evaluator, collections.Sequence): evaluator = [evaluator] @@ -4725,7 +4724,7 @@ def ctc_layer(input, fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer should also be num_classes + 1. - The simple usage: + The example usage is: .. code-block:: python @@ -4812,7 +4811,7 @@ def warp_ctc_layer(input, - As a native 'softmax' activation is interated to the warp-ctc library, 'linear' activation is expected instead in the 'input' layer. - The simple usage: + The example usage is: .. code-block:: python @@ -4873,7 +4872,7 @@ def crf_layer(input, A layer for calculating the cost of sequential conditional random field model. - The simple usage: + The example usage is: .. code-block:: python @@ -4947,7 +4946,7 @@ def crf_decoding_layer(input, this layer will also calculate error. output.value[i] is 1 for incorrect decoding or 0 for correct decoding. - The simple usage: + The example usage is: .. code-block:: python @@ -5140,7 +5139,7 @@ def rank_cost(left, - :math:`o_i` and :math:`o_j`: the left output and right output. Their dimension is one. - The simple usage: + The example usage is: .. code-block:: python @@ -5197,7 +5196,7 @@ def lambda_cost(input, """ lambdaCost for lambdaRank LTR approach. - The simple usage: + The example usage is: .. code-block:: python @@ -5255,6 +5254,8 @@ def cross_entropy(input, """ A loss layer for multi class entropy. + The example usage is: + .. code-block:: python cost = cross_entropy(input=input_layer, @@ -5301,6 +5302,8 @@ def cross_entropy_with_selfnorm(input, A loss layer for multi class entropy with selfnorm. Input should be a vector of positive numbers, without normalization. + The example usage is: + .. code-block:: python cost = cross_entropy_with_selfnorm(input=input_layer, @@ -5342,6 +5345,8 @@ def sum_cost(input, name=None, layer_attr=None): """ A loss layer which calculate the sum of the input as loss + The example usage is: + .. code-block:: python cost = sum_cost(input=input_layer) @@ -5371,6 +5376,8 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): """ A loss layer for huber loss. + The example usage is: + .. code-block:: python cost = huber_cost(input=input_layer, @@ -5411,6 +5418,8 @@ def multi_binary_label_cross_entropy(input, """ A loss layer for multi binary label cross entropy. + The example usage is: + .. code-block:: python cost = multi_binary_label_cross_entropy(input=input_layer, @@ -5470,6 +5479,8 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): More details can be found by referring to `Fast R-CNN `_ + The example usage is: + .. code-block:: python cost = smooth_l1_cost(input=input_layer, @@ -5519,6 +5530,8 @@ def multiplex_layer(input, name=None, layer_attr=None): where, y is output. :math:`x_{k}` is the k-th input layer and :math:`k = x_{0}[i] + 1`. + The example usage is: + .. code-block:: python maxid = multiplex_layer(input=layers) @@ -5551,3 +5564,82 @@ def multiplex_layer(input, name=None, layer_attr=None): layer_type=LayerType.MULTIPLEX_LAYER, parents=input, size=l.config.size) + + +@wrap_name_default("dropout") +def dropout_layer(input, dropout_rate, name=None): + """ + @TODO(yuyang18): Add comments. + + :param name: + :param input: + :param dropout_rate: + :return: + """ + return addto_layer( + name=name, + input=input, + act=LinearActivation(), + bias_attr=False, + layer_attr=ExtraAttr(drop_rate=dropout_rate)) + + +@wrap_name_default() +@layer_support() +@wrap_name_default() +@wrap_param_attr_default() +def prelu_layer(input, + name=None, + partial_sum=1, + param_attr=None, + layer_attr=None): + """ + The Parameter Relu activation that actives outputs with a learnable weight. + + Reference: + Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf + + .. math:: + z_i &\\quad if \\quad z_i > 0 \\\\ + a_i * z_i &\\quad \\mathrm{otherwise} + + The example usage is: + + .. code-block:: python + + prelu = prelu_layer(input=layers, partial_sum=1) + + :param name: Name of this layer. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput + :param partial_sum: this parameter makes a group of inputs share a same weight. + + - partial_sum = 1, indicates the element-wise activation: each element has a weight. + - partial_sum = number of elements in one channel, indicates the channel-wise activation, elements in a channel share a same weight. + - partial_sum = number of outputs, indicates all elements share a same weight. + + :type partial_sum: int + :param param_attr: The parameter attribute. See ParameterAttribute for details. + :type param_attr: ParameterAttribute|None + :param layer_attr: Extra layer configurations. Default is None. + :type layer_attr: ExtraLayerAttribute|None + :return: LayerOutput object. + :rtype: LayerOutput + """ + + assert isinstance(input, LayerOutput), 'prelu_layer only accepts one input' + assert isinstance(param_attr, ParameterAttribute) + + l = Layer( + name=name, + type=LayerType.PRELU, + inputs=Input(input.name, **param_attr.attr), + partial_sum=partial_sum, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.PRELU, + parents=input, + size=l.config.size) diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index fb533a47e0b0585be6f0e019086993f8b3aa7f38..1bf59ed4840ae69afc5bce49c86a08b60e9603ee 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -26,10 +26,10 @@ from paddle.trainer.config_parser import * __all__ = [ 'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool", - "img_conv_bn_pool", 'dropout_layer', 'lstmemory_group', 'lstmemory_unit', - 'small_vgg', 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', - 'simple_gru', 'simple_attention', 'simple_gru2', 'bidirectional_gru', - 'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs' + "img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg', + 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', 'simple_gru', + 'simple_attention', 'simple_gru2', 'bidirectional_gru', 'text_conv_pool', + 'bidirectional_lstm', 'inputs', 'outputs' ] ###################################################### @@ -1366,29 +1366,6 @@ def simple_attention(encoded_sequence, input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name) -############################################################################ -# Miscs # -############################################################################ - - -@wrap_name_default("dropout") -def dropout_layer(input, dropout_rate, name=None): - """ - @TODO(yuyang18): Add comments. - - :param name: - :param input: - :param dropout_rate: - :return: - """ - return addto_layer( - name=name, - input=input, - act=LinearActivation(), - bias_attr=False, - layer_attr=ExtraAttr(drop_rate=dropout_rate)) - - def inputs(layers, *args): """ Declare the inputs of network. The order of input should be as same as diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 981ccbf248391b5db4339570d918404df6033f3d..bef14bffaf648b92e608a6a18cd46d57e850550e 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -5,6 +5,7 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops -test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer) +test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer +test_prelu_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..64d227565f2b21ff43d4391c682ca90c0f47908e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr @@ -0,0 +1,36 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "__prelu_layer_0__" + type: "prelu" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "___prelu_layer_0__.w0" + } +} +parameters { + name: "___prelu_layer_0__.w0" + size: 300 + initial_mean: 0.0 + initial_std: 0.057735026919 + initial_strategy: 0 + initial_smart: true +} +input_layer_names: "input" +output_layer_names: "__prelu_layer_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "__prelu_layer_0__" + input_layer_names: "input" + output_layer_names: "__prelu_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..2e3057f323db22ffc3911cce30ec2e8bb95e3dbe --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py @@ -0,0 +1,6 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) +prelu = prelu_layer(input=data) + +outputs(prelu) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 815635f5dd4654fe3a31a9244e6e4473c397dd2f..aeed9ebd7d4d64efa5d0bf1638742a485c0fa44a 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -13,7 +13,7 @@ # limitations under the License. """ `paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2, -we want to make Paddle a plain Python package. The model config package defined +we want to make Paddle a plain Python package. The model config package defines the way how to configure a neural network topology in Paddle Python code. The primary usage shows below. @@ -30,7 +30,6 @@ The primary usage shows below. # use prediction instance where needed. parameters = paddle.parameters.create(cost) """ - import collections import copy import re @@ -44,9 +43,10 @@ __all__ = ['data', 'parse_network'] def __need_to_keep__(name): - if name in ['StaticInput', 'LayerType', 'layer_support']: - return False - return True + return name in [ + 'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType', + 'layer_support' + ] def __need_to_wrap__(name): @@ -54,6 +54,8 @@ def __need_to_wrap__(name): def __convert_name__(inname): + if __need_to_keep__(inname): + return inname if inname == 'maxid_layer': return 'max_id' elif inname.endswith('memory') or inname.endswith( @@ -74,8 +76,6 @@ def __convert_name__(inname): for name in v1_layers.__all__: obj = getattr(v1_layers, name) - if not __need_to_keep__(name): - continue new_name = __convert_name__(name) if callable(obj) and __need_to_wrap__(name): globals()[new_name] = __convert_to_v2__(obj, new_name, __name__) @@ -107,7 +107,7 @@ __data_layer__.__doc__ = __map_data_docstr__(v1_layers.data_layer.__doc__) data = __convert_to_v2__(__data_layer__, 'name', __name__) -def __get_used_layers__(output_layers, extra_layers=None): +def __get_used_layers__(output_layers): layer_names = set() parents = {} @@ -132,6 +132,13 @@ def __get_used_layers__(output_layers, extra_layers=None): add_parent(mem.layer_name, mem.boot_layer_name) add_parent(mem.link_name, mem.layer_name) + if sub_model.HasField('generator'): + # according to the implementation of text generation + # in recurrent layer group, the generated word must be + # the first out link + add_parent(sub_model.out_links[0].layer_name, + sub_model.generator.eos_layer_name) + def dfs_travel(layer_name): if layer_name in layer_names: return @@ -247,9 +254,9 @@ def __trim_submodel__(old_submodel, layer_names, input_layer_names, def parse_network(output_layers, extra_layers=None): if not isinstance(output_layers, collections.Sequence): output_layers = [output_layers] - if extra_layers is not None and not isinstance(extra_layers, - collections.Sequence): - extra_layers = [extra_layers] + if extra_layers is not None: + if not isinstance(extra_layers, collections.Sequence): + extra_layers = [extra_layers] else: extra_layers = [] @@ -262,18 +269,29 @@ def parse_network(output_layers, extra_layers=None): model_config = ModelConfig() model_config.type = cp.g_config.model_config.type + + for layer in output_layers: + model_config.output_layer_names.append(layer.full_name) + output_layer_names.add(layer.full_name) + for l in cp.g_config.model_config.layers: if l.name not in layer_names: continue model_config.layers.extend([l]) if l.type == 'data': + if l.name in model_config.output_layer_names: + """ + In text generation, the outlink to save the generated word + indices is a data_layer defined in recurrent_group. This + data_layer is sure to be the output of the network in text + generation task, so this statement excludes such a special + data_layer from being inputs of the network, otherwise an error + will occur during data feeding. + """ + continue model_config.input_layer_names.append(l.name) input_layer_names.add(l.name) - for layer in output_layers: - model_config.output_layer_names.append(layer.full_name) - output_layer_names.add(layer.full_name) - for e in cp.g_config.model_config.evaluators: if e.name in evaluator_names: model_config.evaluators.extend([e]) diff --git a/python/paddle/v2/topology.py b/python/paddle/v2/topology.py index f3bb4d5f10dd6c5b220161e32dfc3a94642ac7a2..a20e878d0817d0a75e9c47a44f8765deca99225c 100644 --- a/python/paddle/v2/topology.py +++ b/python/paddle/v2/topology.py @@ -31,7 +31,6 @@ class Topology(object): def __init__(self, layers, extra_layers=None): def __check__(layers): if not isinstance(layers, collections.Sequence): - __check_layer_type__(layers) layers = [layers] for layer in layers: __check_layer_type__(layer) @@ -91,6 +90,7 @@ class Topology(object): [('image', dense_vector(768)), ('label', integer_value(10))] """ data_layers = self.data_layers() + return [(nm, data_layers[nm].data_type) for nm in self.proto().input_layer_names]