diff --git a/.travis.yml b/.travis.yml index 2c46da71e757da0b7d9f3ed933b91303738d697f..64961adcf28cbb24ead67ca6989fd2700956f2d5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ cache: directories: - $HOME/.ccache - $HOME/.cache/pip + - $TRAVIS_BUILD_DIR/build/third_party sudo: required dist: trusty os: @@ -41,7 +42,9 @@ before_install: - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: - - paddle/scripts/travis/$JOB.sh + - | + timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout + RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; notifications: email: on_success: change diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index fe1fe5f6f03b315cf30d96e171dca53e32efa040..6c85b1804bb9c5f3a8bc46bb3f54cc62c56cca70 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -18,6 +18,7 @@ func main() { etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", "comma separated endpoint string for pserver to connect to etcd") etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") + numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job") logLevel := flag.String("log-level", "info", "log level, possible values: debug, info, warning, error, fatal, panic") flag.Parse() @@ -29,7 +30,7 @@ func main() { log.SetLevel(level) timeout := time.Second * time.Duration((*etcdTimeout)) - s, err := pserver.NewService(*etcdEndpoint, timeout) + s, err := pserver.NewService(*etcdEndpoint, *numPservers, timeout) if err != nil { panic(err) } diff --git a/go/pserver/service.go b/go/pserver/service.go index 7e2b841dd8e8efda8b3033645b4b1b6a596e53c4..f966595fdccbf23e23f94a857503ce05815164ef 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -73,7 +73,7 @@ type Service struct { // NewService creates a new service, will bypass etcd registration if no // endpoints specified. -func NewService(endpoints string, timeout time.Duration) (*Service, error) { +func NewService(endpoints string, numPservers int, timeout time.Duration) (*Service, error) { s := &Service{opt: newOptimizer(sgd, 0.005)} s.paramMap = make(map[string]Parameter) s.initialized = make(chan struct{}) @@ -103,6 +103,22 @@ func NewService(endpoints string, timeout time.Duration) (*Service, error) { log.Debugf("inited client to %s", s.etcdEndpoints) break } + // init /ps_desired using transaction, for multiple pservers may want to write + // it at the same time. + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + _, err := s.initDesiredPsercers(ctx, numPservers) + cancel() + if err != nil { + log.Warn(err) + time.Sleep(s.etcdTimeout) + continue + } + break + } + // TODO: when implementing extending or reducing pservers, /ps_desired is + // changed, then we need to watch /ps_desired node for events. For now, just + // write once when init and read from it. // wait and set s.desired init value for { ctx, cancel := context.WithTimeout(context.Background(), time.Second) @@ -141,6 +157,16 @@ func NewService(endpoints string, timeout time.Duration) (*Service, error) { return s, nil } +func (s *Service) initDesiredPsercers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { + return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error { + dsStr := c.Get(PsDesired) + if dsStr == "" { + c.Put(PsDesired, strconv.Itoa(numPservers)) + } + return nil + }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) +} + // registerPserverEtcd registers pserver node on etcd using transaction. func (s *Service) registerPserverEtcd(ctx context.Context) (*clientv3.TxnResponse, error) { return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error { diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 673cfa19ac35116288a2481b85858b6f88f3378e..e3c3155aa902c941058ea1b15488360df6c06175 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -2,3 +2,5 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) + +cc_test(variable_test SRCS variable_test.cc) diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index e5c84d7abe9d476f285c8c5cd904d2e570eb0e4f..36eef02370e0196c2af2c05f49176b70ce69235a 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -1,5 +1,3 @@ -//#include -//#include #include #include diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h new file mode 100644 index 0000000000000000000000000000000000000000..b33e10e6820129a874f5355d14d8a3e990186025 --- /dev/null +++ b/paddle/framework/variable.h @@ -0,0 +1,68 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include "paddle/platform/assert.h" + +namespace paddle { +namespace framework { + +class Variable { + public: + template + const T& Get() const { + PADDLE_ASSERT(holder_ != nullptr); + PADDLE_ASSERT(std::type_index(typeid(T)) == + std::type_index(holder_->Type())); + return *static_cast(holder_->Ptr()); + } + + template + T* GetMutable() { + if (holder_ == nullptr || + std::type_index(typeid(T)) != std::type_index(holder_->Type())) { + holder_.reset(new PlaceholderImpl(new T())); + } + return static_cast(holder_->Ptr()); + } + + private: + struct Placeholder { + virtual ~Placeholder() {} + virtual const std::type_info& Type() const = 0; + virtual void* Ptr() const = 0; + }; + + // Placeholder hides type T, so it doesn't appear as a template + // parameter of Variable. + template + struct PlaceholderImpl : public Placeholder { + PlaceholderImpl(T* ptr) : ptr_(ptr), type_(typeid(T)) {} + + virtual const std::type_info& Type() const { return type_; } + virtual void* Ptr() const { return static_cast(ptr_.get()); } + + std::unique_ptr ptr_; + const std::type_info& type_; + }; + + std::unique_ptr + holder_; // pointers to a PlaceholderImpl object indeed. +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/variable.md b/paddle/framework/variable.md new file mode 100644 index 0000000000000000000000000000000000000000..f44d5ea46e7ce98dd443d684ad42308496bc4179 --- /dev/null +++ b/paddle/framework/variable.md @@ -0,0 +1,52 @@ +# Design Doc: Variable + + +Variable is also known as *blob* in MxNet and Caffe2. It is the input and output type of operators, where a neural network is a graph of operators. + +## Requirements: Lazy Memory Allocation + +For the flexibility of a DL system, a variable should be able to contain any typed value -- a tensor in most cases, but could also be some integer IDs or a scope of other variables in the case of RNN. + +To use the minimum amount of memory, we'd like that a variable to allocate memory when it has to, or, lazy memory allocation. Let's take the following example: + +```cpp +Variable vr, v1, v2; + +Tensor* t1 = new Tensor(); +Tensor* t2 = new Tensor(); + +Randomize( + /* malloc */ v1.GetMutable().mutable_data(DDim(100,200)), + /* size */ t1.Size()); + +Randomize( + /* malloc */ v2.GetMutable().mutable_data(DDim(200,300)), + /* size */ t2.Size()); + +Mult( + /*result*/ vr.GetMutable().mutable_data(SizeOfMult(v1, v2)), + /*input1*/ v1.Get().data(), + /*input2*/ v2.Get().data()); +``` + +We see that a variable holds nothing until `Variable::GetMutable()` allocates a tensor and puts it in the variable. Similarly, a tensor gets its memory until `Tensor::mutable_data()`. + +This syntax for lazy memory allocation when we call `Randomize` and `Mult`, those functions that mutate the variable, so it saves us some line of C++ code. + + +## Implementation: Type Hiding + +To make memory allocation lazy, we cannot assume that we know the type held by a variable at definition time. In other words, `class Variable` cannot be a template `template class Variable`. + +Because we don't know the type `T`, we cannot save a `T*` as `Variable's` data member. Instead, we save an interface object `Placeholder`, who can return the pointer to the saved object via `Placeholder::Ptr()` as `void*`. + +But anyway, Variable needs to know `T` so could it `delete(ptr)` and so could `Variable::Get` checks the expected type and the saved object's type. + +We save `T` in `PlaceholderImpl`, the implementation of `Placeholder`. Please be aware that `PlaceholderImpl` is a class template and `T` is passed in as a template parameter. + +Because `PlaceholderImpl` knows `T`, it can save and return `typeid(T)` for the type comparison in `Variable::Get` and `Variable::GetMutable`. + + +## Conclusion + +The technique type hiding utilizes C++ class templates, interface and derivation, and C++ RTTI (typeid). This combination saves us from definition something like `caffe2::TypeMata`, which takes hundreds of lines of C++ code. diff --git a/paddle/framework/variable_test.cc b/paddle/framework/variable_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..aea03bcf5719dacc01d2d78b52b33e8a0b29b5e5 --- /dev/null +++ b/paddle/framework/variable_test.cc @@ -0,0 +1,40 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include + +#include "gtest/gtest.h" +#include "paddle/framework/variable.h" + +TEST(Variable, GetMutable) { + using paddle::framework::Variable; + + struct Tensor { + int content_; + }; + + std::unique_ptr v(new Variable()); + + Tensor* t = v->GetMutable(); + t->content_ = 1234; + + const Tensor& tt = v->Get(); + EXPECT_EQ(1234, tt.content_); + + std::string* s = v->GetMutable(); + *s = "hello"; + + const std::string& ss = v->Get(); + EXPECT_EQ("hello", ss); +} diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index c7d7b14518ebb8415014a78fc1a3bafa8c386191..7abe2ab89e0798672149e28a8d02f7a58b6de3ea 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -2,3 +2,4 @@ nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) +cc_test(must_check_test SRCS must_check_test.cc) diff --git a/paddle/utils/Compiler.h b/paddle/platform/must_check.h similarity index 78% rename from paddle/utils/Compiler.h rename to paddle/platform/must_check.h index cebca5a2a3766110b83231eb0705e48800a7bda6..4fcc62afc05b14949fc43266f0d05be1f1b7891a 100644 --- a/paddle/utils/Compiler.h +++ b/paddle/platform/must_check.h @@ -10,24 +10,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -/** - * This header defines some useful attribute by each compiler. It is the - * abstract layer of compilers. - */ -#ifdef __GNUC__ -#define GCC_VERSION \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#else -#define GCC_VERSION -#endif - /** * __must_check macro. It make the function's return value must be used, * otherwise it will raise a compile warning. And also Paddle treat all compile * warnings as errors. */ -#if GCC_VERSION >= 30400 +#ifdef __GNUC__ +#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 30400 #define __must_check __attribute__((warn_unused_result)) #else #define __must_check #endif +#else +#define __must_check +#endif diff --git a/paddle/platform/must_check_test.cc b/paddle/platform/must_check_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..6ee3ea49acdc4384b5d5df353bfa1290856e982c --- /dev/null +++ b/paddle/platform/must_check_test.cc @@ -0,0 +1,10 @@ +#include +#include + +int __must_check SomeFunctionMustCheck() { return 0; } + +TEST(MustCheck, all) { + // This line should not be compiled, because the + // return value of SomeFunctionMustCheck marked as __must_check + // SomeFunctionMustCheck(); +} \ No newline at end of file diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index 88264d8c262b463f0f1ff6a29a2d265e2f3bec15..a44bd35357fde41c379134bed6b7fb242efe49e5 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -7,6 +7,7 @@ cd $TRAVIS_BUILD_DIR/build # Compile Documentation only. cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF + mkdir output make -j `nproc` find .. -name '*whl' | xargs pip install # install all wheels. diff --git a/paddle/utils/Error.h b/paddle/utils/Error.h index cda1b5c37dada8d0c6c77fc2fb03bb614d5301b5..f3d535c69c53fa350612459560dd9ac7c279aa72 100644 --- a/paddle/utils/Error.h +++ b/paddle/utils/Error.h @@ -19,7 +19,7 @@ limitations under the License. */ #include #include #include -#include "Compiler.h" +#include "paddle/platform/must_check.h" namespace paddle { diff --git a/python/setup.py.in b/python/setup.py.in index 2e22f640cb55677b6814b7f26a71457a96449de7..86fc0fc5c0318b03659bf84f8ad9e2a114467c74 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -13,6 +13,7 @@ packages=['paddle', setup_requires=["requests", "numpy", "protobuf==3.1", + "recordio", "matplotlib", "rarfile"]