diff --git a/.gitignore b/.gitignore index c84b2fc8c79d6e2c9c83e2b830ab176295846fd0..9622ab78e0e0556ec2b4cc974fee93ff680d54d2 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,5 @@ cmake-build-* python/paddle/v2/framework/core.so CMakeFiles cmake_install.cmake - +paddle/.timestamp +python/paddlepaddle.egg-info/ diff --git a/Dockerfile b/Dockerfile index 06a3d8930769bca2599a7afedb3683b2207cb302..8ac123bf9c0f24b47b741611f3b80213c61b82e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,7 +28,7 @@ RUN apt-get update && \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-matplotlib gcc-4.8 g++-4.8 \ - automake locales clang-format-3.8 swig doxygen cmake \ + automake locales clang-format swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \ net-tools && \ diff --git a/go/glide.lock b/go/glide.lock index be1fb24d772a6524cb798c6169c23ff03e9fed7b..1ecdd217520e0a62b546b4c7048a25f4316d3f37 100644 --- a/go/glide.lock +++ b/go/glide.lock @@ -1,5 +1,5 @@ hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582 -updated: 2017-08-03T21:46:51.744995189Z +updated: 2017-08-07T23:37:48.867469328Z imports: - name: github.com/beorn7/perks version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9 @@ -10,7 +10,7 @@ imports: - name: github.com/cockroachdb/cmux version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92 - name: github.com/coreos/etcd - version: c31bec0f29facff13f7c3e3d948e55dd6689ed42 + version: d0d1a87aa96ae14914751d42264262cb69eda170 subpackages: - alarm - auth @@ -24,6 +24,7 @@ imports: - error - etcdserver - etcdserver/api + - etcdserver/api/etcdhttp - etcdserver/api/v2http - etcdserver/api/v2http/httptypes - etcdserver/api/v3client @@ -210,11 +211,6 @@ testImports: version: 04cdfd42973bb9c8589fd6a731800cf222fde1a9 subpackages: - spew -- name: github.com/docker/docker - version: b6d164e6c46d8115b146e4c3ac93784e9ef8b49e - subpackages: - - pkg/ioutils - - pkg/longpath - name: github.com/pmezard/go-difflib version: d8ed2627bdf02c080bf22230dbb337003b7aba2d subpackages: diff --git a/go/master/service_test.go b/go/master/service_test.go index 5f91910ecc8cf32289e71e2e41e8b283acc115e6..2d00c22d6feb7177da5c19c557fd16d7925ef6d1 100644 --- a/go/master/service_test.go +++ b/go/master/service_test.go @@ -1,24 +1,30 @@ package master_test import ( + "io/ioutil" + "net/url" "os" + "strings" "testing" "time" "github.com/PaddlePaddle/Paddle/go/master" "github.com/coreos/etcd/clientv3" "github.com/coreos/etcd/embed" - "github.com/docker/docker/pkg/ioutils" "github.com/stretchr/testify/assert" ) func TestNewServiceWithEtcd(t *testing.T) { // setup an embed etcd server - etcdDir, err := ioutils.TempDir("", "") + etcdDir, err := ioutil.TempDir("", "") if err != nil { t.Fatal(err) } cfg := embed.NewConfig() + lpurl, _ := url.Parse("http://localhost:0") + lcurl, _ := url.Parse("http://localhost:0") + cfg.LPUrls = []url.URL{*lpurl} + cfg.LCUrls = []url.URL{*lcurl} cfg.Dir = etcdDir e, err := embed.StartEtcd(cfg) if err != nil { @@ -30,15 +36,13 @@ func TestNewServiceWithEtcd(t *testing.T) { t.Fatal(err) } }() - select { - case <-e.Server.ReadyNotify(): - t.Log("Server is ready!") - case <-time.After(60 * time.Second): - e.Server.Stop() // trigger a shutdown - t.Fatal("Server took too long to start!") - } - ep := []string{"127.0.0.1:2379"} + <-e.Server.ReadyNotify() + + port := strings.Split(e.Clients[0].Addr().String(), ":")[1] + endpoint := "127.0.0.1:" + port + + ep := []string{endpoint} masterAddr := "127.0.0.1:3306" store, err := master.NewEtcdClient(ep, masterAddr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, 30) if err != nil { diff --git a/go/pserver/client/c/cclient.go b/go/pserver/client/c/cclient.go index 14ad0774550f6e5a5d8610d6007904cd2820432c..a49cd01522b8b49a74f21fcb97e9eeb1fbb2d272 100644 --- a/go/pserver/client/c/cclient.go +++ b/go/pserver/client/c/cclient.go @@ -90,8 +90,12 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte { type selector bool -func (s selector) Select() bool { - return bool(s) +func (s selector) Select() (bool, error) { + return bool(s), nil +} + +func (s selector) Done() error { + return nil } type lister []client.Server @@ -114,11 +118,10 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli } //export paddle_new_etcd_pserver_client -func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client { - // TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters) +func paddle_new_etcd_pserver_client(etcdEndpoints *C.char) C.paddle_pserver_client { addr := C.GoString(etcdEndpoints) etcdClient := client.NewEtcd(addr) - c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0)) + c := client.NewClient(etcdClient, etcdClient.Desired(), etcdClient) return add(c) } @@ -136,7 +139,12 @@ func paddle_pserver_client_release(client C.paddle_pserver_client) { //export paddle_begin_init_params func paddle_begin_init_params(client C.paddle_pserver_client) C.int { c := get(client) - if selected := c.BeginInitParams(); selected { + selected, err := c.BeginInitParams() + if err != nil { + panic(err) + } + + if selected { return 1 } return 0 diff --git a/go/pserver/client/client.go b/go/pserver/client/client.go index 15adda4735b022c16cb22715fb690b3740e58b76..20d91e77034e1a0c6825bc401175e6dc1afec52f 100644 --- a/go/pserver/client/client.go +++ b/go/pserver/client/client.go @@ -27,9 +27,13 @@ import ( // TODO(helin): add RPC call retry logic -// Selector selects if the client should initialize parameter servers. +// Selector selects if the client should initialize parameters and +// reports the initialization process done. type Selector interface { - Select() bool + // Select selects if the client should initialize parameter servers. + Select() (bool, error) + // Done indicates the initialization process is done. + Done() error } // Server is the identification of a parameter Server. @@ -115,7 +119,7 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) { // servers. Other trainers will be blocked until the initialization is // done, and they need to get the initialized parameters from // parameter servers using GetParams. -func (c *Client) BeginInitParams() bool { +func (c *Client) BeginInitParams() (bool, error) { return c.sel.Select() } diff --git a/go/pserver/client/client_test.go b/go/pserver/client/client_test.go index 1243ebd6836550d58144b5033e2755ae8594e948..c3d88e926d7cb5f3027be26a270bee6f2db65f31 100644 --- a/go/pserver/client/client_test.go +++ b/go/pserver/client/client_test.go @@ -124,8 +124,12 @@ func initEtcdClient() { type selector bool -func (s selector) Select() bool { - return bool(s) +func (s selector) Select() (bool, error) { + return bool(s), nil +} + +func (s selector) Done() error { + return nil } type lister []client.Server @@ -135,7 +139,11 @@ func (l lister) List() []client.Server { } func testClient(t *testing.T, c *client.Client) { - selected := c.BeginInitParams() + selected, err := c.BeginInitParams() + if err != nil { + t.Fatal(err) + } + if !selected { t.Fatal("should be selected.") } diff --git a/go/pserver/client/etcd_client.go b/go/pserver/client/etcd_client.go index 977ae5af37e2b7d647ae16af9c4403f916b0216d..f9071caaa8f5ac32d426b1d4344a30262202b96d 100644 --- a/go/pserver/client/etcd_client.go +++ b/go/pserver/client/etcd_client.go @@ -16,53 +16,60 @@ package client import ( "context" + "errors" + "fmt" "strconv" "strings" "time" "github.com/PaddlePaddle/Paddle/go/pserver" "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/clientv3/concurrency" log "github.com/sirupsen/logrus" ) const ( defaultEtcdTimeout time.Duration = 5 * time.Second + + initLockPath = "/init_ps/lock" + initDonePath = "/init_ps/done" + initDoneVal = "1" ) -// EtcdClient is used by pserver client that is a part of trainer process. +// Etcd is used by pserver client that is a part of trainer process. // TODO: -// 1. add watcher to watch the change state of pservers) -// 1. add etcd lock) -type EtcdClient struct { +// 1. add watcher to watch the change state of pservers. +type Etcd struct { client *clientv3.Client timeout time.Duration endpoints []string + lock *concurrency.Mutex } // Desired read ps desired number from etcd. -func (p *EtcdClient) Desired() int { +func (e *Etcd) Desired() int { var psDesired int for { - ctx, cancel := context.WithTimeout(context.Background(), p.timeout) - resp, err := p.client.Get(ctx, pserver.PsDesired) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + resp, err := e.client.Get(ctx, pserver.PsDesired) cancel() if err != nil { log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } kvs := resp.Kvs if len(kvs) == 0 { log.Infoln("Waiting for ps desired registered ...") - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) if err != nil { log.Errorf("psDesired %d invalid %v", psDesired, err) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } @@ -73,26 +80,26 @@ func (p *EtcdClient) Desired() int { } // List return the pserver list read from etcd. -func (p *EtcdClient) List() []Server { - psDesired := p.Desired() +func (e *Etcd) List() []Server { + psDesired := e.Desired() servers := make([]Server, psDesired) for { for i := 0; i < psDesired; i++ { - ctx, cancel := context.WithTimeout(context.Background(), p.timeout) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) psKey := pserver.PsPath + strconv.Itoa(i) log.Debugf("checking %s", psKey) - resp, err := p.client.Get(ctx, psKey) + resp, err := e.client.Get(ctx, psKey) cancel() if err != nil { log.Infof("Get psKey= %s error, %v", psKey, err) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } kvs := resp.Kvs if len(kvs) == 0 { log.Infof("Waiting for ps addr registered ...") - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } @@ -100,7 +107,7 @@ func (p *EtcdClient) List() []Server { // TODO(Longfei) check the ps address if psAddr == "" { log.Infof("Get psKey = %s, psAddr is empty", psKey) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } log.Debugf("got value (%s) for key: %s", psAddr, psKey) @@ -113,7 +120,7 @@ func (p *EtcdClient) List() []Server { } // NewEtcd create a etcd client to return the state of pserver on etcd. -func NewEtcd(endpoints string) *EtcdClient { +func NewEtcd(endpoints string) *Etcd { ep := strings.Split(endpoints, ",") var cli *clientv3.Client var err error @@ -130,10 +137,118 @@ func NewEtcd(endpoints string) *EtcdClient { break } log.Infof("Connected to etcd: %s\n", endpoints) - client := &EtcdClient{ + client := &Etcd{ client: cli, timeout: defaultEtcdTimeout, endpoints: ep, } return client } + +// Select indicates if the current trainer is selected to initialize +// the pserver parameters. +func (e *Etcd) Select() (bool, error) { + sess, err := concurrency.NewSession(e.client, concurrency.WithTTL(5)) + if err != nil { + return false, err + } + + lock := concurrency.NewMutex(sess, initLockPath) + log.Infof("Trying to acquire lock at %s.", initLockPath) + // Do not use timeout context here, since we don't know how + // long does it take for other trainers to initialize the + // parameters. + err = lock.Lock(context.Background()) + if err != nil { + return false, err + } + log.Infof("Successfully acquired lock at %s.", initLockPath) + + get := clientv3.OpGet(initDonePath) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + tresp, err := e.client.Txn(ctx).If(lock.IsOwner()).Then(get).Commit() + cancel() + if err != nil { + return false, err + } + + if !tresp.Succeeded { + return false, errors.New("no longer the owner of the lock") + } + + resp := tresp.Responses[0].GetResponseRange() + + if len(resp.Kvs) == 0 { + // Key value not set, select current trainer. + e.lock = lock + log.Infoln("Trainer selected.") + return true, nil + } + + if string(resp.Kvs[0].Value) == initDoneVal { + log.Infoln("Initialization is already done.") + ctx, cancel = context.WithTimeout(context.Background(), e.timeout) + err = lock.Unlock(ctx) + cancel() + if err != nil { + log.Errorln(err) + } + return false, nil + } + + return false, fmt.Errorf("key %s have unexpected value: %v", initDonePath, resp.Kvs[0].Value) +} + +// Done indicates the parameter initialization process is done. +func (e *Etcd) Done() error { + if e.lock == nil { + return errors.New("lock is nil, Done called unexpectedly") + } + + put := clientv3.OpPut(initDonePath, initDoneVal) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + tresp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(put).Commit() + cancel() + if err != nil { + return err + } + + if !tresp.Succeeded { + return errors.New("no longer the owner of the lock") + } + + ctx, cancel = context.WithTimeout(context.Background(), e.timeout) + err = e.lock.Unlock(ctx) + cancel() + if err != nil { + log.Errorln(err) + } else { + e.lock = nil + } + + return nil +} + +// Close closes the etcd client. +func (e *Etcd) Close() error { + var err error + if e.lock != nil { + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + err = e.lock.Unlock(ctx) + cancel() + if err == nil { + e.lock = nil + } + } + + cErr := e.client.Close() + if cErr != nil { + if err != nil { + log.Errorln(cErr) + return err + } + return cErr + } + + return err +} diff --git a/go/pserver/client/etcd_client_test.go b/go/pserver/client/etcd_client_test.go new file mode 100644 index 0000000000000000000000000000000000000000..08742433e7a266fbd39e34f4b92ac4cc4caeb0fb --- /dev/null +++ b/go/pserver/client/etcd_client_test.go @@ -0,0 +1,106 @@ +package client_test + +import ( + "io/ioutil" + "net/url" + "os" + "strings" + "sync" + "testing" + + "github.com/PaddlePaddle/Paddle/go/pserver/client" + "github.com/coreos/etcd/embed" +) + +func TestSelector(t *testing.T) { + etcdDir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatal(err) + } + cfg := embed.NewConfig() + lpurl, _ := url.Parse("http://localhost:0") + lcurl, _ := url.Parse("http://localhost:0") + cfg.LPUrls = []url.URL{*lpurl} + cfg.LCUrls = []url.URL{*lcurl} + cfg.Dir = etcdDir + e, err := embed.StartEtcd(cfg) + if err != nil { + t.Fatal(err) + } + + defer func() { + e.Close() + if err := os.RemoveAll(etcdDir); err != nil { + t.Fatal(err) + } + }() + + <-e.Server.ReadyNotify() + + port := strings.Split(e.Clients[0].Addr().String(), ":")[1] + endpoint := "127.0.0.1:" + port + + var mu sync.Mutex + selectedCount := 0 + var wg sync.WaitGroup + selectAndDone := func(c *client.Etcd) { + defer wg.Done() + + selected, err := c.Select() + if err != nil { + panic(err) + } + + if selected { + mu.Lock() + selectedCount++ + mu.Unlock() + err = c.Done() + if err != nil { + t.Fatal(err) + } + } + } + + c0 := client.NewEtcd(endpoint) + c1 := client.NewEtcd(endpoint) + c2 := client.NewEtcd(endpoint) + c3 := client.NewEtcd(endpoint) + wg.Add(3) + go selectAndDone(c0) + go selectAndDone(c1) + go selectAndDone(c2) + wg.Wait() + + // simulate trainer crashed and restarted after the + // initialization process. + wg.Add(1) + go selectAndDone(c3) + wg.Wait() + + mu.Lock() + if selectedCount != 1 { + t.Fatal("selected count wrong:", selectedCount) + } + mu.Unlock() + + err = c0.Close() + if err != nil { + t.Fatal(err) + } + + err = c1.Close() + if err != nil { + t.Fatal(err) + } + + err = c2.Close() + if err != nil { + t.Fatal(err) + } + + err = c3.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index f6ad5b2e4258553fc1a4eeb869b9d4d02cae9e26..f6df89369c52797f7269c41f635756582fadbc47 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,6 +7,9 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) +cc_library(lod_tensor SRCS lod_tensor.cc details/lod_tensor.cc DEPS ddim place tensor) +cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor) + cc_test(variable_test SRCS variable_test.cc) cc_library(scope SRCS scope.cc) @@ -40,11 +43,13 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward - fc_op - sgd_op - add_op - mean_op - cross_entropy_op - fill_zeros_like_op - recurrent_op) + fc_op + sgd_op + add_op + mean_op + cross_entropy_op + recurrent_op + uniform_random_op + gaussian_random_op + fill_zeros_like_op) endif(WITH_PYTHON) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 13706f8b562a1d68fe0d603f51c2fb47b4e18164..437a44a8aafa650d654a1a77c60613abe07679fe 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/framework/backward.h" + #include #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" @@ -132,8 +133,9 @@ std::shared_ptr BackwardRecursive( std::shared_ptr grad_op = OpRegistry::CreateGradOp(forwardOp); for (std::string& grad_input : grad_op->inputs_) { if (no_grad_names.count(grad_input)) { - std::string prefix = - grad_input.substr(0, grad_input.size() - kGradVarSuffix.size()); + // +1 for \0 + std::string prefix = grad_input.substr( + 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); grad_input = prefix + kZeroVarSuffix; // If part of input gradient of that operator is not calculated, fill @@ -166,7 +168,7 @@ std::shared_ptr Backward( std::unordered_set no_grad_names; no_grad_names.reserve(no_grad_vars.size()); - no_grad_names.insert(kEmptyVarName + kGradVarSuffix); + no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix); for (auto& name : no_grad_vars) { no_grad_names.insert(name + kGradVarSuffix); diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 6c6e12ca254553a8fc02cadbe3a99989ee848943..1677a3ed4c85ef293f0aadc64a4caa809cbd6ced 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -17,16 +17,21 @@ #include #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" -#include "paddle/operators/type_alias.h" namespace paddle { namespace framework { +using OperatorBase = framework::OperatorBase; +using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; +using OpProto = framework::OpProto; +using OpAttrChecker = framework::OpAttrChecker; +using Scope = framework::Scope; +using DeviceContext = platform::DeviceContext; + class EmptyOp : public OperatorBase { public: void InferShape(const Scope &scope) const override {} - void Run(const Scope &scope, - const platform::DeviceContext &dev_ctx) const override {} + void Run(const Scope &scope, const DeviceContext &dev_ctx) const override {} }; class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { @@ -71,7 +76,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { } }; -class FcOp : public ops::NetOp { +class FcOp : public operators::NetOp { public: void Init() override { AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")}, @@ -143,6 +148,7 @@ class AddOpMaker : public OpProtoAndCheckerMaker { } // namespace paddle namespace f = paddle::framework; +namespace ops = paddle::operators; using EnforceNotMet = paddle::platform::EnforceNotMet; REGISTER_OP(rowwise_add, f::EmptyOp, f::RowWiseAddOpMaker); REGISTER_GRADIENT_OP(rowwise_add, rowwise_add_grad, f::EmptyOp); @@ -165,10 +171,10 @@ TEST(Backward, simple_op_grad) { ASSERT_EQ(4UL, gop->inputs_.size()); ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]); ASSERT_EQ("rowwise_add_grad", gop->type_); - ASSERT_EQ("X" + f::kGradVarSuffix, gop->outputs_[0]); - ASSERT_EQ("b" + f::kGradVarSuffix, gop->outputs_[1]); + ASSERT_EQ(f::GradVarName("X"), gop->outputs_[0]); + ASSERT_EQ(f::GradVarName("b"), gop->outputs_[1]); - ASSERT_EQ("X" + f::kGradVarSuffix, gop->Output("X" + f::kGradVarSuffix)); + ASSERT_EQ(f::GradVarName("X"), gop->Output(f::GradVarName("X"))); } TEST(Backward, simple_op_not_need_grad) { @@ -176,7 +182,7 @@ TEST(Backward, simple_op_not_need_grad) { ASSERT_NE(fwd, nullptr); auto gop = f::Backward(*fwd, {"X"}); ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(), - "X" + f::kGradVarSuffix), + f::GradVarName("X")), gop->outputs_.end()); auto no_input_gop = f::Backward(*fwd, {"X", "b"}); @@ -244,18 +250,18 @@ TEST(Backward, net_input_of_network_not_need_grad) { all_output.erase(f::kEmptyVarName); for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { - ASSERT_NE(all_output.find(out + f::kGradVarSuffix), all_output.end()); + ASSERT_NE(all_output.find(f::GradVarName(out)), all_output.end()); } // Not Generated X - ASSERT_EQ(all_output.find("X" + f::kGradVarSuffix), all_output.end()); + ASSERT_EQ(all_output.find(f::GradVarName("X")), all_output.end()); ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); ASSERT_EQ(3UL, first_fc_grad->ops_.size()); ASSERT_EQ(f::kEmptyVarName, - first_fc_grad->ops_[2]->Output("A" + f::kGradVarSuffix)); + first_fc_grad->ops_[2]->Output(f::GradVarName("A"))); } TEST(Backward, net_shared_weight) { @@ -307,15 +313,15 @@ TEST(Backward, op_part_of_output_are_not_need) { ASSERT_EQ(1UL, fill_zero.inputs_.size()); ASSERT_EQ("Z", fill_zero.inputs_[0]); ASSERT_EQ(1UL, fill_zero.outputs_.size()); - ASSERT_EQ("Z" + f::kZeroVarSuffix, fill_zero.outputs_[0]); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.outputs_[0]); auto &d_many_out = *net->ops_[1]; ASSERT_EQ("many_output_op_grad", d_many_out.type_); ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size()); // I/O/OG - ASSERT_EQ("Z" + f::kZeroVarSuffix, d_many_out.Input("z" + f::kGradVarSuffix)); - ASSERT_EQ("Y" + f::kGradVarSuffix, d_many_out.Input("y" + f::kGradVarSuffix)); - ASSERT_EQ("X" + f::kGradVarSuffix, - d_many_out.Output("x" + f::kGradVarSuffix)); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, + d_many_out.Input(f::GradVarName("z"))); + ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y"))); + ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x"))); } TEST(Backward, op_part_of_input_are_not_need) { @@ -325,10 +331,9 @@ TEST(Backward, op_part_of_input_are_not_need) { ASSERT_EQ(grad_mul.type_, "mul_grad"); ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL); ASSERT_EQ(grad_mul.outputs_.size(), 2UL); - ASSERT_EQ(grad_mul.Output("A" + f::kGradVarSuffix), f::kEmptyVarName); - ASSERT_EQ(grad_mul.Output("B" + f::kGradVarSuffix), "b" + f::kGradVarSuffix); - ASSERT_EQ(grad_mul.Input("Out" + f::kGradVarSuffix), - "out" + f::kGradVarSuffix); + ASSERT_EQ(grad_mul.Output(f::GradVarName("A")), f::kEmptyVarName); + ASSERT_EQ(grad_mul.Output(f::GradVarName("B")), f::GradVarName("b")); + ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out")); ASSERT_EQ(grad_mul.Input("A"), "a"); ASSERT_EQ(grad_mul.Input("B"), "b"); ASSERT_EQ(grad_mul.Input("Out"), "out"); diff --git a/paddle/framework/details/lod_tensor.cc b/paddle/framework/details/lod_tensor.cc new file mode 100644 index 0000000000000000000000000000000000000000..9ad3979e5b511517f75d2d43004f97ee1576953b --- /dev/null +++ b/paddle/framework/details/lod_tensor.cc @@ -0,0 +1,62 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/lod_tensor.h" + +#include + +namespace paddle { +namespace framework { +namespace details { + +using LOD = LODTensor::LOD; + +std::shared_ptr SliceLOD(const LOD &lod, size_t level_begin, + size_t level_end) { + auto new_lod = std::make_shared(); + new_lod->reserve(level_end - level_begin); + for (size_t i = level_begin; i < level_end; i++) { + new_lod->emplace_back(lod[i]); + } + return new_lod; +} + +std::shared_ptr SliceLOD(const LOD &lod, size_t level, size_t elem_begin, + size_t elem_end, bool tensor_shared) { + // slice the lod. + auto new_lod = std::make_shared(); + new_lod->reserve(lod.size() - level); + auto start = lod.at(level)[elem_begin]; + auto end = lod.at(level)[elem_end]; + + for (auto it = lod.begin() + level; it != lod.end(); it++) { + auto it_begin = std::find(it->begin(), it->end(), start); + auto it_end = std::find(it_begin, it->end(), end); + PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info"); + PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info"); + new_lod->emplace_back(it_begin, it_end + 1); + if (!tensor_shared) { + // reset offset if tensor is copyed and sliced. + std::transform(new_lod->back().begin(), new_lod->back().end(), + new_lod->back().begin(), + [start](int v) { return v - start; }); + PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD"); + } + } + return new_lod; +} + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/details/lod_tensor.h b/paddle/framework/details/lod_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..9a6a6cd2ea41f02db991bdc0a2b917433dafed99 --- /dev/null +++ b/paddle/framework/details/lod_tensor.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace framework { +namespace details { + +/* + * Slice levels from LOD. + * + * @lod: LOD to slice. + * @level_begin: level to begin slice. + * @level_end: level to end slice. + */ +std::shared_ptr SliceLOD(const LODTensor::LOD &lod, + size_t level_begin, size_t level_end); + +/* + * Slice elements from a level of LOD. + * + * @lod: LOD to slice. + * @level: which level to slice. + * @elem_begin: element's index to begin slice. + * @elem_end: element's index to end slice. + */ +std::shared_ptr SliceLOD(const LODTensor::LOD &lod, + size_t level, size_t elem_begin, + size_t elem_end, bool tensor_shared); +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index cf7143eba4460e5619188b82ffe23db11a04a236..f1ebbae52f13d9c0fc9408aec8c4160575ad59c0 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -83,21 +83,19 @@ TEST(GradOpBuilder, MutiInOut) { EXPECT_EQ(grad_test_op->Input("Out1"), "out1"); EXPECT_EQ(grad_test_op->Inputs("Out2_mult"), std::vector({"out2_1", "out2_2"})); - EXPECT_EQ(grad_test_op->Input("Out1" + f::kGradVarSuffix), - "out1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Inputs("Out2_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out1")), + f::GradVarName("out1")); + EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out2_mult")), std::vector( - {"out2_1" + f::kGradVarSuffix, "out2_2" + f::kGradVarSuffix})); + {f::GradVarName("out2_1"), f::GradVarName("out2_2")})); ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); - EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), - "in1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), - std::vector({"in2_1" + f::kGradVarSuffix, - "in2_2" + f::kGradVarSuffix, - "in2_3" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Output("In3" + f::kGradVarSuffix), - "in3" + f::kGradVarSuffix); + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), + std::vector({f::GradVarName("in2_1"), + f::GradVarName("in2_2"), + f::GradVarName("in2_3")})); + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In3")), f::GradVarName("in3")); } TEST(GradOpBuilder, IOIgnoredInGradient) { @@ -119,19 +117,18 @@ TEST(GradOpBuilder, IOIgnoredInGradient) { EXPECT_EQ(grad_test_op->Inputs("Out1_mult"), std::vector({"out1_1", "out1_2"})); EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName); - EXPECT_EQ(grad_test_op->Inputs("Out1_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out1_mult")), std::vector( - {"out1_1" + f::kGradVarSuffix, "out1_2" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Input("Out2" + f::kGradVarSuffix), - "out2" + f::kGradVarSuffix); + {f::GradVarName("out1_1"), f::GradVarName("out1_2")})); + EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out2")), + f::GradVarName("out2")); ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); - EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), - "in1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), std::vector( - {"in2_1" + f::kGradVarSuffix, "in2_2" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Outputs("In3_mult" + f::kGradVarSuffix), + {f::GradVarName("in2_1"), f::GradVarName("in2_2")})); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In3_mult")), std::vector( - {"in3_1" + f::kGradVarSuffix, "in3_2" + f::kGradVarSuffix})); + {f::GradVarName("in3_1"), f::GradVarName("in3_2")})); } diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc new file mode 100644 index 0000000000000000000000000000000000000000..70045dbf7afd0935e4df852b2f0e3ecd163a9316 --- /dev/null +++ b/paddle/framework/lod_tensor.cc @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/lod_tensor.h" + +#include + +namespace paddle { +namespace framework { + +LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); + // slice levels just need to update LOD info, each level will contains the + // whole tensor_, so no need to modify tensor_. + return LODTensor(tensor_, new_lod); +} + +LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin, + size_t elem_end) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem_begin < NumElements(level), + "element begin [%d] out of range [%d]", elem_begin, + NumElements(level)); + PADDLE_ENFORCE(elem_end < NumElements(level) + 1, + "element end [%d] out of range [%d]", elem_end, + NumElements(level)); + + auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, + true /*tensor_shared*/); + + // slice elements just need to update LOD info, because offsets are not + // changed, so the original tensor_ can be reused. + return LODTensor(tensor_, new_lod); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..4933479b109694312e99595dc8ad6db70259efa6 --- /dev/null +++ b/paddle/framework/lod_tensor.h @@ -0,0 +1,145 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#if (!PADDLE_ONLY_CPU) +#include +#include +#endif + +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace framework { + +/* + * LODTensor (Level of details Tensor) + * see https://en.wikipedia.org/wiki/Level_of_details for reference. + */ +class LODTensor { + public: +// Level save offsets of each unit. +#ifdef PADDLE_ONLY_CPU + using Level = std::vector; +#else + using Level = thrust::device_vector; +#endif + // LOD stores offsets of each level of units, the largest units level first, + // then the smaller units level. Each Level stores the offsets of units in + // Tesor. + typedef std::vector LOD; + + LODTensor() {} + LODTensor(const std::shared_ptr &tensor, + const std::shared_ptr &lod) { + Reset(tensor, lod); + } + + void Reset(const std::shared_ptr &tensor, + const std::shared_ptr &lod) { + tensor_ = tensor; + lod_start_pos_ = lod; + } + + /* + * Get a element from LOD. + */ + size_t lod_element(size_t level, size_t elem) const { + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem < NumElements(level), + "element begin [%d] out of range [%d]", elem, + NumElements(level)); + return (*lod_start_pos_)[level][elem]; + } + + /* + * Number of LODTensor's levels, each level has units of data, for example, + * in the sentence's view, article, paragraph, sentence are 3 levels. + */ + size_t NumLevels() const { + return lod_start_pos_ ? lod_start_pos_->size() : 0UL; + } + /* + * Number of elements in a level. + */ + size_t NumElements(size_t level = 0) const { + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + // the last offset is the end of last element + return lod_start_pos_->at(level).size() - 1; + } + + /* + * Slice of levels[level_begin:level_end], with tensor copied. + */ + template + LODTensor SliceCopied(size_t level_begin, size_t level_end, + const platform::Place &dst_place) const; + + /* + * Slice of levels[level_begin:level_end], with tensor shared. + */ + LODTensor SliceShared(size_t level_begin, size_t level_end) const; + + /* + * Slice of elements of a level, [elem_begin: elem_end], with tensor copied. + * @note: low performance in slice lod_start_pos_. + */ + template + LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end, + const platform::Place &dst_place) const; + + /* + * Slice of elements of a level, [elem_begin: elem_end], with tensor shared. + * @note: low performance in slice lod_start_pos_. + */ + LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const; + + /* + * Copy other's lod_start_pos_, to share LOD info. + * @note: the LOD info should not be changed. + */ + void ShareLOD(const LODTensor &other) { + lod_start_pos_ = other.lod_start_pos_; + } + + /* + * Copy other's lod_start_pos_'s content, free to mutate. + */ + void CopyLOD(const LODTensor &other) { + lod_start_pos_ = std::make_shared(*other.lod_start_pos_); + } + /* + * Determine whether LODTensor has a valid LOD info. + */ + bool HasLOD() const { return bool(lod_start_pos_); } + LOD *lod() const { return lod_start_pos_.get(); } + + std::shared_ptr &tensor() { return tensor_; } + Tensor *raw_tensor() { return tensor_.get(); } + + private: + std::shared_ptr lod_start_pos_; + std::shared_ptr tensor_; +}; + +} // namespace framework +} // namespace paddle + +#include "paddle/framework/lod_tensor_impl.h" diff --git a/paddle/framework/lod_tensor_impl.h b/paddle/framework/lod_tensor_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..0eb6469aea3ae25f035751da985b5bebb489d961 --- /dev/null +++ b/paddle/framework/lod_tensor_impl.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/details/lod_tensor.h" + +namespace paddle { +namespace framework { + +template +LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end, + const platform::Place &dst_place) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); + auto new_tensor = std::make_shared(); + new_tensor->CopyFrom(*tensor_, dst_place); + + return LODTensor(new_tensor, new_lod); +} + +template +LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin, + size_t elem_end, + const platform::Place &dst_place) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem_begin < NumElements(level), + "element begin [%d] out of range [%d]", elem_begin, + NumElements(level)); + PADDLE_ENFORCE(elem_end < NumElements(level) + 1, + "element end [%d] out of range [%d]", elem_end, + NumElements(level)); + + auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, + false /*tensor_shared*/); + + auto start_idx = new_lod->front().front(); + auto end_idx = new_lod->front().back() - 1 /*the next element's start*/; + auto sliced_tensor = tensor_->Slice(start_idx, end_idx); + auto new_tensor = std::make_shared(); + new_tensor->CopyFrom(sliced_tensor, dst_place); + + return LODTensor(new_tensor, new_lod); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..511716375e81e8fd89b071c940ee97327c268b8b --- /dev/null +++ b/paddle/framework/lod_tensor_test.cc @@ -0,0 +1,165 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/framework/lod_tensor.h" + +#include +#include +#include + +namespace paddle { +namespace framework { + +class LODTensorTester : public ::testing::Test { + public: + virtual void SetUp() override { + lod_tensor.reset(new LODTensor); + // tensor's batch_size: 30 + // 3 levels + // 0 10 20 + // 0 5 10 15 20 + // 0 2 5 7 10 12 15 20 + auto lod = std::make_shared(); + lod->push_back(std::vector{0, 10, 20}); + lod->push_back(std::vector{0, 5, 10, 15, 20}); + lod->push_back(std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20}); + + auto tensor = std::make_shared(); + tensor->Resize({20 /*batch size*/, 128 /*dim*/}); + // malloc memory + tensor->mutable_data(place); + + lod_tensor->Reset(tensor, lod); + } + + protected: + std::unique_ptr lod_tensor; + platform::CPUPlace place; +}; + +TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor->NumLevels(), 3UL); } + +TEST_F(LODTensorTester, NumElements) { + ASSERT_EQ(lod_tensor->NumElements(0), 2UL); + ASSERT_EQ(lod_tensor->NumElements(1), 4UL); + ASSERT_EQ(lod_tensor->NumElements(2), 8UL); +} + +TEST_F(LODTensorTester, SliceShared_Level) { + // slice 1 level + for (size_t level = 0; level < 3UL; ++level) { + auto new_lod_tensor = lod_tensor->SliceShared(level, level + 1); + ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); + ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); + ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + } + // slice 2 level + for (size_t level = 0; level < 2UL; ++level) { + auto new_lod_tensor = lod_tensor->SliceShared(level, level + 2); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); + ASSERT_EQ(new_lod_tensor.NumElements(1), + lod_tensor->NumElements(level + 1)); + ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + } +} + +TEST_F(LODTensorTester, SliceCopied_Level) { + // slice 1 level + for (size_t level = 0; level < 3UL; ++level) { + auto new_lod_tensor = + lod_tensor->SliceCopied(level, level + 1, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); + ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); + // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + // TODO(superjom) add tensor comparation here. + } + // slice 2 level + for (size_t level = 0; level < 2UL; ++level) { + auto new_lod_tensor = + lod_tensor->SliceCopied(level, level + 2, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); + ASSERT_EQ(new_lod_tensor.NumElements(1), + lod_tensor->NumElements(level + 1)); + // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + // TODO(superjom) add tensor comparation here. + } +} + +TEST_F(LODTensorTester, SliceShared_Element) { + size_t level = 0; + auto new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); + ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); + ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + + level = 1; + new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); +} + +TEST_F(LODTensorTester, SliceCopied_Element) { + size_t level = 0; + auto new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); + ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + + level = 1; + new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + + level = 1; + // LOD is + // 0 5 10 + // 0 2 5 7 10 + new_lod_tensor = lod_tensor->SliceCopied(level, 1, 3, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + + ASSERT_EQ(new_lod_tensor.lod_element(0, 0), 0UL); + ASSERT_EQ(new_lod_tensor.lod_element(0, 1), 5UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 0), 0UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 1), 2UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 2), 5UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 3), 7UL); + + // TODO(superjom) compare the content of these tensors +} + +TEST_F(LODTensorTester, ShareLOD) { + LODTensor new_lod_tensor; + new_lod_tensor.ShareLOD(*lod_tensor); + ASSERT_EQ(new_lod_tensor.lod(), lod_tensor->lod()); +} + +TEST_F(LODTensorTester, CopyLOD) { + LODTensor new_lod_tensor; + new_lod_tensor.CopyLOD(*lod_tensor); + ASSERT_NE(new_lod_tensor.lod(), lod_tensor->lod()); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index b2813da83d9e4c525e66bb1f79b28769627eaec2..6c26183818a9d6996e3d3ce2af74ba36f4711eca 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -260,12 +260,6 @@ class OpRegistry { return CreateOp(op_desc.type(), inputs, outputs, attrs); } - static bool SupportGPU(const std::string& op_type) { - OperatorWithKernel::OpKernelKey key; - key.place_ = platform::GPUPlace(); - return OperatorWithKernel::AllOpKernels().at(op_type).count(key) != 0; - } - static std::shared_ptr CreateGradOp(const OperatorBase& op) { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 03fabff79b637299f8e133aab29ccb0e145379cf..8949baf60e80d9802693cb4b28c99bb3c258c79c 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -33,19 +33,19 @@ namespace paddle { namespace framework { /// If a variable is a empty variable, that name will be used. -const std::string kEmptyVarName = "@EMPTY@"; +constexpr char kEmptyVarName[] = "@EMPTY@"; /// If a variable is a temporary variable, that name will be set in Python, /// but it will be convert to a unique name in scope after OpCreator. -const std::string kTempVarName = "@TEMP@"; +constexpr char kTempVarName[] = "@TEMP@"; /// If a variable's name has a certain suffix, it means that the /// variable is the gradient of another varibale. /// e.g. Variable "x@GRAD" is the gradient of varibale "x". -const std::string kGradVarSuffix = "@GRAD"; +constexpr char kGradVarSuffix[] = "@GRAD"; /// Variables with this suffix are supposed to be filled up with zeros. -const std::string kZeroVarSuffix = "@ZERO"; +constexpr char kZeroVarSuffix[] = "@ZERO"; inline std::string GradVarName(const std::string& var_name) { return var_name + kGradVarSuffix; @@ -88,6 +88,8 @@ class OperatorBase { virtual bool IsNetOp() const { return false; } + virtual bool SupportGPU() const { return false; } + /// rename inputs outputs name void Rename(const std::string& old_name, const std::string& new_name); @@ -118,10 +120,10 @@ class OperatorBase { std::shared_ptr> in_out_idxs_; }; -class OperatorContext { +class InferShapeContext { public: - OperatorContext(const OperatorBase* op, const Scope& scope) - : op_(*op), scope_(scope) {} + InferShapeContext(const OperatorBase& op, const Scope& scope) + : op_(op), scope_(scope) {} size_t InputSize() const { return op_.inputs_.size(); } @@ -232,12 +234,6 @@ class OperatorContext { const Scope& scope_; }; -class InferShapeContext : public OperatorContext { - public: - InferShapeContext(const OperatorBase* op, const Scope& scope) - : OperatorContext(op, scope) {} -}; - template struct EigenDeviceConverter; @@ -253,11 +249,11 @@ struct EigenDeviceConverter { }; #endif -class ExecutionContext : public OperatorContext { +class ExecutionContext : public InferShapeContext { public: - ExecutionContext(const OperatorBase* op, const Scope& scope, + ExecutionContext(const OperatorBase& op, const Scope& scope, const platform::DeviceContext* device_context) - : OperatorContext(op, scope), device_context_(device_context) {} + : InferShapeContext(op, scope), device_context_(device_context) {} template , OpKernelHash>; - void InferShape(const Scope& scope) const { - InferShape(InferShapeContext(this, scope)); + void InferShape(const Scope& scope) const override { + InferShape(InferShapeContext(*this, scope)); } void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const final { auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); - opKernel->Compute(ExecutionContext(this, scope, &dev_ctx)); + opKernel->Compute(ExecutionContext(*this, scope, &dev_ctx)); } static std::unordered_map& @@ -324,6 +320,12 @@ class OperatorWithKernel : public OperatorBase { return g_all_op_kernels; } + bool SupportGPU() const override { + OperatorWithKernel::OpKernelKey key; + key.place_ = platform::GPUPlace(); + return OperatorWithKernel::AllOpKernels().at(type_).count(key) != 0; + } + protected: virtual void InferShape(const InferShapeContext& ctx) const = 0; }; diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 9ee2c6af86476ea50def237ed011fcddaa41daad..5fd6754e56caf0952e350200eff7be835900c962 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -18,13 +18,11 @@ limitations under the License. */ #include "paddle/framework/backward.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/scope.h" #include "paddle/framework/tensor_py.h" #include "paddle/operators/net_op.h" -#include "paddle/operators/type_alias.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" +#include "paddle/string/to_string.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" @@ -42,8 +40,14 @@ USE_OP(softmax); USE_OP(rowwise_add); USE_OP(fill_zeros_like); USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP(gaussian_random); +USE_OP(uniform_random); + namespace paddle { namespace framework { + +using Tensor = framework::Tensor; + template void ExposeOperator(ClassType &m) { m.def("infer_shape", &ClassType::type::InferShape) @@ -56,6 +60,26 @@ void ExposeOperator(ClassType &m) { [](const typename ClassType::type &op) -> std::vector { return op.outputs_; }) + .def("inputs", + [](const typename ClassType::type &op) -> std::vector { + return op.inputs_; + }) + .def("support_gpu", &ClassType::type::SupportGPU) + .def("temp_outputs", + [](const typename ClassType::type &op) -> std::vector { + auto iter = op.attrs_.find("temporary_index"); + std::vector ret; + if (iter == op.attrs_.end()) { + return ret; + } else { + auto tmp_idx = boost::get>(iter->second); + ret.reserve(tmp_idx.size()); + for (auto &index : tmp_idx) { + ret.push_back(op.outputs_.at(index)); + } + return ret; + } + }) .def("__str__", &ClassType::type::DebugString); } @@ -129,8 +153,8 @@ All parameter, weight, gradient are variables in Paddle. [](Variable &self) -> Tensor * { return self.GetMutable(); }, py::return_value_policy::reference) .def("get_net", - [](Variable &self) -> ops::NetOp * { - return self.GetMutable(); + [](Variable &self) -> operators::NetOp * { + return self.GetMutable(); }, py::return_value_policy::reference); @@ -184,9 +208,13 @@ All parameter, weight, gradient are variables in Paddle. }); // clang-format on - py::class_(m, "GPUPlace").def(py::init()); + py::class_(m, "GPUPlace") + .def(py::init()) + .def("__str__", string::to_string); - py::class_(m, "CPUPlace").def(py::init<>()); + py::class_(m, "CPUPlace") + .def(py::init<>()) + .def("__str__", string::to_string); py::class_> operator_base( m, "Operator"); @@ -201,8 +229,6 @@ All parameter, weight, gradient are variables in Paddle. return OpRegistry::CreateOp(desc); }); - operator_base.def_static("support_gpu", &OpRegistry::SupportGPU); - operator_base.def("backward", [](const OperatorBase &forwardOp, const std::unordered_set &no_grad_vars) { @@ -211,23 +237,24 @@ All parameter, weight, gradient are variables in Paddle. ExposeOperator(operator_base); - py::class_> net(m, "Net"); + py::class_> net(m, "Net"); net.def_static("create", - []() -> std::shared_ptr { - auto retv = std::make_shared(); + []() -> std::shared_ptr { + auto retv = std::make_shared(); retv->type_ = "plain_net"; return retv; }) - .def("add_op", &ops::NetOp::AddOp) - .def( - "add_op", - [](ops::NetOp &self, const std::shared_ptr &net) -> void { - self.AddOp(std::static_pointer_cast(net)); - }) - .def("complete_add_op", &ops::NetOp::CompleteAddOp) - .def("complete_add_op", - [](std::shared_ptr &self) { self->CompleteAddOp(); }); + .def("add_op", &operators::NetOp::AddOp) + .def("add_op", + [](operators::NetOp &self, + const std::shared_ptr &net) -> void { + self.AddOp(std::static_pointer_cast(net)); + }) + .def("complete_add_op", &operators::NetOp::CompleteAddOp) + .def("complete_add_op", [](std::shared_ptr &self) { + self->CompleteAddOp(); + }); ExposeOperator(net); diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index c44df05e4b0fceed858fbf4f68eddc407a44c894..b57958591fb752132407c35958db0781d0e023f0 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -18,6 +18,8 @@ limitations under the License. */ #include #include #include +#include + #include "paddle/framework/ddim.h" #include "paddle/memory/memory.h" #include "paddle/platform/device_context.h" diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 20276181b974bb5b3d6cb40fb5e6c1295cf1c02f..7db38d5caeebccf710334e854faf785ef0f64063 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -19,7 +19,7 @@ TEST(Tensor, Dims) { using namespace paddle::framework; using namespace paddle::platform; Tensor tt; - tt.Resize(make_ddim({2, 3, 4})); + tt.Resize({2, 3, 4}); DDim dims = tt.dims(); ASSERT_EQ(arity(dims), 3); for (int i = 0; i < 3; ++i) { diff --git a/paddle/function/FunctionTest.cpp b/paddle/function/FunctionTest.cpp index 6360a6e023ebd2f97c442c80c8d7f56b5ec4cbf7..7b0b1c6adbd36a6cbe9b89e9518fbe07fb1db368 100644 --- a/paddle/function/FunctionTest.cpp +++ b/paddle/function/FunctionTest.cpp @@ -93,8 +93,8 @@ TEST(Arguments, Matrix) { MatrixPtr matrix = Matrix::create(100, 200); CheckBufferArg check = [=](const BufferArg& arg) { EXPECT_EQ(arg.shape().ndims(), 2U); - EXPECT_EQ(arg.shape()[0], 100); - EXPECT_EQ(arg.shape()[1], 200); + EXPECT_EQ(arg.shape()[0], 100U); + EXPECT_EQ(arg.shape()[1], 200U); EXPECT_EQ(arg.data(), matrix->getData()); EXPECT_EQ(arg.matrix().getHeight(), matrix->getHeight()); @@ -112,8 +112,8 @@ TEST(Arguments, Matrix) { TEST(Arguments, Vector) { VectorPtr vector = Vector::create(100, false); CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 1); - EXPECT_EQ(arg.shape()[0], 100); + EXPECT_EQ(arg.shape().ndims(), 1U); + EXPECT_EQ(arg.shape()[0], 100U); EXPECT_EQ(arg.data(), vector->getData()); CpuVector inVector = arg.vector(); @@ -131,9 +131,9 @@ TEST(Arguments, Vector) { TEST(Arguments, CpuSparseMatrix) { CpuSparseMatrix sparse(200, 300, 50); CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 2); - EXPECT_EQ(arg.shape()[0], 200); - EXPECT_EQ(arg.shape()[1], 300); + EXPECT_EQ(arg.shape().ndims(), 2U); + EXPECT_EQ(arg.shape()[0], 200U); + EXPECT_EQ(arg.shape()[1], 300U); EXPECT_EQ(arg.data(), sparse.getData()); // CHECK_EQ(arg.sparse().nnz(), 50); // CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT); @@ -152,10 +152,10 @@ TEST(Arguments, CpuSparseMatrix) { TEST(Arguments, BufferArg) { BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3}); CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 3); - EXPECT_EQ(arg.shape()[0], 1); - EXPECT_EQ(arg.shape()[1], 2); - EXPECT_EQ(arg.shape()[2], 3); + EXPECT_EQ(arg.shape().ndims(), 3U); + EXPECT_EQ(arg.shape()[0], 1U); + EXPECT_EQ(arg.shape()[1], 2U); + EXPECT_EQ(arg.shape()[2], 3U); }; BufferArgs argments; diff --git a/paddle/function/TensorShapeTest.cpp b/paddle/function/TensorShapeTest.cpp index e5c698237706e7210d3045bbfd0088af58db2954..e55d516d4ac8e86b05911ac1a4e7095f53e2ea3c 100644 --- a/paddle/function/TensorShapeTest.cpp +++ b/paddle/function/TensorShapeTest.cpp @@ -44,7 +44,7 @@ TEST(TensorShape, GetAndSet) { EXPECT_EQ(t.ndims(), 3U); EXPECT_EQ(t.getElements(), 6U); - EXPECT_EQ(t[1], 2); + EXPECT_EQ(t[1], 2U); t.setDim(1, 100); EXPECT_EQ(t.getElements(), 300U); EXPECT_EQ(t[1], 100U); diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp index e96fd61fc1e9663baf6bb10e58a99f13945a5aae..3b5060e3ce980a255321c35a61be10f928b090a3 100644 --- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -98,10 +98,12 @@ void KmaxSeqScoreLayer::forward(PassType passType) { } // TODO(caoying) - // Here selSubSeqIdx is automatically converted from real to int - // This is very dangerous if user fill this matrix himself, invalid data may - // occur. The selected indices should be stored in - // CpuSparseMatrix with SparseValueType set to NO_VALUE. + // In PaddlePaddle, the currently available matrixes all a have real-typed + // data field, but the selected indices information are actually int-typed + // (with -1 as a special token). Storing indices information in real-typed + // Matrix leads to converting real to int. This is very dangerous if a user + // fills this matrix himself, invalid data may occur. + // The selected indices should be stored in an int-typed matrix. Matrix::resizeOrCreate( output_.value, input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index 410aba663e005872eb4fd6828f98a3a3bd737aac..424f8985539938d58a1a93eba109947780fca84b 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -32,10 +32,13 @@ public: private: // TODO(caoying) - // Here selSubSeqIdx is automatically converted from real to int - // This is very dangerous if user fill this matrix himself, invalid data - // may occur. The selected indices should be stored in CpuSparseMatrix - // with SparseValueType set to NO_VALUE. + // In PaddlePaddle, the currently available matrixes all a have real-typed + // data field, but the selected indices information are actually int-typed + // (with -1 as a special token). Storing indices information in real-typed + // Matrix leads to converting real to int. This is very dangerous if a user + // fills this matrix himself, invalid data may occur. + // The selected indices should be stored in an int-typed matrix. + MatrixPtr startIdsOnCpu_; MatrixPtr endIdsOnCpu_; diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index 0db03002702ca627a605f5b1c96736d6b83395be..c8607d50f58789afa1ae5168ec5b4113863c58c1 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -59,6 +59,13 @@ private: const std::vector>& inputSeqInfo); // if the second input of this layer is on GPU memory, copy it to CPU memory. + // TODO(caoying) + // In PaddlePaddle, the currently available matrixes all a have real-typed + // data field, but the selected indices information are actually int-typed + // (with -1 as a special token). Storing indices information in real-typed + // Matrix leads to converting real to int. This is very dangerous if a user + // fills this matrix himself, invalid data may occur. + // The selected indices should be stored in an int-typed matrix. MatrixPtr selIdsCpu_; // reorganized sequenceStartPositions and subSequenceStartPositions @@ -95,12 +102,7 @@ void SubNestedSequenceLayer::calSelectedRows( for (size_t i = 0; i < seqNum; ++i) { for (size_t j = 0; j < beamSize; ++j) { if (selectedIndices->getElement(i, j) == -1.) break; - // TODO(caoying) - // Here selSubSeqIdx is automatically converted from real to int - // This is very dangerous if user fill this matrix himself, invalid data - // may occur. The selected indices should be stored in - // CpuSparseMatrix with SparseValueType set to NO_VALUE. - int selSubSeqIdx = selectedIndices->getElement(i, j); + size_t selSubSeqIdx = selectedIndices->getElement(i, j); CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] - @@ -139,7 +141,7 @@ void SubNestedSequenceLayer::forward(PassType passType) { CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " << "must be a nested sequence."; const MatrixPtr selectedIndices = getInputValue(1); - CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight()); + CHECK_EQ(size_t(inputSeq.getNumSequences()), selectedIndices->getHeight()); if (dynamic_cast(selectedIndices.get())) { /* diff --git a/paddle/gserver/tests/test_KmaxSeqScore.cpp b/paddle/gserver/tests/test_KmaxSeqScore.cpp index f958b4974d45ef65f8f374148a31ad3a6ce7632f..a51fe390c74d74cd5f3d07df62b715b239335548 100644 --- a/paddle/gserver/tests/test_KmaxSeqScore.cpp +++ b/paddle/gserver/tests/test_KmaxSeqScore.cpp @@ -88,7 +88,7 @@ void checkLayerOut(vector> groundTruth, TEST(Layer, kmaxSeqScoreLayer) { const size_t maxBeamSize = 100; - int beamSize = 1 + (rand() % maxBeamSize); + size_t beamSize = 1 + (rand() % maxBeamSize); vector seqStartPosition; vector subSeqStartPosition; diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 531c3c8affac3109a9b795b0d699fb0652c1660e..799345aa47b69dbb31eef2e1bcc0ca39942e103d 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -45,16 +45,15 @@ cc_library(net_op SRCS net_op.cc DEPS op_registry) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) op_library(add_op SRCS add_op.cc add_op.cu) -cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) op_library(mean_op SRCS mean_op.cc mean_op.cu) -cc_test(mean_op_test SRCS mean_op_test.cc DEPS mean_op) op_library(mul_op SRCS mul_op.cc mul_op.cu) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu) op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) +op_library(gaussian_random_op SRCS gaussian_random_op.cc gaussian_random_op.cu) op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) @@ -66,3 +65,5 @@ op_library(fc_op op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS op_desc tensor op_registry operator net_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) +op_library(uniform_random_op + SRCS uniform_random_op.cc uniform_random_op.cu) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index fb85093bb2f4ef7950bd3bab3d0b7b9348763448..086245ef62d759ab20a3684ddbc015f6c6258639 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -17,9 +17,9 @@ limitations under the License. */ namespace paddle { namespace operators { -class AddOp : public OperatorWithKernel { +class AddOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "Inputs of AddOp must all be set"); @@ -31,9 +31,9 @@ class AddOp : public OperatorWithKernel { } }; -class AddOpMaker : public OpProtoAndCheckerMaker { +class AddOpMaker : public framework::OpProtoAndCheckerMaker { public: - AddOpMaker(OpProto *proto, OpAttrChecker *op_checker) + AddOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of add op"); AddInput("Y", "The second input of add op"); @@ -46,14 +46,17 @@ The equation is: Out = X + Y } }; -class AddOpGrad : public OperatorWithKernel { +class AddOpGrad : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override {} + void InferShape(const framework::InferShapeContext &ctx) const override {} }; } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(add_two, ops::AddOp, ops::AddOpMaker); REGISTER_GRADIENT_OP(add_two, add_two_grad, ops::AddOpGrad); -REGISTER_OP_CPU_KERNEL(add_two, ops::AddKernel); + +REGISTER_OP_CPU_KERNEL(add_two, + ops::AddKernel); diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 9bd08634da96c5595d6dd702ad9afafb94632b03..cec5f558cbc161124620ad4241d6bd8a5324277c 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -16,4 +16,6 @@ #include "paddle/framework/op_registry.h" #include "paddle/operators/add_op.h" -REGISTER_OP_GPU_KERNEL(add_two, ops::AddKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(add_two, + ops::AddKernel); diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index 9db19a61381fdb11350276d51d3ebbf083672022..d76c10957e943deb970b1d79a1507a36669314e3 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -13,15 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + template -class AddKernel : public OpKernel { +class AddKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input0 = context.Input(0); auto input1 = context.Input(1); auto output = context.Output(0); diff --git a/paddle/operators/add_op_test.cc b/paddle/operators/add_op_test.cc deleted file mode 100644 index 3d52f5498323dbb7ca0ff25d038947f0ddb2017e..0000000000000000000000000000000000000000 --- a/paddle/operators/add_op_test.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#define private public -#include -USE_OP(add_two); -// USE_OP(add_two_grad); - -TEST(AddOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("add_two"); - ASSERT_NE(it, protos.end()); - auto& op_creators = paddle::framework::OpRegistry::op_creators(); - auto it1 = op_creators.find("add_two_grad"); - ASSERT_NE(it1, op_creators.end()); -} diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 942b919079bf06caeb6d185efb31d9d28d193008..c813d54e17fa48aa4447ef76b918b7355be52b09 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -17,9 +17,9 @@ limitations under the License. */ namespace paddle { namespace operators { -class OnehotCrossEntropyOp : public OperatorWithKernel { +class OnehotCrossEntropyOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of OnehotCrossEntropyOp must be two"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, @@ -37,9 +37,9 @@ class OnehotCrossEntropyOp : public OperatorWithKernel { } }; -class OnehotCrossEntropyGradientOp : public OperatorWithKernel { +class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { auto X_grad = ctx.Output(framework::GradVarName("X")); auto X = ctx.Input("X"); @@ -48,9 +48,10 @@ class OnehotCrossEntropyGradientOp : public OperatorWithKernel { } }; -class OnehotCrossEntropyOpMaker : public OpProtoAndCheckerMaker { +class OnehotCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { public: - OnehotCrossEntropyOpMaker(OpProto *proto, OpAttrChecker *op_checker) + OnehotCrossEntropyOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of OnehotCrossEntropyOp"); AddInput("label", "The second input of OnehotCrossEntropyOp"); @@ -66,11 +67,14 @@ OnehotCrossEntropy Operator. } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp, ops::OnehotCrossEntropyOpMaker); -REGISTER_OP_CPU_KERNEL(onehot_cross_entropy, - ops::OnehotCrossEntropyOpKernel); - +REGISTER_OP_CPU_KERNEL( + onehot_cross_entropy, + ops::OnehotCrossEntropyOpKernel); +REGISTER_GRADIENT_OP(onehot_cross_entropy, onehot_cross_entropy_grad, + ops::OnehotCrossEntropyGradientOp); REGISTER_OP_CPU_KERNEL( onehot_cross_entropy_grad, - ops::OnehotCrossEntropyGradientOpKernel); + ops::OnehotCrossEntropyGradientOpKernel); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index ec73721a810fa86d65409f643401eb77248ad5de..4bbc8f093a794d46737a16488684a6a0cc25e285 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -14,3 +14,8 @@ #define EIGEN_USE_GPU #include "paddle/operators/cross_entropy_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + onehot_cross_entropy, + ops::OnehotCrossEntropyOpKernel); diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index e02e3e2945af13fe283f95f7faa03b2a76d06125..d1bbc2cb66d6ce84ddcdcb87648f23c6ce77b748 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -13,11 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; + template T tolerable_value(T x) { static_assert(std::is_floating_point::value, @@ -38,9 +40,9 @@ T tolerable_value(T x) { } template -class OnehotCrossEntropyOpKernel : public OpKernel { +class OnehotCrossEntropyOpKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto X = ctx.Input("X"); const T* Xdata = X->data(); const int* label_data = ctx.Input(1)->data(); @@ -61,9 +63,9 @@ class OnehotCrossEntropyOpKernel : public OpKernel { }; template -class OnehotCrossEntropyGradientOpKernel : public OpKernel { +class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto X = ctx.Input("X"); auto dX = ctx.Output(framework::GradVarName("X")); auto dY = ctx.Input(framework::GradVarName("Y")); diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc index b5cf236bac6bb5abe061f7b4ad469d20e0af76a9..01a1a81206f160386467b3c789a41206d89576b6 100644 --- a/paddle/operators/fc_op.cc +++ b/paddle/operators/fc_op.cc @@ -12,11 +12,16 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "type_alias.h" +#include "paddle/operators/net_op.h" + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using OpRegistry = framework::OpRegistry; + class FullyConnectedOp : public NetOp { public: void Init() override { @@ -39,9 +44,10 @@ class FullyConnectedOp : public NetOp { } }; -class FullyConnectedOpMaker : public OpProtoAndCheckerMaker { +class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker { public: - FullyConnectedOpMaker(OpProto *proto, OpAttrChecker *op_checker) + FullyConnectedOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "the input of fc operator"); AddInput("W", "the weight of fc operator"); @@ -66,4 +72,5 @@ USE_OP(rowwise_add); USE_OP(sigmoid); USE_OP(softmax); +namespace ops = paddle::operators; REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 6dcc9372b2ee25c7e653282e7763e97d56be6262..3759a886780e555ccdc6286c4b200a5d14214691 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -50,8 +50,8 @@ The output will have the same size with input. } // namespace operators } // namespace paddle -REGISTER_OP(fill_zeros_like, paddle::operators::FillZerosLikeOp, - paddle::operators::FillZerosLikeOpMaker); +namespace ops = paddle::operators; +REGISTER_OP(fill_zeros_like, ops::FillZerosLikeOp, ops::FillZerosLikeOpMaker); REGISTER_OP_CPU_KERNEL( fill_zeros_like, - paddle::operators::FillZerosLikeKernel); + ops::FillZerosLikeKernel); diff --git a/paddle/operators/fill_zeros_like_op.cu b/paddle/operators/fill_zeros_like_op.cu index 4f1054cf47e35572dbbc51ca742994065a027919..fdbcf520a0d7b4ddfe3fc1837a21e0ce88b8e8fa 100644 --- a/paddle/operators/fill_zeros_like_op.cu +++ b/paddle/operators/fill_zeros_like_op.cu @@ -16,6 +16,7 @@ #include "paddle/framework/op_registry.h" #include "paddle/operators/fill_zeros_like_op.h" +namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( fill_zeros_like, - paddle::operators::FillZerosLikeKernel); + ops::FillZerosLikeKernel); diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index dfaed2c9aaf2bf5c1a9b803fc9c8b9ea0e5c5d4e..f846c7a8ab15e2cd997564edb36660a1360227a8 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..ef417ae2f06e8a9f10aed80674015e2ee448f4a3 --- /dev/null +++ b/paddle/operators/gaussian_random_op.cc @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class GaussianRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + float mean = context.op_.GetAttr("mean"); + float std = context.op_.GetAttr("std"); + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + + // TODO(dzh): attribute does not support unsigned int. + // And we need a global random seed configuration. + int seed = context.op_.GetAttr("seed"); + if (seed == 0) { + seed = std::random_device()(); + } + std::mt19937 g(seed); + std::normal_distribution distribution(mean, std); + ssize_t size = framework::product(tensor->dims()); + for (int i = 0; i < size; ++i) { + data[i] = distribution(g); + } + } +}; + +class GaussianRandomOp : public framework::OperatorWithKernel { + protected: + void InferShape(const framework::InferShapeContext& context) const override { + auto* tensor = context.Output(0); + auto dims = GetAttr>("dims"); + PADDLE_ENFORCE(dims.size() > 0UL, + "dims can be one int or array. dims must be set."); + tensor->Resize(framework::make_ddim(dims)); + } +}; + +class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { + public: + GaussianRandomOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", "output matrix of random op"); + AddComment(R"DOC( +GaussianRandom operator. +Use to initialize tensor with gaussian random generator. +)DOC"); + + AddAttr>("dims", "The dimension of random tensor."); + AddAttr("mean", "mean value of random.").SetDefault(.0f); + AddAttr("std", "minimum value of random value.").SetDefault(1.0f); + AddAttr("seed", + "Random seed of generator." + "0 means use system wide seed") + .SetDefault(0); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(gaussian_random, ops::GaussianRandomOp, ops::GaussianRandomOpMaker); +REGISTER_OP_CPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..54e4ae5d2b255f72582b9826685bfacf6c565fab --- /dev/null +++ b/paddle/operators/gaussian_random_op.cu @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include "paddle/platform/dynload/curand.h" +#include "paddle/platform/gpu_info.h" + +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class GaussianRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + float mean = context.op_.GetAttr("mean"); + float std = context.op_.GetAttr("std"); + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + + int seed = context.op_.GetAttr("seed"); + if (seed == 0) { + seed = std::random_device()(); + } + curandGenerator_t g; + PADDLE_ENFORCE(platform::dynload::curandCreateGenerator( + &g, CURAND_RNG_PSEUDO_DEFAULT)); + PADDLE_ENFORCE( + platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed)); + curandGenerateNormal(g, data, framework::product(tensor->dims()), mean, + std); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); \ No newline at end of file diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index 8ab4e82ac4b795126af7707ce19c6c00da48ee56..2ea049cb3605f4dedabb992ebc0e8aa276ad5e9a 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -17,9 +17,9 @@ limitations under the License. */ namespace paddle { namespace operators { -class MeanOp : public OperatorWithKernel { +class MeanOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 1, "Input size of AddOp must be one"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of AddOp must be one"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "input should be set"); @@ -28,9 +28,9 @@ class MeanOp : public OperatorWithKernel { } }; -class MeanOpMaker : public OpProtoAndCheckerMaker { +class MeanOpMaker : public framework::OpProtoAndCheckerMaker { public: - MeanOpMaker(OpProto *proto, OpAttrChecker *op_checker) + MeanOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input of mean op"); AddOutput("Out", "The output of mean op").IgnoreGradient(); @@ -38,10 +38,10 @@ class MeanOpMaker : public OpProtoAndCheckerMaker { } }; -class MeanGradOp : public OperatorWithKernel { +class MeanGradOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { - ctx.Output("X" + framework::kGradVarSuffix) + void InferShape(const framework::InferShapeContext &ctx) const override { + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("X")->dims()); } }; @@ -49,7 +49,10 @@ class MeanGradOp : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(mean, ops::MeanOp, ops::MeanOpMaker); -REGISTER_OP_CPU_KERNEL(mean, ops::MeanKernel); +REGISTER_OP_CPU_KERNEL(mean, + ops::MeanKernel); REGISTER_GRADIENT_OP(mean, mean_grad, ops::MeanGradOp); -REGISTER_OP_CPU_KERNEL(mean_grad, ops::MeanGradKernel); +REGISTER_OP_CPU_KERNEL(mean_grad, + ops::MeanGradKernel); diff --git a/paddle/operators/mean_op.cu b/paddle/operators/mean_op.cu index 8b97b0154ccdc8c41a90f7580af829c5c8663b60..7af624d81dc5ffbb5c31b4d6f6eb8f9f8652a431 100644 --- a/paddle/operators/mean_op.cu +++ b/paddle/operators/mean_op.cu @@ -16,5 +16,8 @@ #include "paddle/operators/mean_op.h" -REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel); -REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(mean, + ops::MeanKernel); +REGISTER_OP_GPU_KERNEL(mean_grad, + ops::MeanGradKernel); diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index 40a1e2d099acad90b1bbac50f62ea7c4f691c1b4..e8595a14faa7c1b03734f814c78f9cbf1819fbb5 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -13,15 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenScalar = framework::EigenScalar; +template +using EigenVector = framework::EigenVector; + template -class MeanKernel : public OpKernel { +class MeanKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input = context.Input(0); auto output = context.Output(0); @@ -36,13 +45,13 @@ class MeanKernel : public OpKernel { }; template -class MeanGradKernel : public OpKernel { +class MeanGradKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { - auto OG = context.Input("Out" + framework::kGradVarSuffix); + void Compute(const framework::ExecutionContext& context) const override { + auto OG = context.Input(framework::GradVarName("Out")); PADDLE_ENFORCE(framework::product(OG->dims()) == 1, "Mean Gradient should be scalar"); - auto IG = context.Output("X" + framework::kGradVarSuffix); + auto IG = context.Output(framework::GradVarName("X")); IG->mutable_data(context.GetPlace()); T ig_size = (T)framework::product(IG->dims()); diff --git a/paddle/operators/mean_op_test.cc b/paddle/operators/mean_op_test.cc deleted file mode 100644 index 375dcd50e130355c60f82b9d39d1b94fb2c911b0..0000000000000000000000000000000000000000 --- a/paddle/operators/mean_op_test.cc +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include - -USE_OP(mean); - -TEST(MeanOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("mean"); - ASSERT_NE(it, protos.end()); -} diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index ccab9a994cc7aa9e389bd259e4c7365a06e93aa1..db81fd555d1c7bea7c0c3bbd70266b4952ed3724 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -17,9 +17,9 @@ namespace paddle { namespace operators { -class MulOp : public OperatorWithKernel { +class MulOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 2, "The mul op must take two inputs"); auto dim0 = ctx.Input(0)->dims(); auto dim1 = ctx.Input(1)->dims(); @@ -37,9 +37,9 @@ class MulOp : public OperatorWithKernel { } }; -class MulOpMaker : public OpProtoAndCheckerMaker { +class MulOpMaker : public framework::OpProtoAndCheckerMaker { public: - MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) + MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of mul op"); AddInput("Y", "The second input of mul op"); @@ -52,9 +52,9 @@ The equation is: Out = X * Y } }; -class MulOpGrad : public OperatorWithKernel { +class MulOpGrad : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override {} + void InferShape(const framework::InferShapeContext &ctx) const override {} std::string DebugString() const override { LOG(INFO) << "MulGrad"; return ""; @@ -64,7 +64,8 @@ class MulOpGrad : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker); REGISTER_GRADIENT_OP(mul, mul_grad, ops::MulOpGrad); -REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); +REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index 1dc04c4297daed7a7861a09cf6b99446c296ffa5..43debbc21a365a15c914e60e151f7782b82080cb 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -15,4 +15,6 @@ #define EIGEN_USE_GPU #include "paddle/operators/mul_op.h" -REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index 7ecd6e8ac01c9efeabe9d2873da39503966ba8df..ab12631c03453a18fbb067e2d12c2bc332acd567 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -13,16 +13,21 @@ limitations under the License. */ #pragma once - -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + template -class MulKernel : public OpKernel { +class MulKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { Eigen::array, 1> dim_pair = { {Eigen::IndexPair(1, 0)}}; @@ -40,5 +45,6 @@ class MulKernel : public OpKernel { Z.device(place) = X.contract(Y, dim_pair); } }; + } // namespace operators } // namespace paddle diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc index fbc98e09923bda7f3baee04e02df9076247bff0b..a466c4f30fe87db4ad2a44518e083b57f3cbc2ed 100644 --- a/paddle/operators/net_op.cc +++ b/paddle/operators/net_op.cc @@ -15,7 +15,6 @@ */ #include "paddle/operators/net_op.h" -#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index bb2d02b56f48ac4b2f3b1ca742ae6d6141d3454e..792b336675fc97659d9a23358cf3d48ede56e54e 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -14,13 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/framework/op_desc.pb.h" -#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/scope.h" -#include "paddle/operators/type_alias.h" -#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -65,6 +59,15 @@ class NetOp : public framework::OperatorBase { } } + bool SupportGPU() const override { + for (auto& op : ops_) { + if (!op->SupportGPU()) { + return false; + } + } + return true; + } + /** * @brief Add an operator by ptr */ diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index c0a345464a34329d42c7bf753ca94fd07195b8e0..76bf79f9b51fd759da2d02cd90fa458a32be4178 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -2,31 +2,27 @@ #include -#include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" - namespace paddle { namespace operators { +using Scope = framework::Scope; +using DeviceContext = platform::DeviceContext; static int infer_shape_cnt = 0; static int run_cnt = 0; -class TestOp : public OperatorBase { +class TestOp : public framework::OperatorBase { public: - void InferShape(const framework::Scope& scope) const override { - ++infer_shape_cnt; - } - void Run(const framework::Scope& scope, - const paddle::platform::DeviceContext& dev_ctx) const override { + void InferShape(const Scope& scope) const override { ++infer_shape_cnt; } + void Run(const Scope& scope, + const platform::DeviceContext& dev_ctx) const override { ++run_cnt; } }; -class EmptyOp : public OperatorBase { +class EmptyOp : public framework::OperatorBase { public: void InferShape(const Scope& scope) const override {} - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override {} + void Run(const Scope& scope, const DeviceContext& dev_ctx) const override {} }; template @@ -72,7 +68,7 @@ TEST(OpKernel, all) { net->Run(scope, dev_ctx); ASSERT_EQ(2, infer_shape_cnt); ASSERT_EQ(2, run_cnt); - ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet); + ASSERT_THROW(net->AddOp(op2), platform::EnforceNotMet); } TEST(NetOp, insert_op) { diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 5e9c15ca0e6a7c56611a0fadda6c3c0839f309e6..243837420562634c3d99fd0acf234ebd53539735 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -14,17 +14,19 @@ #include "paddle/operators/recurrent_op.h" -#include #include #include #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" -#include "paddle/platform/enforce.h" namespace paddle { namespace operators { +using Scope = framework::Scope; +using Variable = framework::Variable; +using Tensor = framework::Tensor; + void RecurrentAlgorithm::InferShape(const Scope& scope) const { seq_len_ = scope.FindVar((arg_->inlinks[0]).external) ->GetMutable() @@ -135,10 +137,11 @@ void RecurrentOp::Init() { alg_.Init(std::move(arg)); } -class RecurrentAlgorithmProtoAndCheckerMaker : public OpProtoAndCheckerMaker { +class RecurrentAlgorithmProtoAndCheckerMaker + : public framework::OpProtoAndCheckerMaker { public: - RecurrentAlgorithmProtoAndCheckerMaker(OpProto* proto, - OpAttrChecker* op_checker) + RecurrentAlgorithmProtoAndCheckerMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { const auto& name = RecurrentOp::kArgName; // inputs and outputs stored in proto diff --git a/paddle/operators/recurrent_op_test.cc b/paddle/operators/recurrent_op_test.cc index 3607d14bf875dc2892fbbdc4dbc9ccf87c1b9784..6ce28a2b52e3b90596a68714bfdbc07d2d4814d5 100644 --- a/paddle/operators/recurrent_op_test.cc +++ b/paddle/operators/recurrent_op_test.cc @@ -27,6 +27,10 @@ namespace operators { using framework::make_ddim; using framework::DDim; +using framework::Tensor; +using framework::Variable; +using framework::Scope; +using framework::OpRegistry; class RecurrentOpTest : public ::testing::Test { protected: @@ -164,7 +168,7 @@ class RecurrentOpTest : public ::testing::Test { // father scope Scope scope_; - std::shared_ptr rnn_op_; + std::shared_ptr rnn_op_; }; TEST_F(RecurrentOpTest, Run) { diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index 32c6c2dd4efa85359b4e95471e8ba09e56afec57..7e4770630ed2a49214194689aa489e6ab8e476da 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -18,7 +18,9 @@ namespace paddle { namespace operators { namespace rnn { -namespace fmw = paddle::framework; +namespace f = paddle::framework; + +using Tensor = framework::Tensor; void SegmentInputs(const std::vector& step_scopes, const std::vector& inlinks, const size_t seq_len, @@ -30,10 +32,10 @@ void SegmentInputs(const std::vector& step_scopes, inlinks[i].external); Tensor* input = input_var->GetMutable(); - fmw::DDim dims = input->dims(); + f::DDim dims = input->dims(); PADDLE_ENFORCE(static_cast(dims[0]) == seq_len, "all the inlinks must have same length"); - fmw::DDim step_dims = slice_ddim(dims, 1, dims.size()); + f::DDim step_dims = slice_ddim(dims, 1, dims.size()); for (size_t j = 0; j < seq_len; j++) { Tensor* step_input = step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable(); @@ -58,11 +60,10 @@ void ConcatOutputs(const std::vector& step_scopes, auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal); PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", outlinks[i].internal); - fmw::DDim step_dims = - step_scope_var->template GetMutable()->dims(); + f::DDim step_dims = step_scope_var->template GetMutable()->dims(); std::vector dims_vec = vectorize(step_dims); dims_vec.insert(dims_vec.begin(), seq_len); - output->Resize(fmw::make_ddim(dims_vec)); + output->Resize(f::make_ddim(dims_vec)); } else { output->mutable_data(platform::CPUPlace()); for (size_t j = 0; j < seq_len; j++) { @@ -104,7 +105,7 @@ void LinkMemories(const std::vector& scopes, } void InitArgument(const ArgumentName& name, Argument* arg, - const OperatorBase& op) { + const framework::OperatorBase& op) { arg->step_net = op.Input(name.step_net); arg->step_scopes = op.Output(name.step_scopes); diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h index 379754b98fcead6debe0a60efa62fce4b7761940..17941c503cfcc83415b8bc635623a2c2ce2981c3 100644 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ b/paddle/operators/rnn/recurrent_op_utils.h @@ -17,12 +17,13 @@ #include #include "paddle/framework/operator.h" -#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { namespace rnn { +using Scope = framework::Scope; + /** * Memory of a RNN (same as the role of `Momory` in PaddlePaddle). * @@ -86,7 +87,7 @@ void LinkMemories(const std::vector& step_scopes, const int offset, bool infer_shape_mode); void InitArgument(const ArgumentName& name, Argument* arg, - const OperatorBase& op); + const framework::OperatorBase& op); } // namespace rnn } // namespace operators diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index 8d1a36f2b332faad516ced012a409ca428bbf689..55ed1c2f4c316656de94b24dd95b053a89d7e74e 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -13,12 +13,13 @@ limitations under the License. */ #include "paddle/operators/rowwise_add_op.h" + namespace paddle { namespace operators { -class RowWiseAddOp : public OperatorWithKernel { +class RowWiseAddOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 2UL, "Two inputs is needed by rowwise add"); auto dim0 = ctx.Input(0)->dims(); @@ -32,9 +33,10 @@ class RowWiseAddOp : public OperatorWithKernel { } }; -class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { +class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker { public: - RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) + RowWiseAddOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The left input of row-wise add op, must be matrix"); AddInput("b", "The right input of row-wise add op, must be vector"); @@ -50,6 +52,7 @@ for i in xrange(X.shape[0]): } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(rowwise_add, ops::RowWiseAddOp, ops::RowWiseAddOpMaker); -REGISTER_OP_CPU_KERNEL(rowwise_add, - ops::RowWiseAddKernel); +REGISTER_OP_CPU_KERNEL( + rowwise_add, ops::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index f76faa0a3a93a1ac277a1d1aa83c3fa6c3944648..86f80b81228a69ac4c05a4693901570f2b9966e0 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -15,5 +15,6 @@ #define EIGEN_USE_GPU #include "paddle/operators/rowwise_add_op.h" -REGISTER_OP_GPU_KERNEL(rowwise_add, - ops::RowWiseAddKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + rowwise_add, ops::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index b52524c47c7b80d8ddc6a94a4a6d03db8034088d..82e9d70e959441869b958c1241fa5f5beef4c50c 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -13,15 +13,24 @@ limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; +template +using EigenMatrix = framework::EigenMatrix; + template -class RowWiseAddKernel : public OpKernel { +class RowWiseAddKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto out = context.Output(0); out->mutable_data(context.GetPlace()); diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index e0532f2f090aecead499ccef8afb117876be5c78..f9a28ff8a6a06c5c239c4e6ec21eacb410cc162f 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -17,9 +17,9 @@ limitations under the License. */ namespace paddle { namespace operators { -class SGDOp : public OperatorWithKernel { +class SGDOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of SGDOp must be two"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of SGDOp must be one"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "inputs[0] mast be set"); @@ -31,9 +31,9 @@ class SGDOp : public OperatorWithKernel { } }; -class SGDOpMaker : public OpProtoAndCheckerMaker { +class SGDOpMaker : public framework::OpProtoAndCheckerMaker { public: - SGDOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("param", "input parameter"); AddInput("grad", "input gradient"); @@ -51,5 +51,7 @@ param_out = param - learning_rate * grad; } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(sgd, ops::SGDOp, ops::SGDOpMaker); -REGISTER_OP_CPU_KERNEL(sgd, ops::SGDOpKernel); +REGISTER_OP_CPU_KERNEL(sgd, + ops::SGDOpKernel); diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index 72629ccfbb8bc8ec53045289bd985c721c62fa10..f5ba6d3c29f8dfbfdea4fbf2c3d5fd7f5b358666 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -15,4 +15,6 @@ #define EIGEN_USE_GPU #include "paddle/operators/sgd_op.h" -REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(sgd, + ops::SGDOpKernel); diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index bf5b195933fce7faa46bcc96032e784076178cf7..bfb449d0b029409eda4177fc7643810ee6a1df3d 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -13,15 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + template -class SGDOpKernel : public OpKernel { +class SGDOpKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto param = ctx.Input("param"); auto grad = ctx.Input("grad"); auto param_out = ctx.Output(0); diff --git a/paddle/operators/sgd_op_test.cc b/paddle/operators/sgd_op_test.cc deleted file mode 100644 index 75137259f5e608b259b073101353e5818bb17c92..0000000000000000000000000000000000000000 --- a/paddle/operators/sgd_op_test.cc +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -USE_OP(sgd); -TEST(SGDOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("sgd"); - ASSERT_NE(it, protos.end()); -} diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index 1eb795faa858796f7a34aa495b43d043fdb5dd43..bc5e0bbb183f9bdf0a3fa8a5a02499282fbd6b98 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -13,21 +13,23 @@ limitations under the License. */ #include "paddle/operators/sigmoid_op.h" + namespace paddle { namespace operators { -class SigmoidOp : public OperatorWithKernel { +class SigmoidOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 1, "Sigmoid Op only have one input"); PADDLE_ENFORCE(ctx.OutputSize() == 1, "Sigmoid Op only have one output"); ctx.Output(0)->Resize(ctx.Input(0)->dims()); } }; -class SigmoidOpMaker : public OpProtoAndCheckerMaker { +class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { public: - SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SigmoidOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "sigmoid input"); AddOutput("Y", "sigmoid output"); @@ -35,9 +37,9 @@ class SigmoidOpMaker : public OpProtoAndCheckerMaker { } }; -class SigmoidOpGrad : public OperatorWithKernel { +class SigmoidOpGrad : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(0)->Resize(ctx.Input(0)->dims()); } }; @@ -45,9 +47,11 @@ class SigmoidOpGrad : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(sigmoid, ops::SigmoidOp, ops::SigmoidOpMaker); REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, ops::SigmoidOpGrad); -REGISTER_OP_CPU_KERNEL(sigmoid, ops::SigmoidKernel); -REGISTER_OP_CPU_KERNEL(sigmoid_grad, - ops::SigmoidGradKernel); +REGISTER_OP_CPU_KERNEL(sigmoid, + ops::SigmoidKernel); +REGISTER_OP_CPU_KERNEL( + sigmoid_grad, ops::SigmoidGradKernel); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu index e80ba081f2ff805664cf92f3cb47e9ad51889058..1a50dfe14a7b9e2614aadb7729de9f9e461e9905 100644 --- a/paddle/operators/sigmoid_op.cu +++ b/paddle/operators/sigmoid_op.cu @@ -15,6 +15,9 @@ #define EIGEN_USE_GPU #include "paddle/operators/sigmoid_op.h" -REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel); -REGISTER_OP_GPU_KERNEL(sigmoid_grad, - ops::SigmoidGradKernel); +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(sigmoid, + ops::SigmoidKernel); +REGISTER_OP_GPU_KERNEL( + sigmoid_grad, ops::SigmoidGradKernel); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index d513261e74423ce93a50eaaaec1c7d5fadb8f4a8..7af879b2091e4a7f80a3a64be029394156650c23 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -13,16 +13,21 @@ limitations under the License. */ #pragma once - -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + template -class SigmoidKernel : public OpKernel { +class SigmoidKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input = context.Input(0); auto output = context.Output(0); output->mutable_data(context.GetPlace()); @@ -37,9 +42,9 @@ class SigmoidKernel : public OpKernel { }; template -class SigmoidGradKernel : public OpKernel { +class SigmoidGradKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto Y_t = context.Input("Y"); auto dY_t = context.Input(framework::GradVarName("Y")); auto dX_t = context.Output(framework::GradVarName("X")); diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index c08e1b153c05baa474bcd344c1e87405193cb688..3dd4e86918a86f408e7867d15b4fdc8f9cbbb5ce 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -17,9 +17,9 @@ limitations under the License. */ namespace paddle { namespace operators { -class SoftmaxOp : public OperatorWithKernel { +class SoftmaxOp : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, "Only one input is need for softmax"); PADDLE_ENFORCE_EQ(ctx.Input("X")->dims().size(), 2UL, @@ -30,9 +30,10 @@ class SoftmaxOp : public OperatorWithKernel { } }; -class SoftmaxOpMaker : public OpProtoAndCheckerMaker { +class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { public: - SoftmaxOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SoftmaxOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "input of softmax"); AddOutput("Y", "output of softmax"); @@ -40,9 +41,9 @@ class SoftmaxOpMaker : public OpProtoAndCheckerMaker { } }; -class SoftmaxOpGrad : public OperatorWithKernel { +class SoftmaxOpGrad : public framework::OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 3UL, "Input of SoftmaxOpGrad should be 3, X, Y, YG"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, @@ -61,8 +62,11 @@ class SoftmaxOpGrad : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; + REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); -REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); +REGISTER_OP_CPU_KERNEL(softmax, + ops::SoftmaxKernel); REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad); -REGISTER_OP_CPU_KERNEL(softmax_grad, - ops::SoftmaxGradKernel); +REGISTER_OP_CPU_KERNEL( + softmax_grad, ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index b79228580a7ea0f70b62eb2dc7a61cf85bc0b5fb..2e99a89699dbdcafc8055c47debf9e49f10507e6 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,9 +13,11 @@ limitations under the License. */ #define EIGEN_USE_GPU -#include "paddle/framework/op_registry.h" #include "paddle/operators/softmax_op.h" -REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel); -REGISTER_OP_GPU_KERNEL(softmax_grad, - ops::SoftmaxGradKernel); +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(softmax, + ops::SoftmaxKernel); +REGISTER_OP_GPU_KERNEL( + softmax_grad, ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index b2dbcf57edf1a64da8da0d9a4c14d708eec17f3f..4fa6b59540498638c3b7df639ae10a66c0fa1c16 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -13,19 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - -#include "paddle/framework/ddim.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/tensor.h" -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + template -class SoftmaxKernel : public OpKernel { +class SoftmaxKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input = context.Input("X"); auto output = context.Output("Y"); output->mutable_data(context.GetPlace()); @@ -62,9 +64,9 @@ class SoftmaxKernel : public OpKernel { }; template -class SoftmaxGradKernel : public OpKernel { +class SoftmaxGradKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { std::shared_ptr scale_ = std::make_shared(); auto Y = context.Input("Y"); diff --git a/paddle/operators/type_alias.h b/paddle/operators/type_alias.h deleted file mode 100644 index eac12d35dd8d2977191218167ebb0a6e638d5d73..0000000000000000000000000000000000000000 --- a/paddle/operators/type_alias.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include "paddle/framework/eigen.h" -#include "paddle/framework/op_registry.h" -#include "paddle/operators/net_op.h" - -namespace paddle { -namespace operators { - -using OpKernel = framework::OpKernel; -using OperatorBase = framework::OperatorBase; -using InferShapeContext = framework::InferShapeContext; -using ExecutionContext = framework::ExecutionContext; -using Variable = framework::Variable; -template -using EigenScalar = framework::EigenScalar; -template -using EigenVector = framework::EigenVector; -template -using EigenMatrix = framework::EigenMatrix; -template -using EigenTensor = framework::EigenTensor; -using Tensor = framework::Tensor; -using Scope = framework::Scope; -using OperatorWithKernel = framework::OperatorWithKernel; -using OperatorBase = framework::OperatorBase; -using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; -using OpProto = framework::OpProto; -using OpAttrChecker = framework::OpAttrChecker; -using CPUPlace = platform::CPUPlace; -using GPUPlace = platform::GPUPlace; -using OpRegistry = framework::OpRegistry; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..405b84b76d2e24db25d2ff16e99495f2f132ef09 --- /dev/null +++ b/paddle/operators/uniform_random_op.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +// It seems that Eigen::Tensor::random in GPU will SEGFAULT. +// Use std::random and thrust::random(thrust is a std library in CUDA) to +// implement uniform random. +template +class CPUUniformRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + std::uniform_real_distribution dist( + static_cast(context.op_.GetAttr("min")), + static_cast(context.op_.GetAttr("max"))); + for (ssize_t i = 0; i < framework::product(tensor->dims()); ++i) { + data[i] = dist(engine); + } + } +}; + +class UniformRandomOp : public framework::OperatorWithKernel { + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE(GetAttr("min") < GetAttr("max"), + "uniform_random's min must less then max"); + auto* tensor = ctx.Output(0); + auto dims = GetAttr>("dims"); + tensor->Resize(framework::make_ddim(dims)); + } +}; + +class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { + public: + UniformRandomOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", "The output tensor of uniform random op"); + AddComment(R"DOC(Uniform random operator. + +Used to initialize tensor with uniform random generator. +)DOC"); + AddAttr>("dims", "the dimension of random tensor"); + AddAttr("min", "Minimum value of uniform random").SetDefault(-1.0f); + AddAttr("max", "Maximun value of uniform random").SetDefault(1.0f); + AddAttr("seed", + "Random seed of uniform random. " + "0 means generate a seed by system") + .SetDefault(0); + } +}; +} // namespace operators +} // namespace paddle + +REGISTER_OP(uniform_random, paddle::operators::UniformRandomOp, + paddle::operators::UniformRandomOpMaker); +REGISTER_OP_CPU_KERNEL(uniform_random, + paddle::operators::CPUUniformRandomKernel); diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..f1a63e52ec0d3d46a505a89d7d7916bf93a58221 --- /dev/null +++ b/paddle/operators/uniform_random_op.cu @@ -0,0 +1,70 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +template +struct UniformGenerator { + T min_, max_; + unsigned int seed_; + + __host__ __device__ UniformGenerator(T min, T max, int seed) + : min_(min), max_(max), seed_(seed) {} + + __host__ __device__ T operator()(const unsigned int n) const { + thrust::minstd_rand rng; + rng.seed(seed_); + thrust::uniform_real_distribution dist(min_, max_); + rng.discard(n); + return dist(rng); + } +}; + +// It seems that Eigen::Tensor::random in GPU will SEGFAULT. +// Use std::random and thrust::random(thrust is a std library in CUDA) to +// implement uniform random. +template +class GPUUniformRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); + if (seed == 0) { + seed = std::random_device()(); + } + T min = static_cast(context.op_.GetAttr("min")); + T max = static_cast(context.op_.GetAttr("max")); + thrust::counting_iterator index_sequence_begin(0); + ssize_t N = framework::product(tensor->dims()); + thrust::transform(index_sequence_begin, index_sequence_begin + N, + thrust::device_ptr(data), + UniformGenerator(min, max, seed)); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_GPU_KERNEL(uniform_random, + paddle::operators::GPUUniformRandomKernel); diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index bd77bb7daa50e0b273f110624ddf6f4b79a3ceab..4154aad15c39119e2f155cb2c7b5177b5aa78022 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -8,7 +8,7 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) -cc_test(enforce_test SRCS enforce_test.cc) +cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece) IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index d2adb997de8e36922d5056b20f238a82eee74f8c..337a059fb1494d500be0fd2437e59c863ae1563c 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -15,11 +15,12 @@ limitations under the License. */ #pragma once #include -#include #include #include #include #include +#include "paddle/string/printf.h" +#include "paddle/string/to_string.h" #ifndef PADDLE_ONLY_CPU @@ -194,8 +195,8 @@ inline void throw_on_error(T e) { #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \ "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ - #__VAL0, #__VAL1, std::to_string(__VAL0), \ - std::to_string(__VAL1), \ + #__VAL0, #__VAL1, paddle::string::to_string(__VAL0), \ + paddle::string::to_string(__VAL1), \ paddle::string::Sprintf("" __VA_ARGS__)); } // namespace platform diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc index 4dfb69754608cb1120baa295072c3d031a4e1a7b..80bdee3d9dfbe38ef707a6ba60cdb7f7b99714de 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -9,10 +9,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include +#include #include #include "gtest/gtest.h" #include "paddle/platform/enforce.h" +#include "paddle/string/piece.h" + +using StringPiece = paddle::string::Piece; +using paddle::string::HasPrefix; TEST(ENFORCE, OK) { PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); @@ -22,19 +28,15 @@ TEST(ENFORCE, OK) { } TEST(ENFORCE, FAILED) { - bool in_catch = false; + bool caught_exception = false; try { PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123); } catch (paddle::platform::EnforceNotMet error) { - // your error handling code here - in_catch = true; - std::string msg = "Enforce is not ok 123 at all"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "Enforce is not ok 123 at all")); } - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE, NO_ARG_OK) { @@ -47,41 +49,27 @@ TEST(ENFORCE, NO_ARG_OK) { TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) { int a = 2; - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_EQ(a, 1 + 3); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce a == 1 + 3 failed, 2 != 4"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + HasPrefix(StringPiece(error.what()), "enforce a == 1 + 3 failed, 2 != 4"); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) { int a = 2; - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_EQ(a, 1 + 3, "%s size not match", "their"); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = - "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + HasPrefix(StringPiece(error.what()), + "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_NE, OK) { @@ -89,42 +77,32 @@ TEST(ENFORCE_NE, OK) { PADDLE_ENFORCE_NE(1.0, 2UL); } TEST(ENFORCE_NE, FAIL) { - bool in_catch = false; + bool caught_exception = false; try { // 2UL here to check data type compatible PADDLE_ENFORCE_NE(1.0, 1UL); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1.0 != 1UL failed, 1.000000 == 1"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), + "enforce 1.0 != 1UL failed, 1 == 1")) + << error.what() << " does not have expected prefix"; } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); } TEST(ENFORCE_GT, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { - // 2UL here to check data type compatible PADDLE_ENFORCE_GT(1, 2UL); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_GE, OK) { @@ -134,21 +112,16 @@ TEST(ENFORCE_GE, OK) { PADDLE_ENFORCE_GE(3.21, 2UL); } TEST(ENFORCE_GE, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_GE(1, 2UL); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1 >= 2UL failed, 1 < 2"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "enforce 1 >= 2UL failed, 1 < 2")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_LE, OK) { @@ -159,21 +132,16 @@ TEST(ENFORCE_LE, OK) { PADDLE_ENFORCE_LE(2UL, 3.2); } TEST(ENFORCE_LE, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_GT(1, 2UL); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_LT, OK) { @@ -182,21 +150,15 @@ TEST(ENFORCE_LT, OK) { PADDLE_ENFORCE_LT(2UL, 3); } TEST(ENFORCE_LT, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_LT(1UL, 0.12); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1UL < 0.12 failed, 1 >= 0.12"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), + "enforce 1UL < 0.12 failed, 1 >= 0.12")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_NOT_NULL, OK) { @@ -205,20 +167,50 @@ TEST(ENFORCE_NOT_NULL, OK) { delete a; } TEST(ENFORCE_NOT_NULL, FAIL) { - bool in_catch = false; - int* a{nullptr}; - + bool caught_exception = false; try { + int* a = nullptr; PADDLE_ENFORCE_NOT_NULL(a); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "a should not be null"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); + caught_exception = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), "a should not be null")); + } + EXPECT_TRUE(caught_exception); +} + +struct Dims { + size_t dims_[4]; + + bool operator==(const Dims& o) const { + for (size_t i = 0; i < 4; ++i) { + if (dims_[i] != o.dims_[i]) return false; } + return true; } +}; - ASSERT_TRUE(in_catch); +std::ostream& operator<<(std::ostream& os, const Dims& d) { + for (size_t i = 0; i < 4; ++i) { + if (i == 0) { + os << "["; + } + os << d.dims_[i]; + if (i == 4 - 1) { + os << "]"; + } else { + os << ", "; + } + } + return os; } + +TEST(ENFORCE_USER_DEFINED_CLASS, EQ) { + Dims a{{1, 2, 3, 4}}, b{{1, 2, 3, 4}}; + PADDLE_ENFORCE_EQ(a, b); +} + +TEST(ENFORCE_USER_DEFINED_CLASS, NE) { + Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}}; + ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet); +} \ No newline at end of file diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt deleted file mode 100644 index 8e6b258e00c0012876cda8ffc5b340322d51e894..0000000000000000000000000000000000000000 --- a/paddle/pybind/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -cc_library(paddle_pybind SHARED - SRCS pybind.cc - DEPS pybind python backward - fc_op - sgd_op - add_op - mean_op - cross_entropy_op - recurrent_op - fill_zeros_like_op) diff --git a/paddle/string/CMakeLists.txt b/paddle/string/CMakeLists.txt index 5becf62672d0c606c98ea1a1a4383df97088ab05..60667b72873f9422aec1807972a81ab680de2e64 100644 --- a/paddle/string/CMakeLists.txt +++ b/paddle/string/CMakeLists.txt @@ -2,3 +2,4 @@ cc_library(stringpiece SRCS piece.cc) cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags) cc_test(stringprintf_test SRCS printf_test.cc DEPS glog gflags) +cc_test(to_string_test SRCS to_string_test.cc) diff --git a/paddle/string/to_string.h b/paddle/string/to_string.h new file mode 100644 index 0000000000000000000000000000000000000000..4f478b6a36b23bdba8ef3ddae94b3eadf18716c2 --- /dev/null +++ b/paddle/string/to_string.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include + +namespace paddle { +namespace string { +template +inline std::string to_string(T v) { + std::ostringstream sout; + sout << v; + return sout.str(); +} + +// Faster std::string/const char* type +template <> +inline std::string to_string(std::string v) { + return v; +} + +template <> +inline std::string to_string(const char* v) { + return std::string(v); +} + +} // namespace string +} // namespace paddle diff --git a/paddle/string/to_string_test.cc b/paddle/string/to_string_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..5ff1b007f1875c7b920a08bd13b8d98cdc5138d3 --- /dev/null +++ b/paddle/string/to_string_test.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/string/to_string.h" +#include + +constexpr char kOutputString[] = "User Defined Output"; +class UserDefinedClass { +public: +}; + +std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) { + s << kOutputString; + return s; +} + +TEST(to_string, normal) { + using namespace paddle::string; + ASSERT_EQ("10", to_string(10)); + ASSERT_EQ("abc", to_string("abc")); + ASSERT_EQ("1.2", to_string(1.2)); +} + +TEST(to_string, user_defined) { + using namespace paddle::string; + UserDefinedClass instance; + ASSERT_EQ(kOutputString, to_string(instance)); +} \ No newline at end of file diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index e1558e3fdfbcf296be0ee64202132f53bf901be9..af1dceed0284c70d68b61b9682b0cb23c28043d6 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -50,8 +50,8 @@ void NewRemoteParameterUpdater::init( // create parameter server client. if (useEtcd_) { - parameterClient_ = paddle_new_etcd_pserver_client( - (char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0); + parameterClient_ = + paddle_new_etcd_pserver_client((char *)pserverSpec_.c_str()); } else { parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0); diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 541639ac21661529b0b1f2cc8d8fa25605052c8c..f6850e06512d196d51e454bc22cfa3cda8bba84a 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -13,6 +13,7 @@ py_test(test_protobuf SRCS test_protobuf.py) py_test(test_add_two_op SRCS test_add_two_op.py) py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_softmax_op SRCS test_softmax_op.py) +py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(gradient_checker SRCS gradient_checker.py) @@ -20,4 +21,8 @@ py_test(gradient_checker SRCS gradient_checker.py) py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) + py_test(test_operator SRCS test_operator.py) + +py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) +py_test(test_uniform_random_op SRCS test_uniform_random_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index cfd29932f5b46920815819c5a75d62a0138e21a2..aacc5e88feeb65e08093a35ef85837c916cfd39e 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -1,16 +1,31 @@ +import unittest + +import numpy import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator -import numpy -import unittest __all__ = ['get_numeric_gradient'] +def create_op(op_type): + kwargs = dict() + for in_name in Operator.get_op_input_names(op_type): + kwargs[in_name] = in_name + for out_name in Operator.get_op_output_names(op_type): + kwargs[out_name] = out_name + + return Operator(op_type, **kwargs) + + +def grad_var_name(var_name): + return var_name + "@GRAD" + + def get_numeric_gradient(op, input_values, output_name, input_to_check, - delta=1e-2, + delta=0.005, local_scope=None): """ Get Numeric Gradient for an operator's input. @@ -76,6 +91,119 @@ def get_numeric_gradient(op, return gradient_flat.reshape(tensor_to_check.get_dims()) +class GradientChecker(unittest.TestCase): + def assert_is_close(self, numeric_grads, scope, max_relative_error, + msg_prefix): + for name in numeric_grads: + b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + a = numeric_grads[name] + + abs_a = numpy.abs(a) + # if abs_a is nearly zero, then use abs error for a, not relative + # error. + abs_a[abs_a < 1e-3] = 1 + + diff_mat = numpy.abs(a - b) / abs_a + max_diff = numpy.max(diff_mat) + + def err_msg(): + offset = numpy.argmax(diff_mat > max_relative_error) + return "%s Variable %s max gradient diff %f over limit %f, the first " \ + "error element is %d" % ( + msg_prefix, name, max_diff, max_relative_error, offset) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) + + def check_grad(self, + forward_op, + input_vars, + inputs_to_check, + output_name, + no_grad_set=None, + only_cpu=False, + max_relative_error=0.005): + """ + :param forward_op: used to create backward_op + :param input_vars: numpy value of input variable. The following + computation will use these variables. + :param inputs_to_check: inputs var names that should check gradient. + :param output_name: output name that used to + :param max_relative_error: The relative tolerance parameter. + :param no_grad_set: used when create backward ops + :param only_cpu: only compute and check gradient on cpu kernel. + :return: + """ + if no_grad_set is None: + no_grad_set = set() + + tmp_outs = forward_op.temp_outputs() + no_tmp_out = filter(lambda name: name not in tmp_outs, + forward_op.outputs()) + if len(no_tmp_out) != 1: + raise ValueError("non temp out_names should be 1") + + in_names = forward_op.inputs() + for no_grad in no_grad_set: + if no_grad not in in_names: + raise ValueError("no_grad should be in in_names") + + backward_op = core.Operator.backward(forward_op, no_grad_set) + + places = [core.CPUPlace()] + if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): + places.append(core.GPUPlace(0)) + + numeric_grad = dict() + # get numeric gradient + for check_name in inputs_to_check: + numeric_grad[check_name] = \ + get_numeric_gradient(forward_op, input_vars, output_name, + check_name) + + # get operator gradient according to different device + for place in places: + scope = core.Scope() + ctx = core.DeviceContext.create(place) + + # create input var and set value + for name, value in input_vars.iteritems(): + if name not in in_names: + raise ValueError(name + " not in op.inputs_") + var = scope.new_var(name).get_tensor() + var.set_dims(value.shape) + var.set(value, place) + + # create output var + for out_name in forward_op.outputs(): + scope.new_var(out_name).get_tensor() + + # infer the shape of output var and compute/set value of output var + forward_op.infer_shape(scope) + forward_op.run(scope, ctx) + + # create output grad var + # set shape as the output var + # set value of this grad to ones + for name in forward_op.outputs(): + out_tensor = scope.find_var(name).get_tensor() + grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() + grad_tensor.set_dims(out_tensor.shape()) + data = 1.0 * numpy.ones(out_tensor.shape()) + grad_tensor.set(data, place) + + # create input grad var + for name in backward_op.outputs(): + scope.new_var(name).get_tensor() + + # infer the shape of input gradient var and compute/set it's value + # with backward op + backward_op.infer_shape(scope) + backward_op.run(scope, ctx) + + self.assert_is_close(numeric_grad, scope, max_relative_error, + "Gradient Check On %s" % str(place)) + + if __name__ == '__main__': class GetNumericGradientTest(unittest.TestCase): @@ -87,4 +215,28 @@ if __name__ == '__main__': arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X') self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2) + def test_softmax_op(self): + def stable_softmax(x): + """Compute the softmax of vector x in a numerically stable way.""" + shiftx = x - numpy.max(x) + exps = numpy.exp(shiftx) + return exps / numpy.sum(exps) + + def label_softmax_grad(Y, dY): + dX = Y * 0.0 + for i in range(Y.shape[0]): + d = numpy.dot(Y[i, :], dY[i, :]) + dX[i, :] = Y[i, :] * (dY[i, :] - d) + return dX + + softmax_op = Operator("softmax", X="X", Y="Y") + + X = numpy.random.random((2, 2)).astype("float32") + Y = numpy.apply_along_axis(stable_softmax, 1, X) + dY = numpy.ones(Y.shape) + dX = label_softmax_grad(Y, dY) + + arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X') + numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2) + unittest.main() diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index da6bed0fcd690d5a7f53f44d0181c75f12e5d074..dd65e0f2dc23d3f657ff16c55fb297dae210b2d7 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -1,6 +1,5 @@ -import paddle.v2.framework.core as core -import unittest import numpy +import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator @@ -24,7 +23,7 @@ class OpTestMeta(type): scope = core.Scope() kwargs = dict() places = [core.CPUPlace()] - if core.is_compile_gpu() and core.Operator.support_gpu(self.type): + if core.is_compile_gpu(): places.append(core.GPUPlace(0)) for place in places: @@ -53,6 +52,8 @@ class OpTestMeta(type): kwargs[attr_name] = self.attrs[attr_name] op = Operator(self.type, **kwargs) + if isinstance(place, core.GPUPlace) and not op.support_gpu(): + return op.infer_shape(scope) diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index b26e25d58b59bd1cb16e9ba2a1cccd27799b15f2..4815192e255c6e0429db3f50918a76a773b30131 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -1,9 +1,10 @@ import unittest import numpy from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op -class TestSGD(unittest.TestCase): +class TestCrossEntropy(unittest.TestCase): __metaclass__ = OpTestMeta def setUp(self): @@ -20,7 +21,18 @@ class TestSGD(unittest.TestCase): self.outputs = {'Y': numpy.array(Y).astype("float32")} -# TODO(superjom) add gradient check +class CrossEntropyGradOpTest(GradientChecker): + def test_softmax_grad(self): + op = create_op("onehot_cross_entropy") + batch_size = 100 + class_num = 10 + inputs = { + "X": numpy.random.uniform( + 0.1, 1.0, [batch_size, class_num]).astype("float32"), + "label": (class_num / 2) * numpy.ones(batch_size).astype("int32") + } + self.check_grad(op, inputs, set("X"), "Y") + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py new file mode 100644 index 0000000000000000000000000000000000000000..f95ed70b58d611b3233a21d3f2a34c864ae4d1b3 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -0,0 +1,36 @@ +import unittest +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator +import numpy + + +class GaussianRandomTest(unittest.TestCase): + def test_cpu(self): + self.gaussian_random_test(place=core.CPUPlace()) + + def test_gpu(self): + if core.is_compile_gpu(): + self.gaussian_random_test(place=core.GPUPlace(0)) + + def gaussian_random_test(self, place): + scope = core.Scope() + scope.new_var("Out").get_tensor() + + op = Operator( + "gaussian_random", + Out="Out", + dims=[1000, 784], + mean=.0, + std=1., + seed=10) + + op.infer_shape(scope) + context = core.DeviceContext.create(place) + op.run(scope, context) + tensor = numpy.array(scope.find_var("Out").get_tensor()) + self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) + self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index d20e085b8e43488480edf07b6cd4edcd861883f3..e670d93653e07d35e5019c9daac45c214eddf367 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -1,9 +1,8 @@ import unittest import numpy as np -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +from gradient_checker import GradientChecker, create_op from op_test_util import OpTestMeta @@ -25,62 +24,11 @@ class TestSoftmaxOp(unittest.TestCase): } -class TestSoftmaxGradOp(unittest.TestCase): - def test_softmax_grad(self): - op = Operator('softmax', X="X", Y="Y") - backward_op = core.Operator.backward(op, set()) - self.assertEqual(backward_op.type(), "softmax_grad") - expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).''' - self.assertEqual(expected, str(backward_op)) - - batch_size = 3 - class_num = 5 - # Initialize X and add 1e-2 for numerical stability - Y = np.random.rand(batch_size, class_num).astype(np.float32) - Y = Y + 1e-2 - dY = np.random.rand(batch_size, class_num).astype(np.float32) - - # Reference implementation of cross entropy with soft labels - def label_softmax_grad(Y, dY): - dX = Y * 0.0 - for i in range(batch_size): - d = np.dot(Y[i, :], dY[i, :]) - dX[i, :] = Y[i, :] * (dY[i, :] - d) - return dX - - expected = label_softmax_grad(Y, dY) - - scope = core.Scope() - places = [] - places.append(core.CPUPlace()) - if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) - - for place in places: - y = scope.new_var("Y") - y_tensor = y.get_tensor() - y_tensor.set_dims([batch_size, class_num]) - y_tensor.alloc_float(place) - y_tensor.set(Y, place) - - dy = scope.new_var("Y@GRAD") - dy_tensor = dy.get_tensor() - dy_tensor.set_dims([batch_size, class_num]) - dy_tensor.alloc_float(place) - dy_tensor.set(dY, place) - - x = scope.new_var("X") - dx = scope.new_var("X@GRAD") - - tensor = scope.find_var("X@GRAD").get_tensor() - backward_op.infer_shape(scope) - self.assertEqual([batch_size, class_num], tensor.shape()) - - ctx = core.DeviceContext.create(place) - backward_op.run(scope, ctx) - actual = np.array(tensor) - - np.testing.assert_almost_equal(actual, expected, decimal=3) +class SoftmaxGradOpTest(GradientChecker): + def test_softmax(self): + op = create_op("softmax") + inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")} + self.check_grad(op, inputs, set("X"), "Y") if __name__ == '__main__': diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/framework/tests/test_uniform_random_op.py new file mode 100644 index 0000000000000000000000000000000000000000..c3d2bb44da3977c0899b2609a8efe15b7e1789f2 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_uniform_random_op.py @@ -0,0 +1,35 @@ +import unittest +from paddle.v2.framework.op import Operator +import paddle.v2.framework.core as core +import numpy + + +class UniformRandomTest(unittest.TestCase): + def test_uniform_random_cpu(self): + self.uniform_random_test(place=core.CPUPlace()) + + def test_uniform_random_gpu(self): + if core.is_compile_gpu(): + self.uniform_random_test(place=core.GPUPlace(0)) + + def uniform_random_test(self, place): + scope = core.Scope() + scope.new_var("X").get_tensor() + + op = Operator( + "uniform_random", + Out="X", + dims=[1000, 784], + min=-5.0, + max=10.0, + seed=10) + + op.infer_shape(scope) + ctx = core.DeviceContext.create(place) + op.run(scope, ctx) + tensor = numpy.array(scope.find_var("X").get_tensor()) + self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1) + + +if __name__ == '__main__': + unittest.main()