diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index efb4dcb2dfbc63bb6905961b054cdef860cf4573..980a97a07c996eca2e8c126a6ad5ab7f340fa1e5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,9 +22,11 @@ hooks: - id: clang-formater - repo: https://github.com/PaddlePaddle/pre-commit-golang - sha: 16398aeccf263adaf53b2495eed0406347d76281 + sha: 8337620115c25ff8333f1b1a493bd031049bd7c0 hooks: - - id: go-fmt - types: [go] - - id: gometalinter - types: [go] + - id: go-fmt + types: + - go + - id: gometalinter + types: + - go diff --git a/go/master/c/client.go b/go/master/c/client.go index a2b18e4b474e039e661a3ae130379b41e76f29bd..b5759c30b1d7f7dc33e162e959c7de165e02e1da 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -18,7 +18,6 @@ package main #include #include #include - #define PADDLE_MASTER_OK 0 #define PADDLE_MASTER_ERROR -1 @@ -101,6 +100,12 @@ func paddle_release_master_client(client C.paddle_master_client) { remove(client) } +//export paddle_start_get_records +func paddle_start_get_records(client C.paddle_master_client, pass C.int) { + c := get(client) + c.StartGetRecords(int(pass)) +} + //export paddle_set_dataset func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int) C.int { c := get(client) @@ -121,15 +126,19 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int // paddle_next_record gets the nexts training record. // -// returns number of bytes of the records if success, -1 if failed. +// returns number of bytes of the records if success, -1 if failed, -2 if pass end. // //export paddle_next_record func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { c := get(client) r, err := c.NextRecord() if err != nil { - // Error - // TODO: return the type of error? + // NOTE: use errors to indicate pass ends + if err.Error() == master.ErrAllTaskFailed.Error() || + err.Error() == master.ErrNoMoreAvailable.Error() || + err.Error() == master.ErrPassBefore.Error() { + return -2 + } *record = (*C.uchar)(nil) return -1 } diff --git a/go/master/client.go b/go/master/client.go index bbf3768d96ead1911508486410d2402ea0ac8b12..62801b9b7fe85fe27147b12160f48d988623d547 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -16,7 +16,6 @@ package master import ( "os" - "sync" "time" "github.com/PaddlePaddle/Paddle/go/connection" @@ -27,9 +26,9 @@ import ( // Client is the client of the master server. type Client struct { - conn *connection.Conn - ch chan record - initChOnce sync.Once + conn *connection.Conn + ch chan record + bufSize int } type record struct { @@ -46,11 +45,7 @@ func WithBuffer(bufSize int) func(*Client) error { if bufSize <= 0 { return nil } - - c.initChOnce.Do(func() { - c.ch = make(chan record, bufSize) - go c.getRecords() - }) + c.bufSize = bufSize return nil } } @@ -104,25 +99,41 @@ func NewClient(opts ...func(*Client) error) (*Client, error) { if err != nil { return nil, err } - } - + c.ch = make(chan record, c.bufSize) + // FIXME: connection is created asyncrosly in monitorMaster go routine, + // ensure the connection is ready for use before calling c.addClient. + time.Sleep(time.Second) return c, nil } -func (c *Client) getRecords() { +// StartGetRecords must be called at beginning of each pass +func (c *Client) StartGetRecords(passID int) { + go c.getRecords(passID) +} + +func (c *Client) getRecords(passID int) { for { - t, err := c.getTask() + t, err := c.getTask(passID) if err != nil { - log.Errorf("Get task failed, sleep 3 seconds and continue, %s", err) - time.Sleep(3 * time.Second) - continue + if err.Error() == ErrPassBefore.Error() || + err.Error() == ErrNoMoreAvailable.Error() || + err.Error() == ErrAllTaskFailed.Error() { + c.ch <- record{nil, err} + break + } + if err.Error() == ErrPassAfter.Error() { + // wait util last pass finishes + time.Sleep(time.Second * 3) + continue + } + log.Errorf("getTask error: %s", err) } for _, chunk := range t.Chunks { - f, err := os.Open(chunk.Path) - if err != nil { - log.Errorln(err) + f, e := os.Open(chunk.Path) + if e != nil { + log.Errorln(e) continue } @@ -178,18 +189,21 @@ func (c *Client) monitorMaster(addrCh <-chan string) { } } -// SetDataset set dataset for the master server to dispatch. +// SetDataset sets dataset to dispatch for the master server. +// +// SetDataset can be call multiple times at one pass. But only the first call +// will be honored. // -// SetDataset can be call multiple times from different nodes. But -// only the first call will be honored. +// After all tasks are done, another call of SetDataset will start another pass. func (c *Client) SetDataset(globPaths []string) error { - return c.conn.Call("Service.SetDataset", globPaths, nil) + err := c.conn.Call("Service.SetDataset", globPaths, nil) + return err } // getTask gets a new task from the master server. -func (c *Client) getTask() (Task, error) { +func (c *Client) getTask(passID int) (Task, error) { var t Task - err := c.conn.Call("Service.GetTask", 0, &t) + err := c.conn.Call("Service.GetTask", passID, &t) return t, err } @@ -208,12 +222,6 @@ func (c *Client) taskFailed(meta TaskMeta) error { // NextRecord will block until the next record is available. It is // thread-safe. func (c *Client) NextRecord() ([]byte, error) { - c.initChOnce.Do(func() { - // initialize with in case WithBuffer is not used. - c.ch = make(chan record, 0) - go c.getRecords() - }) - r := <-c.ch return r.r, r.err } diff --git a/go/master/client_internal_test.go b/go/master/client_internal_test.go index ee305e2c80f54ebee2e5011ca7ff0cf5e0612f41..d5f3d79464655540a29eaa6395057aa5795c4615 100644 --- a/go/master/client_internal_test.go +++ b/go/master/client_internal_test.go @@ -54,22 +54,22 @@ func TestGetFinishTask(t *testing.T) { panic(err) } go func(l net.Listener) { - s, err := NewService(&InMemStore{}, chunkPerTask, time.Second, 1) - if err != nil { - panic(err) + s, sErr := NewService(&InMemStore{}, chunkPerTask, time.Second, 1) + if sErr != nil { + panic(sErr) } server := rpc.NewServer() - err = server.Register(s) - if err != nil { - panic(err) + sErr = server.Register(s) + if sErr != nil { + panic(sErr) } mux := http.NewServeMux() mux.Handle(rpc.DefaultRPCPath, server) - err = http.Serve(l, mux) - if err != nil { - panic(err) + sErr = http.Serve(l, mux) + if sErr != nil { + panic(sErr) } }(l) @@ -103,6 +103,7 @@ func TestGetFinishTask(t *testing.T) { ch := make(chan string, 1) ch <- addr go c.monitorMaster(ch) + err = c.SetDataset([]string{path}) if err != nil { panic(err) @@ -111,44 +112,47 @@ func TestGetFinishTask(t *testing.T) { checkOnePass := func(i int) { var tasks []Task for idx := 0; idx < totalTask; idx++ { - task, err := c.getTask() - if err != nil { - t.Fatalf("Error: %v, pass: %d\n", err, i) + task, cErr := c.getTask(i) + if cErr != nil && cErr.Error() != ErrNoMoreAvailable.Error() && cErr.Error() != ErrPassAfter.Error() { + t.Fatalf("error: %v, pass: %d\n", cErr, i) } tasks = append(tasks, task) } - _, err = c.getTask() - if err == nil { + // getting task before task finishes should return error + _, cErr := c.getTask(i) + if cErr == nil { t.Fatalf("Should get error, pass: %d\n", i) } - err = c.taskFinished(tasks[0].Meta.ID) - if err != nil { - t.Fatalf("Error: %v, pass: %d\n", err, i) + cErr = c.taskFinished(tasks[0].Meta.ID) + if cErr != nil { + t.Fatalf("Error: %v, pass: %d\n", cErr, i) } - - err = c.taskFailed(tasks[0].Meta) - if err != nil { - t.Fatalf("Error: %v, pass: %d\n", err, i) + // call taskFailed once won't put the task to failed queue, just ensure + // the call + cErr = c.taskFailed(tasks[0].Meta) + if cErr != nil { + t.Fatalf("Error: %v, pass: %d\n", cErr, i) } tasks = tasks[1:] - task, err := c.getTask() - if err != nil { - t.Fatal(err) + _, cErr = c.getTask(i) + if cErr != nil && cErr.Error() != ErrNoMoreAvailable.Error() && cErr.Error() != ErrPassAfter.Error() { + t.Fatalf("Should be ErrNoMoreAvailable or ErrPassAfter: %s", cErr) } - tasks = append(tasks, task) for _, task := range tasks { - err = c.taskFinished(task.Meta.ID) - if err != nil { - t.Fatalf("Error: %v, pass: %d\n", err, i) + cErr = c.taskFinished(task.Meta.ID) + if cErr != nil { + t.Fatal(cErr) } } } for i := 0; i < 10; i++ { + // init pass data + c.StartGetRecords(i) checkOnePass(i) } } diff --git a/go/master/client_test.go b/go/master/client_test.go index a3a434ae7e855c1cd2043d196435b42d2520f003..79b9cc844d1ff938915a622bf19a7d772682becf 100644 --- a/go/master/client_test.go +++ b/go/master/client_test.go @@ -20,8 +20,10 @@ import ( "net/http" "net/rpc" "os" + "runtime" "strconv" "strings" + "sync" "testing" "time" @@ -29,6 +31,18 @@ import ( "github.com/PaddlePaddle/recordio" ) +// tool function for testing output goroutine ids +func goid() int { + var buf [64]byte + n := runtime.Stack(buf[:], false) + idField := strings.Fields(strings.TrimPrefix(string(buf[:n]), "goroutine "))[0] + id, err := strconv.Atoi(idField) + if err != nil { + panic(fmt.Sprintf("cannot get goroutine id: %v", err)) + } + return id +} + func TestNextRecord(t *testing.T) { const ( path = "/tmp/master_client_TestFull" @@ -45,7 +59,7 @@ func TestNextRecord(t *testing.T) { panic(err) } go func(l net.Listener) { - s, err := master.NewService(&master.InMemStore{}, 10, time.Second, 1) + s, err := master.NewService(&master.InMemStore{}, 1, time.Second*60, 1) if err != nil { panic(err) } @@ -69,7 +83,7 @@ func TestNextRecord(t *testing.T) { panic(err) } - w := recordio.NewWriter(f, -1, -1) + w := recordio.NewWriter(f, 1, -1) for i := 0; i < total; i++ { _, err = w.Write([]byte{byte(i)}) if err != nil { @@ -87,32 +101,49 @@ func TestNextRecord(t *testing.T) { panic(err) } - c, err := master.NewClient(master.WithAddr(fmt.Sprintf(":%d", p)), master.WithBuffer(10)) - if err != nil { - panic(err) - } - - err = c.SetDataset([]string{path}) - if err != nil { - panic(err) - } - - for pass := 0; pass < 50; pass++ { - received := make(map[byte]bool) - for i := 0; i < total; i++ { - r, err := c.NextRecord() - if err != nil { - t.Fatal(pass, i, "Read error:", err) + // start several client to test task fetching + var wg sync.WaitGroup + for i := 0; i < 4; i++ { + wg.Add(1) + // test for multiple concurrent clients + go func() { + defer wg.Done() + // each go-routine needs a single client connection instance + c, e := master.NewClient(master.WithAddr(fmt.Sprintf(":%d", p)), master.WithBuffer(1)) + if e != nil { + t.Fatal(e) } - - if len(r) != 1 { - t.Fatal(pass, i, "Length should be 1.", r) + e = c.SetDataset([]string{path}) + if e != nil { + panic(e) } - - if received[r[0]] { - t.Fatal(pass, i, "Received duplicate.", received, r) + // test for n passes + for pass := 0; pass < 10; pass++ { + c.StartGetRecords(pass) + + received := make(map[byte]bool) + taskid := 0 + for { + r, e := c.NextRecord() + if e != nil { + // ErrorPassAfter will wait, else break for next pass + if e.Error() == master.ErrPassBefore.Error() || + e.Error() == master.ErrNoMoreAvailable.Error() { + break + } + t.Fatal(pass, taskid, "Read error:", e) + } + if len(r) != 1 { + t.Fatal(pass, taskid, "Length should be 1.", r) + } + if received[r[0]] { + t.Fatal(pass, taskid, "Received duplicate.", received, r) + } + taskid++ + received[r[0]] = true + } } - received[r[0]] = true - } + }() } + wg.Wait() } diff --git a/go/master/service.go b/go/master/service.go index d1ec8939e18e8f4a7b4578a9399e2fa9f24325f3..1f2112ecfb925ee8bb9545f0bb1100efd3ad11ca 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -19,6 +19,7 @@ import ( "compress/gzip" "encoding/gob" "errors" + "math/rand" "os" "path/filepath" "sync" @@ -33,6 +34,18 @@ const ( dialTimeout = 5 * time.Second ) +// ErrAllTaskFailed occur when tasks are in done or failed state. +var ErrAllTaskFailed = errors.New("all task finished") + +// ErrNoMoreAvailable occur when no task in todo and yet not all done or fail. +var ErrNoMoreAvailable = errors.New("no more available task") + +// ErrPassBefore client side pass number does not match with master counter. +var ErrPassBefore = errors.New("pass number smaller than master") + +// ErrPassAfter client side pass number does not match with master counter. +var ErrPassAfter = errors.New("pass number larger than master") + // Store is the interface for save and load the master state. type Store interface { Save([]byte) error @@ -75,17 +88,26 @@ type Service struct { chunksPerTask int timeoutDur time.Duration failureMax int - ready chan struct{} store Store - mu sync.Mutex - initDone bool - taskQueues taskQueues + ready chan struct{} + initDone bool + + mu sync.Mutex + taskQueues taskQueues + currPass int + jobTasks []taskEntry + savingTrainer string } func partition(chunks []Chunk, chunksPerTask int) []taskEntry { - id := 0 + // generate uniq id across job using nanosecond + randint + counter + // FIXME(typhoonzero): this is a workaround, use uuid + randStart := rand.Int() + counter := 0 + timestamp := time.Now().Nanosecond() + id := timestamp + randStart + counter if chunksPerTask <= 0 { chunksPerTask = 1 } @@ -95,7 +117,8 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { for i, c := range chunks { if i%chunksPerTask == 0 && len(cur.Task.Chunks) > 0 { cur.Task.Meta.ID = id - id++ + counter++ + id = timestamp + randStart + counter result = append(result, cur) cur.Task.Chunks = nil } @@ -266,19 +289,21 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error { return err } - s.taskQueues.Todo = partition(chunks, s.chunksPerTask) + s.jobTasks = partition(chunks, s.chunksPerTask) + s.taskQueues.Todo = s.jobTasks err = s.snapshot() if err != nil { log.Errorln(err) return err } - close(s.ready) s.initDone = true return nil } +// processFailedTask retry s.failureMax times for failed task. +// return true if all task are done or failed. func (s *Service) processFailedTask(t taskEntry, epoch int) { if t.Task.Meta.Epoch != epoch { // new epoch, task launched after the @@ -302,8 +327,9 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) { return } - log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure) + log.Warningf("Task %v failed %d times, re-dispatch.", t.Task, t.NumFailure) s.taskQueues.Todo = append(s.taskQueues.Todo, t) + return } func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { @@ -331,37 +357,30 @@ func (s *Service) logFields() log.Fields { } // GetTask gets a new task from the service. -func (s *Service) GetTask(_ int, task *Task) error { +// passID is the client side pass count +func (s *Service) GetTask(passID int, task *Task) error { select { case <-s.ready: } s.mu.Lock() defer s.mu.Unlock() + if passID < s.currPass { + return ErrPassBefore + } + if passID > s.currPass { + // Client may get run to pass after master when one client faster than the + // other + return ErrPassAfter + } if len(s.taskQueues.Todo) == 0 { - if len(s.taskQueues.Done) == 0 { - if len(s.taskQueues.Pending) == 0 { - err := errors.New("all task failed") - log.WithFields(s.logFields()).Warningln("All tasks failed.") - return err - } - - // TODO(helin): client need to retry in this - // error case. Gotcha: RPC client can't - // compare returned error with predefined - // errors like io.EOF, because the error - // instance deserialized from RPC is a - // different instance than the error defined - // in package. So we need to figure out a way - // for client to check this error correctly. - err := errors.New("no more available task") - log.WithFields(s.logFields()).Warningln("No more available task.") - return err + if len(s.taskQueues.Done) == 0 && len(s.taskQueues.Pending) == 0 { + log.WithFields(s.logFields()).Warningln("All tasks failed, may start next pass") + return ErrAllTaskFailed } - s.taskQueues.Todo = s.taskQueues.Done - s.taskQueues.Done = nil - log.WithFields(s.logFields()).Infoln("No more todo task, but trainer is requesting task to do. Move all done task to todo.") + log.WithFields(s.logFields()).Warningln("No more available task.") + return ErrNoMoreAvailable } t := s.taskQueues.Todo[0] @@ -381,7 +400,7 @@ func (s *Service) GetTask(_ int, task *Task) error { } // TaskFinished tell the service that a task is finished. -func (s *Service) TaskFinished(taskID int, _ *int) error { +func (s *Service) TaskFinished(taskID int, dummy *int) error { select { case <-s.ready: } @@ -401,11 +420,14 @@ func (s *Service) TaskFinished(taskID int, _ *int) error { delete(s.taskQueues.Pending, taskID) log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID) - - if len(s.taskQueues.Pending) == 0 && len(s.taskQueues.Todo) == 0 { - log.WithFields(s.logFields()).Infoln("No more todo and pending task, start a new pass.") - s.taskQueues.Todo = append(s.taskQueues.Todo, s.taskQueues.Done...) - s.taskQueues.Done = nil + if len(s.taskQueues.Todo) == 0 && len(s.taskQueues.Pending) == 0 { + // increase master side pass count if all tasks finished + s.currPass++ + s.taskQueues.Todo = s.jobTasks + s.taskQueues.Done = []taskEntry{} + // TODO(typhoonzero): deal with failed tasks + s.taskQueues.Failed = []taskEntry{} + log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.currPass) } err := s.snapshot() @@ -416,7 +438,7 @@ func (s *Service) TaskFinished(taskID int, _ *int) error { } // TaskFailed tells the service that a task is failed. -func (s *Service) TaskFailed(meta TaskMeta, _ *int) error { +func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error { select { case <-s.ready: } diff --git a/go/master/service_internal_test.go b/go/master/service_internal_test.go index 69a882fc33668a8cdefa30ae394f6c605f3bf099..bd1a939a55553b558181d91a757c487d0f97b40b 100644 --- a/go/master/service_internal_test.go +++ b/go/master/service_internal_test.go @@ -44,7 +44,8 @@ func TestPartionIndex(t *testing.T) { cs := make([]Chunk, 100) ts := partition(cs, 20) for i := range ts { - if ts[i].Task.Meta.ID != i { + // test auto increament ids + if i > 0 && ts[i].Task.Meta.ID != ts[i-1].Task.Meta.ID+1 { t.Error(ts[i], i) } } diff --git a/go/pserver/client/c/test/test_train.py b/go/pserver/client/c/test/test_train.py index 17082cf8926deb1ebd3b20e2e37c3895aee30b81..85cb399590f7a5e7e73285ca87c49ea5f24afb32 100644 --- a/go/pserver/client/c/test/test_train.py +++ b/go/pserver/client/c/test/test_train.py @@ -6,16 +6,19 @@ import cPickle as pickle etcd_ip = os.getenv("MASTER_IP", "127.0.0.1") etcd_endpoint = "http://" + etcd_ip + ":2379" +print "connecting to master, etcd endpoints: ", etcd_endpoint +master_client = master.client(etcd_endpoint, 5, 64) def cloud_reader(): - print "connecting to master, etcd endpoints: ", etcd_endpoint - master_client = master.client(etcd_endpoint, 5, 64) + global master_client master_client.set_dataset( - ["/pfs/dlnel/public/dataset/uci_housing/uci_housing-*-of-*"]) + ["/pfs/dlnel/public/dataset/uci_housing/uci_housing-*"], passes=30) while 1: r, e = master_client.next_record() if not r: + if e != -2: # other errors + print "get record error:", e break yield pickle.loads(r) @@ -27,10 +30,12 @@ def main(): # network config x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) y_predict = paddle.layer.fc(input=x, - param_attr=paddle.attr.Param(name='w'), + param_attr=paddle.attr.Param( + name='w', learning_rate=1e-3), size=1, act=paddle.activation.Linear(), - bias_attr=paddle.attr.Param(name='b')) + bias_attr=paddle.attr.Param( + name='b', learning_rate=1e-3)) y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) cost = paddle.layer.mse_cost(input=y_predict, label=y) @@ -40,7 +45,6 @@ def main(): # create optimizer of new remote updater to pserver optimizer = paddle.optimizer.Momentum(momentum=0, learning_rate=1e-3) - print "etcd endoint: ", etcd_endpoint trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer, @@ -51,6 +55,8 @@ def main(): # event_handler to print training and testing info def event_handler(event): if isinstance(event, paddle.event.EndIteration): + # FIXME: for cloud data reader, pass number is managed by master + # should print the server side pass number if event.batch_id % 100 == 0: print "Pass %d, Batch %d, Cost %f" % ( event.pass_id, event.batch_id, event.cost) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 433edbfda742d3be9915eade7b0a455398a501dc..21cb7c7265e0052630b68954fa25f9189e641e7b 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -3,7 +3,7 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory) +cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) @@ -29,7 +29,5 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) -proto_library(net_proto SRCS net_proto.proto DEPS op_proto) -# cc_library(net SRCS net.cc DEPS operator net_proto op_registry fc_op) -cc_library(net SRCS net.cc DEPS operator net_proto op_registry) +cc_library(net SRCS net.cc DEPS op_registry) cc_test(net_op_test SRCS net_op_test.cc DEPS net add_op mul_op sigmoid_op softmax_op fc_op) diff --git a/paddle/framework/detail/tensor-inl.h b/paddle/framework/detail/tensor-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..2acae1b0e20865e786137be09a3973b31b9fba25 --- /dev/null +++ b/paddle/framework/detail/tensor-inl.h @@ -0,0 +1,160 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/memory/memcpy.h" + +namespace paddle { +namespace framework { + +template +inline void Tensor::check_memory_size() const { + PADDLE_ENFORCE(holder_ != nullptr, + "Tenosr holds no memory. Call Tensor::mutable_data first."); + PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, + "Tensor's dims_ is out of bound. Call Tensor::mutable_data " + "first to re-allocate memory."); +} + +template +inline const T* Tensor::data() const { + check_memory_size(); + return reinterpret_cast( + reinterpret_cast(holder_->ptr()) + offset_); +} + +template +inline T* Tensor::data() { + check_memory_size(); + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + + offset_); +} + +template +inline T* Tensor::mutable_data(DDim dims, platform::Place place) { + static_assert(std::is_pod::value, "T must be POD"); + Resize(dims); + return mutable_data(place); +} + +template +inline T* Tensor::mutable_data(platform::Place place) { + static_assert(std::is_pod::value, "T must be POD"); + PADDLE_ENFORCE(product(dims_) > 0, + "Tensor's numel must be larger than zero to call " + "Tensor::mutable_data. Call Tensor::set_dim first."); + /* some versions of boost::variant don't have operator!= */ + size_t size = product(dims_) * sizeof(T); + if (holder_ == nullptr || !(holder_->place() == place) || + holder_->size() < size + offset_) { + if (platform::is_cpu_place(place)) { + holder_.reset(new PlaceholderImpl( + boost::get(place), size)); + } +#ifndef PADDLE_ONLY_CPU + else if (platform::is_gpu_place(place)) { + holder_.reset(new PlaceholderImpl( + boost::get(place), size)); + } +#endif + offset_ = 0; + } + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + + offset_); +} + +template +inline void Tensor::ShareDataWith(const Tensor& src) { + src.check_memory_size(); + *this = src; +} + +template +inline void Tensor::CopyFrom(const Tensor& src, + const platform::CPUDeviceContext& ctx) { + src.check_memory_size(); + Resize(src.dims()); + + auto src_place = src.holder_->place(); + auto src_ptr = static_cast(src.data()); + + auto dst_place = ctx.GetPlace(); + auto dst_ptr = static_cast(mutable_data(dst_place)); + + auto size = product(src.dims_) * sizeof(T); + + if (platform::is_cpu_place(src_place)) { + memory::Copy(boost::get(dst_place), dst_ptr, + boost::get(src_place), src_ptr, size); + } +#ifndef PADDLE_ONLY_CPU + else if (platform::is_gpu_place(src_place)) { + memory::Copy(boost::get(dst_place), dst_ptr, + boost::get(src_place), src_ptr, size, 0); + } +#endif +} + +#ifndef PADDLE_ONLY_CPU +template +inline void Tensor::CopyFrom(const Tensor& src, + const platform::CUDADeviceContext& ctx) { + src.check_memory_size(); + Resize(src.dims()); + + auto src_place = src.holder_->place(); + auto src_ptr = static_cast(src.data()); + + auto dst_place = ctx.GetPlace(); + auto dst_ptr = static_cast(mutable_data(dst_place)); + + auto size = product(src.dims_) * sizeof(T); + + if (platform::is_cpu_place(src_place)) { + memory::Copy(boost::get(dst_place), dst_ptr, + boost::get(src_place), src_ptr, size, + ctx.stream()); + } else if (platform::is_gpu_place(src_place)) { + memory::Copy(boost::get(dst_place), dst_ptr, + boost::get(src_place), src_ptr, size, + ctx.stream()); + } +} +#endif + +template +inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { + check_memory_size(); + PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero."); + PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound."); + PADDLE_ENFORCE(begin_idx < end_idx, + "Begin index must be less than end index."); + PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1."); + int base = product(dims_) / dims_[0]; + Tensor dst; + dst.holder_ = holder_; + DDim dst_dims = dims_; + dst_dims[0] = end_idx - begin_idx; + dst.Resize(dst_dims); + dst.offset_ = offset_ + begin_idx * base * sizeof(T); + return dst; +} + +inline void Tensor::Resize(const DDim& dims) { dims_ = dims; } + +inline const DDim& Tensor::dims() const { return dims_; } + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/net.cc b/paddle/framework/net.cc index bc23b63b35d37eea01ae6b9b8891e9cd94615898..2cd378c6b21303d1a24206ba3010b0d035aaa766 100644 --- a/paddle/framework/net.cc +++ b/paddle/framework/net.cc @@ -20,17 +20,7 @@ namespace paddle { namespace framework { -std::shared_ptr AddBackwardOp(std::shared_ptr ForwardOps) { - auto grad_ops = std::make_shared(); - for (auto& op : ForwardOps->ops_) { - auto op_grad = OpRegistry::CreateGradOp(op); - grad_ops->AddOp(op_grad); - } - grad_ops->CompleteAddOp(); - return grad_ops; -} - -void PlainNet::CompleteAddOp(bool calc) { +void NetOp::CompleteAddOp(bool calc) { add_op_done_ = true; if (!calc) return; std::unordered_set input_set; @@ -70,7 +60,7 @@ void PlainNet::CompleteAddOp(bool calc) { attrs_["temporary_index"] = tmp_index; } -std::string PlainNet::DebugString() const { +std::string NetOp::DebugString() const { std::ostringstream os; os << OperatorBase::DebugString() << std::endl; for (auto& op : ops_) { @@ -82,5 +72,7 @@ std::string PlainNet::DebugString() const { return os.str(); } +bool NetOp::IsNetOp() const { return true; } + } // namespace framework } // namespace paddle diff --git a/paddle/framework/net.h b/paddle/framework/net.h index 3264f1f565e3efc188e7835cb9b44e5741e1eea8..089c1355951f59d51db16d4b4bdce4282d6e5c25 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -37,21 +37,7 @@ namespace framework { * This is the base class of network, all the networks should implement the APIs * it defines. */ -class Net : public OperatorBase { - public: - virtual void AddOp(const std::shared_ptr& op) = 0; - virtual void CompleteAddOp(bool calc) = 0; -}; - -using NetPtr = std::shared_ptr; - -/** - * @brief a basic implementation of Net. - * - * PlainNet is a very simple Net, it create a list of operators, and run them - * sequentially following the order they added. - */ -class PlainNet : public Net { +class NetOp : public OperatorBase { public: /** * Infer all the operators' input and output variables' shapes, will be called @@ -80,15 +66,17 @@ class PlainNet : public Net { /** * @brief Add an operator by ptr */ - void AddOp(const std::shared_ptr& op) override { + void AddOp(const std::shared_ptr& op) { PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); ops_.push_back(op); } - void CompleteAddOp(bool calculate = true) override; + void CompleteAddOp(bool calculate = true); std::string DebugString() const override; + bool IsNetOp() const override; + std::vector> ops_; private: @@ -100,7 +88,5 @@ class PlainNet : public Net { } }; -std::shared_ptr AddBackwardOp(std::shared_ptr ForwardOps); - } // namespace framework } // namespace paddle diff --git a/paddle/framework/net_op_test.cc b/paddle/framework/net_op_test.cc index d924058624bf334b015797c4e4f882db10203049..8048311fe54ee1827fb5b91577478a1d30803e43 100644 --- a/paddle/framework/net_op_test.cc +++ b/paddle/framework/net_op_test.cc @@ -40,7 +40,7 @@ void AssertSameVectorWithoutOrder(const std::vector& expected, } TEST(OpKernel, all) { - auto net = std::make_shared(); + auto net = std::make_shared(); ASSERT_NE(net, nullptr); auto op1 = std::make_shared(); @@ -71,28 +71,21 @@ TEST(OpKernel, all) { ASSERT_EQ(2, run_cnt); ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet); } -TEST(AddBackwardOp, TestGradOp) { - auto net = std::make_shared(); - ASSERT_NE(net, nullptr); - net->AddOp(framework::OpRegistry::CreateOp("mul", {"X", "Y"}, {"Out"}, {})); - net->AddOp( - framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {"Out"}, {})); - net->AddOp(framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {""}, {})); - auto grad_ops = AddBackwardOp(net); - for (auto& op : grad_ops->ops_) { - op->DebugString(); - } -} -// TODO(zhihong): add fc grad without registering. -// TEST(AddBackwardOp, TestNoGradOp) { -// auto net = std::make_shared(); -// ASSERT_NE(net, nullptr); -// net->AddOp(framework::OpRegistry::CreateOp("fc", {"X", "W", "b"}, {"Y"}, -// {})); auto grad_ops = AddBackwardOp(net); for (auto& op : grad_ops->ops_) { -// op->DebugString(); -// } -// } +//! TODO(yuyang18): Refine Backward Op. +// TEST(AddBackwardOp, TestGradOp) { +// auto net = std::make_shared(); +// ASSERT_NE(net, nullptr); +// net->AddOp(framework::OpRegistry::CreateOp("mul", {"X", "Y"}, {"Out"}, {})); +// net->AddOp( +// framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {"Out"}, {})); +// net->AddOp(framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {""}, +// {})); +// auto grad_ops = AddBackwardOp(net); +// for (auto& op : grad_ops->ops_) { +// op->DebugString(); +// } +//} } // namespace framework } // namespace paddle diff --git a/paddle/framework/net_proto.proto b/paddle/framework/net_proto.proto deleted file mode 100644 index 0779f49fe2a9a6d0d1ea5ec11ba3befeb0a67fa1..0000000000000000000000000000000000000000 --- a/paddle/framework/net_proto.proto +++ /dev/null @@ -1,15 +0,0 @@ -syntax="proto2"; -package paddle.framework; - -import "op_proto.proto"; - -message NetDesc { - // network identification - optional string name = 1; - // operator contains in network - repeated OpProto operators = 2; - // network type to run with. e.g "plainNet", "DAG" - optional string net_type = 3; - // num worker always - optional int32 num_workers = 4; -} diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index f16deae028d76dc40d6bc589648b461c430c3c98..384f0f631dd9b9a4dd7c0c628340afe668bc248f 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -403,15 +403,16 @@ class GradOpRegisterHelper { STATIC_ASSERT_GLOBAL_NAMESPACE( \ __reg_op_kernel_##type##_##DEVICE_TYPE##__, \ "REGISTER_OP_KERNEL must be in global namespace"); \ - struct __op_kernel_register__##type##__ { \ - __op_kernel_register__##type##__() { \ + struct __op_kernel_register__##type##__##DEVICE_TYPE##__ { \ + __op_kernel_register__##type##__##DEVICE_TYPE##__() { \ ::paddle::framework::OperatorWithKernel::OpKernelKey key; \ key.place_ = PlaceType(); \ ::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \ .reset(new __VA_ARGS__()); \ } \ }; \ - static __op_kernel_register__##type##__ __reg_kernel_##type##__; \ + static __op_kernel_register__##type##__##DEVICE_TYPE##__ \ + __reg_kernel_##type##__##DEVICE_TYPE##__; \ int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; } // (type, KernelType) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index f59314f8288d37f0c645b99811b1355f9a496c00..0a8c82ee47521713fa96cb423ceca4de858c260c 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -90,15 +90,17 @@ class OperatorBase { virtual void Run(const std::shared_ptr& scope, const platform::DeviceContext& dev_ctx) const = 0; - // Get a input with argument's name described in `op_proto` + virtual bool IsNetOp() const { return false; } + + //! Get a input with argument's name described in `op_proto` const std::string& Input(const std::string& name) const; - // Get a input which has multiple variables. - // TODO add a vector_view to prevent memory copy. + //! Get a input which has multiple variables. + //! TODO add a vector_view to prevent memory copy. std::vector Inputs(const std::string& name) const; - // Get a output with argument's name described in `op_proto` + //! Get a output with argument's name described in `op_proto` const std::string& Output(const std::string& name) const; - // Get an output which has multiple variables. - // TODO add a vector_view to prevent memory copy. + //! Get an output which has multiple variables. + //! TODO add a vector_view to prevent memory copy. std::vector Outputs(const std::string& name) const; public: @@ -199,7 +201,9 @@ class OperatorWithKernel : public OperatorBase { place_ = dev_ctx.GetPlace(); } - bool operator==(const OpKernelKey& o) const { return place_ == o.place_; } + bool operator==(const OpKernelKey& o) const { + return platform::places_are_same_class(place_, o.place_); + } }; struct OpKernelHash { diff --git a/paddle/framework/tensor.cc b/paddle/framework/tensor.cc index 964f15ab66bca7da75824e192e61600c29e572c0..ea7b2a1f7b17d9abc2c2e14de5ecd1cf4a1a5027 100644 --- a/paddle/framework/tensor.cc +++ b/paddle/framework/tensor.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include +#include "paddle/framework/tensor.h" namespace paddle { namespace framework {} diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index a36f375d2e42ee3c46ddef42954335cba7eb88f2..d3f56b31cd350fac746b8fd5a37f278b26db3e7d 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #include "paddle/framework/ddim.h" #include "paddle/memory/memory.h" +#include "paddle/platform/device_context.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" #include "unsupported/Eigen/CXX11/Tensor" @@ -31,9 +32,11 @@ template struct CastToPyBufferImpl; } // namespace details } // namespace pybind + namespace framework { class Tensor { + public: template friend struct paddle::pybind::details::CastToPyBufferImpl; @@ -46,106 +49,84 @@ class Tensor { public: Tensor() : offset_(0) {} + /*! Return a pointer to mutable memory block. */ template - const T* data() const { - EnforceSufficientMemory(); - return reinterpret_cast( - reinterpret_cast(holder_->ptr()) + offset_); - } + inline T* data(); + /*! Return a pointer to constant memory block. */ template - T* data() { - EnforceSufficientMemory(); - return reinterpret_cast(reinterpret_cast(holder_->ptr()) + - offset_); - } - - template ::value>::type* = nullptr> - T* mutable_data(DDim dims, platform::Place place) { - Resize(dims); - return mutable_data(place); - } - - template ::value>::type* = nullptr> - T* mutable_data(platform::Place place) { - PADDLE_ENFORCE(product(dims_) > 0, - "Tensor's numel must be larger than zero to call " - "Tensor::mutable_data. Call Tensor::set_dim first."); - if (holder_ == nullptr || - !(holder_->place() == - place) /* some versions of boost::variant don't have operator!= */ - || holder_->size() < product(dims_) * sizeof(T) + offset_) { - if (platform::is_cpu_place(place)) { - holder_.reset(new PlaceholderImpl( - boost::get(place), product(dims_) * sizeof(T))); - } else if (platform::is_gpu_place(place)) { -#ifdef PADDLE_ONLY_CPU - PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); -#else - holder_.reset(new PlaceholderImpl( - boost::get(place), product(dims_) * sizeof(T))); -#endif - } else { - PADDLE_THROW("Unknown 'place'."); - } - offset_ = 0; - } - return reinterpret_cast(reinterpret_cast(holder_->ptr()) + - offset_); - } + inline const T* data() const; + + /** + * @brief Return a pointer to mutable memory block. + * @note If not exist, then allocation. + */ + template + inline T* mutable_data(platform::Place place); + + /** + * @brief Return a pointer to mutable memory block. + * + * @param[in] dims The dimensions of the memory block. + * @param[in] place The place of the memory block. + * + * @note If not exist, then allocation. + */ + template + inline T* mutable_data(DDim dims, platform::Place place); + /*! Return the dimensions of the memory block. */ + inline const DDim& dims() const; + + /*! Resize the dimensions of the memory block. */ + inline void Resize(const DDim& dims); + + /*! The internal of two tensors share the same memory block. */ + template + inline void ShareDataWith(const Tensor& src); + + /** + * @brief Copy the content of external tensor to a new place. + * + * @param[in] src The external tensor. + * @param[in] ctx The device context contains place where to store. + * + * @note CopyFrom supports CPU <-> GPU, GPU <-> GPU. + */ template - void ShareDataWith(const Tensor& src) { - src.EnforceSufficientMemory(); - *this = src; - } + inline void CopyFrom(const Tensor& src, + const platform::CPUDeviceContext& ctx); +#ifndef PADDLE_ONLY_CPU template - void CopyFrom(const Tensor& src, platform::Place dst_place) { - PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) && - platform::is_cpu_place(dst_place), - "Tensor::CopyFrom only support CPU now."); - src.EnforceSufficientMemory(); - size_t size = product(src.dims_) * sizeof(T); - Resize(src.dims()); - const void* src_ptr = static_cast(src.data()); - void* dst_ptr = static_cast(mutable_data(dst_place)); - memcpy(dst_ptr, src_ptr, size); - } + inline void CopyFrom(const Tensor& src, + const platform::CUDADeviceContext& ctx); +#endif + /** + * @brief Return the slice of the tensor. + * + * @param[in] begin_idx The begin index of the slice. + * @param[in] end_idx The end index of the slice. + */ template - Tensor Slice(const int& begin_idx, const int& end_idx) const { - EnforceSufficientMemory(); - PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero."); - PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound."); - PADDLE_ENFORCE(begin_idx < end_idx, - "Begin index must be less than end index."); - PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1."); - int base = product(dims_) / dims_[0]; - Tensor dst; - dst.holder_ = holder_; - DDim dst_dims = dims_; - dst_dims[0] = end_idx - begin_idx; - dst.Resize(dst_dims); - dst.offset_ = offset_ + begin_idx * base * sizeof(T); - return dst; - } - - void Resize(const DDim& dims) { dims_ = dims; } - - const DDim& dims() const { return dims_; } + inline Tensor Slice(const int& begin_idx, const int& end_idx) const; private: - // Placeholder hides type T, so it doesn't appear as a template - // parameter of Variable. + template + inline void check_memory_size() const; + + private: + /** + * @note Placeholder hides type T, so it doesn't appear as a template + * parameter of Variable. + */ struct Placeholder { virtual ~Placeholder() {} virtual void* ptr() const = 0; - virtual platform::Place place() const = 0; virtual size_t size() const = 0; virtual std::type_index type() const = 0; + virtual platform::Place place() const = 0; }; template @@ -156,33 +137,38 @@ class Tensor { place_(place), size_(size) {} - virtual void* ptr() const { return static_cast(ptr_.get()); } virtual size_t size() const { return size_; } - virtual paddle::platform::Place place() const { return place_; } + virtual platform::Place place() const { return place_; } + virtual void* ptr() const { return static_cast(ptr_.get()); } virtual std::type_index type() const { return std::type_index(typeid(T)); } + /*! the pointer of memory block. */ std::unique_ptr> ptr_; - platform::Place place_; // record the place of ptr_. - size_t size_; // size of the memory block. + + /*! the place of memory block. */ + platform::Place place_; + + /*! the size of memory block. */ + size_t size_; }; - template - inline void EnforceSufficientMemory() const { - PADDLE_ENFORCE(holder_ != nullptr, - "Tenosr holds no memory. Call Tensor::mutable_data first."); - PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, - "Tensor's dims_ is out of bound. Call Tensor::mutable_data " - "first to re-allocate memory."); - } - - std::shared_ptr holder_; // holds the memory block if allocated. + /*! holds the memory block if allocated. */ + std::shared_ptr holder_; + + /*! points to dimensions of memory block. */ DDim dims_; - // A PlaceHolder may be shared by more than one tensor. Some of them may be - // slices of the others. So the offset_ is introduced here to indicate the - // byte offset between PlaceHolder::ptr_ and where tensor's data really - // begins. + + /** + * @brief A PlaceHolder may be shared by more than one tensor. + * + * @note Some of them may be slices of the others. So the offset_ + * is introduced here to indicate the byte offset between + * PlaceHolder::ptr_ and where the tensor data really begins. + */ size_t offset_; }; } // namespace framework } // namespace paddle + +#include "paddle/framework/detail/tensor-inl.h" diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 530a8fb05eb42ca887b30b52aee9c1a1948171dd..fd7143cfaa6ee9c9f1430dab743aa6b67fdd461e 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -72,7 +72,8 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); EXPECT_EQ(p1, p2); } -#ifdef __CUDACC__ + +#ifndef PADDLE_ONLY_CPU { Tensor src_tensor; float* p1 = nullptr; @@ -123,7 +124,7 @@ TEST(Tensor, ShareDataWith) { ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } -#ifdef __CUDACC__ +#ifndef PADDLE_ONLY_CPU { Tensor src_tensor; Tensor dst_tensor; @@ -160,7 +161,7 @@ TEST(Tensor, Slice) { EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); } -#ifdef __CUDACC__ +#ifndef PADDLE_ONLY_CPU { Tensor src_tensor; src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); @@ -188,25 +189,74 @@ TEST(Tensor, Slice) { TEST(Tensor, CopyFrom) { using namespace paddle::framework; using namespace paddle::platform; + { + Tensor src_tensor; + Tensor dst_tensor; + + int* src_ptr = src_tensor.mutable_data(make_ddim({3, 3}), CPUPlace()); + + int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + memcpy(src_ptr, arr, 9 * sizeof(int)); - Tensor src_tensor; - int* src_ptr = src_tensor.mutable_data(make_ddim({3, 3}), CPUPlace()); - int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - memcpy(src_ptr, arr, 9 * sizeof(int)); - Tensor dst_tensor; - dst_tensor.CopyFrom(src_tensor, CPUPlace()); - const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, dst_ptr); - for (size_t i = 0; i < 9; ++i) { - EXPECT_EQ(src_ptr[i], dst_ptr[i]); + auto* cpu_ctx = new paddle::platform::CPUDeviceContext(); + dst_tensor.CopyFrom(src_tensor, *cpu_ctx); + + const int* dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + Tensor slice_tensor = src_tensor.Slice(1, 2); + dst_tensor.CopyFrom(slice_tensor, *cpu_ctx); + const int* slice_ptr = slice_tensor.data(); + dst_ptr = dst_tensor.data(); + ASSERT_NE(dst_ptr, slice_ptr); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(dst_ptr[i], slice_ptr[i]); + } } +#ifndef PADDLE_ONLY_CPU + { + Tensor src_tensor; + Tensor gpu_tensor; + Tensor dst_tensor; + + int* src_ptr = src_tensor.mutable_data(make_ddim({3, 3}), CPUPlace()); + + int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + memcpy(src_ptr, arr, 9 * sizeof(int)); + + // CPU Tensor to GPU Tensor + auto gpu_ctx = new paddle::platform::CUDADeviceContext(0); + gpu_tensor.CopyFrom(src_tensor, *gpu_ctx); + + // GPU Tensor to CPU Tensor + auto cpu_ctx = new paddle::platform::CPUDeviceContext(); + dst_tensor.CopyFrom(gpu_tensor, *cpu_ctx); + + // Compare Tensors + const int* dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + Tensor slice_tensor = src_tensor.Slice(1, 2); + + // CPU Slice Tensor to GPU Tensor + gpu_tensor.CopyFrom(slice_tensor, *gpu_ctx); - Tensor slice_tensor = src_tensor.Slice(1, 2); - dst_tensor.CopyFrom(slice_tensor, CPUPlace()); - const int* slice_ptr = slice_tensor.data(); - dst_ptr = dst_tensor.data(); - ASSERT_NE(dst_ptr, slice_ptr); - for (size_t i = 0; i < 3; ++i) { - EXPECT_EQ(dst_ptr[i], slice_ptr[i]); + // GPU Tensor to CPU Tensor + dst_tensor.CopyFrom(gpu_tensor, *cpu_ctx); + + // Compare Slice Tensors + const int* slice_ptr = slice_tensor.data(); + dst_ptr = dst_tensor.data(); + ASSERT_NE(dst_ptr, slice_ptr); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(dst_ptr[i], slice_ptr[i]); + } } +#endif } diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index 27c1b4033b53b059d38ed88694b20b429cbb4cce..bb44970109c05d239e6b92d90b2079b752fa0104 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -27,12 +27,11 @@ BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator, system_allocator_(std::move(system_allocator)) {} BuddyAllocator::~BuddyAllocator() { - DLOG(INFO) << "BuddyAllocator Disconstructor makes sure that all of these " - "have actually been freed"; + VLOG(3) << "BuddyAllocator Disconstructor makes sure that all of these " + "have actually been freed"; while (!pool_.empty()) { auto block = static_cast(std::get<2>(*pool_.begin())); - DLOG(INFO) << "Free from block (" << block << ", " << max_chunk_size_ - << ")"; + VLOG(3) << "Free from block (" << block << ", " << max_chunk_size_ << ")"; system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); cache_.invalidate(block); @@ -52,12 +51,11 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { // acquire the allocator lock std::lock_guard lock(mutex_); - DLOG(INFO) << "Allocate " << unaligned_size << " bytes from chunk size " - << size; + VLOG(3) << "Allocate " << unaligned_size << " bytes from chunk size " << size; // if the allocation is huge, send directly to the system allocator if (size > max_chunk_size_) { - DLOG(INFO) << "Allocate from system allocator."; + VLOG(3) << "Allocate from system allocator."; return SystemAlloc(size); } @@ -72,9 +70,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { return nullptr; } } else { - DLOG(INFO) << "Allocation from existing memory block " << std::get<2>(*it) - << " at address " - << reinterpret_cast(std::get<2>(*it))->data(); + VLOG(3) << "Allocation from existing memory block " << std::get<2>(*it) + << " at address " + << reinterpret_cast(std::get<2>(*it))->data(); } total_used_ += size; @@ -91,10 +89,10 @@ void BuddyAllocator::Free(void* p) { // Acquire the allocator lock std::lock_guard lock(mutex_); - DLOG(INFO) << "Free from address " << block; + VLOG(3) << "Free from address " << block; if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) { - DLOG(INFO) << "Free directly from system allocator"; + VLOG(3) << "Free directly from system allocator"; system_allocator_->Free(block, block->total_size(cache_), block->index(cache_)); @@ -111,8 +109,8 @@ void BuddyAllocator::Free(void* p) { // Trying to merge the right buddy if (block->has_right_buddy(cache_)) { - DLOG(INFO) << "Merging this block " << block << " with its right buddy " - << block->right_buddy(cache_); + VLOG(3) << "Merging this block " << block << " with its right buddy " + << block->right_buddy(cache_); auto right_buddy = block->right_buddy(cache_); @@ -129,8 +127,8 @@ void BuddyAllocator::Free(void* p) { // Trying to merge the left buddy if (block->has_left_buddy(cache_)) { - DLOG(INFO) << "Merging this block " << block << " with its left buddy " - << block->left_buddy(cache_); + VLOG(3) << "Merging this block " << block << " with its left buddy " + << block->left_buddy(cache_); auto left_buddy = block->left_buddy(cache_); @@ -146,8 +144,8 @@ void BuddyAllocator::Free(void* p) { } // Dumping this block into pool - DLOG(INFO) << "Inserting free block (" << block << ", " - << block->total_size(cache_) << ")"; + VLOG(3) << "Inserting free block (" << block << ", " + << block->total_size(cache_) << ")"; pool_.insert( IndexSizeAddress(block->index(cache_), block->total_size(cache_), block)); @@ -166,7 +164,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) { size_t index = 0; void* p = system_allocator_->Alloc(index, size); - DLOG(INFO) << "Allocated " << p << " from system allocator."; + VLOG(3) << "Allocated " << p << " from system allocator."; if (p == nullptr) return nullptr; @@ -192,8 +190,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { if (p == nullptr) return pool_.end(); - DLOG(INFO) << "Creating and inserting new block " << p - << " from system allocator"; + VLOG(3) << "Creating and inserting new block " << p + << " from system allocator"; static_cast(p)->init(cache_, MemoryBlock::FREE_CHUNK, index, max_chunk_size_, nullptr, nullptr); @@ -237,19 +235,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, auto block = static_cast(std::get<2>(*it)); pool_.erase(it); - DLOG(INFO) << "Split block (" << block << ", " << block->total_size(cache_) - << ") into"; + VLOG(3) << "Split block (" << block << ", " << block->total_size(cache_) + << ") into"; block->split(cache_, size); - DLOG(INFO) << "Left block (" << block << ", " << block->total_size(cache_) - << ")"; + VLOG(3) << "Left block (" << block << ", " << block->total_size(cache_) + << ")"; block->set_type(cache_, MemoryBlock::ARENA_CHUNK); // the rest of memory if exist if (block->has_right_buddy(cache_)) { if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) { - DLOG(INFO) << "Insert right block (" << block->right_buddy(cache_) << ", " - << block->right_buddy(cache_)->total_size(cache_) << ")"; + VLOG(3) << "Insert right block (" << block->right_buddy(cache_) << ", " + << block->right_buddy(cache_)->total_size(cache_) << ")"; pool_.insert( IndexSizeAddress(block->right_buddy(cache_)->index(cache_), @@ -276,7 +274,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() { return; } - DLOG(INFO) << "Return block " << block << " to fallback allocator."; + VLOG(3) << "Return block " << block << " to fallback allocator."; system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); cache_.invalidate(block); @@ -312,7 +310,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() { MemoryBlock* block = static_cast(std::get<2>(*pool)); - DLOG(INFO) << "Return block " << block << " to base allocator."; + VLOG(3) << "Return block " << block << " to base allocator."; system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); cache_.invalidate(block); diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index 5e0d64707299acb22aacff0fad237c135f614d9c..fd4d5e7082c06e481e74515c9cd7f7c13f1cff4b 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -29,10 +29,10 @@ void Free(Place, void*); template size_t Used(Place); -template ::value>::type* = nullptr> +template class PODDeleter { + static_assert(std::is_pod::value, "T must be POD"); + public: PODDeleter(Place place) : place_(place) {} void operator()(T* ptr) { Free(place_, static_cast(ptr)); } diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 8d415fbd2e72af556e21f89c37d31b9fad130e3d..1424b0284372d8dfe9eb93ee251b121a48b19b0b 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -13,17 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/add_op.h" -#include "paddle/framework/op_registry.h" -#include "paddle/framework/tensor.h" namespace paddle { namespace operators { -class AddOp : public framework::OperatorWithKernel { +class AddOp : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override { + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override { PADDLE_ENFORCE(inputs.size() == 2, "Input size of AddOp must be two"); PADDLE_ENFORCE(outputs.size() == 1, "Output size of AddOp must be one"); PADDLE_ENFORCE( @@ -35,10 +32,10 @@ protected: } }; -class AddOpMaker : public framework::OpProtoAndCheckerMaker { +class AddOpMaker : public OpProtoAndCheckerMaker { public: - AddOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of add op"); AddInput("Y", "The second input of add op"); AddOutput("Out", "The output of add op"); @@ -50,11 +47,10 @@ The equation is: Out = X + Y } }; -class AddOpGrad : public framework::OperatorWithKernel { +class AddOpGrad : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override {} + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override {} std::string DebugString() const override { LOG(INFO) << "AddOpGrad"; return ""; @@ -64,7 +60,6 @@ protected: } // namespace operators } // namespace paddle -REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); -REGISTER_GRADIENT_OP(add_two, add_two_grad, paddle::operators::AddOpGrad); -REGISTER_OP_CPU_KERNEL( - add_two, paddle::operators::AddKernel); +REGISTER_OP(add_two, ops::AddOp, ops::AddOpMaker); +REGISTER_GRADIENT_OP(add_two, add_two_grad, ops::AddOpGrad); +REGISTER_OP_CPU_KERNEL(add_two, ops::AddKernel); diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 2e5a755f92e4d1fa487152ed453fe3b2823062ed..79d8de6cd46e1c72b14b0554c7be7b4eee281f4c 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -1,5 +1,4 @@ -#include "paddle/operators/add_op.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/add_op.h" -REGISTER_OP_GPU_KERNEL(add_two, - paddle::operators::AddKernel); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(add_two, ops::AddKernel); diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index 39d54a63bd16cdafeec1cfcd86ef5d142382e880..0c39433788e1e07e30aaadc4766028219b05bfa5 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -13,27 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "glog/logging.h" -#include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { template -class AddKernel : public framework::OpKernel { +class AddKernel : public OpKernel { public: - void Compute(const framework::KernelContext& context) const override { - auto input0 = context.Input(0)->Get(); - auto input1 = context.Input(1)->Get(); - auto* output = context.Output(0)->GetMutable(); + void Compute(const KernelContext& context) const override { + auto input0 = context.Input(0)->Get(); + auto input1 = context.Input(1)->Get(); + auto output = context.Output(0)->GetMutable(); output->mutable_data(context.GetPlace()); - framework::EigenVector::Flatten(*output).device( + EigenVector::Flatten(*output).device( *(context.GetEigenDevice())) = - framework::EigenVector::Flatten(input0) + - framework::EigenVector::Flatten(input1); + EigenVector::Flatten(input0) + EigenVector::Flatten(input1); } }; diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 7d7bb09f3d63bef49913c3c7501082c509c45653..46c88d4d1a28eeedd02eb699562244651ead6d68 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -13,17 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/cross_entropy_op.h" -#include "paddle/framework/op_registry.h" -#include "paddle/framework/tensor.h" namespace paddle { namespace operators { -class OnehotCrossEntropyOp : public framework::OperatorWithKernel { +class OnehotCrossEntropyOp : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override { + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override { PADDLE_ENFORCE(inputs.size() == 2, "Input size of OnehotCrossEntropyOp must be two"); PADDLE_ENFORCE(outputs.size() == 1, @@ -35,15 +32,14 @@ protected: PADDLE_ENFORCE(inputs[0]->dims().size() == 2, "X's dimension must be 2."); PADDLE_ENFORCE(outputs[0]->dims().size() == 1, "label's dimension must be 1."); - outputs[0]->Resize(framework::make_ddim({inputs[0]->dims()[0]})); + outputs[0]->Resize({inputs[0]->dims()[0]}); } }; -class OnehotCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { +class OnehotCrossEntropyOpMaker : public OpProtoAndCheckerMaker { public: - OnehotCrossEntropyOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { + OnehotCrossEntropyOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of OnehotCrossEntropyOp"); AddInput("label", "The second input of OnehotCrossEntropyOp"); AddOutput("Y", "The output of OnehotCrossEntropyOp"); @@ -59,9 +55,7 @@ OnehotCrossEntropy Operator. } // namespace paddle REGISTER_OP(onehot_cross_entropy, - paddle::operators::OnehotCrossEntropyOp, - paddle::operators::OnehotCrossEntropyOpMaker); -REGISTER_OP_CPU_KERNEL( - onehot_cross_entropy, - paddle::operators::OnehotCrossEntropyOpKernel<::paddle::platform::CPUPlace, - float>); + ops::OnehotCrossEntropyOp, + ops::OnehotCrossEntropyOpMaker); +REGISTER_OP_CPU_KERNEL(onehot_cross_entropy, + ops::OnehotCrossEntropyOpKernel); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index 1bcdcb7ea650a361cad376ecdd5e96fe8e8f7c94..19e4b74596a0f59edd04db830ec6f6f481373465 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -1,6 +1,4 @@ #include "paddle/operators/cross_entropy_op.h" -#include "paddle/framework/op_registry.h" REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, - paddle::operators::OnehotCrossEntropyOpKernel< - ::paddle::platform::GPUPlace, float>); \ No newline at end of file + ops::OnehotCrossEntropyOpKernel); \ No newline at end of file diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index ad2c7f34e1fd91b97287b4c5f4004d5b79ea4f82..0383df46be3a3cea7dde8f1b45857e64d5a2f2d8 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -13,23 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "glog/logging.h" -#include "paddle/framework/operator.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { template -class OnehotCrossEntropyOpKernel : public framework::OpKernel { +class OnehotCrossEntropyOpKernel : public OpKernel { public: constexpr T LOG_THRESHOLD() const { return static_cast(1e-20); } - void Compute(const framework::KernelContext& context) const override { - auto X = context.Input(0)->Get(); + void Compute(const KernelContext& context) const override { + auto X = context.Input(0)->Get(); const T* X_data = X.data(); - const int* label_data = - context.Input(1)->Get().data(); - auto* Y = context.Output(0)->GetMutable(); + const int* label_data = context.Input(1)->Get().data(); + auto* Y = context.Output(0)->GetMutable(); Y->mutable_data(context.GetPlace()); diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc index 01e96f4c4817466e3266ca57a0d0ae2368b3e097..c4a9f5937f4fa8c60989bea1726cedbb73330156 100644 --- a/paddle/operators/fc_op.cc +++ b/paddle/operators/fc_op.cc @@ -12,41 +12,38 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/framework/net.h" -#include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" +#include "type_alias.h" namespace paddle { namespace operators { -class FullyConnectedOp : public framework::PlainNet { +class FullyConnectedOp : public NetOp { public: void Init() override { - AddOp(framework::OpRegistry::CreateOp("mul", - { - Input("X"), Input("W"), - }, - {Output("before_act")}, - {})); + AddOp(OpRegistry::CreateOp("mul", + { + Input("X"), Input("W"), + }, + {Output("before_act")}, + {})); auto b = Input("b"); - if (b != framework::OperatorBase::EMPTY_VAR_NAME()) { - AddOp(framework::OpRegistry::CreateOp("rowwise_add", - {Output("before_act"), Input("b")}, - {Output("before_act")}, - {})); + if (b != EMPTY_VAR_NAME()) { + AddOp(OpRegistry::CreateOp("rowwise_add", + {Output("before_act"), Input("b")}, + {Output("before_act")}, + {})); } auto activation = GetAttr("activation"); - AddOp(framework::OpRegistry::CreateOp( + AddOp(OpRegistry::CreateOp( activation, {Output("before_act")}, {Output("Y")}, {})); CompleteAddOp(false); } }; -class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker { +class FullyConnectedOpMaker : public OpProtoAndCheckerMaker { public: - FullyConnectedOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) + FullyConnectedOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "the input of fc operator"); AddInput("W", "the weight of fc operator"); @@ -71,6 +68,4 @@ USE_OP(rowwise_add); USE_OP(sigmoid); USE_OP(softmax); -REGISTER_OP(fc, - paddle::operators::FullyConnectedOp, - paddle::operators::FullyConnectedOpMaker); +REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index cd74c8b976d18ffecd50077cc81e1fce56bea155..22c1b78005358a934c57d487f5b0cff133f61f0c 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -13,17 +13,14 @@ limitations under the License. */ #include "paddle/operators/mul_op.h" -#include "paddle/framework/op_registry.h" -#include "paddle/framework/tensor.h" namespace paddle { namespace operators { -class MulOp : public framework::OperatorWithKernel { +class MulOp : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override { + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override { PADDLE_ENFORCE(inputs.size() == 2, "The mul op must take two inputs"); auto dim0 = inputs[0]->dims(); auto dim1 = inputs[1]->dims(); @@ -37,10 +34,10 @@ protected: } }; -class MulOpMaker : public framework::OpProtoAndCheckerMaker { +class MulOpMaker : public OpProtoAndCheckerMaker { public: - MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { + MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of mul op"); AddInput("Y", "The second input of mul op"); AddOutput("Out", "The output of mul op"); @@ -52,11 +49,10 @@ The equation is: Out = X * Y } }; -class MulOpGrad : public framework::OperatorWithKernel { +class MulOpGrad : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override {} + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override {} std::string DebugString() const override { LOG(INFO) << "MulGrad"; return ""; @@ -66,8 +62,7 @@ protected: } // namespace operators } // namespace paddle -REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker); -REGISTER_GRADIENT_OP(mul, mul_grad, paddle::operators::MulOpGrad); +REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker); +REGISTER_GRADIENT_OP(mul, mul_grad, ops::MulOpGrad); -REGISTER_OP_CPU_KERNEL( - mul, paddle::operators::MulKernel); +REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index 3ee581dc77dc08e6e47b240588811fbc7c6ea303..c27fc886ce7238a13c8ef86bce673a2b54949a9d 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -13,8 +13,5 @@ limitations under the License. */ #include "paddle/operators/mul_op.h" -#include "paddle/framework/op_registry.h" -REGISTER_OP_GPU_KERNEL(mul, - paddle::operators::MulKernel); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); \ No newline at end of file diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index e6bad7fb9da2d489666aa67f032552e48a86c6cb..467975044638a3f034ceec84173e8d3fed43cc0c 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -14,30 +14,27 @@ #pragma once -#include "glog/logging.h" -#include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { template -class MulKernel : public framework::OpKernel { +class MulKernel : public OpKernel { public: - void Compute(const framework::KernelContext& context) const override { + void Compute(const KernelContext& context) const override { Eigen::array, 1> dim_pair = { {Eigen::IndexPair(1, 0)}}; - auto input0 = context.Input(0)->Get(); - auto input1 = context.Input(1)->Get(); - auto* output = context.Output(0)->GetMutable(); + auto input0 = context.Input(0)->Get(); + auto input1 = context.Input(1)->Get(); + auto* output = context.Output(0)->GetMutable(); output->mutable_data(context.GetPlace()); - framework::EigenMatrix::From(*output).device( - *(context.GetEigenDevice())) = - framework::EigenMatrix::From(input0).contract( - framework::EigenMatrix::From(input1), dim_pair); + EigenMatrix::From(*output).device(*(context.GetEigenDevice())) = + EigenMatrix::From(input0).contract(EigenMatrix::From(input1), + dim_pair); } }; } // namespace operators diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index e04d69fa72a2f54cc1cc0829d12e0da1609b3383..4129422fa744b2a7cf135b681efa73ffb2ebcdcc 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -13,15 +13,13 @@ limitations under the License. */ #include "paddle/operators/rowwise_add_op.h" -#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { -class RowWiseAddOp : public framework::OperatorWithKernel { +class RowWiseAddOp : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override { + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override { PADDLE_ENFORCE(inputs.size() == 2UL, "Two inputs is needed by rowwise add"); auto dim0 = inputs[0]->dims(); auto dim1 = inputs[1]->dims(); @@ -34,11 +32,10 @@ protected: } }; -class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker { +class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { public: - RowWiseAddOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { + RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The left input of row-wise add op, must be matrix"); AddInput("b", "The right input of row-wise add op, must be vector"); AddOutput("Out", "The output of row-wise add op"); @@ -53,9 +50,6 @@ for i in xrange(X.shape[0]): } // namespace operators } // namespace paddle -REGISTER_OP(rowwise_add, - paddle::operators::RowWiseAddOp, - paddle::operators::RowWiseAddOpMaker); -REGISTER_OP_CPU_KERNEL( - rowwise_add, - paddle::operators::RowWiseAddKernel); +REGISTER_OP(rowwise_add, ops::RowWiseAddOp, ops::RowWiseAddOpMaker); +REGISTER_OP_CPU_KERNEL(rowwise_add, + ops::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index 5dfac4fd2cf9b7da24dcfa5e7583b9ece12bad1e..4b33e38ebabe853e179fe70ef7fde0a80b9050e2 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -1,6 +1,4 @@ -#include "paddle/framework/op_registry.h" #include "paddle/operators/rowwise_add_op.h" -REGISTER_OP_GPU_KERNEL( - rowwise_add, - paddle::operators::RowWiseAddKernel); +REGISTER_OP_GPU_KERNEL(rowwise_add, + ops::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index dc47fe7c847bd0c8c179ac0a5f44b8cc541b47cb..4596925e9322f373c822608fd9aa6ecee6144d4c 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -13,25 +13,23 @@ limitations under the License. */ #pragma once -#include "glog/logging.h" -#include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { template -class RowWiseAddKernel : public framework::OpKernel { +class RowWiseAddKernel : public OpKernel { public: - void Compute(const framework::KernelContext& context) const override { - auto in0 = context.Input(0)->Get(); - auto in1 = context.Input(1)->Get(); - auto* out = context.Output(0)->GetMutable(); + void Compute(const KernelContext& context) const override { + auto in0 = context.Input(0)->Get(); + auto in1 = context.Input(1)->Get(); + auto* out = context.Output(0)->GetMutable(); out->mutable_data(context.GetPlace()); - auto input = framework::EigenMatrix::From(in0); - auto bias = framework::EigenVector::From(in1); - auto output = framework::EigenMatrix::From(*out); + auto input = EigenMatrix::From(in0); + auto bias = EigenVector::From(in1); + auto output = EigenMatrix::From(*out); const int bias_size = bias.dimension(0); const int rest_size = input.size() / bias_size; diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 66ab1e001142bfb005d3c2e2ea29e01a32dce507..f6c654a9e7083704e353c276e0abc975f4e61ef9 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -13,17 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/sgd_op.h" -#include "paddle/framework/op_registry.h" -#include "paddle/framework/tensor.h" namespace paddle { namespace operators { -class SGDOp : public framework::OperatorWithKernel { +class SGDOp : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override { + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override { PADDLE_ENFORCE(inputs.size() == 2, "Input size of SGDOp must be two"); PADDLE_ENFORCE(outputs.size() == 1, "Output size of SGDOp must be one"); PADDLE_ENFORCE(inputs[0] != nullptr, "inputs[0] mast be set"); @@ -35,10 +32,10 @@ protected: } }; -class SGDOpMaker : public framework::OpProtoAndCheckerMaker { +class SGDOpMaker : public OpProtoAndCheckerMaker { public: - SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { + SGDOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("param", "input parameter"); AddInput("grad", "input gradient"); AddOutput("param_out", "output parameter"); @@ -55,7 +52,5 @@ param_out = param - learning_rate * grad; } // namespace operators } // namespace paddle -REGISTER_OP(sgd, paddle::operators::SGDOp, paddle::operators::SGDOpMaker); -typedef paddle::operators::SGDOpKernel<::paddle::platform::CPUPlace, float> - SGDOpKernel_CPU_float; -REGISTER_OP_CPU_KERNEL(sgd, SGDOpKernel_CPU_float); +REGISTER_OP(sgd, ops::SGDOp, ops::SGDOpMaker); +REGISTER_OP_CPU_KERNEL(sgd, ops::SGDOpKernel); diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index 400425db10896e3970fc7468e34aba596a536184..f8f5b90cab460b4457cfb0a88bfc012bafe0fbc2 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -1,5 +1,3 @@ #include "paddle/operators/sgd_op.h" -#include "paddle/framework/op_registry.h" -typedef paddle::operators::SGDOpKernel<::paddle::platform::GPUPlace, float> SGDOpKernel_GPU_float; -REGISTER_OP_GPU_KERNEL(sgd, SGDOpKernel_GPU_float); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel); \ No newline at end of file diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index 4b2d214618e5c7c15695bd66604139d805255c47..65179d323bd991b8b4e196c069a11cd901c62082 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -13,28 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "glog/logging.h" -#include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { template -class SGDOpKernel : public framework::OpKernel { +class SGDOpKernel : public OpKernel { public: - void Compute(const framework::KernelContext& ctx) const override { - auto param = ctx.Input("param")->Get(); - auto grad = ctx.Input("grad")->Get(); - auto* param_out = ctx.Output(0)->GetMutable(); + void Compute(const KernelContext& ctx) const override { + auto param = ctx.Input("param")->Get(); + auto grad = ctx.Input("grad")->Get(); + auto* param_out = ctx.Output(0)->GetMutable(); float lr = ctx.op_.GetAttr("learning_rate"); param_out->mutable_data(ctx.GetPlace()); - framework::EigenVector::Flatten(*param_out) - .device(*(ctx.GetEigenDevice())) = - framework::EigenVector::Flatten(param) - - lr * framework::EigenVector::Flatten(grad); + EigenVector::Flatten(*param_out).device(*(ctx.GetEigenDevice())) = + EigenVector::Flatten(param) - lr * EigenVector::Flatten(grad); } }; diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index bf63af28b003daad0ab8c223e71a561437ee663a..716f1d9c4dbc45e2d5569f8d634b06fd988a149c 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -13,37 +13,33 @@ limitations under the License. */ #include "paddle/operators/sigmoid_op.h" -#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { -class SigmoidOp : public framework::OperatorWithKernel { +class SigmoidOp : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override { + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override { PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input"); PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output"); outputs[0]->Resize(inputs[0]->dims()); } }; -class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { +class SigmoidOpMaker : public OpProtoAndCheckerMaker { public: - SigmoidOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : framework::OpProtoAndCheckerMaker(proto, op_checker) { + SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "sigmoid input"); AddOutput("Y", "sigmoid output"); AddComment("Sigmoid function"); } }; -class SigmoidOpGrad : public framework::OperatorWithKernel { +class SigmoidOpGrad : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override {} + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override {} std::string DebugString() const override { LOG(INFO) << "SigmoidGrad"; return ""; @@ -53,11 +49,7 @@ protected: } // namespace operators } // namespace paddle -REGISTER_OP(sigmoid, - paddle::operators::SigmoidOp, - paddle::operators::SigmoidOpMaker); -REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, paddle::operators::SigmoidOpGrad); +REGISTER_OP(sigmoid, ops::SigmoidOp, ops::SigmoidOpMaker); +REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, ops::SigmoidOpGrad); -REGISTER_OP_CPU_KERNEL( - sigmoid, - paddle::operators::SigmoidKernel); +REGISTER_OP_CPU_KERNEL(sigmoid, ops::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu index ed344b2bfd4a9eeef2ce79746bec608469503c9c..f679b20418f04eff4310efe4e121963ce5a235e0 100644 --- a/paddle/operators/sigmoid_op.cu +++ b/paddle/operators/sigmoid_op.cu @@ -1,5 +1,3 @@ #include "paddle/operators/sigmoid_op.h" -#include "paddle/framework/op_registry.h" -REGISTER_OP_GPU_KERNEL( - sigmoid, paddle::operators::SigmoidKernel); +REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index 2b9356246c471853b53af1d73f8b2a3c206db7ad..896a6f5d83e0f96de50e3aaae6f545172bf5da14 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -14,25 +14,23 @@ #pragma once -#include "glog/logging.h" -#include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { template -class SigmoidKernel : public framework::OpKernel { +class SigmoidKernel : public OpKernel { public: - void Compute(const framework::KernelContext& context) const override { - auto input = context.Input(0)->Get(); - auto* output = context.Output(0)->GetMutable(); + void Compute(const KernelContext& context) const override { + auto input = context.Input(0)->Get(); + auto* output = context.Output(0)->GetMutable(); output->mutable_data(context.GetPlace()); - framework::EigenVector::Flatten(*output).device( + EigenVector::Flatten(*output).device( *(context.GetEigenDevice())) = - 1.0 / (1.0 + (-1.0 * framework::EigenVector::Flatten(input)).exp()); + 1.0 / (1.0 + (-1.0 * EigenVector::Flatten(input)).exp()); } }; } // namespace operators diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 82f72fa19f690bebdff01629e75d17eecd6ada74..df60b62fa6ac8d67c9dadc40ec49aaedab92bc88 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -12,16 +12,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/softmax_op.h" -#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { -class SoftmaxOp : public framework::OperatorWithKernel { +class SoftmaxOp : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override { + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override { PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax"); PADDLE_ENFORCE(inputs[0]->dims().size() == 2, "The input of softmax op must be matrix"); @@ -31,10 +29,9 @@ protected: } }; -class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { +class SoftmaxOpMaker : public OpProtoAndCheckerMaker { public: - SoftmaxOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) + SoftmaxOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "input of softmax"); AddOutput("Y", "output of softmax"); @@ -42,11 +39,10 @@ public: } }; -class SoftmaxOpGrad : public framework::OperatorWithKernel { +class SoftmaxOpGrad : public OperatorWithKernel { protected: - void InferShape( - const std::vector &inputs, - const std::vector &outputs) const override {} + void InferShape(const std::vector &inputs, + const std::vector &outputs) const override {} std::string DebugString() const override { LOG(INFO) << "SoftmaxOpGrad"; return ""; @@ -56,9 +52,6 @@ protected: } // namespace operators } // namespace paddle -namespace ops = paddle::operators; - REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); -REGISTER_GRADIENT_OP(softmax, softmax_grad, paddle::operators::SoftmaxOpGrad); -REGISTER_OP_CPU_KERNEL(softmax, - ops::SoftmaxKernel); +REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad); +REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index 60676191eb9460868a266d0e4f70357fa78bec2c..a1f6944a369fe5148ffcfeabf3bf7063dcbc2664 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -1,5 +1,4 @@ #include "paddle/framework/op_registry.h" #include "paddle/operators/softmax_op.h" -REGISTER_OP_GPU_KERNEL( - softmax, paddle::operators::SoftmaxKernel); +REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 500c188dbfcf28ae52c2d5b06466539e115acc4a..625a87b58560231572c1cca2a21bd0c47c8cb296 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -14,23 +14,21 @@ #pragma once -#include "glog/logging.h" -#include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { template -class SoftmaxKernel : public framework::OpKernel { +class SoftmaxKernel : public OpKernel { public: - void Compute(const framework::KernelContext& context) const override { - auto input = context.Input(0)->Get(); - auto* output = context.Output(0)->GetMutable(); + void Compute(const KernelContext& context) const override { + auto input = context.Input(0)->Get(); + auto* output = context.Output(0)->GetMutable(); output->mutable_data(context.GetPlace()); - auto logits = framework::EigenMatrix::From(input); - auto softmax = framework::EigenMatrix::From(*output); + auto logits = EigenMatrix::From(input); + auto softmax = EigenMatrix::From(*output); const int kBatchDim = 0; const int kClassDim = 1; diff --git a/paddle/operators/type_alias.h b/paddle/operators/type_alias.h new file mode 100644 index 0000000000000000000000000000000000000000..b712e457ff60e8b30b87c0d549693d53e9f05d59 --- /dev/null +++ b/paddle/operators/type_alias.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/net.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using OpKernel = framework::OpKernel; +using KernelContext = framework::KernelContext; +template +using EigenVector = framework::EigenVector; +template +using EigenMatrix = framework::EigenMatrix; +template +using EigenTensor = framework::EigenTensor; +using Tensor = framework::Tensor; +using OperatorWithKernel = framework::OperatorWithKernel; +using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; +using OpProto = framework::OpProto; +using OpAttrChecker = framework::OpAttrChecker; +using CPUPlace = platform::CPUPlace; +using GPUPlace = platform::GPUPlace; +using NetOp = framework::NetOp; +using OpRegistry = framework::OpRegistry; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index a3a10fc07fed1cb245dc721d18ea927ea25393a1..fd4adbd9deca12ad6c3a59cfd5d30fb0cb6fcf98 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -58,11 +58,6 @@ struct EnforceNotMet : public std::exception { // For more details, please check https://stackoverflow.com/a/43870188/724872. #define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) -template -inline void throw_on_error(T e) { - throw_on_error(e, ""); -} - template inline typename std::enable_if::type throw_on_error( int stat, const Args&... args) { @@ -132,6 +127,11 @@ inline typename std::enable_if::type throw_on_error( #endif // PADDLE_ONLY_CPU +template +inline void throw_on_error(T e) { + throw_on_error(e, ""); +} + #define PADDLE_THROW(...) \ do { \ throw ::paddle::platform::EnforceNotMet( \ diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 0b152d03c0641113370fd634aed05ce6b3fb6cf2..ccefcd2511ca0f132d127463166f9f40779a1d85 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -146,22 +146,22 @@ All parameter, weight, gradient are variables in Paddle. }); ExposeOperator(operator_base); - using PlainNetPtr = std::shared_ptr; - py::class_ net(m, "Net"); + py::class_> net(m, "Net"); net.def_static("create", - []() -> std::shared_ptr { - auto retv = std::make_shared(); + []() -> std::shared_ptr { + auto retv = std::make_shared(); retv->type_ = "plain_net"; return retv; }) - .def("add_op", &pd::PlainNet::AddOp) + .def("add_op", &pd::NetOp::AddOp) .def("add_op", - [](PlainNetPtr& self, const PlainNetPtr& net) -> void { - self->AddOp(std::static_pointer_cast(net)); + [](pd::NetOp& self, const std::shared_ptr& net) -> void { + self.AddOp(std::static_pointer_cast(net)); }) - .def("complete_add_op", &pd::PlainNet::CompleteAddOp) - .def("complete_add_op", [](PlainNetPtr& self) { self->CompleteAddOp(); }); + .def("complete_add_op", &pd::NetOp::CompleteAddOp) + .def("complete_add_op", + [](std::shared_ptr& self) { self->CompleteAddOp(); }); ExposeOperator(net); m.def("unique_integer", UniqueIntegerGenerator); diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py index 9b9f979bb615f37ec1dc9baa154d28741b1400d5..ecba87191045cff6c05014010e60575741238f8d 100644 --- a/python/paddle/trainer_config_helpers/attrs.py +++ b/python/paddle/trainer_config_helpers/attrs.py @@ -272,7 +272,7 @@ class ExtraLayerAttribute(object): for key in self.attr: if not hasattr(self, 'can_%s' % key) or \ not getattr(self, 'can_%s' % key): - raise NotImplementedError("Layer %s cannot support %s" % + raise NotImplementedError("Layer %s does not support %s" % (layer_name, key)) @staticmethod diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 21eba71527e60833e0c69b344ecc639626faa529..14f072fc55109d770edf469ad7c574b8dda8a434 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -865,7 +865,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): @wrap_name_default("embedding") @wrap_param_attr_default() -@layer_support(ERROR_CLIPPING) +@layer_support(ERROR_CLIPPING, DROPOUT) def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): """ Define a embedding Layer. @@ -1320,7 +1320,7 @@ def pooling_layer(input, @wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) @wrap_act_default(param_names=["act", 'state_act'], act=TanhActivation()) @wrap_name_default("lstmemory") -@layer_support(DROPOUT) +@layer_support() def lstmemory(input, name=None, size=None, @@ -1429,7 +1429,7 @@ def lstmemory(input, @wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) @wrap_act_default(param_names=["act"], act=TanhActivation()) @wrap_name_default("gru") -@layer_support(DROPOUT) +@layer_support() def grumemory(input, size=None, name=None, @@ -1793,7 +1793,7 @@ def repeat_layer(input, @wrap_name_default("seqreshape") @wrap_act_default(act=IdentityActivation()) @wrap_bias_attr_default(has_bias=False) -@layer_support() +@layer_support(ERROR_CLIPPING, DROPOUT) def seq_reshape_layer(input, reshape_size, act=None, @@ -2703,7 +2703,7 @@ def img_cmrnorm_layer(input, default_factory=lambda _: ParamAttr(initial_mean=1.0, initial_std=0.)) @wrap_act_default(act=ReluActivation()) @wrap_name_default("batch_norm") -@layer_support(DROPOUT) +@layer_support(DROPOUT, ERROR_CLIPPING) def batch_norm_layer(input, act=None, name=None, @@ -2783,15 +2783,6 @@ def batch_norm_layer(input, :return: LayerOutput object. :rtype: LayerOutput """ - if not isinstance(act, ReluActivation): - logger.log(logging.WARN, - "%s is not recommend for batch normalization's activation, " - "maybe the relu is better" % act.name) - - if not isinstance(input.activation, LinearActivation): - logger.log(logging.WARN, - "The activation should be inside batch normalization, the " - "previous layer's activation may be Linear") if num_channels is None: if input.num_filters is not None: @@ -2861,7 +2852,7 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): @wrap_name_default("addto") @wrap_act_default(act=LinearActivation()) @wrap_bias_attr_default(has_bias=False) -@layer_support(DROPOUT) +@layer_support(DROPOUT, ERROR_CLIPPING) def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): """ AddtoLayer. @@ -2940,7 +2931,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): @wrap_act_default(act=IdentityActivation()) @wrap_name_default("concat") -@layer_support() +@layer_support(DROPOUT, ERROR_CLIPPING) def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): """ Concat all input vector into one huge vector. @@ -3024,7 +3015,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): @wrap_name_default("seqconcat") @wrap_act_default(act=IdentityActivation()) @wrap_bias_attr_default(has_bias=False) -@layer_support() +@layer_support(DROPOUT, ERROR_CLIPPING) def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, bias_attr=None): """ @@ -3177,7 +3168,7 @@ def memory(name, @wrap_act_default(param_names=['state_act'], act=TanhActivation()) @wrap_act_default(act=TanhActivation()) @wrap_name_default('lstm_step') -@layer_support(ERROR_CLIPPING, DROPOUT) +@layer_support() def lstm_step_layer(input, state, size=None, @@ -4480,7 +4471,7 @@ def tensor_layer(a, @wrap_param_attr_default() @wrap_bias_attr_default() @wrap_act_default() -@layer_support() +@layer_support(DROPOUT, ERROR_CLIPPING) def selective_fc_layer(input, size, select=None, @@ -5974,7 +5965,7 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): """ The crop layer crops images by offset and shape. User can set crop shape by args 'shape' explicitly or by reference input layer. - + The example usage is: .. code-block:: python diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index 645f3cc0dce70752c20569523e4bab440861f6a1..111496618dfa997246d0a067b0cd4c7dad74f9dc 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -166,55 +166,37 @@ def cluster_files_reader(files_pattern, return reader -def convert(output_path, - reader, - num_shards, - name_prefix, - max_lines_to_shuffle=1000): +def convert(output_path, reader, line_count, name_prefix): import recordio """ Convert data from reader to recordio format files. :param output_path: directory in which output files will be saved. :param reader: a data reader, from which the convert program will read data instances. - :param num_shards: the number of shards that the dataset will be partitioned into. :param name_prefix: the name prefix of generated files. :param max_lines_to_shuffle: the max lines numbers to shuffle before writing. """ - assert num_shards >= 1 - assert max_lines_to_shuffle >= 1 - - def open_writers(): - w = [] - for i in range(0, num_shards): - n = "%s/%s-%05d-of-%05d" % (output_path, name_prefix, i, - num_shards - 1) - w.append(recordio.writer(n)) - - return w - - def close_writers(w): - for i in range(0, num_shards): - w[i].close() + assert line_count >= 1 + indx_f = 0 - def write_data(w, lines): + def write_data(indx_f, lines): random.shuffle(lines) - for i, d in enumerate(lines): + filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f) + writer = recordio.writer(filename) + for l in lines: # FIXME(Yancey1989): # dumps with protocol: pickle.HIGHEST_PROTOCOL - o = pickle.dumps(d) - w[i % num_shards].write(o) + writer.write(cPickle.dumps(l)) + writer.close() - w = open_writers() lines = [] - for i, d in enumerate(reader()): lines.append(d) - if i % max_lines_to_shuffle == 0 and i >= max_lines_to_shuffle: - write_data(w, lines) + if i % line_count == 0 and i >= line_count: + write_data(indx_f, lines) lines = [] + indx_f += 1 continue - write_data(w, lines) - close_writers(w) + write_data(indx_f, lines) diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 40134a3270c3579fd2f6a891af66ff241050f60c..4dcc3ab57e7e6dfbe040ac61025e55b9e48b4415 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -35,6 +35,13 @@ class Inference(object): name = param.getName() assert isinstance(val, api.Vector) val.copyFromNumpyArray(parameters.get(name).flatten()) + # the setValueUpdated function is called in randomize, zeroMem, + # load function in paddle/parameter/Parameter.cpp. But in the + # inference mode, the setValueUpdated is never called, it will + # cause the parameter will not be dispatched + # in MultiGradientMachine for multi-GPU. So setValueUpdated is + # called here, but it's better to call this function in one place. + param.setValueUpdated() self.__gradient_machine__ = gm self.__data_types__ = topo.data_type() diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py index 3ac62d116bc7d0f68069d41c9f2b3da687c19ca3..b658a81630733fea3976b812afe819d76de4cb25 100644 --- a/python/paddle/v2/master/client.py +++ b/python/paddle/v2/master/client.py @@ -49,7 +49,6 @@ class client(object): def set_dataset(self, paths): holder_type = ctypes.c_char_p * len(paths) holder = holder_type() - print paths for idx, path in enumerate(paths): c_ptr = ctypes.c_char_p(path) holder[idx] = c_ptr