提交 b0505fda 编写于 作者: Y Yu Yang

Merge branch 'develop' into feature/unittest_for_inputs

...@@ -22,9 +22,11 @@ ...@@ -22,9 +22,11 @@
hooks: hooks:
- id: clang-formater - id: clang-formater
- repo: https://github.com/PaddlePaddle/pre-commit-golang - repo: https://github.com/PaddlePaddle/pre-commit-golang
sha: 16398aeccf263adaf53b2495eed0406347d76281 sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
hooks: hooks:
- id: go-fmt - id: go-fmt
types: [go] types:
- id: gometalinter - go
types: [go] - id: gometalinter
types:
- go
...@@ -18,7 +18,6 @@ package main ...@@ -18,7 +18,6 @@ package main
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#define PADDLE_MASTER_OK 0 #define PADDLE_MASTER_OK 0
#define PADDLE_MASTER_ERROR -1 #define PADDLE_MASTER_ERROR -1
...@@ -101,6 +100,12 @@ func paddle_release_master_client(client C.paddle_master_client) { ...@@ -101,6 +100,12 @@ func paddle_release_master_client(client C.paddle_master_client) {
remove(client) remove(client)
} }
//export paddle_start_get_records
func paddle_start_get_records(client C.paddle_master_client, pass C.int) {
c := get(client)
c.StartGetRecords(int(pass))
}
//export paddle_set_dataset //export paddle_set_dataset
func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int) C.int { func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int) C.int {
c := get(client) c := get(client)
...@@ -121,15 +126,19 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int ...@@ -121,15 +126,19 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int
// paddle_next_record gets the nexts training record. // paddle_next_record gets the nexts training record.
// //
// returns number of bytes of the records if success, -1 if failed. // returns number of bytes of the records if success, -1 if failed, -2 if pass end.
// //
//export paddle_next_record //export paddle_next_record
func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int {
c := get(client) c := get(client)
r, err := c.NextRecord() r, err := c.NextRecord()
if err != nil { if err != nil {
// Error // NOTE: use errors to indicate pass ends
// TODO: return the type of error? if err.Error() == master.ErrAllTaskFailed.Error() ||
err.Error() == master.ErrNoMoreAvailable.Error() ||
err.Error() == master.ErrPassBefore.Error() {
return -2
}
*record = (*C.uchar)(nil) *record = (*C.uchar)(nil)
return -1 return -1
} }
......
...@@ -16,7 +16,6 @@ package master ...@@ -16,7 +16,6 @@ package master
import ( import (
"os" "os"
"sync"
"time" "time"
"github.com/PaddlePaddle/Paddle/go/connection" "github.com/PaddlePaddle/Paddle/go/connection"
...@@ -27,9 +26,9 @@ import ( ...@@ -27,9 +26,9 @@ import (
// Client is the client of the master server. // Client is the client of the master server.
type Client struct { type Client struct {
conn *connection.Conn conn *connection.Conn
ch chan record ch chan record
initChOnce sync.Once bufSize int
} }
type record struct { type record struct {
...@@ -46,11 +45,7 @@ func WithBuffer(bufSize int) func(*Client) error { ...@@ -46,11 +45,7 @@ func WithBuffer(bufSize int) func(*Client) error {
if bufSize <= 0 { if bufSize <= 0 {
return nil return nil
} }
c.bufSize = bufSize
c.initChOnce.Do(func() {
c.ch = make(chan record, bufSize)
go c.getRecords()
})
return nil return nil
} }
} }
...@@ -104,25 +99,41 @@ func NewClient(opts ...func(*Client) error) (*Client, error) { ...@@ -104,25 +99,41 @@ func NewClient(opts ...func(*Client) error) (*Client, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
} }
c.ch = make(chan record, c.bufSize)
// FIXME: connection is created asyncrosly in monitorMaster go routine,
// ensure the connection is ready for use before calling c.addClient.
time.Sleep(time.Second)
return c, nil return c, nil
} }
func (c *Client) getRecords() { // StartGetRecords must be called at beginning of each pass
func (c *Client) StartGetRecords(passID int) {
go c.getRecords(passID)
}
func (c *Client) getRecords(passID int) {
for { for {
t, err := c.getTask() t, err := c.getTask(passID)
if err != nil { if err != nil {
log.Errorf("Get task failed, sleep 3 seconds and continue, %s", err) if err.Error() == ErrPassBefore.Error() ||
time.Sleep(3 * time.Second) err.Error() == ErrNoMoreAvailable.Error() ||
continue err.Error() == ErrAllTaskFailed.Error() {
c.ch <- record{nil, err}
break
}
if err.Error() == ErrPassAfter.Error() {
// wait util last pass finishes
time.Sleep(time.Second * 3)
continue
}
log.Errorf("getTask error: %s", err)
} }
for _, chunk := range t.Chunks { for _, chunk := range t.Chunks {
f, err := os.Open(chunk.Path) f, e := os.Open(chunk.Path)
if err != nil { if e != nil {
log.Errorln(err) log.Errorln(e)
continue continue
} }
...@@ -178,18 +189,21 @@ func (c *Client) monitorMaster(addrCh <-chan string) { ...@@ -178,18 +189,21 @@ func (c *Client) monitorMaster(addrCh <-chan string) {
} }
} }
// SetDataset set dataset for the master server to dispatch. // SetDataset sets dataset to dispatch for the master server.
//
// SetDataset can be call multiple times at one pass. But only the first call
// will be honored.
// //
// SetDataset can be call multiple times from different nodes. But // After all tasks are done, another call of SetDataset will start another pass.
// only the first call will be honored.
func (c *Client) SetDataset(globPaths []string) error { func (c *Client) SetDataset(globPaths []string) error {
return c.conn.Call("Service.SetDataset", globPaths, nil) err := c.conn.Call("Service.SetDataset", globPaths, nil)
return err
} }
// getTask gets a new task from the master server. // getTask gets a new task from the master server.
func (c *Client) getTask() (Task, error) { func (c *Client) getTask(passID int) (Task, error) {
var t Task var t Task
err := c.conn.Call("Service.GetTask", 0, &t) err := c.conn.Call("Service.GetTask", passID, &t)
return t, err return t, err
} }
...@@ -208,12 +222,6 @@ func (c *Client) taskFailed(meta TaskMeta) error { ...@@ -208,12 +222,6 @@ func (c *Client) taskFailed(meta TaskMeta) error {
// NextRecord will block until the next record is available. It is // NextRecord will block until the next record is available. It is
// thread-safe. // thread-safe.
func (c *Client) NextRecord() ([]byte, error) { func (c *Client) NextRecord() ([]byte, error) {
c.initChOnce.Do(func() {
// initialize with in case WithBuffer is not used.
c.ch = make(chan record, 0)
go c.getRecords()
})
r := <-c.ch r := <-c.ch
return r.r, r.err return r.r, r.err
} }
......
...@@ -54,22 +54,22 @@ func TestGetFinishTask(t *testing.T) { ...@@ -54,22 +54,22 @@ func TestGetFinishTask(t *testing.T) {
panic(err) panic(err)
} }
go func(l net.Listener) { go func(l net.Listener) {
s, err := NewService(&InMemStore{}, chunkPerTask, time.Second, 1) s, sErr := NewService(&InMemStore{}, chunkPerTask, time.Second, 1)
if err != nil { if sErr != nil {
panic(err) panic(sErr)
} }
server := rpc.NewServer() server := rpc.NewServer()
err = server.Register(s) sErr = server.Register(s)
if err != nil { if sErr != nil {
panic(err) panic(sErr)
} }
mux := http.NewServeMux() mux := http.NewServeMux()
mux.Handle(rpc.DefaultRPCPath, server) mux.Handle(rpc.DefaultRPCPath, server)
err = http.Serve(l, mux) sErr = http.Serve(l, mux)
if err != nil { if sErr != nil {
panic(err) panic(sErr)
} }
}(l) }(l)
...@@ -103,6 +103,7 @@ func TestGetFinishTask(t *testing.T) { ...@@ -103,6 +103,7 @@ func TestGetFinishTask(t *testing.T) {
ch := make(chan string, 1) ch := make(chan string, 1)
ch <- addr ch <- addr
go c.monitorMaster(ch) go c.monitorMaster(ch)
err = c.SetDataset([]string{path}) err = c.SetDataset([]string{path})
if err != nil { if err != nil {
panic(err) panic(err)
...@@ -111,44 +112,47 @@ func TestGetFinishTask(t *testing.T) { ...@@ -111,44 +112,47 @@ func TestGetFinishTask(t *testing.T) {
checkOnePass := func(i int) { checkOnePass := func(i int) {
var tasks []Task var tasks []Task
for idx := 0; idx < totalTask; idx++ { for idx := 0; idx < totalTask; idx++ {
task, err := c.getTask() task, cErr := c.getTask(i)
if err != nil { if cErr != nil && cErr.Error() != ErrNoMoreAvailable.Error() && cErr.Error() != ErrPassAfter.Error() {
t.Fatalf("Error: %v, pass: %d\n", err, i) t.Fatalf("error: %v, pass: %d\n", cErr, i)
} }
tasks = append(tasks, task) tasks = append(tasks, task)
} }
_, err = c.getTask() // getting task before task finishes should return error
if err == nil { _, cErr := c.getTask(i)
if cErr == nil {
t.Fatalf("Should get error, pass: %d\n", i) t.Fatalf("Should get error, pass: %d\n", i)
} }
err = c.taskFinished(tasks[0].Meta.ID) cErr = c.taskFinished(tasks[0].Meta.ID)
if err != nil { if cErr != nil {
t.Fatalf("Error: %v, pass: %d\n", err, i) t.Fatalf("Error: %v, pass: %d\n", cErr, i)
} }
// call taskFailed once won't put the task to failed queue, just ensure
err = c.taskFailed(tasks[0].Meta) // the call
if err != nil { cErr = c.taskFailed(tasks[0].Meta)
t.Fatalf("Error: %v, pass: %d\n", err, i) if cErr != nil {
t.Fatalf("Error: %v, pass: %d\n", cErr, i)
} }
tasks = tasks[1:] tasks = tasks[1:]
task, err := c.getTask() _, cErr = c.getTask(i)
if err != nil { if cErr != nil && cErr.Error() != ErrNoMoreAvailable.Error() && cErr.Error() != ErrPassAfter.Error() {
t.Fatal(err) t.Fatalf("Should be ErrNoMoreAvailable or ErrPassAfter: %s", cErr)
} }
tasks = append(tasks, task)
for _, task := range tasks { for _, task := range tasks {
err = c.taskFinished(task.Meta.ID) cErr = c.taskFinished(task.Meta.ID)
if err != nil { if cErr != nil {
t.Fatalf("Error: %v, pass: %d\n", err, i) t.Fatal(cErr)
} }
} }
} }
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
// init pass data
c.StartGetRecords(i)
checkOnePass(i) checkOnePass(i)
} }
} }
...@@ -20,8 +20,10 @@ import ( ...@@ -20,8 +20,10 @@ import (
"net/http" "net/http"
"net/rpc" "net/rpc"
"os" "os"
"runtime"
"strconv" "strconv"
"strings" "strings"
"sync"
"testing" "testing"
"time" "time"
...@@ -29,6 +31,18 @@ import ( ...@@ -29,6 +31,18 @@ import (
"github.com/PaddlePaddle/recordio" "github.com/PaddlePaddle/recordio"
) )
// tool function for testing output goroutine ids
func goid() int {
var buf [64]byte
n := runtime.Stack(buf[:], false)
idField := strings.Fields(strings.TrimPrefix(string(buf[:n]), "goroutine "))[0]
id, err := strconv.Atoi(idField)
if err != nil {
panic(fmt.Sprintf("cannot get goroutine id: %v", err))
}
return id
}
func TestNextRecord(t *testing.T) { func TestNextRecord(t *testing.T) {
const ( const (
path = "/tmp/master_client_TestFull" path = "/tmp/master_client_TestFull"
...@@ -45,7 +59,7 @@ func TestNextRecord(t *testing.T) { ...@@ -45,7 +59,7 @@ func TestNextRecord(t *testing.T) {
panic(err) panic(err)
} }
go func(l net.Listener) { go func(l net.Listener) {
s, err := master.NewService(&master.InMemStore{}, 10, time.Second, 1) s, err := master.NewService(&master.InMemStore{}, 1, time.Second*60, 1)
if err != nil { if err != nil {
panic(err) panic(err)
} }
...@@ -69,7 +83,7 @@ func TestNextRecord(t *testing.T) { ...@@ -69,7 +83,7 @@ func TestNextRecord(t *testing.T) {
panic(err) panic(err)
} }
w := recordio.NewWriter(f, -1, -1) w := recordio.NewWriter(f, 1, -1)
for i := 0; i < total; i++ { for i := 0; i < total; i++ {
_, err = w.Write([]byte{byte(i)}) _, err = w.Write([]byte{byte(i)})
if err != nil { if err != nil {
...@@ -87,32 +101,49 @@ func TestNextRecord(t *testing.T) { ...@@ -87,32 +101,49 @@ func TestNextRecord(t *testing.T) {
panic(err) panic(err)
} }
c, err := master.NewClient(master.WithAddr(fmt.Sprintf(":%d", p)), master.WithBuffer(10)) // start several client to test task fetching
if err != nil { var wg sync.WaitGroup
panic(err) for i := 0; i < 4; i++ {
} wg.Add(1)
// test for multiple concurrent clients
err = c.SetDataset([]string{path}) go func() {
if err != nil { defer wg.Done()
panic(err) // each go-routine needs a single client connection instance
} c, e := master.NewClient(master.WithAddr(fmt.Sprintf(":%d", p)), master.WithBuffer(1))
if e != nil {
for pass := 0; pass < 50; pass++ { t.Fatal(e)
received := make(map[byte]bool)
for i := 0; i < total; i++ {
r, err := c.NextRecord()
if err != nil {
t.Fatal(pass, i, "Read error:", err)
} }
e = c.SetDataset([]string{path})
if len(r) != 1 { if e != nil {
t.Fatal(pass, i, "Length should be 1.", r) panic(e)
} }
// test for n passes
if received[r[0]] { for pass := 0; pass < 10; pass++ {
t.Fatal(pass, i, "Received duplicate.", received, r) c.StartGetRecords(pass)
received := make(map[byte]bool)
taskid := 0
for {
r, e := c.NextRecord()
if e != nil {
// ErrorPassAfter will wait, else break for next pass
if e.Error() == master.ErrPassBefore.Error() ||
e.Error() == master.ErrNoMoreAvailable.Error() {
break
}
t.Fatal(pass, taskid, "Read error:", e)
}
if len(r) != 1 {
t.Fatal(pass, taskid, "Length should be 1.", r)
}
if received[r[0]] {
t.Fatal(pass, taskid, "Received duplicate.", received, r)
}
taskid++
received[r[0]] = true
}
} }
received[r[0]] = true }()
}
} }
wg.Wait()
} }
...@@ -19,6 +19,7 @@ import ( ...@@ -19,6 +19,7 @@ import (
"compress/gzip" "compress/gzip"
"encoding/gob" "encoding/gob"
"errors" "errors"
"math/rand"
"os" "os"
"path/filepath" "path/filepath"
"sync" "sync"
...@@ -33,6 +34,18 @@ const ( ...@@ -33,6 +34,18 @@ const (
dialTimeout = 5 * time.Second dialTimeout = 5 * time.Second
) )
// ErrAllTaskFailed occur when tasks are in done or failed state.
var ErrAllTaskFailed = errors.New("all task finished")
// ErrNoMoreAvailable occur when no task in todo and yet not all done or fail.
var ErrNoMoreAvailable = errors.New("no more available task")
// ErrPassBefore client side pass number does not match with master counter.
var ErrPassBefore = errors.New("pass number smaller than master")
// ErrPassAfter client side pass number does not match with master counter.
var ErrPassAfter = errors.New("pass number larger than master")
// Store is the interface for save and load the master state. // Store is the interface for save and load the master state.
type Store interface { type Store interface {
Save([]byte) error Save([]byte) error
...@@ -75,17 +88,26 @@ type Service struct { ...@@ -75,17 +88,26 @@ type Service struct {
chunksPerTask int chunksPerTask int
timeoutDur time.Duration timeoutDur time.Duration
failureMax int failureMax int
ready chan struct{}
store Store store Store
mu sync.Mutex ready chan struct{}
initDone bool initDone bool
taskQueues taskQueues
mu sync.Mutex
taskQueues taskQueues
currPass int
jobTasks []taskEntry
savingTrainer string savingTrainer string
} }
func partition(chunks []Chunk, chunksPerTask int) []taskEntry { func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
id := 0 // generate uniq id across job using nanosecond + randint + counter
// FIXME(typhoonzero): this is a workaround, use uuid
randStart := rand.Int()
counter := 0
timestamp := time.Now().Nanosecond()
id := timestamp + randStart + counter
if chunksPerTask <= 0 { if chunksPerTask <= 0 {
chunksPerTask = 1 chunksPerTask = 1
} }
...@@ -95,7 +117,8 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { ...@@ -95,7 +117,8 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
for i, c := range chunks { for i, c := range chunks {
if i%chunksPerTask == 0 && len(cur.Task.Chunks) > 0 { if i%chunksPerTask == 0 && len(cur.Task.Chunks) > 0 {
cur.Task.Meta.ID = id cur.Task.Meta.ID = id
id++ counter++
id = timestamp + randStart + counter
result = append(result, cur) result = append(result, cur)
cur.Task.Chunks = nil cur.Task.Chunks = nil
} }
...@@ -266,19 +289,21 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error { ...@@ -266,19 +289,21 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error {
return err return err
} }
s.taskQueues.Todo = partition(chunks, s.chunksPerTask) s.jobTasks = partition(chunks, s.chunksPerTask)
s.taskQueues.Todo = s.jobTasks
err = s.snapshot() err = s.snapshot()
if err != nil { if err != nil {
log.Errorln(err) log.Errorln(err)
return err return err
} }
close(s.ready) close(s.ready)
s.initDone = true s.initDone = true
return nil return nil
} }
// processFailedTask retry s.failureMax times for failed task.
// return true if all task are done or failed.
func (s *Service) processFailedTask(t taskEntry, epoch int) { func (s *Service) processFailedTask(t taskEntry, epoch int) {
if t.Task.Meta.Epoch != epoch { if t.Task.Meta.Epoch != epoch {
// new epoch, task launched after the // new epoch, task launched after the
...@@ -302,8 +327,9 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) { ...@@ -302,8 +327,9 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) {
return return
} }
log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure) log.Warningf("Task %v failed %d times, re-dispatch.", t.Task, t.NumFailure)
s.taskQueues.Todo = append(s.taskQueues.Todo, t) s.taskQueues.Todo = append(s.taskQueues.Todo, t)
return
} }
func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
...@@ -331,37 +357,30 @@ func (s *Service) logFields() log.Fields { ...@@ -331,37 +357,30 @@ func (s *Service) logFields() log.Fields {
} }
// GetTask gets a new task from the service. // GetTask gets a new task from the service.
func (s *Service) GetTask(_ int, task *Task) error { // passID is the client side pass count
func (s *Service) GetTask(passID int, task *Task) error {
select { select {
case <-s.ready: case <-s.ready:
} }
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
if passID < s.currPass {
return ErrPassBefore
}
if passID > s.currPass {
// Client may get run to pass after master when one client faster than the
// other
return ErrPassAfter
}
if len(s.taskQueues.Todo) == 0 { if len(s.taskQueues.Todo) == 0 {
if len(s.taskQueues.Done) == 0 { if len(s.taskQueues.Done) == 0 && len(s.taskQueues.Pending) == 0 {
if len(s.taskQueues.Pending) == 0 { log.WithFields(s.logFields()).Warningln("All tasks failed, may start next pass")
err := errors.New("all task failed") return ErrAllTaskFailed
log.WithFields(s.logFields()).Warningln("All tasks failed.")
return err
}
// TODO(helin): client need to retry in this
// error case. Gotcha: RPC client can't
// compare returned error with predefined
// errors like io.EOF, because the error
// instance deserialized from RPC is a
// different instance than the error defined
// in package. So we need to figure out a way
// for client to check this error correctly.
err := errors.New("no more available task")
log.WithFields(s.logFields()).Warningln("No more available task.")
return err
} }
s.taskQueues.Todo = s.taskQueues.Done log.WithFields(s.logFields()).Warningln("No more available task.")
s.taskQueues.Done = nil return ErrNoMoreAvailable
log.WithFields(s.logFields()).Infoln("No more todo task, but trainer is requesting task to do. Move all done task to todo.")
} }
t := s.taskQueues.Todo[0] t := s.taskQueues.Todo[0]
...@@ -381,7 +400,7 @@ func (s *Service) GetTask(_ int, task *Task) error { ...@@ -381,7 +400,7 @@ func (s *Service) GetTask(_ int, task *Task) error {
} }
// TaskFinished tell the service that a task is finished. // TaskFinished tell the service that a task is finished.
func (s *Service) TaskFinished(taskID int, _ *int) error { func (s *Service) TaskFinished(taskID int, dummy *int) error {
select { select {
case <-s.ready: case <-s.ready:
} }
...@@ -401,11 +420,14 @@ func (s *Service) TaskFinished(taskID int, _ *int) error { ...@@ -401,11 +420,14 @@ func (s *Service) TaskFinished(taskID int, _ *int) error {
delete(s.taskQueues.Pending, taskID) delete(s.taskQueues.Pending, taskID)
log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID) log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID)
if len(s.taskQueues.Todo) == 0 && len(s.taskQueues.Pending) == 0 {
if len(s.taskQueues.Pending) == 0 && len(s.taskQueues.Todo) == 0 { // increase master side pass count if all tasks finished
log.WithFields(s.logFields()).Infoln("No more todo and pending task, start a new pass.") s.currPass++
s.taskQueues.Todo = append(s.taskQueues.Todo, s.taskQueues.Done...) s.taskQueues.Todo = s.jobTasks
s.taskQueues.Done = nil s.taskQueues.Done = []taskEntry{}
// TODO(typhoonzero): deal with failed tasks
s.taskQueues.Failed = []taskEntry{}
log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.currPass)
} }
err := s.snapshot() err := s.snapshot()
...@@ -416,7 +438,7 @@ func (s *Service) TaskFinished(taskID int, _ *int) error { ...@@ -416,7 +438,7 @@ func (s *Service) TaskFinished(taskID int, _ *int) error {
} }
// TaskFailed tells the service that a task is failed. // TaskFailed tells the service that a task is failed.
func (s *Service) TaskFailed(meta TaskMeta, _ *int) error { func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error {
select { select {
case <-s.ready: case <-s.ready:
} }
......
...@@ -44,7 +44,8 @@ func TestPartionIndex(t *testing.T) { ...@@ -44,7 +44,8 @@ func TestPartionIndex(t *testing.T) {
cs := make([]Chunk, 100) cs := make([]Chunk, 100)
ts := partition(cs, 20) ts := partition(cs, 20)
for i := range ts { for i := range ts {
if ts[i].Task.Meta.ID != i { // test auto increament ids
if i > 0 && ts[i].Task.Meta.ID != ts[i-1].Task.Meta.ID+1 {
t.Error(ts[i], i) t.Error(ts[i], i)
} }
} }
......
...@@ -6,16 +6,19 @@ import cPickle as pickle ...@@ -6,16 +6,19 @@ import cPickle as pickle
etcd_ip = os.getenv("MASTER_IP", "127.0.0.1") etcd_ip = os.getenv("MASTER_IP", "127.0.0.1")
etcd_endpoint = "http://" + etcd_ip + ":2379" etcd_endpoint = "http://" + etcd_ip + ":2379"
print "connecting to master, etcd endpoints: ", etcd_endpoint
master_client = master.client(etcd_endpoint, 5, 64)
def cloud_reader(): def cloud_reader():
print "connecting to master, etcd endpoints: ", etcd_endpoint global master_client
master_client = master.client(etcd_endpoint, 5, 64)
master_client.set_dataset( master_client.set_dataset(
["/pfs/dlnel/public/dataset/uci_housing/uci_housing-*-of-*"]) ["/pfs/dlnel/public/dataset/uci_housing/uci_housing-*"], passes=30)
while 1: while 1:
r, e = master_client.next_record() r, e = master_client.next_record()
if not r: if not r:
if e != -2: # other errors
print "get record error:", e
break break
yield pickle.loads(r) yield pickle.loads(r)
...@@ -27,10 +30,12 @@ def main(): ...@@ -27,10 +30,12 @@ def main():
# network config # network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x, y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param(name='w'), param_attr=paddle.attr.Param(
name='w', learning_rate=1e-3),
size=1, size=1,
act=paddle.activation.Linear(), act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param(name='b')) bias_attr=paddle.attr.Param(
name='b', learning_rate=1e-3))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.mse_cost(input=y_predict, label=y) cost = paddle.layer.mse_cost(input=y_predict, label=y)
...@@ -38,9 +43,8 @@ def main(): ...@@ -38,9 +43,8 @@ def main():
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
# create optimizer of new remote updater to pserver # create optimizer of new remote updater to pserver
optimizer = paddle.optimizer.Momentum(momentum=0) optimizer = paddle.optimizer.Momentum(momentum=0, learning_rate=1e-3)
print "etcd endoint: ", etcd_endpoint
trainer = paddle.trainer.SGD(cost=cost, trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer, update_equation=optimizer,
...@@ -51,6 +55,8 @@ def main(): ...@@ -51,6 +55,8 @@ def main():
# event_handler to print training and testing info # event_handler to print training and testing info
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
# FIXME: for cloud data reader, pass number is managed by master
# should print the server side pass number
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % ( print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost) event.pass_id, event.batch_id, event.cost)
......
...@@ -37,7 +37,7 @@ std::vector<std::string> Evaluator::getNames() const { ...@@ -37,7 +37,7 @@ std::vector<std::string> Evaluator::getNames() const {
double Evaluator::getValue(const std::string name) const { double Evaluator::getValue(const std::string name) const {
paddle::Error err; paddle::Error err;
double v = m->rawPtr->getValue(name, &err); double v = m->rawPtr->getValue(name, &err);
if (err) { if (!err.isOK()) {
throw std::runtime_error(err.msg()); throw std::runtime_error(err.msg());
} }
return v; return v;
......
...@@ -3,7 +3,7 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3) ...@@ -3,7 +3,7 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory) cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context)
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
...@@ -29,7 +29,5 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc. ...@@ -29,7 +29,5 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(framework_py_proto framework_py_proto_init) add_dependencies(framework_py_proto framework_py_proto_init)
proto_library(net_proto SRCS net_proto.proto DEPS op_proto) cc_library(net SRCS net.cc DEPS op_registry)
# cc_library(net SRCS net.cc DEPS operator net_proto op_registry fc_op)
cc_library(net SRCS net.cc DEPS operator net_proto op_registry)
cc_test(net_op_test SRCS net_op_test.cc DEPS net add_op mul_op sigmoid_op softmax_op fc_op) cc_test(net_op_test SRCS net_op_test.cc DEPS net add_op mul_op sigmoid_op softmax_op fc_op)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/memory/memcpy.h"
namespace paddle {
namespace framework {
template <typename T>
inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE(holder_ != nullptr,
"Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.");
}
template <typename T>
inline const T* Tensor::data() const {
check_memory_size<T>();
return reinterpret_cast<const T*>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
}
template <typename T>
inline T* Tensor::data() {
check_memory_size<T>();
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
template <typename T>
inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
static_assert(std::is_pod<T>::value, "T must be POD");
Resize(dims);
return mutable_data<T>(place);
}
template <typename T>
inline T* Tensor::mutable_data(platform::Place place) {
static_assert(std::is_pod<T>::value, "T must be POD");
PADDLE_ENFORCE(product(dims_) > 0,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first.");
/* some versions of boost::variant don't have operator!= */
size_t size = product(dims_) * sizeof(T);
if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) {
if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size));
}
#ifndef PADDLE_ONLY_CPU
else if (platform::is_gpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
boost::get<platform::GPUPlace>(place), size));
}
#endif
offset_ = 0;
}
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
template <typename T>
inline void Tensor::ShareDataWith(const Tensor& src) {
src.check_memory_size<T>();
*this = src;
}
template <typename T>
inline void Tensor::CopyFrom(const Tensor& src,
const platform::CPUDeviceContext& ctx) {
src.check_memory_size<T>();
Resize(src.dims());
auto src_place = src.holder_->place();
auto src_ptr = static_cast<const void*>(src.data<T>());
auto dst_place = ctx.GetPlace();
auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
auto size = product(src.dims_) * sizeof(T);
if (platform::is_cpu_place(src_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
#ifndef PADDLE_ONLY_CPU
else if (platform::is_gpu_place(src_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::GPUPlace>(src_place), src_ptr, size, 0);
}
#endif
}
#ifndef PADDLE_ONLY_CPU
template <typename T>
inline void Tensor::CopyFrom(const Tensor& src,
const platform::CUDADeviceContext& ctx) {
src.check_memory_size<T>();
Resize(src.dims());
auto src_place = src.holder_->place();
auto src_ptr = static_cast<const void*>(src.data<T>());
auto dst_place = ctx.GetPlace();
auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
auto size = product(src.dims_) * sizeof(T);
if (platform::is_cpu_place(src_place)) {
memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size,
ctx.stream());
} else if (platform::is_gpu_place(src_place)) {
memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr,
boost::get<platform::GPUPlace>(src_place), src_ptr, size,
ctx.stream());
}
}
#endif
template <typename T>
inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
check_memory_size<T>();
PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero.");
PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound.");
PADDLE_ENFORCE(begin_idx < end_idx,
"Begin index must be less than end index.");
PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1.");
int base = product(dims_) / dims_[0];
Tensor dst;
dst.holder_ = holder_;
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
dst.offset_ = offset_ + begin_idx * base * sizeof(T);
return dst;
}
inline void Tensor::Resize(const DDim& dims) { dims_ = dims; }
inline const DDim& Tensor::dims() const { return dims_; }
} // namespace framework
} // namespace paddle
...@@ -20,17 +20,7 @@ ...@@ -20,17 +20,7 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
std::shared_ptr<PlainNet> AddBackwardOp(std::shared_ptr<PlainNet> ForwardOps) { void NetOp::CompleteAddOp(bool calc) {
auto grad_ops = std::make_shared<PlainNet>();
for (auto& op : ForwardOps->ops_) {
auto op_grad = OpRegistry::CreateGradOp(op);
grad_ops->AddOp(op_grad);
}
grad_ops->CompleteAddOp();
return grad_ops;
}
void PlainNet::CompleteAddOp(bool calc) {
add_op_done_ = true; add_op_done_ = true;
if (!calc) return; if (!calc) return;
std::unordered_set<std::string> input_set; std::unordered_set<std::string> input_set;
...@@ -70,7 +60,7 @@ void PlainNet::CompleteAddOp(bool calc) { ...@@ -70,7 +60,7 @@ void PlainNet::CompleteAddOp(bool calc) {
attrs_["temporary_index"] = tmp_index; attrs_["temporary_index"] = tmp_index;
} }
std::string PlainNet::DebugString() const { std::string NetOp::DebugString() const {
std::ostringstream os; std::ostringstream os;
os << OperatorBase::DebugString() << std::endl; os << OperatorBase::DebugString() << std::endl;
for (auto& op : ops_) { for (auto& op : ops_) {
...@@ -82,5 +72,7 @@ std::string PlainNet::DebugString() const { ...@@ -82,5 +72,7 @@ std::string PlainNet::DebugString() const {
return os.str(); return os.str();
} }
bool NetOp::IsNetOp() const { return true; }
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -37,21 +37,7 @@ namespace framework { ...@@ -37,21 +37,7 @@ namespace framework {
* This is the base class of network, all the networks should implement the APIs * This is the base class of network, all the networks should implement the APIs
* it defines. * it defines.
*/ */
class Net : public OperatorBase { class NetOp : public OperatorBase {
public:
virtual void AddOp(const std::shared_ptr<OperatorBase>& op) = 0;
virtual void CompleteAddOp(bool calc) = 0;
};
using NetPtr = std::shared_ptr<Net>;
/**
* @brief a basic implementation of Net.
*
* PlainNet is a very simple Net, it create a list of operators, and run them
* sequentially following the order they added.
*/
class PlainNet : public Net {
public: public:
/** /**
* Infer all the operators' input and output variables' shapes, will be called * Infer all the operators' input and output variables' shapes, will be called
...@@ -80,15 +66,17 @@ class PlainNet : public Net { ...@@ -80,15 +66,17 @@ class PlainNet : public Net {
/** /**
* @brief Add an operator by ptr * @brief Add an operator by ptr
*/ */
void AddOp(const std::shared_ptr<OperatorBase>& op) override { void AddOp(const std::shared_ptr<OperatorBase>& op) {
PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed");
ops_.push_back(op); ops_.push_back(op);
} }
void CompleteAddOp(bool calculate = true) override; void CompleteAddOp(bool calculate = true);
std::string DebugString() const override; std::string DebugString() const override;
bool IsNetOp() const override;
std::vector<std::shared_ptr<OperatorBase>> ops_; std::vector<std::shared_ptr<OperatorBase>> ops_;
private: private:
...@@ -100,7 +88,5 @@ class PlainNet : public Net { ...@@ -100,7 +88,5 @@ class PlainNet : public Net {
} }
}; };
std::shared_ptr<PlainNet> AddBackwardOp(std::shared_ptr<PlainNet> ForwardOps);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -40,7 +40,7 @@ void AssertSameVectorWithoutOrder(const std::vector<T>& expected, ...@@ -40,7 +40,7 @@ void AssertSameVectorWithoutOrder(const std::vector<T>& expected,
} }
TEST(OpKernel, all) { TEST(OpKernel, all) {
auto net = std::make_shared<PlainNet>(); auto net = std::make_shared<NetOp>();
ASSERT_NE(net, nullptr); ASSERT_NE(net, nullptr);
auto op1 = std::make_shared<TestOp>(); auto op1 = std::make_shared<TestOp>();
...@@ -71,28 +71,21 @@ TEST(OpKernel, all) { ...@@ -71,28 +71,21 @@ TEST(OpKernel, all) {
ASSERT_EQ(2, run_cnt); ASSERT_EQ(2, run_cnt);
ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet); ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet);
} }
TEST(AddBackwardOp, TestGradOp) {
auto net = std::make_shared<PlainNet>();
ASSERT_NE(net, nullptr);
net->AddOp(framework::OpRegistry::CreateOp("mul", {"X", "Y"}, {"Out"}, {}));
net->AddOp(
framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {"Out"}, {}));
net->AddOp(framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {""}, {}));
auto grad_ops = AddBackwardOp(net);
for (auto& op : grad_ops->ops_) {
op->DebugString();
}
}
// TODO(zhihong): add fc grad without registering. //! TODO(yuyang18): Refine Backward Op.
// TEST(AddBackwardOp, TestNoGradOp) { // TEST(AddBackwardOp, TestGradOp) {
// auto net = std::make_shared<PlainNet>(); // auto net = std::make_shared<NetOp>();
// ASSERT_NE(net, nullptr); // ASSERT_NE(net, nullptr);
// net->AddOp(framework::OpRegistry::CreateOp("fc", {"X", "W", "b"}, {"Y"}, // net->AddOp(framework::OpRegistry::CreateOp("mul", {"X", "Y"}, {"Out"}, {}));
// {})); auto grad_ops = AddBackwardOp(net); for (auto& op : grad_ops->ops_) { // net->AddOp(
// op->DebugString(); // framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {"Out"}, {}));
// } // net->AddOp(framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {""},
// } // {}));
// auto grad_ops = AddBackwardOp(net);
// for (auto& op : grad_ops->ops_) {
// op->DebugString();
// }
//}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
syntax="proto2";
package paddle.framework;
import "op_proto.proto";
message NetDesc {
// network identification
optional string name = 1;
// operator contains in network
repeated OpProto operators = 2;
// network type to run with. e.g "plainNet", "DAG"
optional string net_type = 3;
// num worker always
optional int32 num_workers = 4;
}
...@@ -403,15 +403,16 @@ class GradOpRegisterHelper { ...@@ -403,15 +403,16 @@ class GradOpRegisterHelper {
STATIC_ASSERT_GLOBAL_NAMESPACE( \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op_kernel_##type##_##DEVICE_TYPE##__, \ __reg_op_kernel_##type##_##DEVICE_TYPE##__, \
"REGISTER_OP_KERNEL must be in global namespace"); \ "REGISTER_OP_KERNEL must be in global namespace"); \
struct __op_kernel_register__##type##__ { \ struct __op_kernel_register__##type##__##DEVICE_TYPE##__ { \
__op_kernel_register__##type##__() { \ __op_kernel_register__##type##__##DEVICE_TYPE##__() { \
::paddle::framework::OperatorWithKernel::OpKernelKey key; \ ::paddle::framework::OperatorWithKernel::OpKernelKey key; \
key.place_ = PlaceType(); \ key.place_ = PlaceType(); \
::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \ ::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \
.reset(new __VA_ARGS__()); \ .reset(new __VA_ARGS__()); \
} \ } \
}; \ }; \
static __op_kernel_register__##type##__ __reg_kernel_##type##__; \ static __op_kernel_register__##type##__##DEVICE_TYPE##__ \
__reg_kernel_##type##__##DEVICE_TYPE##__; \
int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; } int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; }
// (type, KernelType) // (type, KernelType)
......
...@@ -90,15 +90,17 @@ class OperatorBase { ...@@ -90,15 +90,17 @@ class OperatorBase {
virtual void Run(const std::shared_ptr<Scope>& scope, virtual void Run(const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& dev_ctx) const = 0; const platform::DeviceContext& dev_ctx) const = 0;
// Get a input with argument's name described in `op_proto` virtual bool IsNetOp() const { return false; }
//! Get a input with argument's name described in `op_proto`
const std::string& Input(const std::string& name) const; const std::string& Input(const std::string& name) const;
// Get a input which has multiple variables. //! Get a input which has multiple variables.
// TODO add a vector_view to prevent memory copy. //! TODO add a vector_view to prevent memory copy.
std::vector<std::string> Inputs(const std::string& name) const; std::vector<std::string> Inputs(const std::string& name) const;
// Get a output with argument's name described in `op_proto` //! Get a output with argument's name described in `op_proto`
const std::string& Output(const std::string& name) const; const std::string& Output(const std::string& name) const;
// Get an output which has multiple variables. //! Get an output which has multiple variables.
// TODO add a vector_view to prevent memory copy. //! TODO add a vector_view to prevent memory copy.
std::vector<std::string> Outputs(const std::string& name) const; std::vector<std::string> Outputs(const std::string& name) const;
public: public:
...@@ -199,7 +201,9 @@ class OperatorWithKernel : public OperatorBase { ...@@ -199,7 +201,9 @@ class OperatorWithKernel : public OperatorBase {
place_ = dev_ctx.GetPlace(); place_ = dev_ctx.GetPlace();
} }
bool operator==(const OpKernelKey& o) const { return place_ == o.place_; } bool operator==(const OpKernelKey& o) const {
return platform::places_are_same_class(place_, o.place_);
}
}; };
struct OpKernelHash { struct OpKernelHash {
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <paddle/framework/tensor.h> #include "paddle/framework/tensor.h"
namespace paddle { namespace paddle {
namespace framework {} namespace framework {}
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include <typeindex> #include <typeindex>
#include "paddle/framework/ddim.h" #include "paddle/framework/ddim.h"
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
...@@ -31,9 +32,11 @@ template <bool less, size_t i, typename... args> ...@@ -31,9 +32,11 @@ template <bool less, size_t i, typename... args>
struct CastToPyBufferImpl; struct CastToPyBufferImpl;
} // namespace details } // namespace details
} // namespace pybind } // namespace pybind
namespace framework { namespace framework {
class Tensor { class Tensor {
public:
template <bool less, size_t i, typename... args> template <bool less, size_t i, typename... args>
friend struct paddle::pybind::details::CastToPyBufferImpl; friend struct paddle::pybind::details::CastToPyBufferImpl;
...@@ -46,106 +49,84 @@ class Tensor { ...@@ -46,106 +49,84 @@ class Tensor {
public: public:
Tensor() : offset_(0) {} Tensor() : offset_(0) {}
/*! Return a pointer to mutable memory block. */
template <typename T> template <typename T>
const T* data() const { inline T* data();
EnforceSufficientMemory<T>();
return reinterpret_cast<const T*>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
}
/*! Return a pointer to constant memory block. */
template <typename T> template <typename T>
T* data() { inline const T* data() const;
EnforceSufficientMemory<T>();
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) + /**
offset_); * @brief Return a pointer to mutable memory block.
} * @note If not exist, then allocation.
*/
template <typename T, // must be POD types template <typename T>
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr> inline T* mutable_data(platform::Place place);
T* mutable_data(DDim dims, platform::Place place) {
Resize(dims); /**
return mutable_data<T>(place); * @brief Return a pointer to mutable memory block.
} *
* @param[in] dims The dimensions of the memory block.
template <typename T, // must be POD types * @param[in] place The place of the memory block.
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr> *
T* mutable_data(platform::Place place) { * @note If not exist, then allocation.
PADDLE_ENFORCE(product(dims_) > 0, */
"Tensor's numel must be larger than zero to call " template <typename T>
"Tensor::mutable_data. Call Tensor::set_dim first."); inline T* mutable_data(DDim dims, platform::Place place);
if (holder_ == nullptr ||
!(holder_->place() ==
place) /* some versions of boost::variant don't have operator!= */
|| holder_->size() < product(dims_) * sizeof(T) + offset_) {
if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), product(dims_) * sizeof(T)));
} else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_ONLY_CPU
PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
#else
holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
boost::get<platform::GPUPlace>(place), product(dims_) * sizeof(T)));
#endif
} else {
PADDLE_THROW("Unknown 'place'.");
}
offset_ = 0;
}
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
/*! Return the dimensions of the memory block. */
inline const DDim& dims() const;
/*! Resize the dimensions of the memory block. */
inline void Resize(const DDim& dims);
/*! The internal of two tensors share the same memory block. */
template <typename T>
inline void ShareDataWith(const Tensor& src);
/**
* @brief Copy the content of external tensor to a new place.
*
* @param[in] src The external tensor.
* @param[in] ctx The device context contains place where to store.
*
* @note CopyFrom supports CPU <-> GPU, GPU <-> GPU.
*/
template <typename T> template <typename T>
void ShareDataWith(const Tensor& src) { inline void CopyFrom(const Tensor& src,
src.EnforceSufficientMemory<T>(); const platform::CPUDeviceContext& ctx);
*this = src;
}
#ifndef PADDLE_ONLY_CPU
template <typename T> template <typename T>
void CopyFrom(const Tensor& src, platform::Place dst_place) { inline void CopyFrom(const Tensor& src,
PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) && const platform::CUDADeviceContext& ctx);
platform::is_cpu_place(dst_place), #endif
"Tensor::CopyFrom only support CPU now.");
src.EnforceSufficientMemory<T>();
size_t size = product(src.dims_) * sizeof(T);
Resize(src.dims());
const void* src_ptr = static_cast<const void*>(src.data<T>());
void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
memcpy(dst_ptr, src_ptr, size);
}
/**
* @brief Return the slice of the tensor.
*
* @param[in] begin_idx The begin index of the slice.
* @param[in] end_idx The end index of the slice.
*/
template <typename T> template <typename T>
Tensor Slice(const int& begin_idx, const int& end_idx) const { inline Tensor Slice(const int& begin_idx, const int& end_idx) const;
EnforceSufficientMemory<T>();
PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero.");
PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound.");
PADDLE_ENFORCE(begin_idx < end_idx,
"Begin index must be less than end index.");
PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1.");
int base = product(dims_) / dims_[0];
Tensor dst;
dst.holder_ = holder_;
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
dst.offset_ = offset_ + begin_idx * base * sizeof(T);
return dst;
}
void Resize(const DDim& dims) { dims_ = dims; }
const DDim& dims() const { return dims_; }
private: private:
// Placeholder hides type T, so it doesn't appear as a template template <typename T>
// parameter of Variable. inline void check_memory_size() const;
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a template
* parameter of Variable.
*/
struct Placeholder { struct Placeholder {
virtual ~Placeholder() {} virtual ~Placeholder() {}
virtual void* ptr() const = 0; virtual void* ptr() const = 0;
virtual platform::Place place() const = 0;
virtual size_t size() const = 0; virtual size_t size() const = 0;
virtual std::type_index type() const = 0; virtual std::type_index type() const = 0;
virtual platform::Place place() const = 0;
}; };
template <typename T, typename PlaceType> template <typename T, typename PlaceType>
...@@ -156,33 +137,38 @@ class Tensor { ...@@ -156,33 +137,38 @@ class Tensor {
place_(place), place_(place),
size_(size) {} size_(size) {}
virtual void* ptr() const { return static_cast<void*>(ptr_.get()); }
virtual size_t size() const { return size_; } virtual size_t size() const { return size_; }
virtual paddle::platform::Place place() const { return place_; } virtual platform::Place place() const { return place_; }
virtual void* ptr() const { return static_cast<void*>(ptr_.get()); }
virtual std::type_index type() const { return std::type_index(typeid(T)); } virtual std::type_index type() const { return std::type_index(typeid(T)); }
/*! the pointer of memory block. */
std::unique_ptr<T, memory::PODDeleter<T, PlaceType>> ptr_; std::unique_ptr<T, memory::PODDeleter<T, PlaceType>> ptr_;
platform::Place place_; // record the place of ptr_.
size_t size_; // size of the memory block. /*! the place of memory block. */
platform::Place place_;
/*! the size of memory block. */
size_t size_;
}; };
template <typename T> /*! holds the memory block if allocated. */
inline void EnforceSufficientMemory() const { std::shared_ptr<Placeholder> holder_;
PADDLE_ENFORCE(holder_ != nullptr,
"Tenosr holds no memory. Call Tensor::mutable_data first."); /*! points to dimensions of memory block. */
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.");
}
std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
DDim dims_; DDim dims_;
// A PlaceHolder may be shared by more than one tensor. Some of them may be
// slices of the others. So the offset_ is introduced here to indicate the /**
// byte offset between PlaceHolder::ptr_ and where tensor's data really * @brief A PlaceHolder may be shared by more than one tensor.
// begins. *
* @note Some of them may be slices of the others. So the offset_
* is introduced here to indicate the byte offset between
* PlaceHolder::ptr_ and where the tensor data really begins.
*/
size_t offset_; size_t offset_;
}; };
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#include "paddle/framework/detail/tensor-inl.h"
...@@ -72,7 +72,8 @@ TEST(Tensor, MutableData) { ...@@ -72,7 +72,8 @@ TEST(Tensor, MutableData) {
p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), CPUPlace()); p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), CPUPlace());
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
} }
#ifdef __CUDACC__
#ifndef PADDLE_ONLY_CPU
{ {
Tensor src_tensor; Tensor src_tensor;
float* p1 = nullptr; float* p1 = nullptr;
...@@ -123,7 +124,7 @@ TEST(Tensor, ShareDataWith) { ...@@ -123,7 +124,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
} }
#ifdef __CUDACC__ #ifndef PADDLE_ONLY_CPU
{ {
Tensor src_tensor; Tensor src_tensor;
Tensor dst_tensor; Tensor dst_tensor;
...@@ -160,7 +161,7 @@ TEST(Tensor, Slice) { ...@@ -160,7 +161,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address);
} }
#ifdef __CUDACC__ #ifndef PADDLE_ONLY_CPU
{ {
Tensor src_tensor; Tensor src_tensor;
src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace()); src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace());
...@@ -188,25 +189,74 @@ TEST(Tensor, Slice) { ...@@ -188,25 +189,74 @@ TEST(Tensor, Slice) {
TEST(Tensor, CopyFrom) { TEST(Tensor, CopyFrom) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
{
Tensor src_tensor;
Tensor dst_tensor;
int* src_ptr = src_tensor.mutable_data<int>(make_ddim({3, 3}), CPUPlace());
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
memcpy(src_ptr, arr, 9 * sizeof(int));
Tensor src_tensor; auto* cpu_ctx = new paddle::platform::CPUDeviceContext();
int* src_ptr = src_tensor.mutable_data<int>(make_ddim({3, 3}), CPUPlace()); dst_tensor.CopyFrom<int>(src_tensor, *cpu_ctx);
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
memcpy(src_ptr, arr, 9 * sizeof(int)); const int* dst_ptr = dst_tensor.data<int>();
Tensor dst_tensor; ASSERT_NE(src_ptr, dst_ptr);
dst_tensor.CopyFrom<int>(src_tensor, CPUPlace()); for (size_t i = 0; i < 9; ++i) {
const int* dst_ptr = dst_tensor.data<int>(); EXPECT_EQ(src_ptr[i], dst_ptr[i]);
ASSERT_NE(src_ptr, dst_ptr); }
for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], dst_ptr[i]); Tensor slice_tensor = src_tensor.Slice<int>(1, 2);
dst_tensor.CopyFrom<int>(slice_tensor, *cpu_ctx);
const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr);
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
} }
#ifndef PADDLE_ONLY_CPU
{
Tensor src_tensor;
Tensor gpu_tensor;
Tensor dst_tensor;
int* src_ptr = src_tensor.mutable_data<int>(make_ddim({3, 3}), CPUPlace());
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
memcpy(src_ptr, arr, 9 * sizeof(int));
// CPU Tensor to GPU Tensor
auto gpu_ctx = new paddle::platform::CUDADeviceContext(0);
gpu_tensor.CopyFrom<int>(src_tensor, *gpu_ctx);
// GPU Tensor to CPU Tensor
auto cpu_ctx = new paddle::platform::CPUDeviceContext();
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_ctx);
// Compare Tensors
const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}
Tensor slice_tensor = src_tensor.Slice<int>(1, 2);
// CPU Slice Tensor to GPU Tensor
gpu_tensor.CopyFrom<int>(slice_tensor, *gpu_ctx);
Tensor slice_tensor = src_tensor.Slice<int>(1, 2); // GPU Tensor to CPU Tensor
dst_tensor.CopyFrom<int>(slice_tensor, CPUPlace()); dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_ctx);
const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>(); // Compare Slice Tensors
ASSERT_NE(dst_ptr, slice_ptr); const int* slice_ptr = slice_tensor.data<int>();
for (size_t i = 0; i < 3; ++i) { dst_ptr = dst_tensor.data<int>();
EXPECT_EQ(dst_ptr[i], slice_ptr[i]); ASSERT_NE(dst_ptr, slice_ptr);
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
} }
#endif
} }
...@@ -207,8 +207,8 @@ Error __must_check backward(Argument& act) { ...@@ -207,8 +207,8 @@ Error __must_check backward(Argument& act) {
argument_.value->setData(act.value->getData() + offset, 1UL, size); argument_.value->setData(act.value->getData() + offset, 1UL, size);
argument_.grad->setData(act.grad->getData() + offset, 1UL, size); argument_.grad->setData(act.grad->getData() + offset, 1UL, size);
Error status = softmax_.backward(argument_); Error err = softmax_.backward(argument_);
if (!status) return status; if (!err.isOK()) return err;
} }
return Error(); return Error();
} }
......
...@@ -27,12 +27,11 @@ BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator, ...@@ -27,12 +27,11 @@ BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator,
system_allocator_(std::move(system_allocator)) {} system_allocator_(std::move(system_allocator)) {}
BuddyAllocator::~BuddyAllocator() { BuddyAllocator::~BuddyAllocator() {
DLOG(INFO) << "BuddyAllocator Disconstructor makes sure that all of these " VLOG(3) << "BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed"; "have actually been freed";
while (!pool_.empty()) { while (!pool_.empty()) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin())); auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
DLOG(INFO) << "Free from block (" << block << ", " << max_chunk_size_ VLOG(3) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
<< ")";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
...@@ -52,12 +51,11 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { ...@@ -52,12 +51,11 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// acquire the allocator lock // acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
DLOG(INFO) << "Allocate " << unaligned_size << " bytes from chunk size " VLOG(3) << "Allocate " << unaligned_size << " bytes from chunk size " << size;
<< size;
// if the allocation is huge, send directly to the system allocator // if the allocation is huge, send directly to the system allocator
if (size > max_chunk_size_) { if (size > max_chunk_size_) {
DLOG(INFO) << "Allocate from system allocator."; VLOG(3) << "Allocate from system allocator.";
return SystemAlloc(size); return SystemAlloc(size);
} }
...@@ -72,9 +70,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { ...@@ -72,9 +70,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
return nullptr; return nullptr;
} }
} else { } else {
DLOG(INFO) << "Allocation from existing memory block " << std::get<2>(*it) VLOG(3) << "Allocation from existing memory block " << std::get<2>(*it)
<< " at address " << " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data(); << reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
} }
total_used_ += size; total_used_ += size;
...@@ -91,10 +89,10 @@ void BuddyAllocator::Free(void* p) { ...@@ -91,10 +89,10 @@ void BuddyAllocator::Free(void* p) {
// Acquire the allocator lock // Acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
DLOG(INFO) << "Free from address " << block; VLOG(3) << "Free from address " << block;
if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) { if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) {
DLOG(INFO) << "Free directly from system allocator"; VLOG(3) << "Free directly from system allocator";
system_allocator_->Free(block, block->total_size(cache_), system_allocator_->Free(block, block->total_size(cache_),
block->index(cache_)); block->index(cache_));
...@@ -111,8 +109,8 @@ void BuddyAllocator::Free(void* p) { ...@@ -111,8 +109,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the right buddy // Trying to merge the right buddy
if (block->has_right_buddy(cache_)) { if (block->has_right_buddy(cache_)) {
DLOG(INFO) << "Merging this block " << block << " with its right buddy " VLOG(3) << "Merging this block " << block << " with its right buddy "
<< block->right_buddy(cache_); << block->right_buddy(cache_);
auto right_buddy = block->right_buddy(cache_); auto right_buddy = block->right_buddy(cache_);
...@@ -129,8 +127,8 @@ void BuddyAllocator::Free(void* p) { ...@@ -129,8 +127,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the left buddy // Trying to merge the left buddy
if (block->has_left_buddy(cache_)) { if (block->has_left_buddy(cache_)) {
DLOG(INFO) << "Merging this block " << block << " with its left buddy " VLOG(3) << "Merging this block " << block << " with its left buddy "
<< block->left_buddy(cache_); << block->left_buddy(cache_);
auto left_buddy = block->left_buddy(cache_); auto left_buddy = block->left_buddy(cache_);
...@@ -146,8 +144,8 @@ void BuddyAllocator::Free(void* p) { ...@@ -146,8 +144,8 @@ void BuddyAllocator::Free(void* p) {
} }
// Dumping this block into pool // Dumping this block into pool
DLOG(INFO) << "Inserting free block (" << block << ", " VLOG(3) << "Inserting free block (" << block << ", "
<< block->total_size(cache_) << ")"; << block->total_size(cache_) << ")";
pool_.insert( pool_.insert(
IndexSizeAddress(block->index(cache_), block->total_size(cache_), block)); IndexSizeAddress(block->index(cache_), block->total_size(cache_), block));
...@@ -166,7 +164,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) { ...@@ -166,7 +164,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
size_t index = 0; size_t index = 0;
void* p = system_allocator_->Alloc(index, size); void* p = system_allocator_->Alloc(index, size);
DLOG(INFO) << "Allocated " << p << " from system allocator."; VLOG(3) << "Allocated " << p << " from system allocator.";
if (p == nullptr) return nullptr; if (p == nullptr) return nullptr;
...@@ -192,8 +190,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { ...@@ -192,8 +190,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
if (p == nullptr) return pool_.end(); if (p == nullptr) return pool_.end();
DLOG(INFO) << "Creating and inserting new block " << p VLOG(3) << "Creating and inserting new block " << p
<< " from system allocator"; << " from system allocator";
static_cast<MemoryBlock*>(p)->init(cache_, MemoryBlock::FREE_CHUNK, index, static_cast<MemoryBlock*>(p)->init(cache_, MemoryBlock::FREE_CHUNK, index,
max_chunk_size_, nullptr, nullptr); max_chunk_size_, nullptr, nullptr);
...@@ -237,19 +235,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, ...@@ -237,19 +235,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
auto block = static_cast<MemoryBlock*>(std::get<2>(*it)); auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
pool_.erase(it); pool_.erase(it);
DLOG(INFO) << "Split block (" << block << ", " << block->total_size(cache_) VLOG(3) << "Split block (" << block << ", " << block->total_size(cache_)
<< ") into"; << ") into";
block->split(cache_, size); block->split(cache_, size);
DLOG(INFO) << "Left block (" << block << ", " << block->total_size(cache_) VLOG(3) << "Left block (" << block << ", " << block->total_size(cache_)
<< ")"; << ")";
block->set_type(cache_, MemoryBlock::ARENA_CHUNK); block->set_type(cache_, MemoryBlock::ARENA_CHUNK);
// the rest of memory if exist // the rest of memory if exist
if (block->has_right_buddy(cache_)) { if (block->has_right_buddy(cache_)) {
if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) { if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) {
DLOG(INFO) << "Insert right block (" << block->right_buddy(cache_) << ", " VLOG(3) << "Insert right block (" << block->right_buddy(cache_) << ", "
<< block->right_buddy(cache_)->total_size(cache_) << ")"; << block->right_buddy(cache_)->total_size(cache_) << ")";
pool_.insert( pool_.insert(
IndexSizeAddress(block->right_buddy(cache_)->index(cache_), IndexSizeAddress(block->right_buddy(cache_)->index(cache_),
...@@ -276,7 +274,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() { ...@@ -276,7 +274,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
return; return;
} }
DLOG(INFO) << "Return block " << block << " to fallback allocator."; VLOG(3) << "Return block " << block << " to fallback allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
...@@ -312,7 +310,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() { ...@@ -312,7 +310,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool)); MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
DLOG(INFO) << "Return block " << block << " to base allocator."; VLOG(3) << "Return block " << block << " to base allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
......
...@@ -29,10 +29,10 @@ void Free(Place, void*); ...@@ -29,10 +29,10 @@ void Free(Place, void*);
template <typename Place> template <typename Place>
size_t Used(Place); size_t Used(Place);
template <typename T, /* must be POD types */ template <typename T, typename Place>
typename Place /* platform::GPUPlace or platform::CPUPlace */,
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr>
class PODDeleter { class PODDeleter {
static_assert(std::is_pod<T>::value, "T must be POD");
public: public:
PODDeleter(Place place) : place_(place) {} PODDeleter(Place place) : place_(place) {}
void operator()(T* ptr) { Free(place_, static_cast<void*>(ptr)); } void operator()(T* ptr) { Free(place_, static_cast<void*>(ptr)); }
......
...@@ -13,17 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,17 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/add_op.h" #include "paddle/operators/add_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class AddOp : public framework::OperatorWithKernel { class AddOp : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "Input size of AddOp must be two"); PADDLE_ENFORCE(inputs.size() == 2, "Input size of AddOp must be two");
PADDLE_ENFORCE(outputs.size() == 1, "Output size of AddOp must be one"); PADDLE_ENFORCE(outputs.size() == 1, "Output size of AddOp must be one");
PADDLE_ENFORCE( PADDLE_ENFORCE(
...@@ -35,10 +32,10 @@ protected: ...@@ -35,10 +32,10 @@ protected:
} }
}; };
class AddOpMaker : public framework::OpProtoAndCheckerMaker { class AddOpMaker : public OpProtoAndCheckerMaker {
public: public:
AddOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) AddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of add op"); AddInput("X", "The first input of add op");
AddInput("Y", "The second input of add op"); AddInput("Y", "The second input of add op");
AddOutput("Out", "The output of add op"); AddOutput("Out", "The output of add op");
...@@ -50,11 +47,10 @@ The equation is: Out = X + Y ...@@ -50,11 +47,10 @@ The equation is: Out = X + Y
} }
}; };
class AddOpGrad : public framework::OperatorWithKernel { class AddOpGrad : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {}
const std::vector<framework::Tensor *> &outputs) const override {}
std::string DebugString() const override { std::string DebugString() const override {
LOG(INFO) << "AddOpGrad"; LOG(INFO) << "AddOpGrad";
return ""; return "";
...@@ -64,7 +60,6 @@ protected: ...@@ -64,7 +60,6 @@ protected:
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); REGISTER_OP(add_two, ops::AddOp, ops::AddOpMaker);
REGISTER_GRADIENT_OP(add_two, add_two_grad, paddle::operators::AddOpGrad); REGISTER_GRADIENT_OP(add_two, add_two_grad, ops::AddOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(add_two, ops::AddKernel<ops::CPUPlace, float>);
add_two, paddle::operators::AddKernel<paddle::platform::CPUPlace, float>);
#include "paddle/operators/add_op.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/add_op.h"
REGISTER_OP_GPU_KERNEL(add_two, REGISTER_OP_GPU_KERNEL(add_two, ops::AddKernel<ops::GPUPlace, float>);
paddle::operators::AddKernel<paddle::platform::GPUPlace, float>);
\ No newline at end of file
...@@ -13,27 +13,24 @@ See the License for the specific language governing permissions and ...@@ -13,27 +13,24 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "glog/logging.h" #include "paddle/operators/type_alias.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place, typename T> template <typename Place, typename T>
class AddKernel : public framework::OpKernel { class AddKernel : public OpKernel {
public: public:
void Compute(const framework::KernelContext& context) const override { void Compute(const KernelContext& context) const override {
auto input0 = context.Input(0)->Get<framework::Tensor>(); auto input0 = context.Input(0)->Get<Tensor>();
auto input1 = context.Input(1)->Get<framework::Tensor>(); auto input1 = context.Input(1)->Get<Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>(); auto output = context.Output(0)->GetMutable<Tensor>();
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
framework::EigenVector<T>::Flatten(*output).device( EigenVector<T>::Flatten(*output).device(
*(context.GetEigenDevice<Place>())) = *(context.GetEigenDevice<Place>())) =
framework::EigenVector<T>::Flatten(input0) + EigenVector<T>::Flatten(input0) + EigenVector<T>::Flatten(input1);
framework::EigenVector<T>::Flatten(input1);
} }
}; };
......
...@@ -13,17 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,17 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/cross_entropy_op.h" #include "paddle/operators/cross_entropy_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class OnehotCrossEntropyOp : public framework::OperatorWithKernel { class OnehotCrossEntropyOp : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, PADDLE_ENFORCE(inputs.size() == 2,
"Input size of OnehotCrossEntropyOp must be two"); "Input size of OnehotCrossEntropyOp must be two");
PADDLE_ENFORCE(outputs.size() == 1, PADDLE_ENFORCE(outputs.size() == 1,
...@@ -35,15 +32,14 @@ protected: ...@@ -35,15 +32,14 @@ protected:
PADDLE_ENFORCE(inputs[0]->dims().size() == 2, "X's dimension must be 2."); PADDLE_ENFORCE(inputs[0]->dims().size() == 2, "X's dimension must be 2.");
PADDLE_ENFORCE(outputs[0]->dims().size() == 1, PADDLE_ENFORCE(outputs[0]->dims().size() == 1,
"label's dimension must be 1."); "label's dimension must be 1.");
outputs[0]->Resize(framework::make_ddim({inputs[0]->dims()[0]})); outputs[0]->Resize({inputs[0]->dims()[0]});
} }
}; };
class OnehotCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { class OnehotCrossEntropyOpMaker : public OpProtoAndCheckerMaker {
public: public:
OnehotCrossEntropyOpMaker(framework::OpProto *proto, OnehotCrossEntropyOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of OnehotCrossEntropyOp"); AddInput("X", "The first input of OnehotCrossEntropyOp");
AddInput("label", "The second input of OnehotCrossEntropyOp"); AddInput("label", "The second input of OnehotCrossEntropyOp");
AddOutput("Y", "The output of OnehotCrossEntropyOp"); AddOutput("Y", "The output of OnehotCrossEntropyOp");
...@@ -59,9 +55,7 @@ OnehotCrossEntropy Operator. ...@@ -59,9 +55,7 @@ OnehotCrossEntropy Operator.
} // namespace paddle } // namespace paddle
REGISTER_OP(onehot_cross_entropy, REGISTER_OP(onehot_cross_entropy,
paddle::operators::OnehotCrossEntropyOp, ops::OnehotCrossEntropyOp,
paddle::operators::OnehotCrossEntropyOpMaker); ops::OnehotCrossEntropyOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(onehot_cross_entropy,
onehot_cross_entropy, ops::OnehotCrossEntropyOpKernel<ops::CPUPlace, float>);
paddle::operators::OnehotCrossEntropyOpKernel<::paddle::platform::CPUPlace,
float>);
#include "paddle/operators/cross_entropy_op.h" #include "paddle/operators/cross_entropy_op.h"
#include "paddle/framework/op_registry.h"
REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, REGISTER_OP_GPU_KERNEL(onehot_cross_entropy,
paddle::operators::OnehotCrossEntropyOpKernel< ops::OnehotCrossEntropyOpKernel<ops::GPUPlace, float>);
::paddle::platform::GPUPlace, float>); \ No newline at end of file
\ No newline at end of file
...@@ -13,23 +13,21 @@ See the License for the specific language governing permissions and ...@@ -13,23 +13,21 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "glog/logging.h" #include "paddle/operators/type_alias.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place, typename T> template <typename Place, typename T>
class OnehotCrossEntropyOpKernel : public framework::OpKernel { class OnehotCrossEntropyOpKernel : public OpKernel {
public: public:
constexpr T LOG_THRESHOLD() const { return static_cast<T>(1e-20); } constexpr T LOG_THRESHOLD() const { return static_cast<T>(1e-20); }
void Compute(const framework::KernelContext& context) const override { void Compute(const KernelContext& context) const override {
auto X = context.Input(0)->Get<framework::Tensor>(); auto X = context.Input(0)->Get<Tensor>();
const T* X_data = X.data<T>(); const T* X_data = X.data<T>();
const int* label_data = const int* label_data = context.Input(1)->Get<Tensor>().data<int>();
context.Input(1)->Get<framework::Tensor>().data<int>(); auto* Y = context.Output(0)->GetMutable<Tensor>();
auto* Y = context.Output(0)->GetMutable<framework::Tensor>();
Y->mutable_data<T>(context.GetPlace()); Y->mutable_data<T>(context.GetPlace());
......
...@@ -12,41 +12,38 @@ ...@@ -12,41 +12,38 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/framework/net.h" #include "type_alias.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class FullyConnectedOp : public framework::PlainNet { class FullyConnectedOp : public NetOp {
public: public:
void Init() override { void Init() override {
AddOp(framework::OpRegistry::CreateOp("mul", AddOp(OpRegistry::CreateOp("mul",
{ {
Input("X"), Input("W"), Input("X"), Input("W"),
}, },
{Output("before_act")}, {Output("before_act")},
{})); {}));
auto b = Input("b"); auto b = Input("b");
if (b != framework::OperatorBase::EMPTY_VAR_NAME()) { if (b != EMPTY_VAR_NAME()) {
AddOp(framework::OpRegistry::CreateOp("rowwise_add", AddOp(OpRegistry::CreateOp("rowwise_add",
{Output("before_act"), Input("b")}, {Output("before_act"), Input("b")},
{Output("before_act")}, {Output("before_act")},
{})); {}));
} }
auto activation = GetAttr<std::string>("activation"); auto activation = GetAttr<std::string>("activation");
AddOp(framework::OpRegistry::CreateOp( AddOp(OpRegistry::CreateOp(
activation, {Output("before_act")}, {Output("Y")}, {})); activation, {Output("before_act")}, {Output("Y")}, {}));
CompleteAddOp(false); CompleteAddOp(false);
} }
}; };
class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker { class FullyConnectedOpMaker : public OpProtoAndCheckerMaker {
public: public:
FullyConnectedOpMaker(framework::OpProto *proto, FullyConnectedOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input of fc operator"); AddInput("X", "the input of fc operator");
AddInput("W", "the weight of fc operator"); AddInput("W", "the weight of fc operator");
...@@ -71,6 +68,4 @@ USE_OP(rowwise_add); ...@@ -71,6 +68,4 @@ USE_OP(rowwise_add);
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(softmax); USE_OP(softmax);
REGISTER_OP(fc, REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker);
paddle::operators::FullyConnectedOp,
paddle::operators::FullyConnectedOpMaker);
...@@ -13,17 +13,14 @@ ...@@ -13,17 +13,14 @@
limitations under the License. */ limitations under the License. */
#include "paddle/operators/mul_op.h" #include "paddle/operators/mul_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class MulOp : public framework::OperatorWithKernel { class MulOp : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "The mul op must take two inputs"); PADDLE_ENFORCE(inputs.size() == 2, "The mul op must take two inputs");
auto dim0 = inputs[0]->dims(); auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims(); auto dim1 = inputs[1]->dims();
...@@ -37,10 +34,10 @@ protected: ...@@ -37,10 +34,10 @@ protected:
} }
}; };
class MulOpMaker : public framework::OpProtoAndCheckerMaker { class MulOpMaker : public OpProtoAndCheckerMaker {
public: public:
MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) MulOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of mul op"); AddInput("X", "The first input of mul op");
AddInput("Y", "The second input of mul op"); AddInput("Y", "The second input of mul op");
AddOutput("Out", "The output of mul op"); AddOutput("Out", "The output of mul op");
...@@ -52,11 +49,10 @@ The equation is: Out = X * Y ...@@ -52,11 +49,10 @@ The equation is: Out = X * Y
} }
}; };
class MulOpGrad : public framework::OperatorWithKernel { class MulOpGrad : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {}
const std::vector<framework::Tensor *> &outputs) const override {}
std::string DebugString() const override { std::string DebugString() const override {
LOG(INFO) << "MulGrad"; LOG(INFO) << "MulGrad";
return ""; return "";
...@@ -66,8 +62,7 @@ protected: ...@@ -66,8 +62,7 @@ protected:
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker); REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker);
REGISTER_GRADIENT_OP(mul, mul_grad, paddle::operators::MulOpGrad); REGISTER_GRADIENT_OP(mul, mul_grad, ops::MulOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel<ops::CPUPlace, float>);
mul, paddle::operators::MulKernel<paddle::platform::CPUPlace, float>);
...@@ -13,8 +13,5 @@ ...@@ -13,8 +13,5 @@
limitations under the License. */ limitations under the License. */
#include "paddle/operators/mul_op.h" #include "paddle/operators/mul_op.h"
#include "paddle/framework/op_registry.h"
REGISTER_OP_GPU_KERNEL(mul, REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<ops::GPUPlace, float>);
paddle::operators::MulKernel<paddle::platform \ No newline at end of file
::GPUPlace, float>);
\ No newline at end of file
...@@ -14,30 +14,27 @@ ...@@ -14,30 +14,27 @@
#pragma once #pragma once
#include "glog/logging.h" #include "paddle/operators/type_alias.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place, typename T> template <typename Place, typename T>
class MulKernel : public framework::OpKernel { class MulKernel : public OpKernel {
public: public:
void Compute(const framework::KernelContext& context) const override { void Compute(const KernelContext& context) const override {
Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair = { Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair = {
{Eigen::IndexPair<Eigen::DenseIndex>(1, 0)}}; {Eigen::IndexPair<Eigen::DenseIndex>(1, 0)}};
auto input0 = context.Input(0)->Get<framework::Tensor>(); auto input0 = context.Input(0)->Get<Tensor>();
auto input1 = context.Input(1)->Get<framework::Tensor>(); auto input1 = context.Input(1)->Get<Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>(); auto* output = context.Output(0)->GetMutable<Tensor>();
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
framework::EigenMatrix<T>::From(*output).device( EigenMatrix<T>::From(*output).device(*(context.GetEigenDevice<Place>())) =
*(context.GetEigenDevice<Place>())) = EigenMatrix<T>::From(input0).contract(EigenMatrix<T>::From(input1),
framework::EigenMatrix<T>::From(input0).contract( dim_pair);
framework::EigenMatrix<T>::From(input1), dim_pair);
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -13,15 +13,13 @@ ...@@ -13,15 +13,13 @@
limitations under the License. */ limitations under the License. */
#include "paddle/operators/rowwise_add_op.h" #include "paddle/operators/rowwise_add_op.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class RowWiseAddOp : public framework::OperatorWithKernel { class RowWiseAddOp : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2UL, "Two inputs is needed by rowwise add"); PADDLE_ENFORCE(inputs.size() == 2UL, "Two inputs is needed by rowwise add");
auto dim0 = inputs[0]->dims(); auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims(); auto dim1 = inputs[1]->dims();
...@@ -34,11 +32,10 @@ protected: ...@@ -34,11 +32,10 @@ protected:
} }
}; };
class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker { class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
public: public:
RowWiseAddOpMaker(framework::OpProto *proto, RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The left input of row-wise add op, must be matrix"); AddInput("X", "The left input of row-wise add op, must be matrix");
AddInput("b", "The right input of row-wise add op, must be vector"); AddInput("b", "The right input of row-wise add op, must be vector");
AddOutput("Out", "The output of row-wise add op"); AddOutput("Out", "The output of row-wise add op");
...@@ -53,9 +50,6 @@ for i in xrange(X.shape[0]): ...@@ -53,9 +50,6 @@ for i in xrange(X.shape[0]):
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP(rowwise_add, REGISTER_OP(rowwise_add, ops::RowWiseAddOp, ops::RowWiseAddOpMaker);
paddle::operators::RowWiseAddOp, REGISTER_OP_CPU_KERNEL(rowwise_add,
paddle::operators::RowWiseAddOpMaker); ops::RowWiseAddKernel<ops::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace, float>);
#include "paddle/framework/op_registry.h"
#include "paddle/operators/rowwise_add_op.h" #include "paddle/operators/rowwise_add_op.h"
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(rowwise_add,
rowwise_add, ops::RowWiseAddKernel<ops::GPUPlace, float>);
paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace, float>);
...@@ -13,25 +13,23 @@ ...@@ -13,25 +13,23 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "glog/logging.h" #include "paddle/operators/type_alias.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place, typename T> template <typename Place, typename T>
class RowWiseAddKernel : public framework::OpKernel { class RowWiseAddKernel : public OpKernel {
public: public:
void Compute(const framework::KernelContext& context) const override { void Compute(const KernelContext& context) const override {
auto in0 = context.Input(0)->Get<framework::Tensor>(); auto in0 = context.Input(0)->Get<Tensor>();
auto in1 = context.Input(1)->Get<framework::Tensor>(); auto in1 = context.Input(1)->Get<Tensor>();
auto* out = context.Output(0)->GetMutable<framework::Tensor>(); auto* out = context.Output(0)->GetMutable<Tensor>();
out->mutable_data<T>(context.GetPlace()); out->mutable_data<T>(context.GetPlace());
auto input = framework::EigenMatrix<T>::From(in0); auto input = EigenMatrix<T>::From(in0);
auto bias = framework::EigenVector<T>::From(in1); auto bias = EigenVector<T>::From(in1);
auto output = framework::EigenMatrix<T>::From(*out); auto output = EigenMatrix<T>::From(*out);
const int bias_size = bias.dimension(0); const int bias_size = bias.dimension(0);
const int rest_size = input.size() / bias_size; const int rest_size = input.size() / bias_size;
......
...@@ -13,17 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,17 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/sgd_op.h" #include "paddle/operators/sgd_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class SGDOp : public framework::OperatorWithKernel { class SGDOp : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "Input size of SGDOp must be two"); PADDLE_ENFORCE(inputs.size() == 2, "Input size of SGDOp must be two");
PADDLE_ENFORCE(outputs.size() == 1, "Output size of SGDOp must be one"); PADDLE_ENFORCE(outputs.size() == 1, "Output size of SGDOp must be one");
PADDLE_ENFORCE(inputs[0] != nullptr, "inputs[0] mast be set"); PADDLE_ENFORCE(inputs[0] != nullptr, "inputs[0] mast be set");
...@@ -35,10 +32,10 @@ protected: ...@@ -35,10 +32,10 @@ protected:
} }
}; };
class SGDOpMaker : public framework::OpProtoAndCheckerMaker { class SGDOpMaker : public OpProtoAndCheckerMaker {
public: public:
SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SGDOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("param", "input parameter"); AddInput("param", "input parameter");
AddInput("grad", "input gradient"); AddInput("grad", "input gradient");
AddOutput("param_out", "output parameter"); AddOutput("param_out", "output parameter");
...@@ -55,7 +52,5 @@ param_out = param - learning_rate * grad; ...@@ -55,7 +52,5 @@ param_out = param - learning_rate * grad;
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP(sgd, paddle::operators::SGDOp, paddle::operators::SGDOpMaker); REGISTER_OP(sgd, ops::SGDOp, ops::SGDOpMaker);
typedef paddle::operators::SGDOpKernel<::paddle::platform::CPUPlace, float> REGISTER_OP_CPU_KERNEL(sgd, ops::SGDOpKernel<ops::CPUPlace, float>);
SGDOpKernel_CPU_float;
REGISTER_OP_CPU_KERNEL(sgd, SGDOpKernel_CPU_float);
#include "paddle/operators/sgd_op.h" #include "paddle/operators/sgd_op.h"
#include "paddle/framework/op_registry.h"
typedef paddle::operators::SGDOpKernel<::paddle::platform::GPUPlace, float> SGDOpKernel_GPU_float; REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel<ops::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(sgd, SGDOpKernel_GPU_float); \ No newline at end of file
\ No newline at end of file
...@@ -13,28 +13,24 @@ See the License for the specific language governing permissions and ...@@ -13,28 +13,24 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "glog/logging.h" #include "paddle/operators/type_alias.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place, typename T> template <typename Place, typename T>
class SGDOpKernel : public framework::OpKernel { class SGDOpKernel : public OpKernel {
public: public:
void Compute(const framework::KernelContext& ctx) const override { void Compute(const KernelContext& ctx) const override {
auto param = ctx.Input("param")->Get<framework::Tensor>(); auto param = ctx.Input("param")->Get<Tensor>();
auto grad = ctx.Input("grad")->Get<framework::Tensor>(); auto grad = ctx.Input("grad")->Get<Tensor>();
auto* param_out = ctx.Output(0)->GetMutable<framework::Tensor>(); auto* param_out = ctx.Output(0)->GetMutable<Tensor>();
float lr = ctx.op_.GetAttr<float>("learning_rate"); float lr = ctx.op_.GetAttr<float>("learning_rate");
param_out->mutable_data<T>(ctx.GetPlace()); param_out->mutable_data<T>(ctx.GetPlace());
framework::EigenVector<T>::Flatten(*param_out) EigenVector<T>::Flatten(*param_out).device(*(ctx.GetEigenDevice<Place>())) =
.device(*(ctx.GetEigenDevice<Place>())) = EigenVector<T>::Flatten(param) - lr * EigenVector<T>::Flatten(grad);
framework::EigenVector<T>::Flatten(param) -
lr * framework::EigenVector<T>::Flatten(grad);
} }
}; };
......
...@@ -13,37 +13,33 @@ ...@@ -13,37 +13,33 @@
limitations under the License. */ limitations under the License. */
#include "paddle/operators/sigmoid_op.h" #include "paddle/operators/sigmoid_op.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class SigmoidOp : public framework::OperatorWithKernel { class SigmoidOp : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input"); PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input");
PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output"); PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output");
outputs[0]->Resize(inputs[0]->dims()); outputs[0]->Resize(inputs[0]->dims());
} }
}; };
class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { class SigmoidOpMaker : public OpProtoAndCheckerMaker {
public: public:
SigmoidOpMaker(framework::OpProto *proto, SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "sigmoid input"); AddInput("X", "sigmoid input");
AddOutput("Y", "sigmoid output"); AddOutput("Y", "sigmoid output");
AddComment("Sigmoid function"); AddComment("Sigmoid function");
} }
}; };
class SigmoidOpGrad : public framework::OperatorWithKernel { class SigmoidOpGrad : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {}
const std::vector<framework::Tensor *> &outputs) const override {}
std::string DebugString() const override { std::string DebugString() const override {
LOG(INFO) << "SigmoidGrad"; LOG(INFO) << "SigmoidGrad";
return ""; return "";
...@@ -53,11 +49,7 @@ protected: ...@@ -53,11 +49,7 @@ protected:
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP(sigmoid, REGISTER_OP(sigmoid, ops::SigmoidOp, ops::SigmoidOpMaker);
paddle::operators::SigmoidOp, REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, ops::SigmoidOpGrad);
paddle::operators::SigmoidOpMaker);
REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, paddle::operators::SigmoidOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(sigmoid, ops::SigmoidKernel<ops::CPUPlace, float>);
sigmoid,
paddle::operators::SigmoidKernel<paddle::platform::CPUPlace, float>);
#include "paddle/operators/sigmoid_op.h" #include "paddle/operators/sigmoid_op.h"
#include "paddle/framework/op_registry.h"
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel<ops::GPUPlace, float>);
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace, float>);
...@@ -14,25 +14,23 @@ ...@@ -14,25 +14,23 @@
#pragma once #pragma once
#include "glog/logging.h" #include "paddle/operators/type_alias.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place, typename T> template <typename Place, typename T>
class SigmoidKernel : public framework::OpKernel { class SigmoidKernel : public OpKernel {
public: public:
void Compute(const framework::KernelContext& context) const override { void Compute(const KernelContext& context) const override {
auto input = context.Input(0)->Get<framework::Tensor>(); auto input = context.Input(0)->Get<Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>(); auto* output = context.Output(0)->GetMutable<Tensor>();
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
framework::EigenVector<T>::Flatten(*output).device( EigenVector<T>::Flatten(*output).device(
*(context.GetEigenDevice<Place>())) = *(context.GetEigenDevice<Place>())) =
1.0 / (1.0 + (-1.0 * framework::EigenVector<T>::Flatten(input)).exp()); 1.0 / (1.0 + (-1.0 * EigenVector<T>::Flatten(input)).exp());
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -12,16 +12,14 @@ ...@@ -12,16 +12,14 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/softmax_op.h" #include "paddle/operators/softmax_op.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class SoftmaxOp : public framework::OperatorWithKernel { class SoftmaxOp : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax"); PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax");
PADDLE_ENFORCE(inputs[0]->dims().size() == 2, PADDLE_ENFORCE(inputs[0]->dims().size() == 2,
"The input of softmax op must be matrix"); "The input of softmax op must be matrix");
...@@ -31,10 +29,9 @@ protected: ...@@ -31,10 +29,9 @@ protected:
} }
}; };
class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { class SoftmaxOpMaker : public OpProtoAndCheckerMaker {
public: public:
SoftmaxOpMaker(framework::OpProto *proto, SoftmaxOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "input of softmax"); AddInput("X", "input of softmax");
AddOutput("Y", "output of softmax"); AddOutput("Y", "output of softmax");
...@@ -42,11 +39,10 @@ public: ...@@ -42,11 +39,10 @@ public:
} }
}; };
class SoftmaxOpGrad : public framework::OperatorWithKernel { class SoftmaxOpGrad : public OperatorWithKernel {
protected: protected:
void InferShape( void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<const framework::Tensor *> &inputs, const std::vector<Tensor *> &outputs) const override {}
const std::vector<framework::Tensor *> &outputs) const override {}
std::string DebugString() const override { std::string DebugString() const override {
LOG(INFO) << "SoftmaxOpGrad"; LOG(INFO) << "SoftmaxOpGrad";
return ""; return "";
...@@ -56,9 +52,6 @@ protected: ...@@ -56,9 +52,6 @@ protected:
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
REGISTER_GRADIENT_OP(softmax, softmax_grad, paddle::operators::SoftmaxOpGrad); REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad);
REGISTER_OP_CPU_KERNEL(softmax, REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<ops::CPUPlace, float>);
ops::SoftmaxKernel<paddle::platform::CPUPlace, float>);
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/softmax_op.h" #include "paddle/operators/softmax_op.h"
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel<ops::GPUPlace, float>);
softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace, float>);
...@@ -14,23 +14,21 @@ ...@@ -14,23 +14,21 @@
#pragma once #pragma once
#include "glog/logging.h" #include "paddle/operators/type_alias.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place, typename T> template <typename Place, typename T>
class SoftmaxKernel : public framework::OpKernel { class SoftmaxKernel : public OpKernel {
public: public:
void Compute(const framework::KernelContext& context) const override { void Compute(const KernelContext& context) const override {
auto input = context.Input(0)->Get<framework::Tensor>(); auto input = context.Input(0)->Get<Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>(); auto* output = context.Output(0)->GetMutable<Tensor>();
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
auto logits = framework::EigenMatrix<T>::From(input); auto logits = EigenMatrix<T>::From(input);
auto softmax = framework::EigenMatrix<T>::From(*output); auto softmax = EigenMatrix<T>::From(*output);
const int kBatchDim = 0; const int kBatchDim = 0;
const int kClassDim = 1; const int kClassDim = 1;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using OpKernel = framework::OpKernel;
using KernelContext = framework::KernelContext;
template <typename T,
int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename T,
int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename T,
size_t D,
int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
using Tensor = framework::Tensor;
using OperatorWithKernel = framework::OperatorWithKernel;
using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker;
using OpProto = framework::OpProto;
using OpAttrChecker = framework::OpAttrChecker;
using CPUPlace = platform::CPUPlace;
using GPUPlace = platform::GPUPlace;
using NetOp = framework::NetOp;
using OpRegistry = framework::OpRegistry;
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
...@@ -87,7 +87,7 @@ class CUDADeviceContext : public DeviceContext { ...@@ -87,7 +87,7 @@ class CUDADeviceContext : public DeviceContext {
"cudaStreamSynchronize failed"); "cudaStreamSynchronize failed");
} }
cudaStream_t stream() { return stream_; } cudaStream_t stream() const { return stream_; }
Eigen::GpuDevice* eigen_device() const { return eigen_device_.get(); } Eigen::GpuDevice* eigen_device() const { return eigen_device_.get(); }
......
...@@ -146,22 +146,22 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -146,22 +146,22 @@ All parameter, weight, gradient are variables in Paddle.
}); });
ExposeOperator(operator_base); ExposeOperator(operator_base);
using PlainNetPtr = std::shared_ptr<pd::PlainNet>; py::class_<pd::NetOp, std::shared_ptr<pd::NetOp>> net(m, "Net");
py::class_<pd::PlainNet, PlainNetPtr> net(m, "Net");
net.def_static("create", net.def_static("create",
[]() -> std::shared_ptr<pd::PlainNet> { []() -> std::shared_ptr<pd::NetOp> {
auto retv = std::make_shared<pd::PlainNet>(); auto retv = std::make_shared<pd::NetOp>();
retv->type_ = "plain_net"; retv->type_ = "plain_net";
return retv; return retv;
}) })
.def("add_op", &pd::PlainNet::AddOp) .def("add_op", &pd::NetOp::AddOp)
.def("add_op", .def("add_op",
[](PlainNetPtr& self, const PlainNetPtr& net) -> void { [](pd::NetOp& self, const std::shared_ptr<pd::NetOp>& net) -> void {
self->AddOp(std::static_pointer_cast<pd::OperatorBase>(net)); self.AddOp(std::static_pointer_cast<pd::OperatorBase>(net));
}) })
.def("complete_add_op", &pd::PlainNet::CompleteAddOp) .def("complete_add_op", &pd::NetOp::CompleteAddOp)
.def("complete_add_op", [](PlainNetPtr& self) { self->CompleteAddOp(); }); .def("complete_add_op",
[](std::shared_ptr<pd::NetOp>& self) { self->CompleteAddOp(); });
ExposeOperator(net); ExposeOperator(net);
m.def("unique_integer", UniqueIntegerGenerator); m.def("unique_integer", UniqueIntegerGenerator);
......
...@@ -76,7 +76,11 @@ void NewRemoteParameterUpdater::init( ...@@ -76,7 +76,11 @@ void NewRemoteParameterUpdater::init(
sgdConfigV2->set_decay(paramConfig.decay_rate()); sgdConfigV2->set_decay(paramConfig.decay_rate());
optimizeConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); optimizeConfigV2.set_lr_policy(paddle::OptimizerConfig::Const);
auto constlr = optimizeConfigV2.mutable_const_lr(); auto constlr = optimizeConfigV2.mutable_const_lr();
constlr->set_learning_rate(paramConfig.learning_rate()); if (paramConfig.has_learning_rate()) {
constlr->set_learning_rate(paramConfig.learning_rate());
} else {
constlr->set_learning_rate(trainerConfig_.learning_rate());
}
if (trainerConfig_.algorithm() == "sgd") { if (trainerConfig_.algorithm() == "sgd") {
optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD);
// FIXME: config all algorithms // FIXME: config all algorithms
......
...@@ -126,9 +126,11 @@ public: ...@@ -126,9 +126,11 @@ public:
} }
/** /**
* @brief operator bool, return True if there is something error. * @brief check this status by glog.
* @note It is a temp method used during cleaning Paddle code. It will be
* removed later.
*/ */
operator bool() const { return !this->isOK(); } void check() const { CHECK(this->isOK()) << msg(); }
/** /**
* @brief isOK return True if there is no error. * @brief isOK return True if there is no error.
...@@ -136,13 +138,6 @@ public: ...@@ -136,13 +138,6 @@ public:
*/ */
bool isOK() const { return msg_ == nullptr; } bool isOK() const { return msg_ == nullptr; }
/**
* @brief check this status by glog.
* @note It is a temp method used during cleaning Paddle code. It will be
* removed later.
*/
void check() const { CHECK(this->isOK()) << msg(); }
private: private:
std::shared_ptr<std::string> msg_; std::shared_ptr<std::string> msg_;
}; };
......
...@@ -18,17 +18,17 @@ limitations under the License. */ ...@@ -18,17 +18,17 @@ limitations under the License. */
TEST(Error, testAll) { TEST(Error, testAll) {
paddle::Error error; paddle::Error error;
ASSERT_FALSE(error); ASSERT_TRUE(error.isOK());
error = paddle::Error("I'm the error"); error = paddle::Error("I'm the error");
ASSERT_TRUE(error); ASSERT_FALSE(error.isOK());
ASSERT_STREQ("I'm the error", error.msg()); ASSERT_STREQ("I'm the error", error.msg());
error = paddle::Error("error2"); error = paddle::Error("error2");
ASSERT_TRUE(error); ASSERT_FALSE(error.isOK());
ASSERT_STREQ("error2", error.msg()); ASSERT_STREQ("error2", error.msg());
int i = 3; int i = 3;
auto error3 = paddle::Error("error%d", i); auto error3 = paddle::Error("error%d", i);
ASSERT_TRUE(error3); ASSERT_FALSE(error3.isOK());
ASSERT_STREQ("error3", error3.msg()); ASSERT_STREQ("error3", error3.msg());
} }
...@@ -272,7 +272,7 @@ class ExtraLayerAttribute(object): ...@@ -272,7 +272,7 @@ class ExtraLayerAttribute(object):
for key in self.attr: for key in self.attr:
if not hasattr(self, 'can_%s' % key) or \ if not hasattr(self, 'can_%s' % key) or \
not getattr(self, 'can_%s' % key): not getattr(self, 'can_%s' % key):
raise NotImplementedError("Layer %s cannot support %s" % raise NotImplementedError("Layer %s does not support %s" %
(layer_name, key)) (layer_name, key))
@staticmethod @staticmethod
......
...@@ -865,7 +865,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): ...@@ -865,7 +865,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
@wrap_name_default("embedding") @wrap_name_default("embedding")
@wrap_param_attr_default() @wrap_param_attr_default()
@layer_support(ERROR_CLIPPING) @layer_support(ERROR_CLIPPING, DROPOUT)
def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None):
""" """
Define a embedding Layer. Define a embedding Layer.
...@@ -1320,7 +1320,7 @@ def pooling_layer(input, ...@@ -1320,7 +1320,7 @@ def pooling_layer(input,
@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) @wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
@wrap_act_default(param_names=["act", 'state_act'], act=TanhActivation()) @wrap_act_default(param_names=["act", 'state_act'], act=TanhActivation())
@wrap_name_default("lstmemory") @wrap_name_default("lstmemory")
@layer_support(DROPOUT) @layer_support()
def lstmemory(input, def lstmemory(input,
name=None, name=None,
size=None, size=None,
...@@ -1429,7 +1429,7 @@ def lstmemory(input, ...@@ -1429,7 +1429,7 @@ def lstmemory(input,
@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation()) @wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
@wrap_act_default(param_names=["act"], act=TanhActivation()) @wrap_act_default(param_names=["act"], act=TanhActivation())
@wrap_name_default("gru") @wrap_name_default("gru")
@layer_support(DROPOUT) @layer_support()
def grumemory(input, def grumemory(input,
size=None, size=None,
name=None, name=None,
...@@ -1793,7 +1793,7 @@ def repeat_layer(input, ...@@ -1793,7 +1793,7 @@ def repeat_layer(input,
@wrap_name_default("seqreshape") @wrap_name_default("seqreshape")
@wrap_act_default(act=IdentityActivation()) @wrap_act_default(act=IdentityActivation())
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@layer_support() @layer_support(ERROR_CLIPPING, DROPOUT)
def seq_reshape_layer(input, def seq_reshape_layer(input,
reshape_size, reshape_size,
act=None, act=None,
...@@ -2703,7 +2703,7 @@ def img_cmrnorm_layer(input, ...@@ -2703,7 +2703,7 @@ def img_cmrnorm_layer(input,
default_factory=lambda _: ParamAttr(initial_mean=1.0, initial_std=0.)) default_factory=lambda _: ParamAttr(initial_mean=1.0, initial_std=0.))
@wrap_act_default(act=ReluActivation()) @wrap_act_default(act=ReluActivation())
@wrap_name_default("batch_norm") @wrap_name_default("batch_norm")
@layer_support(DROPOUT) @layer_support(DROPOUT, ERROR_CLIPPING)
def batch_norm_layer(input, def batch_norm_layer(input,
act=None, act=None,
name=None, name=None,
...@@ -2783,15 +2783,6 @@ def batch_norm_layer(input, ...@@ -2783,15 +2783,6 @@ def batch_norm_layer(input,
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if not isinstance(act, ReluActivation):
logger.log(logging.WARN,
"%s is not recommend for batch normalization's activation, "
"maybe the relu is better" % act.name)
if not isinstance(input.activation, LinearActivation):
logger.log(logging.WARN,
"The activation should be inside batch normalization, the "
"previous layer's activation may be Linear")
if num_channels is None: if num_channels is None:
if input.num_filters is not None: if input.num_filters is not None:
...@@ -2861,7 +2852,7 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): ...@@ -2861,7 +2852,7 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None):
@wrap_name_default("addto") @wrap_name_default("addto")
@wrap_act_default(act=LinearActivation()) @wrap_act_default(act=LinearActivation())
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@layer_support(DROPOUT) @layer_support(DROPOUT, ERROR_CLIPPING)
def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
""" """
AddtoLayer. AddtoLayer.
...@@ -2940,7 +2931,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): ...@@ -2940,7 +2931,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
@wrap_act_default(act=IdentityActivation()) @wrap_act_default(act=IdentityActivation())
@wrap_name_default("concat") @wrap_name_default("concat")
@layer_support() @layer_support(DROPOUT, ERROR_CLIPPING)
def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
""" """
Concat all input vector into one huge vector. Concat all input vector into one huge vector.
...@@ -3024,7 +3015,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): ...@@ -3024,7 +3015,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
@wrap_name_default("seqconcat") @wrap_name_default("seqconcat")
@wrap_act_default(act=IdentityActivation()) @wrap_act_default(act=IdentityActivation())
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@layer_support() @layer_support(DROPOUT, ERROR_CLIPPING)
def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
bias_attr=None): bias_attr=None):
""" """
...@@ -3177,7 +3168,7 @@ def memory(name, ...@@ -3177,7 +3168,7 @@ def memory(name,
@wrap_act_default(param_names=['state_act'], act=TanhActivation()) @wrap_act_default(param_names=['state_act'], act=TanhActivation())
@wrap_act_default(act=TanhActivation()) @wrap_act_default(act=TanhActivation())
@wrap_name_default('lstm_step') @wrap_name_default('lstm_step')
@layer_support(ERROR_CLIPPING, DROPOUT) @layer_support()
def lstm_step_layer(input, def lstm_step_layer(input,
state, state,
size=None, size=None,
...@@ -4480,7 +4471,7 @@ def tensor_layer(a, ...@@ -4480,7 +4471,7 @@ def tensor_layer(a,
@wrap_param_attr_default() @wrap_param_attr_default()
@wrap_bias_attr_default() @wrap_bias_attr_default()
@wrap_act_default() @wrap_act_default()
@layer_support() @layer_support(DROPOUT, ERROR_CLIPPING)
def selective_fc_layer(input, def selective_fc_layer(input,
size, size,
select=None, select=None,
...@@ -5974,7 +5965,7 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): ...@@ -5974,7 +5965,7 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
""" """
The crop layer crops images by offset and shape. User can set crop shape by The crop layer crops images by offset and shape. User can set crop shape by
args 'shape' explicitly or by reference input layer. args 'shape' explicitly or by reference input layer.
The example usage is: The example usage is:
.. code-block:: python .. code-block:: python
......
...@@ -166,55 +166,37 @@ def cluster_files_reader(files_pattern, ...@@ -166,55 +166,37 @@ def cluster_files_reader(files_pattern,
return reader return reader
def convert(output_path, def convert(output_path, reader, line_count, name_prefix):
reader,
num_shards,
name_prefix,
max_lines_to_shuffle=1000):
import recordio import recordio
""" """
Convert data from reader to recordio format files. Convert data from reader to recordio format files.
:param output_path: directory in which output files will be saved. :param output_path: directory in which output files will be saved.
:param reader: a data reader, from which the convert program will read data instances. :param reader: a data reader, from which the convert program will read data instances.
:param num_shards: the number of shards that the dataset will be partitioned into.
:param name_prefix: the name prefix of generated files. :param name_prefix: the name prefix of generated files.
:param max_lines_to_shuffle: the max lines numbers to shuffle before writing. :param max_lines_to_shuffle: the max lines numbers to shuffle before writing.
""" """
assert num_shards >= 1 assert line_count >= 1
assert max_lines_to_shuffle >= 1 indx_f = 0
def open_writers():
w = []
for i in range(0, num_shards):
n = "%s/%s-%05d-of-%05d" % (output_path, name_prefix, i,
num_shards - 1)
w.append(recordio.writer(n))
return w
def close_writers(w):
for i in range(0, num_shards):
w[i].close()
def write_data(w, lines): def write_data(indx_f, lines):
random.shuffle(lines) random.shuffle(lines)
for i, d in enumerate(lines): filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f)
writer = recordio.writer(filename)
for l in lines:
# FIXME(Yancey1989): # FIXME(Yancey1989):
# dumps with protocol: pickle.HIGHEST_PROTOCOL # dumps with protocol: pickle.HIGHEST_PROTOCOL
o = pickle.dumps(d) writer.write(cPickle.dumps(l))
w[i % num_shards].write(o) writer.close()
w = open_writers()
lines = [] lines = []
for i, d in enumerate(reader()): for i, d in enumerate(reader()):
lines.append(d) lines.append(d)
if i % max_lines_to_shuffle == 0 and i >= max_lines_to_shuffle: if i % line_count == 0 and i >= line_count:
write_data(w, lines) write_data(indx_f, lines)
lines = [] lines = []
indx_f += 1
continue continue
write_data(w, lines) write_data(indx_f, lines)
close_writers(w)
...@@ -35,6 +35,13 @@ class Inference(object): ...@@ -35,6 +35,13 @@ class Inference(object):
name = param.getName() name = param.getName()
assert isinstance(val, api.Vector) assert isinstance(val, api.Vector)
val.copyFromNumpyArray(parameters.get(name).flatten()) val.copyFromNumpyArray(parameters.get(name).flatten())
# the setValueUpdated function is called in randomize, zeroMem,
# load function in paddle/parameter/Parameter.cpp. But in the
# inference mode, the setValueUpdated is never called, it will
# cause the parameter will not be dispatched
# in MultiGradientMachine for multi-GPU. So setValueUpdated is
# called here, but it's better to call this function in one place.
param.setValueUpdated()
self.__gradient_machine__ = gm self.__gradient_machine__ = gm
self.__data_types__ = topo.data_type() self.__data_types__ = topo.data_type()
......
...@@ -49,7 +49,6 @@ class client(object): ...@@ -49,7 +49,6 @@ class client(object):
def set_dataset(self, paths): def set_dataset(self, paths):
holder_type = ctypes.c_char_p * len(paths) holder_type = ctypes.c_char_p * len(paths)
holder = holder_type() holder = holder_type()
print paths
for idx, path in enumerate(paths): for idx, path in enumerate(paths):
c_ptr = ctypes.c_char_p(path) c_ptr = ctypes.c_char_p(path)
holder[idx] = c_ptr holder[idx] = c_ptr
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册