diff --git a/.travis.yml b/.travis.yml index 44b755ee32d204c883f0d74e7ad0f78380918954..f9b4a7e08315a42a61a58d6c61c45771df962c4d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -50,6 +50,7 @@ before_install: # protobuf version. - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - pip install rarfile + - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: diff --git a/CMakeLists.txt b/CMakeLists.txt index 79210d043648de5d493f0b998eeb885c993a6106..c2218be5efb90142ef7f16b788e141e025105771 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,7 @@ endif(WITH_GPU) add_subdirectory(proto) add_subdirectory(paddle) add_subdirectory(python) +add_subdirectory(go/pserver/cclient) if(WITH_DOC) add_subdirectory(doc) diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 59bd8b91255944a8ef702edd389214c17d0cb35d..c7b017bc07b25bc606fd838a5fb9d3715f4faecb 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -59,6 +59,11 @@ context_projection .. autoclass:: paddle.v2.layer.context_projection :noindex: +row_conv +-------- +.. autoclass:: paddle.v2.layer.row_conv + :noindex: + Image Pooling Layer =================== @@ -346,6 +351,12 @@ sampling_id .. autoclass:: paddle.v2.layer.sampling_id :noindex: +multiplex +--------- +.. autoclass:: paddle.v2.layer.multiplex + :noindex: + + Slicing and Joining Layers ========================== diff --git a/doc/design/cluster_train/pserver_client.md b/doc/design/cluster_train/pserver_client.md index b3e4079010490b69db1de28157f0cab80cad2381..474b8c572cd92fc87e9f7f3f2b19d12cccd158de 100644 --- a/doc/design/cluster_train/pserver_client.md +++ b/doc/design/cluster_train/pserver_client.md @@ -74,14 +74,25 @@ typedef enum { typedef struct { char* name; paddle_element_type element_type; - void* content; + unsigned char* content; int content_len; } paddle_parameter, paddle_gradient; -typedef struct paddle_pserver_client paddle_pserver_client; +typedef int paddle_pserver_client; -paddle_pserver_client* paddle_new_pserver_client(); -void paddle_pserver_client_release(paddle_pserver_client* client); +/** + * @brief creates a pserver client that talks to etcd for coordination. + */ +paddle_pserver_client paddle_new_etcd_pserver_client(char* etcd_addr); + +/** + * @brief creates a pserver client given pserver addresses. + * + * @param pserver_addrs comma-separated pserver addresses. + * @param selected if current pserver client is selected to initialize all parameter servers. + */ +paddle_pserver_client paddle_new_pserver_client(char* pserver_addrs, int selected); +void paddle_pserver_client_release(paddle_pserver_client c); /** * @brief paddle_begin_init_params begins to initialize parameters on @@ -95,7 +106,7 @@ void paddle_pserver_client_release(paddle_pserver_client* client); * @return 1 if the trainer is selected to initialize parameter * servers, otherwise 0. */ -int paddle_begin_init_params(paddle_pserver_client* client); +int paddle_begin_init_params(paddle_pserver_client client); /** * @brief paddle_init_param initializes the parameter on parameter @@ -109,7 +120,7 @@ int paddle_begin_init_params(paddle_pserver_client* client); * @paddle_begin_init_param). Or simply exit the program and wait for * the cluster management system to restart the trainer. */ -int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, const unsigned char* param_config_proto, int config_len); +int paddle_init_param(paddle_pserver_client client, paddle_parameter param, const unsigned char* param_config_proto, int config_len); /** * @brief paddle_finish_init_params tells parameter servers client has @@ -120,7 +131,7 @@ int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, con * @paddle_begin_init_param). Or simply exit the program and wait for * the cluster management system to restart the trainer. */ -int paddle_finish_init_params(paddle_pserver_client* client); +int paddle_finish_init_params(paddle_pserver_client client); /** * @brief paddle_send_grads sends gradients to parameter servers for @@ -131,7 +142,7 @@ int paddle_finish_init_params(paddle_pserver_client* client); * @param learning_rate the learning rate for the gradients. * @return 0 if successful, otherwise -1. */ -int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grads, int len); +int paddle_send_grads(paddle_pserver_client client, const paddle_gradient* grads, int len); /** * @brief paddle_get_params gets parameters from parameter servers. @@ -139,13 +150,15 @@ int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grad * paddle_get_params will block until parameters are initialized on * the parameter servers. * - * @param names the array of names of the parameters to get. - * @param dst the destination array of parameters to save to. + * @param dst the destination array of parameter pointers to save to. + * The parameter pointer must be pre-popullated with required parameter name, + * and the content of parameter must be pre-allocated of the size of required + * parameter on pserver. * @param len the length of the names array and the paddle_parameter * array. * @return 0 if successful, otherwise -1. */ -int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_parameter* dst, int len); +int paddle_get_params(paddle_pserver_client client, paddle_parameter** dst, int len); /** * @brief paddle_save_model indicates parameters to save the parameter @@ -154,5 +167,5 @@ int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_ * @param path the path to save parameters. * @return 0 if successful, otherwise -1. */ -int paddle_save_model(paddle_pserver_client* client, const char* path); +int paddle_save_model(paddle_pserver_client client, const char* path); ``` diff --git a/doc/design/cluster_train/remote_parameter_updater.md b/doc/design/cluster_train/remote_parameter_updater.md new file mode 100644 index 0000000000000000000000000000000000000000..6e8e5938455b869e0f3367794c41250340b37f77 --- /dev/null +++ b/doc/design/cluster_train/remote_parameter_updater.md @@ -0,0 +1,21 @@ +# Design Doc: Remote Parameter Updater for Cluster Train + +For an overview of distribute training, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter updater that will use parameter server cclient [The Client Library of Parameter Server Design Doc](pserver_client.md) to manage and update parameters. + +## Parameter Updater + +Parameter Updater is used by trainer to manage and update parameter, there are mainly two kind of parameter updater: local and remote, since this design is for cluster train, we will only discuss remote parameter updater here. + +### Remote Parameter Updater + +Remote Parameter Updater manage parameters through remote parameter server with the client that communicate with pserver([The Client Library of Parameter Server Design Doc](pserver_client.md)) + +In PaddlePaddle Python V2 API, trainer is implemented in python, and the trainer will hold a instance of parameter updater and call it's functions directly. In this design, we will also expose the api of RemoteParameterUpdater to python with swig. + +#### Sparse Remote Parameter Updater + +Since we will only implement dense parameter management new, the mechanism for sparse parameter will be discussed in next stage. + +### Interface Design + +TBD diff --git a/go/cmake/golang.cmake b/go/cmake/golang.cmake index d38d06de2348821b21109f7dc708314da81111c5..7c85fb6298d02b85ea87af9b6f6e960463443b7d 100644 --- a/go/cmake/golang.cmake +++ b/go/cmake/golang.cmake @@ -17,7 +17,7 @@ function(GO_LIBRARY NAME BUILD_TYPE) endif() file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") - file(RELATIVE_PATH rel ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) + file(RELATIVE_PATH rel ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) # find Paddle directory. get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) @@ -32,12 +32,14 @@ function(GO_LIBRARY NAME BUILD_TYPE) # will use the local changes in Paddle rather than checkout Paddle # in github. add_custom_target(copyPaddle - COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH}) + COMMAND rm -rf ${PADDLE_IN_GOPATH}/Paddle + COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH}/Paddle) add_dependencies(goGet copyPaddle) add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} - -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" + -gcflags=-shared -asmflags=-shared -installsuffix=_shared -a + -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" ${CMAKE_GO_FLAGS} ${GO_SOURCE} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/go/cmd/master/master.go b/go/cmd/master/master.go index d1f3d7d76c438670faf6677b01e790c5ebe1f2cb..25cd1cafcdf328094a019638f37f908591f5f374 100644 --- a/go/cmd/master/master.go +++ b/go/cmd/master/master.go @@ -1,80 +1,32 @@ package main import ( - "fmt" "net" "net/http" "net/rpc" - "os" - "path/filepath" "strconv" - "strings" "time" "github.com/namsral/flag" "github.com/PaddlePaddle/Paddle/go/master" - "github.com/PaddlePaddle/recordio" ) func main() { port := flag.Int("port", 8080, "port of the master server.") - dataset := flag.String("training_dataset", "", "dataset: comma separated path to RecordIO paths, supports golb patterns.") + faultTolerance := flag.Bool("fault_tolerance", false, "enable fault tolerance (requires etcd).") taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.") taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.") chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.") flag.Parse() - if *dataset == "" { - panic("no dataset specified.") - } - if *faultTolerance { panic("fault tolernance not implemented.") - } - - var chunks []master.Chunk - var paths []string - ss := strings.Split(*dataset, ",") - fmt.Println(ss) - for _, s := range ss { - match, err := filepath.Glob(s) - if err != nil { - panic(err) - } - paths = append(paths, match...) - } - - if len(paths) == 0 { - panic("no valid datset specified.") - } - - idx := 0 - for _, path := range paths { - f, err := os.Open(path) - if err != nil { - panic(err) - } - - index, err := recordio.LoadIndex(f) - if err != nil { - panic(err) - } - f.Close() - count := index.NumChunks() - for i := 0; i < count; i++ { - chunk := master.Chunk{ - Idx: idx, - Path: path, - Index: *index.ChunkIndex(i), - } - chunks = append(chunks, chunk) - } } - s := master.NewService(chunks, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) + s := master.NewService(*chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) err := rpc.Register(s) if err != nil { panic(err) diff --git a/go/pserver/internal/connection/conn.go b/go/connection/conn.go similarity index 82% rename from go/pserver/internal/connection/conn.go rename to go/connection/conn.go index 1c04f117254054741b7d45fb16462b5ce84a2aea..bc9b5f0617e35f049c3e14f0b441aca2033f9645 100644 --- a/go/pserver/internal/connection/conn.go +++ b/go/connection/conn.go @@ -2,6 +2,7 @@ package connection import ( "errors" + "log" "net/rpc" "sync" ) @@ -21,6 +22,18 @@ func New() *Conn { return c } +// Close closes the connection. +func (c *Conn) Close() error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.client == nil { + return nil + } + + return c.client.Close() +} + // Connect connects the connection to a address. func (c *Conn) Connect(addr string) error { c.mu.Lock() @@ -50,12 +63,20 @@ func (c *Conn) Connect(addr string) error { c.waitConn = nil } } else { + err := client.Close() + if err != nil { + log.Println(err) + } + return errors.New("client already set from a concurrent goroutine") } return nil } +// TODO(helin): refactor Call to be able to perform given retry +// policy. + // Call make a RPC call. // // Call will be blocked until the connection to remote RPC service diff --git a/go/master/client.go b/go/master/client.go new file mode 100644 index 0000000000000000000000000000000000000000..20c66340dc28bc514b6c51583dd94830c42a41bf --- /dev/null +++ b/go/master/client.go @@ -0,0 +1,82 @@ +package master + +import ( + "log" + "time" + + "github.com/PaddlePaddle/Paddle/go/connection" +) + +// Addresser provide the address of the master server. +type Addresser interface { + Address() string +} + +// Client is the client of the master server. +type Client struct { + conn *connection.Conn +} + +// NewClient creates a new Client. +func NewClient(addr Addresser) *Client { + c := &Client{} + c.conn = connection.New() + go c.monitorMaster(addr) + return c +} + +func (c *Client) monitorMaster(addr Addresser) { + lastMaster := "" + monitor := func() { + // get the lastest address of the master server, + // connect to the new address once address changed. + curMaster := addr.Address() + if curMaster != lastMaster { + if curMaster == "" { + err := c.conn.Close() + if err != nil { + log.Println(err) + } + } else { + err := c.conn.Connect(curMaster) + if err != nil { + log.Println(err) + + // connect to addr failed, set + // to last known addr in order + // to retry next time. + curMaster = lastMaster + } + + } + } + + lastMaster = curMaster + } + + monitor() + ticker := time.NewTicker(10 * time.Second) + for _ = range ticker.C { + monitor() + } +} + +// SetDataset set dataset for the master server to dispatch. +// +// SetDataset can be call multiple times from different nodes. But +// only the first call will be honored. +func (c *Client) SetDataset(globPaths []string) error { + return c.conn.Call("Service.SetDataset", globPaths, nil) +} + +// GetTask gets a new task from the master server. +func (c *Client) GetTask() (Task, error) { + var t Task + err := c.conn.Call("Service.GetTask", 0, &t) + return t, err +} + +// TaskFinished tells the master server a task is finished. +func (c *Client) TaskFinished(taskID int) error { + return c.conn.Call("Service.TaskFinished", taskID, nil) +} diff --git a/go/master/client_test.go b/go/master/client_test.go new file mode 100644 index 0000000000000000000000000000000000000000..df708ad7912c07205ae0cee8d2ab1c06d65223cc --- /dev/null +++ b/go/master/client_test.go @@ -0,0 +1,120 @@ +package master_test + +import ( + "fmt" + "net" + "net/http" + "net/rpc" + "os" + "strconv" + "strings" + "testing" + "time" + + log "github.com/sirupsen/logrus" + + "github.com/PaddlePaddle/Paddle/go/master" + "github.com/PaddlePaddle/recordio" +) + +const ( + totalTask = 20 + chunkPerTask = 10 +) + +var port int + +func init() { + log.SetLevel(log.ErrorLevel) + + l, err := net.Listen("tcp", ":0") + if err != nil { + panic(err) + } + + ss := strings.Split(l.Addr().String(), ":") + p, err := strconv.Atoi(ss[len(ss)-1]) + if err != nil { + panic(err) + } + port = p + + go func(l net.Listener) { + s := master.NewService(chunkPerTask, time.Second, 1) + server := rpc.NewServer() + err := server.Register(s) + if err != nil { + panic(err) + } + + mux := http.NewServeMux() + mux.Handle(rpc.DefaultRPCPath, server) + err = http.Serve(l, mux) + if err != nil { + panic(err) + } + }(l) +} + +type addresser string + +func (a addresser) Address() string { + return string(a) +} + +func TestClientFull(t *testing.T) { + const p = "/tmp/master_client_test_0" + f, err := os.Create(p) + if err != nil { + panic(err) + } + + for i := 0; i < totalTask*chunkPerTask; i++ { + w := recordio.NewWriter(f, -1, -1) + w.Write(nil) + // call Close to force RecordIO writing a chunk. + w.Close() + } + f.Close() + + c := master.NewClient(addresser(fmt.Sprintf(":%d", port))) + c.SetDataset([]string{p}) + + checkOnePass := func(i int) { + var tasks []master.Task + for i := 0; i < totalTask; i++ { + task, err := c.GetTask() + if err != nil { + t.Fatal(i, err) + } + tasks = append(tasks, task) + } + + _, err = c.GetTask() + if err == nil { + t.Fatal(i, "should get error.") + } + + err = c.TaskFinished(tasks[0].ID) + if err != nil { + t.Fatal(err) + } + tasks = tasks[1:] + task, err := c.GetTask() + if err != nil { + t.Fatal(err) + } + tasks = append(tasks, task) + + for _, task := range tasks { + err = c.TaskFinished(task.ID) + if err != nil { + t.Fatal(i, err) + } + } + } + + for i := 0; i < 10; i++ { + checkOnePass(i) + } +} diff --git a/go/master/service.go b/go/master/service.go index ab17a62f3854c1e32d731037fcc9857260d03781..1e2a34972bb4763688d7df97d768320cd6f9e9d4 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -2,29 +2,25 @@ package master import ( "errors" - "log" + "os" + "path/filepath" "sync" "time" - "github.com/PaddlePaddle/recordio" -) + log "github.com/sirupsen/logrus" -const ( - targetTaskCount = 300 -) - -// errors -var ( - ErrNoMoreTask = errors.New("no more task for current pass") - ErrPendingTaskNotFound = errors.New("pending task not found") + "github.com/PaddlePaddle/recordio" ) // Service is the master server service. type Service struct { - timeoutDur time.Duration - timeoutMax int + chunksPerTask int + timeoutDur time.Duration + timeoutMax int + ready chan struct{} mu sync.Mutex + initDone bool taskQueues taskQueues } @@ -55,7 +51,6 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { if len(cur.Task.Chunks) > 0 { cur.Task.ID = id - id++ result = append(result, cur) } @@ -63,21 +58,21 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { } // NewService creates a new service. -func NewService(chunks []Chunk, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service { +func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service { s := &Service{} + s.chunksPerTask = chunksPerTask s.timeoutDur = timeoutDur s.timeoutMax = timeoutMax s.taskQueues = taskQueues{} s.taskQueues.Pending = make(map[int]taskEntry) - s.taskQueues.Todo = partition(chunks, chunksPerTask) + s.ready = make(chan struct{}) return s } // Chunk is a chunk of data consisted of several data instances. type Chunk struct { - Idx int // index of the chunk within the file Path string - Index recordio.Index // block index + Index recordio.Index // chunk index } // Task is the basic unit of data instances assigned to trainers. @@ -105,74 +100,205 @@ func (s *Service) snapshot() error { return nil } -// GetTask gets a new task from the service. -func (s *Service) GetTask(dummy int, task *Task) error { +func readChunks(globPaths []string) ([]Chunk, error) { + var chunks []Chunk + var paths []string + + for _, s := range globPaths { + match, err := filepath.Glob(s) + if err != nil { + return nil, err + } + paths = append(paths, match...) + } + + if len(paths) == 0 { + return nil, errors.New("no valid dataset specified") + } + + for _, path := range paths { + f, err := os.Open(path) + if err != nil { + return nil, err + } + + index, err := recordio.LoadIndex(f) + if err != nil { + return nil, err + } + err = f.Close() + if err != nil { + return nil, err + } + + count := index.NumChunks() + for i := 0; i < count; i++ { + chunk := Chunk{ + Path: path, + Index: *index.ChunkIndex(i), + } + chunks = append(chunks, chunk) + } + } + + return chunks, nil +} + +// SetDataset sets dataset to dispatch for the master server. +// +// SetDataset can be call multiple times. But only the first call will +// be honored. +func (s *Service) SetDataset(globPaths []string, dummy *int) error { + if len(globPaths) == 0 { + return errors.New("no dataset specified") + } + s.mu.Lock() defer s.mu.Unlock() + if s.initDone { + // Already initialized. All trainer will call + // SetDataset, but we only handle the first one. Treat + // other calls as successful but do nothing. + return nil + } - if len(s.taskQueues.Todo) == 0 { - return ErrNoMoreTask + chunks, err := readChunks(globPaths) + if err != nil { + return err } - t := s.taskQueues.Todo[0] - t.Epoch++ - s.taskQueues.Todo = s.taskQueues.Todo[1:] - s.taskQueues.Pending[t.Task.ID] = t - err := s.snapshot() + s.taskQueues.Todo = partition(chunks, s.chunksPerTask) + + err = s.snapshot() if err != nil { + log.Errorln(err) return err } - time.AfterFunc(s.timeoutDur, func(taskID int, epoch int) func() { - return func() { - s.mu.Lock() - defer s.mu.Unlock() + close(s.ready) + s.initDone = true + return nil +} - t, ok := s.taskQueues.Pending[taskID] - if !ok { - return - } +func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { + return func() { + s.mu.Lock() + defer s.mu.Unlock() - if t.Epoch != epoch { - // new epoch, task launched after the - // schedule of this timeout check. - return + t, ok := s.taskQueues.Pending[taskID] + if !ok { + return + } + + if t.Epoch != epoch { + // new epoch, task launched after the + // schedule of this timeout check. + return + } + + defer func() { + err := s.snapshot() + if err != nil { + log.Errorln(err) } + }() + + delete(s.taskQueues.Pending, t.Task.ID) - defer func() { - err := s.snapshot() - if err != nil { - log.Println(err) - } - }() + t.NumTimeout++ + if t.NumTimeout > s.timeoutMax { + log.Warningf("Task %v failed %d times, discard.\n", t.Task, t.NumTimeout) + s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) + return + } - delete(s.taskQueues.Pending, t.Task.ID) + log.Warningf("Task %v failed %d times, retry.\n", t.Task, t.NumTimeout) + s.taskQueues.Todo = append(s.taskQueues.Todo, t) + } +} - t.NumTimeout++ - if t.NumTimeout > s.timeoutMax { - s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) - return +// GetTask gets a new task from the service. +func (s *Service) GetTask(dummy int, task *Task) error { + select { + case <-s.ready: + } + + s.mu.Lock() + defer s.mu.Unlock() + + if len(s.taskQueues.Todo) == 0 { + if len(s.taskQueues.Done) == 0 { + if len(s.taskQueues.Pending) == 0 { + err := errors.New("all task failed") + log.Warningln(err) + return err } - s.taskQueues.Todo = append(s.taskQueues.Todo, t) + // TODO(helin): client need to retry in this + // error case. Gotcha: RPC client can't + // compare returned error with predefined + // errors like io.EOF, because the error + // instance deserialized from RPC is a + // different instance than the error defined + // in package. So we need to figure out a way + // for client to check this error correctly. + err := errors.New("no more available task") + log.Warningln(err) + return err } - }(t.Task.ID, t.Epoch)) + s.taskQueues.Todo = s.taskQueues.Done + s.taskQueues.Done = nil + log.Infoln("No more todo task, but trainer is requesting task to do. Move all done task to todo.") + } + + t := s.taskQueues.Todo[0] + t.Epoch++ + s.taskQueues.Todo = s.taskQueues.Todo[1:] + s.taskQueues.Pending[t.Task.ID] = t + err := s.snapshot() + if err != nil { + return err + } + + *task = t.Task + log.Infof("Task #%d dispatched\n", task.ID) + + time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.ID, t.Epoch)) return nil } // TaskFinished tell the service that a task is finished. func (s *Service) TaskFinished(taskID int, dummy *int) error { + select { + case <-s.ready: + } + s.mu.Lock() defer s.mu.Unlock() + log.Infof("Task %d finished\n", taskID) + t, ok := s.taskQueues.Pending[taskID] if !ok { - return ErrPendingTaskNotFound + err := errors.New("pending task not found") + log.Warningln(err) + return err } // task finished, reset timeout t.NumTimeout = 0 s.taskQueues.Done = append(s.taskQueues.Done, t) delete(s.taskQueues.Pending, taskID) - return s.snapshot() + + if len(s.taskQueues.Pending) == 0 && len(s.taskQueues.Todo) == 0 { + log.Infoln("No more todo and pending task, start a new pass.") + s.taskQueues.Todo = append(s.taskQueues.Todo, s.taskQueues.Done...) + s.taskQueues.Done = nil + } + + err := s.snapshot() + if err != nil { + log.Errorln(err) + } + return err } diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/cclient/CMakeLists.txt index c017d7465611373309c6c60141fed864f5ccfb5d..e00dd6b14a99f89451220f4b8f00ffb40bf6ca8d 100644 --- a/go/pserver/cclient/CMakeLists.txt +++ b/go/pserver/cclient/CMakeLists.txt @@ -9,5 +9,15 @@ project(cxx_go C Go) include(golang) include(flags) -go_library(client STATIC) +go_library(paddle_pserver_cclient STATIC) + +if(PROJ_ROOT) + add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a + COMMAND cp ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.h ${PROJ_ROOT}/paddle/trainer/ + COMMAND cp ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a ${PROJ_ROOT}/paddle/trainer/ + WORKING_DIRECTORY ${PROJ_ROOT}/paddle + DEPENDS paddle_pserver_cclient) + add_custom_target(paddle_pserver_cclient_lib ALL DEPENDS ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a) +endif(PROJ_ROOT) + add_subdirectory(test) diff --git a/go/pserver/cclient/cclient.go b/go/pserver/cclient/cclient.go index 0b4aa79806b72f4608230d2216d1741389913d95..4476e762dad04009833421056aa5a49efd44ddaa 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/cclient/cclient.go @@ -19,21 +19,9 @@ typedef struct { int content_len; } paddle_parameter, paddle_gradient; -static inline void paddle_release_param(paddle_parameter* param) { - if (param != NULL) { - if (param->name != NULL) { - free(param->name); - } - - if (param->content != NULL) { - free(param->content); - } - - free(param); - } -} - -typedef int client; +typedef int paddle_pserver_client; +#define PSERVER_ERROR -1 +#define PSERVER_OK 0 */ import "C" @@ -48,10 +36,10 @@ import ( var nullPtr = unsafe.Pointer(uintptr(0)) var mu sync.Mutex -var handleMap = make(map[C.client]*pserver.Client) -var curHandle C.client +var handleMap = make(map[C.paddle_pserver_client]*pserver.Client) +var curHandle C.paddle_pserver_client -func add(c *pserver.Client) C.client { +func add(c *pserver.Client) C.paddle_pserver_client { mu.Lock() defer mu.Unlock() client := curHandle @@ -60,13 +48,13 @@ func add(c *pserver.Client) C.client { return client } -func get(client C.client) *pserver.Client { +func get(client C.paddle_pserver_client) *pserver.Client { mu.Lock() defer mu.Unlock() return handleMap[client] } -func remove(client C.client) *pserver.Client { +func remove(client C.paddle_pserver_client) *pserver.Client { mu.Lock() defer mu.Unlock() h := handleMap[client] @@ -100,7 +88,7 @@ func (l lister) List() []pserver.Server { } //export paddle_new_pserver_client -func paddle_new_pserver_client(addrs *C.char, selected int) C.client { +func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_client { a := C.GoString(addrs) as := strings.Split(a, ",") servers := make([]pserver.Server, len(as)) @@ -113,27 +101,27 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.client { } //export paddle_new_etcd_pserver_client -func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.client { +func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.paddle_pserver_client { // TODO(helin): fault tolerant pserver client using etcd. panic("not implemented.") } //export paddle_pserver_client_release -func paddle_pserver_client_release(client C.client) { +func paddle_pserver_client_release(client C.paddle_pserver_client) { remove(client) } //export paddle_begin_init_params -func paddle_begin_init_params(client C.client) C.int { +func paddle_begin_init_params(client C.paddle_pserver_client) C.int { c := get(client) if selected := c.BeginInitParams(); selected { return 1 } - return 0 + return C.PSERVER_OK } //export paddle_init_param -func paddle_init_param(client C.client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { +func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { et := pserver.ElementType(param.element_type) name := C.GoString(param.name) content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len)) @@ -143,31 +131,41 @@ func paddle_init_param(client C.client, param C.paddle_parameter, param_config u } c := get(client) err := c.InitParam(pc) + if err != nil { + if err.Error() == pserver.AlreadyInitialized { + log.Printf("parameter %s already initialized, treat paddle_init_param as sucessful.\n", name) + return C.PSERVER_OK + } log.Println(err) - return -1 + return C.PSERVER_ERROR } - return 0 + return C.PSERVER_OK } //export paddle_finish_init_params -func paddle_finish_init_params(client C.client) C.int { +func paddle_finish_init_params(client C.paddle_pserver_client) C.int { c := get(client) err := c.FinishInitParams() if err != nil { + if err.Error() == pserver.AlreadyInitialized { + log.Println("parameters already initialized, treat paddle_finish_init_params as sucessful.") + return C.PSERVER_OK + } + log.Println(err) - return -1 + return C.PSERVER_ERROR } - return 0 + return C.PSERVER_OK } //export paddle_send_grads -func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C.int { +func paddle_send_grads(client C.paddle_pserver_client, grads **C.paddle_gradient, total C.int) C.int { var gs []pserver.Gradient for i := 0; i < int(total); i++ { - grad := (*C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads)))) + grad := *(**C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads)))) et := pserver.ElementType(grad.element_type) name := C.GoString(grad.name) content := cArrayToSlice(unsafe.Pointer(grad.content), int(grad.content_len)) @@ -178,83 +176,81 @@ func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C err := c.SendGrads(gs) if err != nil { log.Println(err) - return -1 + return C.PSERVER_ERROR } - return 0 + return C.PSERVER_OK } //export paddle_get_params -func paddle_get_params(client C.client, names **C.char, dst **C.paddle_parameter, total C.int) C.int { +func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, total C.int) C.int { var ns []string for i := 0; i < int(total); i++ { - name := *(**C.char)(unsafe.Pointer((uintptr(unsafe.Pointer(names)) + uintptr(i)*unsafe.Sizeof(*names)))) - ns = append(ns, C.GoString(name)) + param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) + ns = append(ns, C.GoString(param.name)) } c := get(client) ps, err := c.GetParams(ns) if err != nil { log.Println(err) - return -1 + return C.PSERVER_ERROR } - for i := 0; i < int(total); i++ { - if i >= len(ps) { - break + if len(ps) != len(ns) { + pn := make([]string, len(ps)) + for i, p := range ps { + pn[i] = p.Name } + log.Printf("pserver returned wrong number of parameters. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) + return C.PSERVER_ERROR + } + for i := range ps { + if ns[i] != ps[i].Name { + pn := make([]string, len(ps)) + for i, p := range ps { + pn[i] = p.Name + } + log.Printf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) + return C.PSERVER_ERROR + } + } + + for i := 0; i < int(total); i++ { p := ps[i] param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) - nameReady := false - contentAllocated := false if unsafe.Pointer(param) == nullPtr { - param = (*C.paddle_parameter)(C.calloc(1, C.size_t(unsafe.Sizeof(*param)))) + log.Println("must pre-allocate parameter.") + return C.PSERVER_ERROR } else { - if unsafe.Pointer(param.name) != nullPtr { - if n := C.GoString(param.name); n != p.Name { - log.Println("Warning: the pre-allocated parameter name does not match the parameter name, it will be freed.", n, p.Name) - C.free(unsafe.Pointer(param.name)) - } else { - nameReady = true - } - } - if unsafe.Pointer(param.content) != nullPtr { - if int(param.content_len) == len(p.Content) { - contentAllocated = true - } else { - log.Println("Warning: the pre-allocated content len does not match parameter content len, the pre-allocated content will be freed.", param.content_len, len(p.Content)) - C.free(unsafe.Pointer(param.content)) + if int(param.content_len) != len(p.Content) { + log.Printf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content)) + return C.PSERVER_ERROR } } } - if !nameReady { - param.name = C.CString(p.Name) - } - if !contentAllocated { - param.content = (*C.uchar)(C.malloc(C.size_t(len(p.Content)))) - } C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) param.content_len = C.int(len(p.Content)) param.element_type = C.paddle_element_type(p.ElementType) } - return 0 + return C.PSERVER_OK } //export paddle_save_model -func paddle_save_model(client C.client, path *C.char) C.int { +func paddle_save_model(client C.paddle_pserver_client, path *C.char) C.int { p := C.GoString(path) c := get(client) err := c.Save(p) if err != nil { log.Println(err) - return -1 + return C.PSERVER_ERROR } - return 0 + return C.PSERVER_OK } func main() {} // Required but ignored diff --git a/go/pserver/cclient/test/CMakeLists.txt b/go/pserver/cclient/test/CMakeLists.txt index 16f84648c1de3a8fdb4595c00bdb7608a152ded2..882a894ef2b6d24aed42062a667b69a18204406f 100644 --- a/go/pserver/cclient/test/CMakeLists.txt +++ b/go/pserver/cclient/test/CMakeLists.txt @@ -1,11 +1,22 @@ cmake_minimum_required(VERSION 3.0) -include_directories(${CMAKE_BINARY_DIR}) - add_executable(main main.c) -add_dependencies(main client) +add_dependencies(main paddle_pserver_cclient) +add_executable(test_cclient test_cclient.c) +add_dependencies(test_cclient paddle_pserver_cclient) if(APPLE) set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") +else() + set(CMAKE_EXE_LINKER_FLAGS "-pthread") endif() -target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a) + +if(PROJ_ROOT) + include_directories(${CMAKE_BINARY_DIR}/go/pserver/cclient/) + target_link_libraries(main ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a pthread) + target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a pthread) +else(PROJ_ROOT) + include_directories(${CMAKE_BINARY_DIR}) + target_link_libraries(main ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) + target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) +endif(PROJ_ROOT) diff --git a/go/pserver/cclient/test/main.c b/go/pserver/cclient/test/main.c index f75a2110b947520dfec1265e56eaf2ba7ac3b51b..07e1b86b43299f72e24bb83621847980eaaaf06e 100644 --- a/go/pserver/cclient/test/main.c +++ b/go/pserver/cclient/test/main.c @@ -1,68 +1,90 @@ #include -#include "libclient.h" +#include "libpaddle_pserver_cclient.h" -void fail() { - // TODO(helin): fix: gtest using cmake is not working, using this - // hacky way for now. - printf("test failed.\n"); +// TODO(helin): Fix: gtest using cmake is not working, using this +// hacky way for now. +#define fail() \ + fprintf(stderr, "info: %s:%d: ", __FILE__, __LINE__); \ exit(-1); + +void sendGrads(paddle_pserver_client c) { + unsigned char grad_a[2000] = {2}; + unsigned char grad_b[3000] = {3}; + paddle_gradient grad1 = { + "param_a", PADDLE_ELEMENT_TYPE_FLOAT32, grad_a, 2000}; + paddle_gradient grad2 = { + "param_b", PADDLE_ELEMENT_TYPE_FLOAT32, grad_b, 3000}; + paddle_gradient* grads[2] = {&grad1, &grad2}; + if (paddle_send_grads(c, grads, 2)) { + fail(); + } +} + +void getParams(paddle_pserver_client c) { + paddle_parameter param_a; + paddle_parameter param_b; + char name_a[] = "param_a"; + char name_b[] = "param_b"; + // Must pre-allocate the prameter content before calling paddle_get_params. + unsigned char content_a[2000] = {}; + unsigned char content_b[3000] = {}; + param_a.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + param_a.name = name_a; + param_a.content = content_a; + param_a.content_len = 2000; + param_b.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + param_b.name = name_b; + param_b.content = content_b; + param_b.content_len = 3000; + + paddle_parameter* params[2] = {¶m_a, ¶m_b}; + if (paddle_get_params(c, params, 2)) { + fail(); + } } int main() { char addr[] = "localhost:3000"; - client c = paddle_new_pserver_client(addr, 1); + paddle_pserver_client c = paddle_new_pserver_client(addr, 1); retry: if (paddle_begin_init_params(c)) { paddle_parameter param; char name_a[] = "param_a"; char name_b[] = "param_b"; - unsigned char content[] = {0x00, 0x11, 0x22}; + unsigned char content_a[2000] = {1}; + unsigned char content_b[3000] = {0}; param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; param.name = name_a; - param.content = content; - param.content_len = 3; - if (paddle_init_param(c, param, NULL, 0) != 0) { + param.content = content_a; + param.content_len = 2000; + int error = paddle_init_param(c, param, NULL, 0); + if (error != 0) { goto retry; } - param.element_type = PADDLE_ELEMENT_TYPE_INT32; + + param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; param.name = name_b; - param.content = content; - param.content_len = 3; - if (paddle_init_param(c, param, NULL, 0) != 0) { + param.content = content_b; + param.content_len = 3000; + error = paddle_init_param(c, param, NULL, 0); + if (error != 0) { goto retry; } - if (paddle_finish_init_params(c) != 0) { + + error = paddle_finish_init_params(c); + if (error != 0) { goto retry; } - } else { - fail(); - } - - unsigned char content[] = {0x00, 0x11, 0x22}; - paddle_gradient grads[2] = { - {"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3}, - {"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}}; - - if (!paddle_send_grads(c, grads, 2)) { - fail(); } - paddle_parameter* params[2] = {NULL, NULL}; - char* names[] = {"param_a", "param_b"}; - if (!paddle_get_params(c, names, params, 2)) { - fail(); + int i; + for (i = 0; i < 100; i++) { + sendGrads(c); + getParams(c); } - // get parameters again by reusing the allocated parameter buffers. - if (!paddle_get_params(c, names, params, 2)) { - fail(); - } - - paddle_release_param(params[0]); - paddle_release_param(params[1]); - - if (!paddle_save_model(c, "/tmp/")) { + if (paddle_save_model(c, "/tmp/")) { fail(); } diff --git a/go/pserver/cclient/test/test_cclient.c b/go/pserver/cclient/test/test_cclient.c new file mode 100644 index 0000000000000000000000000000000000000000..0f9c2ef80114d4c5cd887117952f5b7b5d9355f6 --- /dev/null +++ b/go/pserver/cclient/test/test_cclient.c @@ -0,0 +1,117 @@ +#include +#include + +#include "libpaddle_pserver_cclient.h" + +typedef float real; + +void fail() { + // TODO(helin): fix: gtest using cmake is not working, using this + // hacky way for now. + printf("test failed.\n"); + exit(-1); +} + +void print_parameter(paddle_gradient* param) { + if (param == NULL) { + printf("param is NULL!!\n"); + } else { + printf("==== parameter ====\n"); + printf("name: %s\n", param->name); + printf("content_len: %d\n", param->content_len); + printf("content_type: %d\n", param->element_type); + int i; + for (i = 0; i < param->content_len / (int)sizeof(real); ++i) { + printf("%f ", ((float*)param->content)[i]); + } + printf("\n\n"); + } +} + +int main() { + char addr[] = "localhost:3000"; + paddle_pserver_client c = paddle_new_pserver_client(addr, 1); + + char* names[] = {"param_a", "param_b"}; + +retry: + printf("init parameter to pserver:\n"); + + real param_content1[] = {0.1, 0.2, 0.3}; + real param_content2[] = {0.4, 0.5, 0.6}; + paddle_parameter** params = + (paddle_parameter**)malloc(sizeof(paddle_parameter*) * 2); + params[0] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); + params[0]->name = names[0]; + params[0]->content = (unsigned char*)param_content1; + params[0]->content_len = 3 * sizeof(real); + params[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + + params[1] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); + params[1]->name = names[1]; + params[1]->content = (unsigned char*)param_content2; + params[1]->content_len = 3 * sizeof(real); + params[1]->element_type = PADDLE_ELEMENT_TYPE_INT32; + + if (paddle_begin_init_params(c)) { + if (paddle_init_param(c, *params[0], NULL, 0) != 0) { + goto retry; + } + if (paddle_init_param(c, *params[1], NULL, 0) != 0) { + goto retry; + } + if (paddle_finish_init_params(c) != 0) { + goto retry; + } + } else { + fail(); + } + + printf("get inited parameters from pserver:\n"); + // get parameters again by reusing the allocated parameter buffers. + if (paddle_get_params(c, params, 2) != 0) { + fail(); + } + print_parameter(params[0]); + print_parameter(params[1]); + + printf("send gradient to pserver:\n"); + real gradient_content1[] = {0.01, 0.02, 0.03}; + real gradinet_content2[] = {0.04, 0.05, 0.06}; + + paddle_gradient** grads = + (paddle_gradient**)malloc(sizeof(paddle_gradient*) * 2); + grads[0] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); + grads[0]->name = names[0]; + grads[0]->content = (unsigned char*)gradient_content1; + grads[0]->content_len = 3 * sizeof(real); + grads[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + + grads[1] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); + grads[1]->name = names[1]; + grads[1]->content = (unsigned char*)gradinet_content2; + grads[1]->content_len = 3 * sizeof(real); + grads[1]->element_type = PADDLE_ELEMENT_TYPE_INT32; + + printf("print gradient sent to pserver:\n"); + print_parameter(grads[0]); + print_parameter(grads[1]); + + if (paddle_send_grads(c, grads, 2) != 0) { + fail(); + } + + printf("get updated parameters from pserver:\n"); + // get parameters again by reusing the allocated parameter buffers. + if (paddle_get_params(c, params, 2) != 0) { + fail(); + } + print_parameter(params[0]); + print_parameter(params[1]); + + if (paddle_save_model(c, "/tmp/") != 0) { + fail(); + } + + return 0; +} diff --git a/go/pserver/cclient/test/test_mnist.py b/go/pserver/cclient/test/test_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..c3a3af55e2812fa0c965d22ddaba198f43f3c4ad --- /dev/null +++ b/go/pserver/cclient/test/test_mnist.py @@ -0,0 +1,131 @@ +import paddle.v2 as paddle +import gzip + + +def softmax_regression(img): + predict = paddle.layer.fc(input=img, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def multilayer_perceptron(img): + # The first fully-connected layer + hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu()) + # The second fully-connected layer and the according activation function + hidden2 = paddle.layer.fc(input=hidden1, + size=64, + act=paddle.activation.Relu()) + # The thrid fully-connected layer, note that the hidden size should be 10, + # which is the number of unique digits + predict = paddle.layer.fc(input=hidden2, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def convolutional_neural_network(img): + # first conv layer + conv_pool_1 = paddle.networks.simple_img_conv_pool( + input=img, + filter_size=5, + num_filters=20, + num_channel=1, + pool_size=2, + pool_stride=2, + act=paddle.activation.Tanh()) + # second conv layer + conv_pool_2 = paddle.networks.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + num_channel=20, + pool_size=2, + pool_stride=2, + act=paddle.activation.Tanh()) + # The first fully-connected layer + fc1 = paddle.layer.fc(input=conv_pool_2, + size=128, + act=paddle.activation.Tanh()) + # The softmax layer, note that the hidden size should be 10, + # which is the number of unique digits + predict = paddle.layer.fc(input=fc1, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def main(): + paddle.init(use_gpu=False, trainer_count=1) + + # define network topology + images = paddle.layer.data( + name='pixel', type=paddle.data_type.dense_vector(784)) + label = paddle.layer.data( + name='label', type=paddle.data_type.integer_value(10)) + + # Here we can build the prediction network in different ways. Please + # choose one by uncomment corresponding line. + predict = softmax_regression(images) + #predict = multilayer_perceptron(images) + #predict = convolutional_neural_network(images) + + cost = paddle.layer.classification_cost(input=predict, label=label) + parameters = paddle.parameters.create(cost) + + optimizer = paddle.optimizer.Momentum( + learning_rate=0.1 / 128.0, + momentum=0.9, + regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128)) + + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer, + is_local=False, + pserver_spec="localhost:3000") + + lists = [] + + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 1000 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + + elif isinstance(event, paddle.event.EndPass): + result = trainer.test(reader=paddle.batch( + paddle.dataset.mnist.test(), batch_size=128)) + print "Test with Pass %d, Cost %f, %s\n" % ( + event.pass_id, result.cost, result.metrics) + lists.append((event.pass_id, result.cost, + result.metrics['classification_error_evaluator'])) + + trainer.train( + reader=paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=128), + event_handler=event_handler, + num_passes=100) + + # find the best pass + best = sorted(lists, key=lambda list: float(list[1]))[0] + print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) + print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) + + test_creator = paddle.dataset.mnist.test() + test_data = [] + for item in test_creator(): + test_data.append((item[0], )) + if len(test_data) == 100: + break + + # output is a softmax layer. It returns probabilities. + # Shape should be (100, 10) + probs = paddle.infer( + output_layer=predict, parameters=parameters, input=test_data) + print probs.shape + + +if __name__ == '__main__': + main() diff --git a/go/pserver/cclient/test/test_train.py b/go/pserver/cclient/test/test_train.py new file mode 100644 index 0000000000000000000000000000000000000000..3f8d5d793bdeb687c9d234005d9e2eae760cc3a7 --- /dev/null +++ b/go/pserver/cclient/test/test_train.py @@ -0,0 +1,60 @@ +import paddle.v2 as paddle +import paddle.v2.dataset.uci_housing as uci_housing + + +def main(): + # init + paddle.init(use_gpu=False, trainer_count=1) + + # network config + x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) + y_predict = paddle.layer.fc(input=x, + param_attr=paddle.attr.Param(name='w'), + size=1, + act=paddle.activation.Linear(), + bias_attr=paddle.attr.Param(name='b')) + y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) + cost = paddle.layer.mse_cost(input=y_predict, label=y) + + # create parameters + parameters = paddle.parameters.create(cost) + + # create optimizer + optimizer = paddle.optimizer.Momentum(momentum=0) + + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer, + is_local=False, + pserver_spec="localhost:3000") + + # event_handler to print training and testing info + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f" % ( + event.pass_id, event.batch_id, event.cost) + + if isinstance(event, paddle.event.EndPass): + if (event.pass_id + 1) % 10 == 0: + result = trainer.test( + reader=paddle.batch( + uci_housing.test(), batch_size=2), + feeding={'x': 0, + 'y': 1}) + print "Test %d, %.2f" % (event.pass_id, result.cost) + + # training + trainer.train( + reader=paddle.batch( + paddle.reader.shuffle( + uci_housing.train(), buf_size=500), + batch_size=2), + feeding={'x': 0, + 'y': 1}, + event_handler=event_handler, + num_passes=30) + + +if __name__ == '__main__': + main() diff --git a/go/pserver/client.go b/go/pserver/client.go index f8bd0aa59f30ec7e2b2d318929af96135d3128ed..afe1eecd015b84684329c0e624f3753852d7a8ce 100644 --- a/go/pserver/client.go +++ b/go/pserver/client.go @@ -6,7 +6,7 @@ import ( "sort" "time" - "github.com/PaddlePaddle/Paddle/go/pserver/internal/connection" + "github.com/PaddlePaddle/Paddle/go/connection" ) // TODO(helin): add RPC call retry logic @@ -47,7 +47,7 @@ func NewClient(l Lister, pserverNum int, sel Selector) *Client { // monitorPservers monitors pserver addresses, and updates connection // when the address changes. func (c *Client) monitorPservers(l Lister, pserverNum int) { - knownServers := make([]Server, pserverNum) + lastServers := make([]Server, pserverNum) ticker := time.NewTicker(10 * time.Second) monitor := func() { curServers := make([]Server, pserverNum) @@ -56,25 +56,37 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) { curServers[l.Index] = l } - for i := range knownServers { - if knownServers[i].Addr != curServers[i].Addr { - err := c.pservers[i].Connect(curServers[i].Addr) + for i := range lastServers { + if lastServers[i].Addr == curServers[i].Addr { + continue + } + + if curServers[i].Addr == "" { + err := c.pservers[i].Close() if err != nil { log.Println(err) - - // connect to addr failed, set - // to last known addr in order - // to retry next time. - curServers[i].Addr = knownServers[i].Addr } + + continue } + + err := c.pservers[i].Connect(curServers[i].Addr) + if err != nil { + log.Println(err) + + // connect to addr failed, set + // to last known addr in order + // to retry next time. + curServers[i].Addr = lastServers[i].Addr + } + } - knownServers = curServers + lastServers = curServers } monitor() - for _ = range ticker.C { + for range ticker.C { monitor() } } @@ -93,16 +105,14 @@ func (c *Client) BeginInitParams() bool { // InitParam initializes the parameter on parameter servers. func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error { - var dummy int - return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, &dummy) + return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, nil) } // FinishInitParams tells parameter servers client has sent all // parameters to parameter servers as initialization. func (c *Client) FinishInitParams() error { for _, p := range c.pservers { - var dummy int - err := p.Call("Service.FinishInitParams", dummy, &dummy) + err := p.Call("Service.FinishInitParams", 0, nil) if err != nil { return err } @@ -116,8 +126,7 @@ func (c *Client) SendGrads(grads []Gradient) error { errCh := make(chan error, len(grads)) for _, g := range grads { go func(g Gradient) { - var dummy int - err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, &dummy) + err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, nil) errCh <- err }(g) } @@ -196,8 +205,7 @@ func (c *Client) Save(path string) error { errCh := make(chan error, len(c.pservers)) for _, p := range c.pservers { - var dummy int - err := p.Call("Service.Save", path, &dummy) + err := p.Call("Service.Save", path, nil) errCh <- err } diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index a9a0948a51a31a1c7393f716e3dfc436dbf919af..d0371a26a13fac9daecacd0b6a271caa6d830651 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -117,7 +117,7 @@ func TestClientFull(t *testing.T) { for i := range params { if names[i] != params[i].Name { - t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i]) + t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i].Name) } } } diff --git a/go/pserver/optimizer.c b/go/pserver/optimizer.c index b8da3ec9592053e3efe00e69d73a8ae259a30a2f..f16ba2cbf8e168a434fdcdb4f1e0ba1e98d91c6b 100644 --- a/go/pserver/optimizer.c +++ b/go/pserver/optimizer.c @@ -32,7 +32,13 @@ int update_SGD(void* optimizer, const void* gradient, int num_bytes) { SGD_optimizer* o = (SGD_optimizer*)optimizer; - // TODO + float* parameter = (float*)buffer; + float* grad = (float*)gradient; + + int i; + for (i = 0; i < num_bytes / sizeof(float); ++i) { + parameter[i] -= o->learning_rate * grad[i]; + } return 0; } diff --git a/go/pserver/service.go b/go/pserver/service.go index d5787b9708bb15629a6e6290ffc97ee9885bc8b8..78a2bfaf6347019333bf1c7ee6cdc04d93ab1370 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -9,8 +9,10 @@ import ( // ElementType is the type of elements of a Parameter. type ElementType int -var ErrAlreadyInitialized = errors.New("pserver already initialized") -var ErrUninitialized = errors.New("pserver not fully initialized") +const ( + AlreadyInitialized = "pserver already initialized" + Uninitialized = "pserver not fully initialized" +) // Supported element types const ( @@ -49,7 +51,7 @@ type Service struct { // NewService creates a new service. func NewService() *Service { - s := &Service{opt: newOptimizer(sgd, 0.01)} + s := &Service{opt: newOptimizer(sgd, 0.005)} s.paramMap = make(map[string]Parameter) s.initialized = make(chan struct{}) return s @@ -59,7 +61,7 @@ func NewService() *Service { func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error { select { case <-s.initialized: - return ErrAlreadyInitialized + return errors.New(AlreadyInitialized) default: } @@ -80,7 +82,7 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error { select { case <-s.initialized: - return ErrAlreadyInitialized + return errors.New(AlreadyInitialized) default: } @@ -94,7 +96,7 @@ func (s *Service) SendGrad(g Gradient, dummy *int) error { select { case <-s.initialized: default: - return ErrUninitialized + return errors.New(Uninitialized) } s.mu.Lock() diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index 4c9fac4536e09013916aadb26af3a86a5a775b4f..b746d13e1ca71e697c464f84d915af029d37120c 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -15,8 +15,7 @@ func TestFull(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - var dummy int - err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy) + err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { t.FailNow() } @@ -25,12 +24,12 @@ func TestFull(t *testing.T) { p1.Name = "param_b" p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} p1.ElementType = pserver.Float32 - err = s.InitParam(pserver.ParameterWithConfig{p1, nil}, &dummy) + err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: nil}, nil) if err != nil { t.FailNow() } - err = s.FinishInitParams(0, &dummy) + err = s.FinishInitParams(0, nil) if err != nil { t.FailNow() } @@ -46,11 +45,11 @@ func TestFull(t *testing.T) { } g1, g2 := pserver.Gradient(p1), pserver.Gradient(p) - err = s.SendGrad(g1, &dummy) + err = s.SendGrad(g1, nil) if err != nil { t.FailNow() } - err = s.SendGrad(g2, &dummy) + err = s.SendGrad(g2, nil) if err != nil { t.FailNow() @@ -74,23 +73,21 @@ func TestFull(t *testing.T) { func TestMultipleInit(t *testing.T) { s := pserver.NewService() - var dummy int - err := s.FinishInitParams(0, &dummy) + err := s.FinishInitParams(0, nil) if err != nil { t.FailNow() } - err = s.FinishInitParams(0, &dummy) - if err != pserver.ErrAlreadyInitialized { + err = s.FinishInitParams(0, nil) + if err.Error() != pserver.AlreadyInitialized { t.FailNow() } } func TestUninitialized(t *testing.T) { s := pserver.NewService() - var dummy int - err := s.SendGrad(pserver.Gradient{}, &dummy) - if err != pserver.ErrUninitialized { + err := s.SendGrad(pserver.Gradient{}, nil) + if err.Error() != pserver.Uninitialized { t.FailNow() } } @@ -98,13 +95,14 @@ func TestUninitialized(t *testing.T) { func TestBlockUntilInitialized(t *testing.T) { s := pserver.NewService() ch := make(chan struct{}, 2) + errCh := make(chan error, 2) var wg sync.WaitGroup wg.Add(1) go func() { var param pserver.Parameter err := s.GetParam("param_a", ¶m) if err != nil { - t.FailNow() + errCh <- err } wg.Done() ch <- struct{}{} @@ -112,10 +110,9 @@ func TestBlockUntilInitialized(t *testing.T) { wg.Add(1) go func() { - var dummy int - err := s.Save("", &dummy) + err := s.Save("", nil) if err != nil { - t.FailNow() + errCh <- err } wg.Done() ch <- struct{}{} @@ -127,6 +124,8 @@ func TestBlockUntilInitialized(t *testing.T) { case <-ch: // some function returned before initialization is completed. t.FailNow() + case <-errCh: + t.FailNow() default: } @@ -134,13 +133,12 @@ func TestBlockUntilInitialized(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - var dummy int - err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy) + err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { t.FailNow() } - err = s.FinishInitParams(0, &dummy) + err = s.FinishInitParams(0, nil) if err != nil { t.FailNow() } diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index 071bc36c2ded51ba977350aeae15f6d244cea5be..c9433a38de4d005ebe229c55916401a5f82e9ef3 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -16,7 +16,7 @@ set(API_HEADER Internal.h) add_library(paddle_api STATIC ${API_SOURCES}) -add_dependencies(paddle_api gen_proto_cpp) +add_dependencies(paddle_api gen_proto_cpp paddle_pserver_cclient_lib) INCLUDE(${SWIG_USE_FILE}) INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) @@ -45,7 +45,7 @@ SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS ) IF(APPLE) - SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load") + SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load -framework CoreFoundation -framework Security") ELSE(APPLE) SET(START_GROUP "-Xlinker -start-group") SET(END_GROUP "-Xlinker -end-group") diff --git a/paddle/api/Paddle.i b/paddle/api/Paddle.i index 068ba286c07d8854a1a7c7042224a679b50b4957..3237e73745dca58bed923b20851f0f0039a3487c 100644 --- a/paddle/api/Paddle.i +++ b/paddle/api/Paddle.i @@ -179,6 +179,7 @@ namespace std { %newobject ParameterOptimizer::needSpecialTraversal; %newobject ParameterUpdater::createLocalUpdater; %newobject ParameterUpdater::createRemoteUpdater; +%newobject ParameterUpdater::createNewRemoteUpdater; %feature("director") UpdateCallback; %feature("autodoc", 1); // To generate method stub, for code hint in ide diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index da0f157abd68c73c45f498cf9ef2726aac67c95b..7565ea51fe3e71bf81a28e6e4b5a2bbdd085798c 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -841,6 +841,8 @@ public: static ParameterUpdater* createRemoteUpdater(OptimizationConfig* config, int passCount, bool useSparseUpdater); + static ParameterUpdater* createNewRemoteUpdater( + OptimizationConfig* config, const std::string pserverSpec); ~ParameterUpdater(); /** diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp index 79921ea6e787f3c0ebecaad6a9a54bac92211320..eaf8518ae2beaa93bc40ee944c984d142d2bb951 100644 --- a/paddle/api/ParameterUpdater.cpp +++ b/paddle/api/ParameterUpdater.cpp @@ -15,6 +15,7 @@ limitations under the License. */ #include "PaddleAPI.h" #include "PaddleAPIPrivate.h" +#include "paddle/trainer/NewRemoteParameterUpdater.h" #include "paddle/trainer/RemoteParameterUpdater.h" #include "paddle/trainer/ThreadParameterUpdater.h" @@ -28,6 +29,14 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater( return updater; } +ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( + OptimizationConfig *config, const std::string pserverSpec) { + auto updater = new ParameterUpdater(); + updater->m->updater.reset(new paddle::NewRemoteParameterUpdater( + config->m->getConfig(), pserverSpec)); + return updater; +} + ParameterUpdater *ParameterUpdater::createRemoteUpdater( OptimizationConfig *config, int passCount, bool useSparseUpdater) { auto updater = new ParameterUpdater(); diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 233a53709a80f06dd2a06995b159c1aef10e2788..1f54ac1231c6ac2e19b25bb336292194c63c11e9 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -28,6 +28,7 @@ if(WITH_TESTING) add_simple_unittest(PadOpTest) add_simple_unittest(MulOpTest) add_simple_unittest(CosSimOpTest) + add_simple_unittest(RowConvOpTest) endif() endif() diff --git a/paddle/function/RowConvOp.cpp b/paddle/function/RowConvOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6501e8f4db7fd33891cd80e07a6f36dd0b34532 --- /dev/null +++ b/paddle/function/RowConvOp.cpp @@ -0,0 +1,225 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "RowConvOp.h" +#include +#include "paddle/math/Vector.h" + +namespace paddle { + +template <> +void RowConv(CpuMatrix& out, + const CpuMatrix& in, + const CpuMatrix& filter, + const CpuIVector& seq) { + const int* starts = seq.getData(); + const size_t numSeq = seq.getSize() - 1; + const size_t contextLength = filter.getHeight(); + for (size_t i = 0; i < numSeq; ++i) { + size_t begin = starts[i]; + size_t end = starts[i + 1]; + for (size_t j = begin; j < end; ++j) { + MatrixPtr x; + MatrixPtr w; + if ((j + contextLength) < end) { + x = (const_cast(in)).subMatrix(j, contextLength); + w = (const_cast(filter)).subMatrix(0, contextLength); + } else { + x = (const_cast(in)).subMatrix(j, end - j); + w = (const_cast(filter)).subMatrix(0, end - j); + } + MatrixPtr y = out.subMatrix(j, 1); + y->addDotMulVMM(*x, *w); + } + } +} + +template <> +void RowConvGrad(const CpuMatrix& outG, + const CpuMatrix& in, + const CpuMatrix& filter, + CpuMatrix& inG, + CpuMatrix& filterG, + const CpuIVector& seq) { + // gradient w.r.t filter + const int* starts = seq.getData(); + const size_t numSeq = seq.getSize() - 1; + const size_t contextLength = filter.getHeight(); + if (filterG) { + for (size_t i = 0; i < numSeq; ++i) { + size_t begin = starts[i]; + size_t end = starts[i + 1]; + size_t steps = end - begin; + for (size_t j = 0; j < contextLength && (begin + j) < end; ++j) { + MatrixPtr x = + (const_cast(in)).subMatrix(begin + j, steps - j); + MatrixPtr dy = + (const_cast(outG)).subMatrix(begin, steps - j); + MatrixPtr dw = filterG.subMatrix(j, 1); + dw->addDotMulVMM(*dy, *x); + } + } + } + + // gradient w.r.t input feature + if (inG) { + for (size_t i = 0; i < numSeq; ++i) { + size_t begin = starts[i]; + size_t end = starts[i + 1]; + size_t steps = end - begin; + for (size_t j = 0; j < steps; ++j) { + MatrixPtr dx = inG.subMatrix(begin + j, 1); + for (size_t t = 0; t < contextLength; ++t) { + if (int(j - t) >= 0) { + MatrixPtr dy = + (const_cast(outG)).subMatrix(begin + j - t, 1); + MatrixPtr w = (const_cast(filter)).subMatrix(t, 1); + dx->addDotMul(*dy, *w, 1.0, 1.0); + } + } + } + } + } +} + +/** + * \brief The row convolution is called lookahead convolution. It is firstly + * introduced in deep-speech2 system. The bidirectional RNN that learns + * representation for a sequence by performing a forward and a backward pass + * through the entire sequence. However, unlike unidirectional RNNs, + * bidirectional RNNs are challenging to deploy in an online and low-latency + * setting. The lookahead convolution incorporates information from future + * subsequences in a computationally efficient manner to improve unidirectional + * recurrent neural networks. + * + * The connection of row convolution is different form the 1D sequence + * convolution. Assumed that, the future context-length is k, that is to say, + * it can get the output at timestep t by using the the input feature from t-th + * timestep to (t+k)-th timestep. Assumed that the hidden dim of input + * activations are d, the activations r_t for the new layer at time-step t are: + * + * + * -- k + 1 + * r(t,i) = > W(i,j) * h(t+j-1, i), for (1 <= i <= d) + * -- j = 1 + * + * + * The weight shape is: (k + 1) x d + * Function Arguments: + * + * \param inputs[0] The input activations. + * \param inputs[0] The filter (or weight) and shape is (k+1) x d. + * \param outputs[1] The output activations. + * + * [1] Dario Amodei, etc. Deep Speech 2 : End-to-End Speech Recognition in + * English + * and Mandarin. https://arxiv.org/abs/1512.02595 + */ + +template +class RowConvFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override {} + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + // check + CHECK_EQ(2UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + // TODO(qingqing): support ASSIGN_TO. + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) + << "SequenceArg required here."; + const auto in = dynamic_cast(inputs[0]); + auto out = dynamic_cast(outputs[0]); + auto w = inputs[1]; + CHECK(in.data() && out.data() && in.getSequenceId().data()); + CHECK_EQ(in.shape().ndims(), 2UL); + CHECK(in.shape() == out.shape()); + CHECK_EQ(w.shape()[1], in.shape()[1]); + + auto outMat = out.matrix(); + const auto inMat = in.matrix(); + const auto wMat = w.matrix(); + const auto seqId = in.getSequenceId().vector(); + + RowConv(outMat, inMat, wMat, seqId); + } +}; + +/** + * \brief The backward of row convolution function. This function calculated + * the gradient w.r.t filter and the gradient w.r.t input activations(or data). + * + * Argument in this Function: + * + * \param inputs[0] The gradient w.r.t output activations. + * \param inputs[1] The input activations. + * \param inputs[2] The filter (or weight) and shape is (k+1) x d. + * \param outputs[0] The gradient w.r.t input activations. + * \param outputs[1] The gradient w.r.r filter. + * + * Abbreviation: + * w.r.t: with respect to. + */ + +template +class RowConvGradFunc : public FunctionBase { + // TODO(qingqing): split into RowConvDataFunc and RowConvWeightFunc +public: + void init(const FuncConfig& config) override {} + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + // check + CHECK_EQ(3UL, inputs.size()); + CHECK_EQ(2UL, outputs.size()); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + CHECK_EQ(outputs[1].getArgType(), ADD_TO); + CHECK(inputs[0].isSequenceArg() && inputs[1].isSequenceArg() && + outputs[0].isSequenceArg()) + << "SequenceArg required here."; + + const auto outGrad = dynamic_cast(inputs[0]); + const auto in = dynamic_cast(inputs[1]); + const auto w = inputs[2]; + auto inGrad = dynamic_cast(outputs[0]); + auto wGrad = outputs[1]; + + CHECK_EQ(in.shape().ndims(), 2UL); + CHECK(in.shape() == inGrad.shape()); + CHECK(in.shape() == outGrad.shape()); + CHECK_EQ(wGrad.shape()[1], in.shape()[1]); + + const auto outGMat = outGrad.matrix(); + const auto inMat = in.matrix(); + const auto wMat = w.matrix(); + auto inGMat = inGrad.data() + ? inGrad.matrix() + : typename Tensor::Matrix(nullptr, 0, 0); + auto wGMat = wGrad.data() + ? wGrad.matrix() + : typename Tensor::Matrix(nullptr, 0, 0); + const auto seqId = in.getSequenceId().vector(); + + RowConvGrad(outGMat, inMat, wMat, inGMat, wGMat, seqId); + } +}; + +REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc); +REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc); +REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc); +#endif + +} // namespace paddle diff --git a/paddle/function/RowConvOp.h b/paddle/function/RowConvOp.h new file mode 100644 index 0000000000000000000000000000000000000000..2c5de6151aa2ed73ace1569d880b1c3677c195c4 --- /dev/null +++ b/paddle/function/RowConvOp.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +/** + * \brief The forward of row convolution. + * + * \param[out] out The output data and shape is h x d. h is the sum of + * time steps of all samples in one mini-batch. + * \param[in] in The input data and shape is h x d. + * \param[in] filter The filter and shape is k x d. The lookahead step + * number plus one equals k. + * \param[in] seq The sequence start positions. + * + */ +template +void RowConv(typename Tensor::Matrix& out, + const typename Tensor::Matrix& in, + const typename Tensor::Matrix& filter, + const typename Tensor::Vector& seq); + +/** + * \brief The backward of row convolution. + * + * \param[in] outG The gradient w.r.t output data. + * \param[in] in The input data. + * \param[in] filter The filter. + * \param[out] inG The gradient w.r.t input data. + * \param[out] filterG The gradient w.r.t filter. + * \param[in] seq The sequence start positions. + * + */ +template +void RowConvGrad(const typename Tensor::Matrix& outG, + const typename Tensor::Matrix& in, + const typename Tensor::Matrix& filter, + typename Tensor::Matrix& inG, + typename Tensor::Matrix& filterG, + const typename Tensor::Vector& seq); +} // namespace paddle diff --git a/paddle/function/RowConvOpGpu.cu b/paddle/function/RowConvOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..c0b947e224313abaf4fadfb8293dc78ca085ff84 --- /dev/null +++ b/paddle/function/RowConvOpGpu.cu @@ -0,0 +1,344 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "RowConvOp.h" + +namespace paddle { + +template +__global__ void KeRowConv(real* y, const real* x, const real* w, + const int* starts, const int height, const int width, + const int numSeq, const int context) { + + const int tidx = threadIdx.x; + const int tidy = threadIdx.y; + const int blky = blockDim.y; + const int gidx = blockIdx.x * blockDim.x; + + __shared__ real sw[BLOCK_H][BLOCK_W]; + + for (int i = tidy; i < context; i += blky) { + sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0; + } + + __syncthreads(); + + for (int i = 0; i < numSeq; ++i) { + const int start = starts[i]; + const int end = starts[i + 1]; + const int steps = end - start; + for (int j = tidy; j < steps; j += blky) { + real sum = 0; + int off = (start + j) * width; + for (int t = 0; t < context; ++t) { + if ((start + j + t) < end) { + int xoff = off + t * width; + real xVal = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0; + sum += sw[t][tidx] * xVal; + } + } + if (gidx + tidx < width) { + y[off + gidx + tidx] += sum; + } + } + } +} + +__global__ void KeRowConv2(real* y, const real* x, const real* w, + const int* starts, const int height, const int width, + const int numSeq, const int context) { + const int tidx = threadIdx.x; + const int tidy = threadIdx.y; + const int blky = blockDim.y; + const int gidx = blockIdx.x * blockDim.x; + + for (int i = 0; i < numSeq; ++i) { + const int start = starts[i]; + const int end = starts[i + 1]; + const int steps = end - start; + for (int j = tidy; j < steps; j += blky) { + int off = (start + j) * width; + real sum = 0; + for (int t = 0; t < context && (start + j + t) < end; ++t) { + int xoff = off + t * width; + real xd = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0; + real wd = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0; + sum += wd * xd; + } + if (gidx + tidx < width) { + y[off + gidx + tidx] += sum; + } + } + } +} + + + +template <> +void RowConv(GpuMatrix& out, + const GpuMatrix& in, + const GpuMatrix& filter, + const GpuIVector& seq) { + const size_t numSeq = seq.getSize() - 1; + const size_t contextLength = filter.getHeight(); + const size_t height = in.getHeight(); + const size_t width = in.getWidth(); + + real* y = out.getData(); + const real* x = in.getData(); + const real* w = filter.getData(); + const int* starts = seq.getData(); + + dim3 dimBlock(32, 32); + dim3 dimGrid(DIVUP(width, dimBlock.x), 1); + + if (contextLength <= 32) { + KeRowConv<32, 32><<>> + (y, x, w, starts, height, width, numSeq, contextLength); + } else { + KeRowConv2<<>> + (y, x, w, starts, height, width, numSeq, contextLength); + } + CHECK_SYNC("RowConv"); +} + + +template +__global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy, + const int* starts, const int height, const int width, const int numSeq, + const int context) { + + const int tidx = threadIdx.x; + const int tidy = threadIdx.y; + const int blky = blockDim.y; + const int gidx = blockIdx.x * blockDim.x; + + __shared__ real sh_x[BLOCK_W][BLOCK_H]; + __shared__ real sh_dy[BLOCK_W][BLOCK_H + CONTEXT - 1]; + __shared__ real sh_dw[CONTEXT][BLOCK_W]; + + if (tidy < context) { + sh_dw[tidy][tidx] = 0.0; + } + __syncthreads(); + + for (int i = 0; i < numSeq; ++i) { + const int start = starts[i]; + const int end = starts[i + 1]; + const int steps = end - start; + const int size = ((steps + BLOCK_H - 1)/BLOCK_H) * BLOCK_H; + for (int j = tidy; j < size; j += BLOCK_H) { + int xoff = gidx + tidx; + int yoff = start + j; + + // transpose + sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; + sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0; + __syncthreads(); + if (tidy < (context - 1)) { + yoff = yoff - context + 1; + sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0; + } + __syncthreads(); + + for (int t = 0; t < context; t++) { + real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx + context - 1 - t]; + __syncthreads(); + // warp size and blockDim.x is 32. + val += __shfl_down(val, 16); + val += __shfl_down(val, 8); + val += __shfl_down(val, 4); + val += __shfl_down(val, 2); + val += __shfl_down(val, 1); + __syncthreads(); + if (tidx == 0) { + sh_dw[t][tidy] += val; + } + __syncthreads(); + } + } + } + + for (int t = tidy; (t < context) && ((gidx + tidx) < width); t += blky) { + dw[t * width + gidx + tidx] += sh_dw[t][tidx]; + } +} + +template +__global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy, + const int* starts, const int height, const int width, const int numSeq, + const int context) { + + const int tidx = threadIdx.x; + const int tidy = threadIdx.y; + const int gidx = blockIdx.x * blockDim.x; + + __shared__ real sh_x[BLOCK_H][BLOCK_W]; + __shared__ real sh_dy[BLOCK_H][BLOCK_W]; + + for (int i = 0; i < numSeq; ++i) { + const int start = starts[i]; + const int end = starts[i + 1]; + const int steps = end - start; + + const int size = ((steps + BLOCK_H - 1)/BLOCK_H) * BLOCK_H; + for (int j = tidy; j < size; j += BLOCK_H) { + int xoff = gidx + tidx; + int yoff = start + j; + + // transpose + sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; + __syncthreads(); + + for (int t = 0; t < context; t++) { + sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0; + __syncthreads(); + + real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx]; + __syncthreads(); + // warp size and blockDim.x is 32. + val += __shfl_down(val, 16); + val += __shfl_down(val, 8); + val += __shfl_down(val, 4); + val += __shfl_down(val, 2); + val += __shfl_down(val, 1); + __syncthreads(); + + if (tidx == 0 && (gidx + tidy) < width) { + dw[t*width + gidx + tidy] += val; + } + } + } + } +} + +template +__global__ void KeRowConvBwData(real* dx, const real* w, const real* dy, + const int* starts, const int height, const int width, const int numSeq, + const int context) { + + const int tidx = threadIdx.x; + const int tidy = threadIdx.y; + const int blky = blockDim.y; + const int gidx = blockIdx.x * blockDim.x; + + __shared__ real sw[BLOCK_H][BLOCK_W]; + + for (int i = tidy; i < context; i += blky) { + sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0; + } + + __syncthreads(); + + for (int i = 0; i < numSeq; ++i) { + const int start = starts[i]; + const int end = starts[i + 1]; + const int steps = end - start; + for (int j = tidy; j < steps; j += blky) { + real sum = 0; + int off = (start + j) * width; + for (int t = 0; t < context && (j - t) >= 0; ++t) { + int dyOff = off - t * width; + real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0; + sum += sw[t][tidx] * dyVal; + } + if (gidx + tidx < width) { + dx[off + gidx + tidx] += sum; + } + } + } +} + +__global__ void KeRowConvBwData2(real* dx, const real* w, const real* dy, + const int* starts, const int height, const int width, const int numSeq, + const int context) { + + const int tidx = threadIdx.x; + const int tidy = threadIdx.y; + const int blky = blockDim.y; + const int gidx = blockIdx.x * blockDim.x; + + for (int i = 0; i < numSeq; ++i) { + const int start = starts[i]; + const int end = starts[i + 1]; + const int steps = end - start; + for (int j = tidy; j < steps; j += blky) { + real sum = 0; + int off = (start + j) * width; + for (int t = 0; t < context && (j - t) >= 0; ++t) { + int dyOff = off - t * width; + real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0; + real wVal = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0; + sum += wVal * dyVal; + } + if (gidx + tidx < width) { + dx[off + gidx + tidx] += sum; + } + } + } +} + + +template <> +void RowConvGrad(const GpuMatrix& outG, + const GpuMatrix& in, + const GpuMatrix& filter, + GpuMatrix& inG, + GpuMatrix& filterG, + const GpuIVector& seq) { + const size_t numSeq = seq.getSize() - 1; + const size_t contextLength = filter.getHeight(); + const size_t height = in.getHeight(); + const size_t width = in.getWidth(); + + const real* dy = outG.getData(); + const real* x = in.getData(); + const real* w = filter.getData(); + const int* starts = seq.getData(); + + if (filterG) { + dim3 dimBlock(32, 32); + dim3 dimGrid(DIVUP(width, dimBlock.x), 1); + real* dw = filterG.getData(); + if (contextLength <= 32) { + KeRowConvBwWeight<32, 32, 32> + <<>> + (dw, x, dy, starts, height, width, numSeq, contextLength); + } else { + KeRowConvBwWeight2<32, 32> + <<>> + (dw, x, dy, starts, height, width, numSeq, contextLength); + } + } + + if (inG) { + real* dx = inG.getData(); + dim3 dimBlock2(32, 32); + dim3 dimGrid2(DIVUP(width, dimBlock2.x), 1); + if (contextLength <= 64) { + KeRowConvBwData<32, 64> + <<>> + (dx, w, dy, starts, height, width, numSeq, contextLength); + } else { + KeRowConvBwData2 + <<>> + (dx, w, dy, starts, height, width, numSeq, contextLength); + } + } + + CHECK_SYNC("RowConvGrad"); +} + +} // namespace paddle diff --git a/paddle/function/RowConvOpTest.cpp b/paddle/function/RowConvOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1c95d3ff2cccbf33f4c5f91f6daf340871a8f7b0 --- /dev/null +++ b/paddle/function/RowConvOpTest.cpp @@ -0,0 +1,62 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" + +namespace paddle { + +void testRowConvFw(size_t batchSize, size_t dim, size_t contextLength) { + FunctionCompare test("RowConv", FuncConfig()); + + test.addSequence(SequenceIdArg(TensorShape{batchSize})); + test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim})); + + test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}), + ADD_TO); + + test.run(); +} + +void testRowConvBw(size_t batchSize, size_t dim, size_t contextLength) { + FunctionCompare test("RowConvGrad", FuncConfig()); + + test.addSequence(SequenceIdArg(TensorShape{batchSize})); + test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim})); + test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim})); + + test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}), + ADD_TO); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}), + ADD_TO); + + test.run(); +} + +TEST(RowConv, real) { + for (size_t numSamples : {17, 129, 2020}) { + for (size_t dim : {16, 512, 2560}) { + for (size_t context : {3, 19, 65}) { + VLOG(3) << " numSamples=" << numSamples << " dim=" << dim + << " context length=" << context; + testRowConvFw(numSamples, dim, context); + testRowConvBw(numSamples, dim, context); + } + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/RowConvLayer.cpp b/paddle/gserver/layers/RowConvLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..54d77999ad5b30a8d9f4feaa02d81417957544a7 --- /dev/null +++ b/paddle/gserver/layers/RowConvLayer.cpp @@ -0,0 +1,106 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "RowConvLayer.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(row_conv, RowConvLayer); + +bool RowConvLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + + contexLength_ = config_.inputs(0).row_conv_conf().context_length(); + + CHECK_EQ(inputLayers_.size(), 1UL); + weight_.reset(new Weight(contexLength_, getSize(), parameters_[0])); + createFunction(forward_, "RowConv", FuncConfig()); + createFunction(backward_, "RowConvGrad", FuncConfig()); + + return true; +} + +void RowConvLayer::forward(PassType passType) { + Layer::forward(passType); + MatrixPtr input = getInputValue(0); + size_t height = input->getHeight(); + size_t width = input->getWidth(); + CHECK_EQ(width, getSize()); + resetOutput(height, width); + + const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_); + MatrixPtr w = weight_->getW(); + wDims_ = TensorShape({w->getHeight(), w->getWidth()}); + + MatrixPtr outV = getOutputValue(); + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getInputValue(0), *startPos); + inputs.addArg(*w, wDims_); + outputs.addArg(*getOutputValue(), *startPos, ADD_TO); + + { + REGISTER_TIMER_INFO("RowConvForward", getName().c_str()); + forward_[0]->calc(inputs, outputs); + } + + /* activation */ { + REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); + forwardActivation(); + } +} + +void RowConvLayer::backward(const UpdateCallback& callback) { + /* Do derivation */ { + REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); + backwardActivation(); + } + + const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getOutputGrad(), *startPos); + inputs.addArg(*getInputValue(0), *startPos); + inputs.addArg(*weight_->getW(), wDims_); + + MatrixPtr inGrad = getInputGrad(0); + MatrixPtr wGrad = weight_->getWGrad(); + size_t h = getInputValue(0)->getHeight(); + size_t w = getInputValue(0)->getWidth(); + outputs.addArg( + inGrad ? (*inGrad) : *(Matrix::create(nullptr, h, w, false, useGpu_)), + *startPos, + ADD_TO); + outputs.addArg( + wGrad ? (*wGrad) + : *(Matrix::create(nullptr, contexLength_, w, false, useGpu_)), + wDims_, + ADD_TO); + + { + REGISTER_TIMER_INFO("RowConvBackward", getName().c_str()); + backward_[0]->calc(inputs, outputs); + } + + { + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weight_->getParameterPtr()->incUpdate(callback); + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/RowConvLayer.h b/paddle/gserver/layers/RowConvLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..b3bdda2f3542b312eda531695c975ff2dfc29c93 --- /dev/null +++ b/paddle/gserver/layers/RowConvLayer.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * \brief Row Convolution Layer. + */ +class RowConvLayer : public Layer { +public: + explicit RowConvLayer(const LayerConfig& config) : Layer(config) {} + + ~RowConvLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +protected: + // Row convolution weight, context_lenght_ * fan_out. + // fan_out is the size of output feature. + std::unique_ptr weight_; + + // The step number to look ahead plus one equals contexLength_. + size_t contexLength_; + TensorShape wDims_; +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index e1e8e7fae7ca4c96206d60703db1f35aa1196875..6adffcf53b7966bd6f3d02970e5f07cc9802f469 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1705,6 +1705,26 @@ TEST(Layer, TransLayer) { } } +TEST(Layer, RowConvLayer) { + const int context = 3; + const int size = 512; + + TestConfig config; + config.layerConfig.set_type("row_conv"); + config.layerConfig.set_size(size); + config.layerConfig.set_active_type("sigmoid"); + + config.inputDefs.push_back( + {INPUT_SEQUENCE_DATA, "layer_0", size, context * size}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + RowConvConfig* conv = input->mutable_row_conv_conf(); + conv->set_context_length(context); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "row_conv", 100, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp index 81fe4ee397351a013c8616ad08fb8cb4b8dae4d0..98ab013548734059060eb06ce1a7cec23dbf1b72 100644 --- a/paddle/parameter/tests/test_argument.cpp +++ b/paddle/parameter/tests/test_argument.cpp @@ -42,7 +42,7 @@ TEST(Argument, poolSequenceWithStride) { CHECK_EQ(outStart[3], 4); CHECK_EQ(outStart[4], 7); - CHECK_EQ(stridePositions->getSize(), 8); + CHECK_EQ(stridePositions->getSize(), 8UL); auto result = reversed ? strideResultReversed : strideResult; for (int i = 0; i < 8; i++) { CHECK_EQ(stridePositions->getData()[i], result[i]); diff --git a/paddle/trainer/CMakeLists.txt b/paddle/trainer/CMakeLists.txt index 06c019f0a97757b658d1bc3405246d8f47632aad..9d246b6690134d96e9a262c6ac64d998536128a9 100644 --- a/paddle/trainer/CMakeLists.txt +++ b/paddle/trainer/CMakeLists.txt @@ -4,6 +4,7 @@ set(TRAINER_SOURCES ParameterUpdater.cpp ParamUtil.cpp RemoteParameterUpdater.cpp + NewRemoteParameterUpdater.cpp Tester.cpp Trainer.cpp TrainerInternal.cpp @@ -16,6 +17,7 @@ set(TRAINER_HEADERS ParameterUpdater.h ParamUtil.h RemoteParameterUpdater.h + NewRemoteParameterUpdater.h Tester.h TesterConfig.h Trainer.h @@ -32,7 +34,7 @@ add_style_check_target(paddle_trainer_lib add_style_check_target(paddle_trainer_lib ${TRAINER_HEADERS}) add_dependencies(paddle_trainer_lib - gen_proto_cpp) + gen_proto_cpp paddle_pserver_cclient_lib) macro(add_paddle_exe TARGET_NAME) add_executable(${TARGET_NAME} ${ARGN}) @@ -56,3 +58,10 @@ install(TARGETS paddle_trainer paddle_merge_model set_target_properties(paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) set_target_properties(paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) + +if(APPLE) + set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") +endif() + +target_link_libraries(paddle_trainer ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a) +target_link_libraries(paddle_trainer_lib ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a) diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f25ce2f7f06f6da0feab27da61b8e49689cbe213 --- /dev/null +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -0,0 +1,86 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "NewRemoteParameterUpdater.h" +#include "Trainer.h" +#include "paddle/utils/Stat.h" + +DECLARE_int32(trainer_id); +DECLARE_string(save_dir); + +namespace paddle { +NewRemoteParameterUpdater::NewRemoteParameterUpdater( + const OptimizationConfig &config, const std::string pserverSpec) + : parameterClient_(-1), + newParameters_(nullptr), + newGradients_(nullptr), + pserverSpec_(pserverSpec) {} + +void NewRemoteParameterUpdater::init( + const std::vector ¶meters) { + ParameterUpdater::init(parameters); + + for (auto ¶ : parameters_) { + para->getBuf(PARAMETER_VALUE)->zeroMem(); + para->getBuf(PARAMETER_GRADIENT)->zeroMem(); + } + + // create parameter server client. + parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), + FLAGS_trainer_id == 0); + + // init new parameter and gradient. + newParameters_ = initNewParameter(PARAMETER_VALUE); + newGradients_ = initNewParameter(PARAMETER_GRADIENT); + + // init parameter, one trainer will get the opportunity to int parameter and + // send them to parameter server. Others will get the initialized parameter + // from parameter server + if (paddle_begin_init_params(parameterClient_)) { + LOG(INFO) << "paddle_begin_init_params start"; + for (int i = 0; i < parameterSize(); ++i) { + auto paramConfig = parameters_[i]->getConfig(); + std::string bytes = paramConfig.SerializeAsString(); + const char *array = bytes.data(); + int size = (int)bytes.size(); + paddle_init_param( + parameterClient_, *newParameters_[i], (void *)array, size); + } + paddle_finish_init_params(parameterClient_); + LOG(INFO) << "paddle_begin_init_params done"; + } else { + paddle_get_params(parameterClient_, newParameters_, parameterSize()); + } + + LOG(INFO) << "NewRemoteParameterUpdater initialized"; +} + +void NewRemoteParameterUpdater::updateImpl(Parameter *para) {} + +void NewRemoteParameterUpdater::finishBatch(real cost) { + // send gradient to parameter server. + paddle_send_grads(parameterClient_, newGradients_, parameterSize()); + // get the updated parameter from parameterClient. + paddle_get_params(parameterClient_, newParameters_, parameterSize()); + + // clear gradient after update parameter. + for (auto ¶ : parameters_) { + para->getBuf(PARAMETER_GRADIENT)->zeroMem(); + } +} + +void NewRemoteParameterUpdater::startPass() {} + +bool NewRemoteParameterUpdater::finishPass() { return true; } +} diff --git a/paddle/trainer/NewRemoteParameterUpdater.h b/paddle/trainer/NewRemoteParameterUpdater.h new file mode 100644 index 0000000000000000000000000000000000000000..f735185f62b3491a63e34cfc4a2ef73dae12243e --- /dev/null +++ b/paddle/trainer/NewRemoteParameterUpdater.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "ParameterUpdater.h" +#include "libpaddle_pserver_cclient.h" +#include "paddle/pserver/ParameterClient2.h" +#include "paddle/utils/Queue.h" +#include "paddle/utils/Util.h" + +namespace paddle { + +/** + * New remote parameter updater for dense parameters that use cclient of go. + */ +class NewRemoteParameterUpdater : public ParameterUpdater { +public: + NewRemoteParameterUpdater(const OptimizationConfig& config, + const std::string pserverSpec); + ~NewRemoteParameterUpdater() { + releaseNewParameter(newParameters_); + releaseNewParameter(newGradients_); + if (parameterClient_ >= 0) paddle_pserver_client_release(parameterClient_); + } + + /** + * initialize the internal parameter client and itself. + */ + virtual void init(const std::vector& parameters); + /** + * @brief start batch + * + * @note one batch training exhibits stateful feature to help + * to do performance tuning, sgd optimization if necessary. + */ + virtual PassType startBatch(int64_t batchSize) { return PASS_TRAIN; } + + /** + * send parameters to pservers and get returned parameters + * from all pservers if necessary. + */ + virtual void finishBatch(real cost); + virtual void startPass(); + virtual bool finishPass(); + +protected: + /** + * work need to do after finishBatch + */ + virtual void updateImpl(Parameter* para); + +private: + int parameterSize() { return (int)parameters_.size(); } + + /** + * init parameter of go paddle pserver cclient. + * @param new_params + * @param type + */ + paddle_parameter** initNewParameter(ParameterType type) { + paddle_parameter** new_params = + (paddle_parameter**)malloc(sizeof(paddle_parameter*) * parameterSize()); + for (int i = 0; i < parameterSize(); ++i) { + new_params[i] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); + memset(new_params[i], 0, sizeof(paddle_parameter)); + } + + for (int i = 0; i < parameterSize(); ++i) { + ParameterPtr param = parameters_[i]; + new_params[i]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + new_params[i]->name = (char*)param->getName().c_str(); + new_params[i]->content = + (unsigned char*)(param->getBuf(type).get()->getData()); + new_params[i]->content_len = + (int)param->getBuf(type).get()->getSize() * sizeof(real); + } + return new_params; + } + + void releaseNewParameter(paddle_parameter** newParams) { + if (newParams != nullptr) { + for (int i = 0; i < parameterSize(); ++i) { + free(newParams[i]); + } + free(newParams); + } + } + +protected: + /// internal parameter client object for exchanging data with pserver + paddle_pserver_client parameterClient_; + /// the parameters for new pserver client + paddle_parameter** newParameters_; + /// the gradinets for new pserver client + paddle_parameter** newGradients_; + /// the specification of parameter server "host1:port,host1:port" + std::string pserverSpec_; +}; + +} // namespace paddle diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 4f9b53d6f6553e55406dd000029a598a92fd2fb6..29270829bbc3af6990aaf03a5228ef7f6a892a5c 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -194,6 +194,10 @@ message MaxOutConfig { required uint32 groups = 2; } +message RowConvConfig { + required uint32 context_length = 1; +} + message ProjectionConfig { required string type = 1; required string name = 2; @@ -279,6 +283,7 @@ message LayerInputConfig { optional SppConfig spp_conf = 12; optional PriorBoxConfig priorbox_conf = 13; optional PadConfig pad_conf = 14; + optional RowConvConfig row_conv_conf = 15; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 3a29c91807f023d9cae509ee729b2eeeb1038805..b406a9f3f7703a2a621f8d537a21683d71ba489d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2081,6 +2081,23 @@ class MaxOutLayer(LayerBase): g_layer_map[input_layer.name].width, out_channels) +@config_layer('row_conv') +class RowConvLayer(LayerBase): + def __init__(self, name, inputs, context_length, **xargs): + super(RowConvLayer, self).__init__( + name, 'maxout', 0, inputs=inputs, **xargs) + config_assert( + len(self.inputs) == 1, + 'TransLayer must have one and only one input') + input_layer = self.get_input_layer(0) + row_conv_conf = self.config.inputs[0].row_conv_conf + row_conv_conf.context_length = context_length + self.set_layer_size(input_layer.size) + psize = context_length * input_layer.size + dims = [context_length, input_layer.size] + self.create_input_parameter(0, psize, dims) + + # key: cost type # value: cost class g_cost_map = {} diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 5320f5c32ce00f4780cea16abaee718c95707467..2d8ddbb9007b241eb1986887d8ea6c2de8235c29 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -121,6 +121,7 @@ __all__ = [ 'smooth_l1_cost', 'layer_support', 'multiplex_layer', + 'row_conv_layer', 'dropout_layer', 'prelu_layer', ] @@ -179,17 +180,18 @@ class LayerType(object): EOSID_LAYER = 'eos_id' RECURRENT_LAYER = 'recurrent' - CONV_SHIFT_LAYER = 'conv_shift' - TENSOR_LAYER = 'tensor' - SEL_FC_LAYER = 'selective_fc' - SAMPLING_ID_LAYER = 'sampling_id' - SLOPE_INTERCEPT_LAYER = 'slope_intercept' - LINEAR_COMBINATION_LAYER = 'convex_comb' - BLOCK_EXPAND = 'blockexpand' - MAXOUT = 'maxout' - SPP_LAYER = 'spp' - PAD_LAYER = 'pad' - MULTIPLEX_LAYER = 'multiplex' + CONV_SHIFT_LAYER = "conv_shift" + TENSOR_LAYER = "tensor" + SEL_FC_LAYER = "selective_fc" + SAMPLING_ID_LAYER = "sampling_id" + SLOPE_INTERCEPT_LAYER = "slope_intercept" + LINEAR_COMBINATION_LAYER = "convex_comb" + BLOCK_EXPAND = "blockexpand" + MAXOUT = "maxout" + SPP_LAYER = "spp" + PAD_LAYER = "pad" + MULTIPLEX_LAYER = "multiplex" + ROW_CONV_LAYER = "row_conv" PRINT_LAYER = 'print' PRIORBOX_LAYER = 'priorbox' @@ -5585,6 +5587,79 @@ def dropout_layer(input, dropout_rate, name=None): @wrap_name_default() +@wrap_act_default(act=LinearActivation()) +@wrap_param_attr_default() +@layer_support(DROPOUT) +def row_conv_layer(input, + context_len, + act=None, + name=None, + param_attr=None, + layer_attr=None): + """ + + The row convolution is called lookahead convolution. It is firstly + introduced in paper of `Deep Speech 2: End-toEnd Speech Recognition + in English and Mandarin `_ . + + The bidirectional RNN that learns representation for a sequence by + performing a forward and a backward pass through the entire sequence. + However, unlike unidirectional RNNs, bidirectional RNNs are challenging + to deploy in an online and low-latency setting. The lookahead convolution + incorporates information from future subsequences in a computationally + efficient manner to improve unidirectional recurrent neural networks. + + The connection of row convolution is different form the 1D sequence + convolution. Assumed that, the future context-length is k, that is to say, + it can get the output at timestep t by using the the input feature from t-th + timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input + activations are d, the activations r_t for the new layer at time-step t are: + + .. math:: + + r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}} + \quad \text{for} \quad (1 \leq i \leq d) + + Note: + The `context_len` is `k + 1`. That is to say, the lookahead step + number plus one equals context_len. + + + .. code-block:: python + + row_conv = row_conv_layer(input=input_layer, context_len=3) + + + :param input: The input layer. + :type input: LayerOutput + :param context_len: The context length equals the lookahead step number + plus one. + :type context_len: int + :param act: Activation Type. Default is linear activation. + :type act: BaseActivation + :param param_attr: The Parameter Attribute. If None, the parameter will be + initialized smartly. It's better set it by yourself. + :type param_attr: ParameterAttribute + :param layer_attr: Extra Layer config. + :type layer_attr: ExtraLayerAttribute|None + :return: LayerOutput object. + :rtype: LayerOutput + + """ + assert isinstance(input, LayerOutput) + assert context_len > 0, "the context_len must be greatet than 0." + + Layer( + inputs=[Input(input.name, **param_attr.attr)], + name=name, + context_length=context_len, + type=LayerType.ROW_CONV_LAYER, + active_type=act.name, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name, LayerType.ROW_CONV_LAYER, input, activation=act, size=input.size) + + @layer_support() @wrap_name_default() @wrap_param_attr_default() diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index bef14bffaf648b92e608a6a18cd46d57e850550e..c24102255f5bbed0f551b2dbfec20be7daf5f5b4 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -6,6 +6,6 @@ img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cos test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer -test_prelu_layer) +test_prelu_layer test_row_conv) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr new file mode 100644 index 0000000000000000000000000000000000000000..9ec15d2a19ec50a1729f9eeaa6dce8b1153c776b --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr @@ -0,0 +1,41 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 2560 + active_type: "" +} +layers { + name: "__row_conv_layer_0__" + type: "maxout" + size: 2560 + active_type: "relu" + inputs { + input_layer_name: "data" + input_parameter_name: "___row_conv_layer_0__.w0" + row_conv_conf { + context_length: 19 + } + } +} +parameters { + name: "___row_conv_layer_0__.w0" + size: 48640 + initial_mean: 0.0 + initial_std: 0.229415733871 + dims: 19 + dims: 2560 + initial_strategy: 0 + initial_smart: true +} +input_layer_names: "data" +output_layer_names: "__row_conv_layer_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__row_conv_layer_0__" + input_layer_names: "data" + output_layer_names: "__row_conv_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..ab33c496b0663d8472ce4b272be6c5cecbcfc978 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py @@ -0,0 +1,9 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +data = data_layer(name='data', size=2560) + +row_conv = row_conv_layer(input=data, context_len=19, act=ReluActivation()) + +outputs(row_conv) diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index 418b592a5ac638cc61b86a9b3fbdcee1e3a0bcaf..9c614914b5e372e8e5e3c3c072b18b83edf51e87 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -149,3 +149,57 @@ def cluster_files_reader(files_pattern, yield line return reader + + +def convert(output_path, + reader, + num_shards, + name_prefix, + max_lines_to_shuffle=1000): + import recordio + import cPickle as pickle + import random + """ + Convert data from reader to recordio format files. + + :param output_path: directory in which output files will be saved. + :param reader: a data reader, from which the convert program will read data instances. + :param num_shards: the number of shards that the dataset will be partitioned into. + :param name_prefix: the name prefix of generated files. + :param max_lines_to_shuffle: the max lines numbers to shuffle before writing. + """ + + assert num_shards >= 1 + assert max_lines_to_shuffle >= 1 + + def open_writers(): + w = [] + for i in range(0, num_shards): + n = "%s/%s-%05d-of-%05d" % (output_path, name_prefix, i, + num_shards - 1) + w.append(recordio.writer(n)) + + return w + + def close_writers(w): + for i in range(0, num_shards): + w[i].close() + + def write_data(w, lines): + random.shuffle(lines) + for i, d in enumerate(lines): + d = pickle.dumps(d, pickle.HIGHEST_PROTOCOL) + w[i % num_shards].write(d) + + w = open_writers() + lines = [] + + for i, d in enumerate(reader()): + lines.append(d) + if i % max_lines_to_shuffle == 0 and i >= max_lines_to_shuffle: + write_data(w, lines) + lines = [] + continue + + write_data(w, lines) + close_writers(w) diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/v2/dataset/tests/common_test.py index f9815d4f9e1ee3bbe9ccf2dae588c51c262468c1..cfa194eba38ea70311c4deeac2635dc0a0103576 100644 --- a/python/paddle/v2/dataset/tests/common_test.py +++ b/python/paddle/v2/dataset/tests/common_test.py @@ -57,6 +57,38 @@ class TestCommon(unittest.TestCase): for idx, e in enumerate(reader()): self.assertEqual(e, str("0")) + def test_convert(self): + record_num = 10 + num_shards = 4 + + def test_reader(): + def reader(): + for x in xrange(record_num): + yield x + + return reader + + path = tempfile.mkdtemp() + paddle.v2.dataset.common.convert(path, + test_reader(), num_shards, + 'random_images') + + files = glob.glob(path + '/random_images-*') + self.assertEqual(len(files), num_shards) + + recs = [] + for i in range(0, num_shards): + n = "%s/random_images-%05d-of-%05d" % (path, i, num_shards - 1) + r = recordio.reader(n) + while True: + d = r.read() + if d is None: + break + recs.append(d) + + recs.sort() + self.assertEqual(total, record_num) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 5e99d4a241b7fe2b0f9ff4ba191db4b341c4d30e..1ef2dceca910e806bddf17c95d1c345a144d9e31 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -45,7 +45,12 @@ class Optimizer(object): return swig_api.ParameterUpdater.createRemoteUpdater( self.__opt_conf__, pass_num, use_sparse_updater) - def create_updater(self, is_local, num_passes, use_sparse_updater): + def __create_new_remote_updater__(self, pserver_spec): + return swig_api.ParameterUpdater.createNewRemoteUpdater( + self.__opt_conf__, pserver_spec) + + def create_updater(self, is_local, num_passes, use_sparse_updater, + pserver_spec): """ create proper parameter_updater by configuration. :param is_local: create local or remote parameter updater @@ -64,8 +69,12 @@ class Optimizer(object): if is_local: parameter_updater = self.__create_local_updater__() else: - parameter_updater = self.__create_remote_updater__( - num_passes, use_sparse_updater) + if pserver_spec is None: + parameter_updater = self.__create_remote_updater__( + num_passes, use_sparse_updater) + else: + parameter_updater = self.__create_new_remote_updater__( + pserver_spec) return parameter_updater diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 8fdb67cc2688a67ed815af396b214e339195c73f..f9658a8c5df9562073c8a187074a6cb3459ac5d9 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -49,7 +49,8 @@ class SGD(object): parameters, update_equation, extra_layers=None, - is_local=True): + is_local=True, + pserver_spec=None): if not isinstance(parameters, v2_parameters.Parameters): raise TypeError('parameters should be parameters') @@ -63,6 +64,7 @@ class SGD(object): self.__parameters__ = parameters self.__topology_in_proto__ = topology.proto() self.__is_local__ = is_local + self.__pserver_spec__ = pserver_spec self.__use_sparse_updater__ = self.__topology__.use_sparse_updater() # # In local mode, disable sparse_remote_update. @@ -126,7 +128,8 @@ class SGD(object): __check_train_args__(**locals()) self.__parameter_updater__ = self.__optimizer__.create_updater( - self.__is_local__, num_passes, self.__use_sparse_updater__) + self.__is_local__, num_passes, self.__use_sparse_updater__, + self.__pserver_spec__) self.__parameter_updater__.init(self.__gradient_machine__) self.__gradient_machine__.start()