diff --git a/.travis.yml b/.travis.yml index 44b755ee32d204c883f0d74e7ad0f78380918954..f9b4a7e08315a42a61a58d6c61c45771df962c4d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -50,6 +50,7 @@ before_install: # protobuf version. - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - pip install rarfile + - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: diff --git a/CMakeLists.txt b/CMakeLists.txt index 79210d043648de5d493f0b998eeb885c993a6106..c2218be5efb90142ef7f16b788e141e025105771 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,7 @@ endif(WITH_GPU) add_subdirectory(proto) add_subdirectory(paddle) add_subdirectory(python) +add_subdirectory(go/pserver/cclient) if(WITH_DOC) add_subdirectory(doc) diff --git a/doc/design/cluster_train/pserver_client.md b/doc/design/cluster_train/pserver_client.md index b3e4079010490b69db1de28157f0cab80cad2381..474b8c572cd92fc87e9f7f3f2b19d12cccd158de 100644 --- a/doc/design/cluster_train/pserver_client.md +++ b/doc/design/cluster_train/pserver_client.md @@ -74,14 +74,25 @@ typedef enum { typedef struct { char* name; paddle_element_type element_type; - void* content; + unsigned char* content; int content_len; } paddle_parameter, paddle_gradient; -typedef struct paddle_pserver_client paddle_pserver_client; +typedef int paddle_pserver_client; -paddle_pserver_client* paddle_new_pserver_client(); -void paddle_pserver_client_release(paddle_pserver_client* client); +/** + * @brief creates a pserver client that talks to etcd for coordination. + */ +paddle_pserver_client paddle_new_etcd_pserver_client(char* etcd_addr); + +/** + * @brief creates a pserver client given pserver addresses. + * + * @param pserver_addrs comma-separated pserver addresses. + * @param selected if current pserver client is selected to initialize all parameter servers. + */ +paddle_pserver_client paddle_new_pserver_client(char* pserver_addrs, int selected); +void paddle_pserver_client_release(paddle_pserver_client c); /** * @brief paddle_begin_init_params begins to initialize parameters on @@ -95,7 +106,7 @@ void paddle_pserver_client_release(paddle_pserver_client* client); * @return 1 if the trainer is selected to initialize parameter * servers, otherwise 0. */ -int paddle_begin_init_params(paddle_pserver_client* client); +int paddle_begin_init_params(paddle_pserver_client client); /** * @brief paddle_init_param initializes the parameter on parameter @@ -109,7 +120,7 @@ int paddle_begin_init_params(paddle_pserver_client* client); * @paddle_begin_init_param). Or simply exit the program and wait for * the cluster management system to restart the trainer. */ -int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, const unsigned char* param_config_proto, int config_len); +int paddle_init_param(paddle_pserver_client client, paddle_parameter param, const unsigned char* param_config_proto, int config_len); /** * @brief paddle_finish_init_params tells parameter servers client has @@ -120,7 +131,7 @@ int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, con * @paddle_begin_init_param). Or simply exit the program and wait for * the cluster management system to restart the trainer. */ -int paddle_finish_init_params(paddle_pserver_client* client); +int paddle_finish_init_params(paddle_pserver_client client); /** * @brief paddle_send_grads sends gradients to parameter servers for @@ -131,7 +142,7 @@ int paddle_finish_init_params(paddle_pserver_client* client); * @param learning_rate the learning rate for the gradients. * @return 0 if successful, otherwise -1. */ -int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grads, int len); +int paddle_send_grads(paddle_pserver_client client, const paddle_gradient* grads, int len); /** * @brief paddle_get_params gets parameters from parameter servers. @@ -139,13 +150,15 @@ int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grad * paddle_get_params will block until parameters are initialized on * the parameter servers. * - * @param names the array of names of the parameters to get. - * @param dst the destination array of parameters to save to. + * @param dst the destination array of parameter pointers to save to. + * The parameter pointer must be pre-popullated with required parameter name, + * and the content of parameter must be pre-allocated of the size of required + * parameter on pserver. * @param len the length of the names array and the paddle_parameter * array. * @return 0 if successful, otherwise -1. */ -int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_parameter* dst, int len); +int paddle_get_params(paddle_pserver_client client, paddle_parameter** dst, int len); /** * @brief paddle_save_model indicates parameters to save the parameter @@ -154,5 +167,5 @@ int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_ * @param path the path to save parameters. * @return 0 if successful, otherwise -1. */ -int paddle_save_model(paddle_pserver_client* client, const char* path); +int paddle_save_model(paddle_pserver_client client, const char* path); ``` diff --git a/doc/design/cluster_train/remote_parameter_updater.md b/doc/design/cluster_train/remote_parameter_updater.md new file mode 100644 index 0000000000000000000000000000000000000000..6e8e5938455b869e0f3367794c41250340b37f77 --- /dev/null +++ b/doc/design/cluster_train/remote_parameter_updater.md @@ -0,0 +1,21 @@ +# Design Doc: Remote Parameter Updater for Cluster Train + +For an overview of distribute training, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter updater that will use parameter server cclient [The Client Library of Parameter Server Design Doc](pserver_client.md) to manage and update parameters. + +## Parameter Updater + +Parameter Updater is used by trainer to manage and update parameter, there are mainly two kind of parameter updater: local and remote, since this design is for cluster train, we will only discuss remote parameter updater here. + +### Remote Parameter Updater + +Remote Parameter Updater manage parameters through remote parameter server with the client that communicate with pserver([The Client Library of Parameter Server Design Doc](pserver_client.md)) + +In PaddlePaddle Python V2 API, trainer is implemented in python, and the trainer will hold a instance of parameter updater and call it's functions directly. In this design, we will also expose the api of RemoteParameterUpdater to python with swig. + +#### Sparse Remote Parameter Updater + +Since we will only implement dense parameter management new, the mechanism for sparse parameter will be discussed in next stage. + +### Interface Design + +TBD diff --git a/go/cmake/golang.cmake b/go/cmake/golang.cmake index d38d06de2348821b21109f7dc708314da81111c5..7c85fb6298d02b85ea87af9b6f6e960463443b7d 100644 --- a/go/cmake/golang.cmake +++ b/go/cmake/golang.cmake @@ -17,7 +17,7 @@ function(GO_LIBRARY NAME BUILD_TYPE) endif() file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") - file(RELATIVE_PATH rel ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) + file(RELATIVE_PATH rel ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) # find Paddle directory. get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) @@ -32,12 +32,14 @@ function(GO_LIBRARY NAME BUILD_TYPE) # will use the local changes in Paddle rather than checkout Paddle # in github. add_custom_target(copyPaddle - COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH}) + COMMAND rm -rf ${PADDLE_IN_GOPATH}/Paddle + COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH}/Paddle) add_dependencies(goGet copyPaddle) add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} - -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" + -gcflags=-shared -asmflags=-shared -installsuffix=_shared -a + -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" ${CMAKE_GO_FLAGS} ${GO_SOURCE} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/go/cmd/master/master.go b/go/cmd/master/master.go index d1f3d7d76c438670faf6677b01e790c5ebe1f2cb..25cd1cafcdf328094a019638f37f908591f5f374 100644 --- a/go/cmd/master/master.go +++ b/go/cmd/master/master.go @@ -1,80 +1,32 @@ package main import ( - "fmt" "net" "net/http" "net/rpc" - "os" - "path/filepath" "strconv" - "strings" "time" "github.com/namsral/flag" "github.com/PaddlePaddle/Paddle/go/master" - "github.com/PaddlePaddle/recordio" ) func main() { port := flag.Int("port", 8080, "port of the master server.") - dataset := flag.String("training_dataset", "", "dataset: comma separated path to RecordIO paths, supports golb patterns.") + faultTolerance := flag.Bool("fault_tolerance", false, "enable fault tolerance (requires etcd).") taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.") taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.") chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.") flag.Parse() - if *dataset == "" { - panic("no dataset specified.") - } - if *faultTolerance { panic("fault tolernance not implemented.") - } - - var chunks []master.Chunk - var paths []string - ss := strings.Split(*dataset, ",") - fmt.Println(ss) - for _, s := range ss { - match, err := filepath.Glob(s) - if err != nil { - panic(err) - } - paths = append(paths, match...) - } - - if len(paths) == 0 { - panic("no valid datset specified.") - } - - idx := 0 - for _, path := range paths { - f, err := os.Open(path) - if err != nil { - panic(err) - } - - index, err := recordio.LoadIndex(f) - if err != nil { - panic(err) - } - f.Close() - count := index.NumChunks() - for i := 0; i < count; i++ { - chunk := master.Chunk{ - Idx: idx, - Path: path, - Index: *index.ChunkIndex(i), - } - chunks = append(chunks, chunk) - } } - s := master.NewService(chunks, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) + s := master.NewService(*chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) err := rpc.Register(s) if err != nil { panic(err) diff --git a/go/pserver/internal/connection/conn.go b/go/connection/conn.go similarity index 82% rename from go/pserver/internal/connection/conn.go rename to go/connection/conn.go index 1c04f117254054741b7d45fb16462b5ce84a2aea..bc9b5f0617e35f049c3e14f0b441aca2033f9645 100644 --- a/go/pserver/internal/connection/conn.go +++ b/go/connection/conn.go @@ -2,6 +2,7 @@ package connection import ( "errors" + "log" "net/rpc" "sync" ) @@ -21,6 +22,18 @@ func New() *Conn { return c } +// Close closes the connection. +func (c *Conn) Close() error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.client == nil { + return nil + } + + return c.client.Close() +} + // Connect connects the connection to a address. func (c *Conn) Connect(addr string) error { c.mu.Lock() @@ -50,12 +63,20 @@ func (c *Conn) Connect(addr string) error { c.waitConn = nil } } else { + err := client.Close() + if err != nil { + log.Println(err) + } + return errors.New("client already set from a concurrent goroutine") } return nil } +// TODO(helin): refactor Call to be able to perform given retry +// policy. + // Call make a RPC call. // // Call will be blocked until the connection to remote RPC service diff --git a/go/master/client.go b/go/master/client.go new file mode 100644 index 0000000000000000000000000000000000000000..20c66340dc28bc514b6c51583dd94830c42a41bf --- /dev/null +++ b/go/master/client.go @@ -0,0 +1,82 @@ +package master + +import ( + "log" + "time" + + "github.com/PaddlePaddle/Paddle/go/connection" +) + +// Addresser provide the address of the master server. +type Addresser interface { + Address() string +} + +// Client is the client of the master server. +type Client struct { + conn *connection.Conn +} + +// NewClient creates a new Client. +func NewClient(addr Addresser) *Client { + c := &Client{} + c.conn = connection.New() + go c.monitorMaster(addr) + return c +} + +func (c *Client) monitorMaster(addr Addresser) { + lastMaster := "" + monitor := func() { + // get the lastest address of the master server, + // connect to the new address once address changed. + curMaster := addr.Address() + if curMaster != lastMaster { + if curMaster == "" { + err := c.conn.Close() + if err != nil { + log.Println(err) + } + } else { + err := c.conn.Connect(curMaster) + if err != nil { + log.Println(err) + + // connect to addr failed, set + // to last known addr in order + // to retry next time. + curMaster = lastMaster + } + + } + } + + lastMaster = curMaster + } + + monitor() + ticker := time.NewTicker(10 * time.Second) + for _ = range ticker.C { + monitor() + } +} + +// SetDataset set dataset for the master server to dispatch. +// +// SetDataset can be call multiple times from different nodes. But +// only the first call will be honored. +func (c *Client) SetDataset(globPaths []string) error { + return c.conn.Call("Service.SetDataset", globPaths, nil) +} + +// GetTask gets a new task from the master server. +func (c *Client) GetTask() (Task, error) { + var t Task + err := c.conn.Call("Service.GetTask", 0, &t) + return t, err +} + +// TaskFinished tells the master server a task is finished. +func (c *Client) TaskFinished(taskID int) error { + return c.conn.Call("Service.TaskFinished", taskID, nil) +} diff --git a/go/master/client_test.go b/go/master/client_test.go new file mode 100644 index 0000000000000000000000000000000000000000..df708ad7912c07205ae0cee8d2ab1c06d65223cc --- /dev/null +++ b/go/master/client_test.go @@ -0,0 +1,120 @@ +package master_test + +import ( + "fmt" + "net" + "net/http" + "net/rpc" + "os" + "strconv" + "strings" + "testing" + "time" + + log "github.com/sirupsen/logrus" + + "github.com/PaddlePaddle/Paddle/go/master" + "github.com/PaddlePaddle/recordio" +) + +const ( + totalTask = 20 + chunkPerTask = 10 +) + +var port int + +func init() { + log.SetLevel(log.ErrorLevel) + + l, err := net.Listen("tcp", ":0") + if err != nil { + panic(err) + } + + ss := strings.Split(l.Addr().String(), ":") + p, err := strconv.Atoi(ss[len(ss)-1]) + if err != nil { + panic(err) + } + port = p + + go func(l net.Listener) { + s := master.NewService(chunkPerTask, time.Second, 1) + server := rpc.NewServer() + err := server.Register(s) + if err != nil { + panic(err) + } + + mux := http.NewServeMux() + mux.Handle(rpc.DefaultRPCPath, server) + err = http.Serve(l, mux) + if err != nil { + panic(err) + } + }(l) +} + +type addresser string + +func (a addresser) Address() string { + return string(a) +} + +func TestClientFull(t *testing.T) { + const p = "/tmp/master_client_test_0" + f, err := os.Create(p) + if err != nil { + panic(err) + } + + for i := 0; i < totalTask*chunkPerTask; i++ { + w := recordio.NewWriter(f, -1, -1) + w.Write(nil) + // call Close to force RecordIO writing a chunk. + w.Close() + } + f.Close() + + c := master.NewClient(addresser(fmt.Sprintf(":%d", port))) + c.SetDataset([]string{p}) + + checkOnePass := func(i int) { + var tasks []master.Task + for i := 0; i < totalTask; i++ { + task, err := c.GetTask() + if err != nil { + t.Fatal(i, err) + } + tasks = append(tasks, task) + } + + _, err = c.GetTask() + if err == nil { + t.Fatal(i, "should get error.") + } + + err = c.TaskFinished(tasks[0].ID) + if err != nil { + t.Fatal(err) + } + tasks = tasks[1:] + task, err := c.GetTask() + if err != nil { + t.Fatal(err) + } + tasks = append(tasks, task) + + for _, task := range tasks { + err = c.TaskFinished(task.ID) + if err != nil { + t.Fatal(i, err) + } + } + } + + for i := 0; i < 10; i++ { + checkOnePass(i) + } +} diff --git a/go/master/service.go b/go/master/service.go index ab17a62f3854c1e32d731037fcc9857260d03781..1e2a34972bb4763688d7df97d768320cd6f9e9d4 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -2,29 +2,25 @@ package master import ( "errors" - "log" + "os" + "path/filepath" "sync" "time" - "github.com/PaddlePaddle/recordio" -) + log "github.com/sirupsen/logrus" -const ( - targetTaskCount = 300 -) - -// errors -var ( - ErrNoMoreTask = errors.New("no more task for current pass") - ErrPendingTaskNotFound = errors.New("pending task not found") + "github.com/PaddlePaddle/recordio" ) // Service is the master server service. type Service struct { - timeoutDur time.Duration - timeoutMax int + chunksPerTask int + timeoutDur time.Duration + timeoutMax int + ready chan struct{} mu sync.Mutex + initDone bool taskQueues taskQueues } @@ -55,7 +51,6 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { if len(cur.Task.Chunks) > 0 { cur.Task.ID = id - id++ result = append(result, cur) } @@ -63,21 +58,21 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { } // NewService creates a new service. -func NewService(chunks []Chunk, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service { +func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service { s := &Service{} + s.chunksPerTask = chunksPerTask s.timeoutDur = timeoutDur s.timeoutMax = timeoutMax s.taskQueues = taskQueues{} s.taskQueues.Pending = make(map[int]taskEntry) - s.taskQueues.Todo = partition(chunks, chunksPerTask) + s.ready = make(chan struct{}) return s } // Chunk is a chunk of data consisted of several data instances. type Chunk struct { - Idx int // index of the chunk within the file Path string - Index recordio.Index // block index + Index recordio.Index // chunk index } // Task is the basic unit of data instances assigned to trainers. @@ -105,74 +100,205 @@ func (s *Service) snapshot() error { return nil } -// GetTask gets a new task from the service. -func (s *Service) GetTask(dummy int, task *Task) error { +func readChunks(globPaths []string) ([]Chunk, error) { + var chunks []Chunk + var paths []string + + for _, s := range globPaths { + match, err := filepath.Glob(s) + if err != nil { + return nil, err + } + paths = append(paths, match...) + } + + if len(paths) == 0 { + return nil, errors.New("no valid dataset specified") + } + + for _, path := range paths { + f, err := os.Open(path) + if err != nil { + return nil, err + } + + index, err := recordio.LoadIndex(f) + if err != nil { + return nil, err + } + err = f.Close() + if err != nil { + return nil, err + } + + count := index.NumChunks() + for i := 0; i < count; i++ { + chunk := Chunk{ + Path: path, + Index: *index.ChunkIndex(i), + } + chunks = append(chunks, chunk) + } + } + + return chunks, nil +} + +// SetDataset sets dataset to dispatch for the master server. +// +// SetDataset can be call multiple times. But only the first call will +// be honored. +func (s *Service) SetDataset(globPaths []string, dummy *int) error { + if len(globPaths) == 0 { + return errors.New("no dataset specified") + } + s.mu.Lock() defer s.mu.Unlock() + if s.initDone { + // Already initialized. All trainer will call + // SetDataset, but we only handle the first one. Treat + // other calls as successful but do nothing. + return nil + } - if len(s.taskQueues.Todo) == 0 { - return ErrNoMoreTask + chunks, err := readChunks(globPaths) + if err != nil { + return err } - t := s.taskQueues.Todo[0] - t.Epoch++ - s.taskQueues.Todo = s.taskQueues.Todo[1:] - s.taskQueues.Pending[t.Task.ID] = t - err := s.snapshot() + s.taskQueues.Todo = partition(chunks, s.chunksPerTask) + + err = s.snapshot() if err != nil { + log.Errorln(err) return err } - time.AfterFunc(s.timeoutDur, func(taskID int, epoch int) func() { - return func() { - s.mu.Lock() - defer s.mu.Unlock() + close(s.ready) + s.initDone = true + return nil +} - t, ok := s.taskQueues.Pending[taskID] - if !ok { - return - } +func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { + return func() { + s.mu.Lock() + defer s.mu.Unlock() - if t.Epoch != epoch { - // new epoch, task launched after the - // schedule of this timeout check. - return + t, ok := s.taskQueues.Pending[taskID] + if !ok { + return + } + + if t.Epoch != epoch { + // new epoch, task launched after the + // schedule of this timeout check. + return + } + + defer func() { + err := s.snapshot() + if err != nil { + log.Errorln(err) } + }() + + delete(s.taskQueues.Pending, t.Task.ID) - defer func() { - err := s.snapshot() - if err != nil { - log.Println(err) - } - }() + t.NumTimeout++ + if t.NumTimeout > s.timeoutMax { + log.Warningf("Task %v failed %d times, discard.\n", t.Task, t.NumTimeout) + s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) + return + } - delete(s.taskQueues.Pending, t.Task.ID) + log.Warningf("Task %v failed %d times, retry.\n", t.Task, t.NumTimeout) + s.taskQueues.Todo = append(s.taskQueues.Todo, t) + } +} - t.NumTimeout++ - if t.NumTimeout > s.timeoutMax { - s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) - return +// GetTask gets a new task from the service. +func (s *Service) GetTask(dummy int, task *Task) error { + select { + case <-s.ready: + } + + s.mu.Lock() + defer s.mu.Unlock() + + if len(s.taskQueues.Todo) == 0 { + if len(s.taskQueues.Done) == 0 { + if len(s.taskQueues.Pending) == 0 { + err := errors.New("all task failed") + log.Warningln(err) + return err } - s.taskQueues.Todo = append(s.taskQueues.Todo, t) + // TODO(helin): client need to retry in this + // error case. Gotcha: RPC client can't + // compare returned error with predefined + // errors like io.EOF, because the error + // instance deserialized from RPC is a + // different instance than the error defined + // in package. So we need to figure out a way + // for client to check this error correctly. + err := errors.New("no more available task") + log.Warningln(err) + return err } - }(t.Task.ID, t.Epoch)) + s.taskQueues.Todo = s.taskQueues.Done + s.taskQueues.Done = nil + log.Infoln("No more todo task, but trainer is requesting task to do. Move all done task to todo.") + } + + t := s.taskQueues.Todo[0] + t.Epoch++ + s.taskQueues.Todo = s.taskQueues.Todo[1:] + s.taskQueues.Pending[t.Task.ID] = t + err := s.snapshot() + if err != nil { + return err + } + + *task = t.Task + log.Infof("Task #%d dispatched\n", task.ID) + + time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.ID, t.Epoch)) return nil } // TaskFinished tell the service that a task is finished. func (s *Service) TaskFinished(taskID int, dummy *int) error { + select { + case <-s.ready: + } + s.mu.Lock() defer s.mu.Unlock() + log.Infof("Task %d finished\n", taskID) + t, ok := s.taskQueues.Pending[taskID] if !ok { - return ErrPendingTaskNotFound + err := errors.New("pending task not found") + log.Warningln(err) + return err } // task finished, reset timeout t.NumTimeout = 0 s.taskQueues.Done = append(s.taskQueues.Done, t) delete(s.taskQueues.Pending, taskID) - return s.snapshot() + + if len(s.taskQueues.Pending) == 0 && len(s.taskQueues.Todo) == 0 { + log.Infoln("No more todo and pending task, start a new pass.") + s.taskQueues.Todo = append(s.taskQueues.Todo, s.taskQueues.Done...) + s.taskQueues.Done = nil + } + + err := s.snapshot() + if err != nil { + log.Errorln(err) + } + return err } diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/cclient/CMakeLists.txt index c017d7465611373309c6c60141fed864f5ccfb5d..7967af51ee9a94c9e40bf6403fe819ff462d9219 100644 --- a/go/pserver/cclient/CMakeLists.txt +++ b/go/pserver/cclient/CMakeLists.txt @@ -9,5 +9,15 @@ project(cxx_go C Go) include(golang) include(flags) -go_library(client STATIC) +go_library(paddle_pserver_cclient STATIC) + +if(PROJ_ROOT) + add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.h ${PROJ_ROOT}/paddle/trainer/ + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.a ${PROJ_ROOT}/paddle/trainer/ + WORKING_DIRECTORY ${PROJ_ROOT}/paddle + DEPENDS paddle_pserver_cclient) + add_custom_target(paddle_pserver_cclient_lib ALL DEPENDS ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a) +endif(PROJ_ROOT) + add_subdirectory(test) diff --git a/go/pserver/cclient/cclient.go b/go/pserver/cclient/cclient.go index 0b4aa79806b72f4608230d2216d1741389913d95..4476e762dad04009833421056aa5a49efd44ddaa 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/cclient/cclient.go @@ -19,21 +19,9 @@ typedef struct { int content_len; } paddle_parameter, paddle_gradient; -static inline void paddle_release_param(paddle_parameter* param) { - if (param != NULL) { - if (param->name != NULL) { - free(param->name); - } - - if (param->content != NULL) { - free(param->content); - } - - free(param); - } -} - -typedef int client; +typedef int paddle_pserver_client; +#define PSERVER_ERROR -1 +#define PSERVER_OK 0 */ import "C" @@ -48,10 +36,10 @@ import ( var nullPtr = unsafe.Pointer(uintptr(0)) var mu sync.Mutex -var handleMap = make(map[C.client]*pserver.Client) -var curHandle C.client +var handleMap = make(map[C.paddle_pserver_client]*pserver.Client) +var curHandle C.paddle_pserver_client -func add(c *pserver.Client) C.client { +func add(c *pserver.Client) C.paddle_pserver_client { mu.Lock() defer mu.Unlock() client := curHandle @@ -60,13 +48,13 @@ func add(c *pserver.Client) C.client { return client } -func get(client C.client) *pserver.Client { +func get(client C.paddle_pserver_client) *pserver.Client { mu.Lock() defer mu.Unlock() return handleMap[client] } -func remove(client C.client) *pserver.Client { +func remove(client C.paddle_pserver_client) *pserver.Client { mu.Lock() defer mu.Unlock() h := handleMap[client] @@ -100,7 +88,7 @@ func (l lister) List() []pserver.Server { } //export paddle_new_pserver_client -func paddle_new_pserver_client(addrs *C.char, selected int) C.client { +func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_client { a := C.GoString(addrs) as := strings.Split(a, ",") servers := make([]pserver.Server, len(as)) @@ -113,27 +101,27 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.client { } //export paddle_new_etcd_pserver_client -func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.client { +func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.paddle_pserver_client { // TODO(helin): fault tolerant pserver client using etcd. panic("not implemented.") } //export paddle_pserver_client_release -func paddle_pserver_client_release(client C.client) { +func paddle_pserver_client_release(client C.paddle_pserver_client) { remove(client) } //export paddle_begin_init_params -func paddle_begin_init_params(client C.client) C.int { +func paddle_begin_init_params(client C.paddle_pserver_client) C.int { c := get(client) if selected := c.BeginInitParams(); selected { return 1 } - return 0 + return C.PSERVER_OK } //export paddle_init_param -func paddle_init_param(client C.client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { +func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { et := pserver.ElementType(param.element_type) name := C.GoString(param.name) content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len)) @@ -143,31 +131,41 @@ func paddle_init_param(client C.client, param C.paddle_parameter, param_config u } c := get(client) err := c.InitParam(pc) + if err != nil { + if err.Error() == pserver.AlreadyInitialized { + log.Printf("parameter %s already initialized, treat paddle_init_param as sucessful.\n", name) + return C.PSERVER_OK + } log.Println(err) - return -1 + return C.PSERVER_ERROR } - return 0 + return C.PSERVER_OK } //export paddle_finish_init_params -func paddle_finish_init_params(client C.client) C.int { +func paddle_finish_init_params(client C.paddle_pserver_client) C.int { c := get(client) err := c.FinishInitParams() if err != nil { + if err.Error() == pserver.AlreadyInitialized { + log.Println("parameters already initialized, treat paddle_finish_init_params as sucessful.") + return C.PSERVER_OK + } + log.Println(err) - return -1 + return C.PSERVER_ERROR } - return 0 + return C.PSERVER_OK } //export paddle_send_grads -func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C.int { +func paddle_send_grads(client C.paddle_pserver_client, grads **C.paddle_gradient, total C.int) C.int { var gs []pserver.Gradient for i := 0; i < int(total); i++ { - grad := (*C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads)))) + grad := *(**C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads)))) et := pserver.ElementType(grad.element_type) name := C.GoString(grad.name) content := cArrayToSlice(unsafe.Pointer(grad.content), int(grad.content_len)) @@ -178,83 +176,81 @@ func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C err := c.SendGrads(gs) if err != nil { log.Println(err) - return -1 + return C.PSERVER_ERROR } - return 0 + return C.PSERVER_OK } //export paddle_get_params -func paddle_get_params(client C.client, names **C.char, dst **C.paddle_parameter, total C.int) C.int { +func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, total C.int) C.int { var ns []string for i := 0; i < int(total); i++ { - name := *(**C.char)(unsafe.Pointer((uintptr(unsafe.Pointer(names)) + uintptr(i)*unsafe.Sizeof(*names)))) - ns = append(ns, C.GoString(name)) + param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) + ns = append(ns, C.GoString(param.name)) } c := get(client) ps, err := c.GetParams(ns) if err != nil { log.Println(err) - return -1 + return C.PSERVER_ERROR } - for i := 0; i < int(total); i++ { - if i >= len(ps) { - break + if len(ps) != len(ns) { + pn := make([]string, len(ps)) + for i, p := range ps { + pn[i] = p.Name } + log.Printf("pserver returned wrong number of parameters. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) + return C.PSERVER_ERROR + } + for i := range ps { + if ns[i] != ps[i].Name { + pn := make([]string, len(ps)) + for i, p := range ps { + pn[i] = p.Name + } + log.Printf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) + return C.PSERVER_ERROR + } + } + + for i := 0; i < int(total); i++ { p := ps[i] param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) - nameReady := false - contentAllocated := false if unsafe.Pointer(param) == nullPtr { - param = (*C.paddle_parameter)(C.calloc(1, C.size_t(unsafe.Sizeof(*param)))) + log.Println("must pre-allocate parameter.") + return C.PSERVER_ERROR } else { - if unsafe.Pointer(param.name) != nullPtr { - if n := C.GoString(param.name); n != p.Name { - log.Println("Warning: the pre-allocated parameter name does not match the parameter name, it will be freed.", n, p.Name) - C.free(unsafe.Pointer(param.name)) - } else { - nameReady = true - } - } - if unsafe.Pointer(param.content) != nullPtr { - if int(param.content_len) == len(p.Content) { - contentAllocated = true - } else { - log.Println("Warning: the pre-allocated content len does not match parameter content len, the pre-allocated content will be freed.", param.content_len, len(p.Content)) - C.free(unsafe.Pointer(param.content)) + if int(param.content_len) != len(p.Content) { + log.Printf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content)) + return C.PSERVER_ERROR } } } - if !nameReady { - param.name = C.CString(p.Name) - } - if !contentAllocated { - param.content = (*C.uchar)(C.malloc(C.size_t(len(p.Content)))) - } C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) param.content_len = C.int(len(p.Content)) param.element_type = C.paddle_element_type(p.ElementType) } - return 0 + return C.PSERVER_OK } //export paddle_save_model -func paddle_save_model(client C.client, path *C.char) C.int { +func paddle_save_model(client C.paddle_pserver_client, path *C.char) C.int { p := C.GoString(path) c := get(client) err := c.Save(p) if err != nil { log.Println(err) - return -1 + return C.PSERVER_ERROR } - return 0 + return C.PSERVER_OK } func main() {} // Required but ignored diff --git a/go/pserver/cclient/test/CMakeLists.txt b/go/pserver/cclient/test/CMakeLists.txt index 16f84648c1de3a8fdb4595c00bdb7608a152ded2..1a3dd7e5e9e0ff3273fc2be67c48461797b4a6b3 100644 --- a/go/pserver/cclient/test/CMakeLists.txt +++ b/go/pserver/cclient/test/CMakeLists.txt @@ -1,11 +1,22 @@ cmake_minimum_required(VERSION 3.0) -include_directories(${CMAKE_BINARY_DIR}) - add_executable(main main.c) -add_dependencies(main client) +add_dependencies(main paddle_pserver_cclient) +add_executable(test_cclient test_cclient.c) +add_dependencies(test_cclient paddle_pserver_cclient) if(APPLE) set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") +else() + set(CMAKE_EXE_LINKER_FLAGS "-pthread") endif() -target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a) + +if(PROJ_ROOT) + include_directories(${CMAKE_CURRENT_BINARY_DIR}/..) + target_link_libraries(main ${CMAKE_CURRENT_BINARY_DIR}/../libpaddle_pserver_cclient.a pthread) + target_link_libraries(test_cclient ${CMAKE_CURRENT_BINARY_DIR}/../libpaddle_pserver_cclient.a pthread) +else(PROJ_ROOT) + include_directories(${CMAKE_BINARY_DIR}) + target_link_libraries(main ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) + target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) +endif(PROJ_ROOT) diff --git a/go/pserver/cclient/test/main.c b/go/pserver/cclient/test/main.c index f75a2110b947520dfec1265e56eaf2ba7ac3b51b..07e1b86b43299f72e24bb83621847980eaaaf06e 100644 --- a/go/pserver/cclient/test/main.c +++ b/go/pserver/cclient/test/main.c @@ -1,68 +1,90 @@ #include -#include "libclient.h" +#include "libpaddle_pserver_cclient.h" -void fail() { - // TODO(helin): fix: gtest using cmake is not working, using this - // hacky way for now. - printf("test failed.\n"); +// TODO(helin): Fix: gtest using cmake is not working, using this +// hacky way for now. +#define fail() \ + fprintf(stderr, "info: %s:%d: ", __FILE__, __LINE__); \ exit(-1); + +void sendGrads(paddle_pserver_client c) { + unsigned char grad_a[2000] = {2}; + unsigned char grad_b[3000] = {3}; + paddle_gradient grad1 = { + "param_a", PADDLE_ELEMENT_TYPE_FLOAT32, grad_a, 2000}; + paddle_gradient grad2 = { + "param_b", PADDLE_ELEMENT_TYPE_FLOAT32, grad_b, 3000}; + paddle_gradient* grads[2] = {&grad1, &grad2}; + if (paddle_send_grads(c, grads, 2)) { + fail(); + } +} + +void getParams(paddle_pserver_client c) { + paddle_parameter param_a; + paddle_parameter param_b; + char name_a[] = "param_a"; + char name_b[] = "param_b"; + // Must pre-allocate the prameter content before calling paddle_get_params. + unsigned char content_a[2000] = {}; + unsigned char content_b[3000] = {}; + param_a.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + param_a.name = name_a; + param_a.content = content_a; + param_a.content_len = 2000; + param_b.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + param_b.name = name_b; + param_b.content = content_b; + param_b.content_len = 3000; + + paddle_parameter* params[2] = {¶m_a, ¶m_b}; + if (paddle_get_params(c, params, 2)) { + fail(); + } } int main() { char addr[] = "localhost:3000"; - client c = paddle_new_pserver_client(addr, 1); + paddle_pserver_client c = paddle_new_pserver_client(addr, 1); retry: if (paddle_begin_init_params(c)) { paddle_parameter param; char name_a[] = "param_a"; char name_b[] = "param_b"; - unsigned char content[] = {0x00, 0x11, 0x22}; + unsigned char content_a[2000] = {1}; + unsigned char content_b[3000] = {0}; param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; param.name = name_a; - param.content = content; - param.content_len = 3; - if (paddle_init_param(c, param, NULL, 0) != 0) { + param.content = content_a; + param.content_len = 2000; + int error = paddle_init_param(c, param, NULL, 0); + if (error != 0) { goto retry; } - param.element_type = PADDLE_ELEMENT_TYPE_INT32; + + param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; param.name = name_b; - param.content = content; - param.content_len = 3; - if (paddle_init_param(c, param, NULL, 0) != 0) { + param.content = content_b; + param.content_len = 3000; + error = paddle_init_param(c, param, NULL, 0); + if (error != 0) { goto retry; } - if (paddle_finish_init_params(c) != 0) { + + error = paddle_finish_init_params(c); + if (error != 0) { goto retry; } - } else { - fail(); - } - - unsigned char content[] = {0x00, 0x11, 0x22}; - paddle_gradient grads[2] = { - {"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3}, - {"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}}; - - if (!paddle_send_grads(c, grads, 2)) { - fail(); } - paddle_parameter* params[2] = {NULL, NULL}; - char* names[] = {"param_a", "param_b"}; - if (!paddle_get_params(c, names, params, 2)) { - fail(); + int i; + for (i = 0; i < 100; i++) { + sendGrads(c); + getParams(c); } - // get parameters again by reusing the allocated parameter buffers. - if (!paddle_get_params(c, names, params, 2)) { - fail(); - } - - paddle_release_param(params[0]); - paddle_release_param(params[1]); - - if (!paddle_save_model(c, "/tmp/")) { + if (paddle_save_model(c, "/tmp/")) { fail(); } diff --git a/go/pserver/cclient/test/test_cclient.c b/go/pserver/cclient/test/test_cclient.c new file mode 100644 index 0000000000000000000000000000000000000000..0f9c2ef80114d4c5cd887117952f5b7b5d9355f6 --- /dev/null +++ b/go/pserver/cclient/test/test_cclient.c @@ -0,0 +1,117 @@ +#include +#include + +#include "libpaddle_pserver_cclient.h" + +typedef float real; + +void fail() { + // TODO(helin): fix: gtest using cmake is not working, using this + // hacky way for now. + printf("test failed.\n"); + exit(-1); +} + +void print_parameter(paddle_gradient* param) { + if (param == NULL) { + printf("param is NULL!!\n"); + } else { + printf("==== parameter ====\n"); + printf("name: %s\n", param->name); + printf("content_len: %d\n", param->content_len); + printf("content_type: %d\n", param->element_type); + int i; + for (i = 0; i < param->content_len / (int)sizeof(real); ++i) { + printf("%f ", ((float*)param->content)[i]); + } + printf("\n\n"); + } +} + +int main() { + char addr[] = "localhost:3000"; + paddle_pserver_client c = paddle_new_pserver_client(addr, 1); + + char* names[] = {"param_a", "param_b"}; + +retry: + printf("init parameter to pserver:\n"); + + real param_content1[] = {0.1, 0.2, 0.3}; + real param_content2[] = {0.4, 0.5, 0.6}; + paddle_parameter** params = + (paddle_parameter**)malloc(sizeof(paddle_parameter*) * 2); + params[0] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); + params[0]->name = names[0]; + params[0]->content = (unsigned char*)param_content1; + params[0]->content_len = 3 * sizeof(real); + params[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + + params[1] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); + params[1]->name = names[1]; + params[1]->content = (unsigned char*)param_content2; + params[1]->content_len = 3 * sizeof(real); + params[1]->element_type = PADDLE_ELEMENT_TYPE_INT32; + + if (paddle_begin_init_params(c)) { + if (paddle_init_param(c, *params[0], NULL, 0) != 0) { + goto retry; + } + if (paddle_init_param(c, *params[1], NULL, 0) != 0) { + goto retry; + } + if (paddle_finish_init_params(c) != 0) { + goto retry; + } + } else { + fail(); + } + + printf("get inited parameters from pserver:\n"); + // get parameters again by reusing the allocated parameter buffers. + if (paddle_get_params(c, params, 2) != 0) { + fail(); + } + print_parameter(params[0]); + print_parameter(params[1]); + + printf("send gradient to pserver:\n"); + real gradient_content1[] = {0.01, 0.02, 0.03}; + real gradinet_content2[] = {0.04, 0.05, 0.06}; + + paddle_gradient** grads = + (paddle_gradient**)malloc(sizeof(paddle_gradient*) * 2); + grads[0] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); + grads[0]->name = names[0]; + grads[0]->content = (unsigned char*)gradient_content1; + grads[0]->content_len = 3 * sizeof(real); + grads[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + + grads[1] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); + grads[1]->name = names[1]; + grads[1]->content = (unsigned char*)gradinet_content2; + grads[1]->content_len = 3 * sizeof(real); + grads[1]->element_type = PADDLE_ELEMENT_TYPE_INT32; + + printf("print gradient sent to pserver:\n"); + print_parameter(grads[0]); + print_parameter(grads[1]); + + if (paddle_send_grads(c, grads, 2) != 0) { + fail(); + } + + printf("get updated parameters from pserver:\n"); + // get parameters again by reusing the allocated parameter buffers. + if (paddle_get_params(c, params, 2) != 0) { + fail(); + } + print_parameter(params[0]); + print_parameter(params[1]); + + if (paddle_save_model(c, "/tmp/") != 0) { + fail(); + } + + return 0; +} diff --git a/go/pserver/cclient/test/test_mnist.py b/go/pserver/cclient/test/test_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..c3a3af55e2812fa0c965d22ddaba198f43f3c4ad --- /dev/null +++ b/go/pserver/cclient/test/test_mnist.py @@ -0,0 +1,131 @@ +import paddle.v2 as paddle +import gzip + + +def softmax_regression(img): + predict = paddle.layer.fc(input=img, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def multilayer_perceptron(img): + # The first fully-connected layer + hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu()) + # The second fully-connected layer and the according activation function + hidden2 = paddle.layer.fc(input=hidden1, + size=64, + act=paddle.activation.Relu()) + # The thrid fully-connected layer, note that the hidden size should be 10, + # which is the number of unique digits + predict = paddle.layer.fc(input=hidden2, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def convolutional_neural_network(img): + # first conv layer + conv_pool_1 = paddle.networks.simple_img_conv_pool( + input=img, + filter_size=5, + num_filters=20, + num_channel=1, + pool_size=2, + pool_stride=2, + act=paddle.activation.Tanh()) + # second conv layer + conv_pool_2 = paddle.networks.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + num_channel=20, + pool_size=2, + pool_stride=2, + act=paddle.activation.Tanh()) + # The first fully-connected layer + fc1 = paddle.layer.fc(input=conv_pool_2, + size=128, + act=paddle.activation.Tanh()) + # The softmax layer, note that the hidden size should be 10, + # which is the number of unique digits + predict = paddle.layer.fc(input=fc1, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def main(): + paddle.init(use_gpu=False, trainer_count=1) + + # define network topology + images = paddle.layer.data( + name='pixel', type=paddle.data_type.dense_vector(784)) + label = paddle.layer.data( + name='label', type=paddle.data_type.integer_value(10)) + + # Here we can build the prediction network in different ways. Please + # choose one by uncomment corresponding line. + predict = softmax_regression(images) + #predict = multilayer_perceptron(images) + #predict = convolutional_neural_network(images) + + cost = paddle.layer.classification_cost(input=predict, label=label) + parameters = paddle.parameters.create(cost) + + optimizer = paddle.optimizer.Momentum( + learning_rate=0.1 / 128.0, + momentum=0.9, + regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128)) + + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer, + is_local=False, + pserver_spec="localhost:3000") + + lists = [] + + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 1000 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + + elif isinstance(event, paddle.event.EndPass): + result = trainer.test(reader=paddle.batch( + paddle.dataset.mnist.test(), batch_size=128)) + print "Test with Pass %d, Cost %f, %s\n" % ( + event.pass_id, result.cost, result.metrics) + lists.append((event.pass_id, result.cost, + result.metrics['classification_error_evaluator'])) + + trainer.train( + reader=paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=128), + event_handler=event_handler, + num_passes=100) + + # find the best pass + best = sorted(lists, key=lambda list: float(list[1]))[0] + print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) + print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) + + test_creator = paddle.dataset.mnist.test() + test_data = [] + for item in test_creator(): + test_data.append((item[0], )) + if len(test_data) == 100: + break + + # output is a softmax layer. It returns probabilities. + # Shape should be (100, 10) + probs = paddle.infer( + output_layer=predict, parameters=parameters, input=test_data) + print probs.shape + + +if __name__ == '__main__': + main() diff --git a/go/pserver/cclient/test/test_train.py b/go/pserver/cclient/test/test_train.py new file mode 100644 index 0000000000000000000000000000000000000000..3f8d5d793bdeb687c9d234005d9e2eae760cc3a7 --- /dev/null +++ b/go/pserver/cclient/test/test_train.py @@ -0,0 +1,60 @@ +import paddle.v2 as paddle +import paddle.v2.dataset.uci_housing as uci_housing + + +def main(): + # init + paddle.init(use_gpu=False, trainer_count=1) + + # network config + x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) + y_predict = paddle.layer.fc(input=x, + param_attr=paddle.attr.Param(name='w'), + size=1, + act=paddle.activation.Linear(), + bias_attr=paddle.attr.Param(name='b')) + y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) + cost = paddle.layer.mse_cost(input=y_predict, label=y) + + # create parameters + parameters = paddle.parameters.create(cost) + + # create optimizer + optimizer = paddle.optimizer.Momentum(momentum=0) + + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer, + is_local=False, + pserver_spec="localhost:3000") + + # event_handler to print training and testing info + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f" % ( + event.pass_id, event.batch_id, event.cost) + + if isinstance(event, paddle.event.EndPass): + if (event.pass_id + 1) % 10 == 0: + result = trainer.test( + reader=paddle.batch( + uci_housing.test(), batch_size=2), + feeding={'x': 0, + 'y': 1}) + print "Test %d, %.2f" % (event.pass_id, result.cost) + + # training + trainer.train( + reader=paddle.batch( + paddle.reader.shuffle( + uci_housing.train(), buf_size=500), + batch_size=2), + feeding={'x': 0, + 'y': 1}, + event_handler=event_handler, + num_passes=30) + + +if __name__ == '__main__': + main() diff --git a/go/pserver/client.go b/go/pserver/client.go index f8bd0aa59f30ec7e2b2d318929af96135d3128ed..afe1eecd015b84684329c0e624f3753852d7a8ce 100644 --- a/go/pserver/client.go +++ b/go/pserver/client.go @@ -6,7 +6,7 @@ import ( "sort" "time" - "github.com/PaddlePaddle/Paddle/go/pserver/internal/connection" + "github.com/PaddlePaddle/Paddle/go/connection" ) // TODO(helin): add RPC call retry logic @@ -47,7 +47,7 @@ func NewClient(l Lister, pserverNum int, sel Selector) *Client { // monitorPservers monitors pserver addresses, and updates connection // when the address changes. func (c *Client) monitorPservers(l Lister, pserverNum int) { - knownServers := make([]Server, pserverNum) + lastServers := make([]Server, pserverNum) ticker := time.NewTicker(10 * time.Second) monitor := func() { curServers := make([]Server, pserverNum) @@ -56,25 +56,37 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) { curServers[l.Index] = l } - for i := range knownServers { - if knownServers[i].Addr != curServers[i].Addr { - err := c.pservers[i].Connect(curServers[i].Addr) + for i := range lastServers { + if lastServers[i].Addr == curServers[i].Addr { + continue + } + + if curServers[i].Addr == "" { + err := c.pservers[i].Close() if err != nil { log.Println(err) - - // connect to addr failed, set - // to last known addr in order - // to retry next time. - curServers[i].Addr = knownServers[i].Addr } + + continue } + + err := c.pservers[i].Connect(curServers[i].Addr) + if err != nil { + log.Println(err) + + // connect to addr failed, set + // to last known addr in order + // to retry next time. + curServers[i].Addr = lastServers[i].Addr + } + } - knownServers = curServers + lastServers = curServers } monitor() - for _ = range ticker.C { + for range ticker.C { monitor() } } @@ -93,16 +105,14 @@ func (c *Client) BeginInitParams() bool { // InitParam initializes the parameter on parameter servers. func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error { - var dummy int - return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, &dummy) + return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, nil) } // FinishInitParams tells parameter servers client has sent all // parameters to parameter servers as initialization. func (c *Client) FinishInitParams() error { for _, p := range c.pservers { - var dummy int - err := p.Call("Service.FinishInitParams", dummy, &dummy) + err := p.Call("Service.FinishInitParams", 0, nil) if err != nil { return err } @@ -116,8 +126,7 @@ func (c *Client) SendGrads(grads []Gradient) error { errCh := make(chan error, len(grads)) for _, g := range grads { go func(g Gradient) { - var dummy int - err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, &dummy) + err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, nil) errCh <- err }(g) } @@ -196,8 +205,7 @@ func (c *Client) Save(path string) error { errCh := make(chan error, len(c.pservers)) for _, p := range c.pservers { - var dummy int - err := p.Call("Service.Save", path, &dummy) + err := p.Call("Service.Save", path, nil) errCh <- err } diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index a9a0948a51a31a1c7393f716e3dfc436dbf919af..d0371a26a13fac9daecacd0b6a271caa6d830651 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -117,7 +117,7 @@ func TestClientFull(t *testing.T) { for i := range params { if names[i] != params[i].Name { - t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i]) + t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i].Name) } } } diff --git a/go/pserver/optimizer.c b/go/pserver/optimizer.c index b8da3ec9592053e3efe00e69d73a8ae259a30a2f..f16ba2cbf8e168a434fdcdb4f1e0ba1e98d91c6b 100644 --- a/go/pserver/optimizer.c +++ b/go/pserver/optimizer.c @@ -32,7 +32,13 @@ int update_SGD(void* optimizer, const void* gradient, int num_bytes) { SGD_optimizer* o = (SGD_optimizer*)optimizer; - // TODO + float* parameter = (float*)buffer; + float* grad = (float*)gradient; + + int i; + for (i = 0; i < num_bytes / sizeof(float); ++i) { + parameter[i] -= o->learning_rate * grad[i]; + } return 0; } diff --git a/go/pserver/service.go b/go/pserver/service.go index d5787b9708bb15629a6e6290ffc97ee9885bc8b8..78a2bfaf6347019333bf1c7ee6cdc04d93ab1370 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -9,8 +9,10 @@ import ( // ElementType is the type of elements of a Parameter. type ElementType int -var ErrAlreadyInitialized = errors.New("pserver already initialized") -var ErrUninitialized = errors.New("pserver not fully initialized") +const ( + AlreadyInitialized = "pserver already initialized" + Uninitialized = "pserver not fully initialized" +) // Supported element types const ( @@ -49,7 +51,7 @@ type Service struct { // NewService creates a new service. func NewService() *Service { - s := &Service{opt: newOptimizer(sgd, 0.01)} + s := &Service{opt: newOptimizer(sgd, 0.005)} s.paramMap = make(map[string]Parameter) s.initialized = make(chan struct{}) return s @@ -59,7 +61,7 @@ func NewService() *Service { func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error { select { case <-s.initialized: - return ErrAlreadyInitialized + return errors.New(AlreadyInitialized) default: } @@ -80,7 +82,7 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error { select { case <-s.initialized: - return ErrAlreadyInitialized + return errors.New(AlreadyInitialized) default: } @@ -94,7 +96,7 @@ func (s *Service) SendGrad(g Gradient, dummy *int) error { select { case <-s.initialized: default: - return ErrUninitialized + return errors.New(Uninitialized) } s.mu.Lock() diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index 4c9fac4536e09013916aadb26af3a86a5a775b4f..b746d13e1ca71e697c464f84d915af029d37120c 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -15,8 +15,7 @@ func TestFull(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - var dummy int - err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy) + err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { t.FailNow() } @@ -25,12 +24,12 @@ func TestFull(t *testing.T) { p1.Name = "param_b" p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} p1.ElementType = pserver.Float32 - err = s.InitParam(pserver.ParameterWithConfig{p1, nil}, &dummy) + err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: nil}, nil) if err != nil { t.FailNow() } - err = s.FinishInitParams(0, &dummy) + err = s.FinishInitParams(0, nil) if err != nil { t.FailNow() } @@ -46,11 +45,11 @@ func TestFull(t *testing.T) { } g1, g2 := pserver.Gradient(p1), pserver.Gradient(p) - err = s.SendGrad(g1, &dummy) + err = s.SendGrad(g1, nil) if err != nil { t.FailNow() } - err = s.SendGrad(g2, &dummy) + err = s.SendGrad(g2, nil) if err != nil { t.FailNow() @@ -74,23 +73,21 @@ func TestFull(t *testing.T) { func TestMultipleInit(t *testing.T) { s := pserver.NewService() - var dummy int - err := s.FinishInitParams(0, &dummy) + err := s.FinishInitParams(0, nil) if err != nil { t.FailNow() } - err = s.FinishInitParams(0, &dummy) - if err != pserver.ErrAlreadyInitialized { + err = s.FinishInitParams(0, nil) + if err.Error() != pserver.AlreadyInitialized { t.FailNow() } } func TestUninitialized(t *testing.T) { s := pserver.NewService() - var dummy int - err := s.SendGrad(pserver.Gradient{}, &dummy) - if err != pserver.ErrUninitialized { + err := s.SendGrad(pserver.Gradient{}, nil) + if err.Error() != pserver.Uninitialized { t.FailNow() } } @@ -98,13 +95,14 @@ func TestUninitialized(t *testing.T) { func TestBlockUntilInitialized(t *testing.T) { s := pserver.NewService() ch := make(chan struct{}, 2) + errCh := make(chan error, 2) var wg sync.WaitGroup wg.Add(1) go func() { var param pserver.Parameter err := s.GetParam("param_a", ¶m) if err != nil { - t.FailNow() + errCh <- err } wg.Done() ch <- struct{}{} @@ -112,10 +110,9 @@ func TestBlockUntilInitialized(t *testing.T) { wg.Add(1) go func() { - var dummy int - err := s.Save("", &dummy) + err := s.Save("", nil) if err != nil { - t.FailNow() + errCh <- err } wg.Done() ch <- struct{}{} @@ -127,6 +124,8 @@ func TestBlockUntilInitialized(t *testing.T) { case <-ch: // some function returned before initialization is completed. t.FailNow() + case <-errCh: + t.FailNow() default: } @@ -134,13 +133,12 @@ func TestBlockUntilInitialized(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - var dummy int - err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy) + err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { t.FailNow() } - err = s.FinishInitParams(0, &dummy) + err = s.FinishInitParams(0, nil) if err != nil { t.FailNow() } diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index 071bc36c2ded51ba977350aeae15f6d244cea5be..c9433a38de4d005ebe229c55916401a5f82e9ef3 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -16,7 +16,7 @@ set(API_HEADER Internal.h) add_library(paddle_api STATIC ${API_SOURCES}) -add_dependencies(paddle_api gen_proto_cpp) +add_dependencies(paddle_api gen_proto_cpp paddle_pserver_cclient_lib) INCLUDE(${SWIG_USE_FILE}) INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) @@ -45,7 +45,7 @@ SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS ) IF(APPLE) - SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load") + SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load -framework CoreFoundation -framework Security") ELSE(APPLE) SET(START_GROUP "-Xlinker -start-group") SET(END_GROUP "-Xlinker -end-group") diff --git a/paddle/api/Paddle.i b/paddle/api/Paddle.i index 068ba286c07d8854a1a7c7042224a679b50b4957..3237e73745dca58bed923b20851f0f0039a3487c 100644 --- a/paddle/api/Paddle.i +++ b/paddle/api/Paddle.i @@ -179,6 +179,7 @@ namespace std { %newobject ParameterOptimizer::needSpecialTraversal; %newobject ParameterUpdater::createLocalUpdater; %newobject ParameterUpdater::createRemoteUpdater; +%newobject ParameterUpdater::createNewRemoteUpdater; %feature("director") UpdateCallback; %feature("autodoc", 1); // To generate method stub, for code hint in ide diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index da0f157abd68c73c45f498cf9ef2726aac67c95b..7565ea51fe3e71bf81a28e6e4b5a2bbdd085798c 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -841,6 +841,8 @@ public: static ParameterUpdater* createRemoteUpdater(OptimizationConfig* config, int passCount, bool useSparseUpdater); + static ParameterUpdater* createNewRemoteUpdater( + OptimizationConfig* config, const std::string pserverSpec); ~ParameterUpdater(); /** diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp index 79921ea6e787f3c0ebecaad6a9a54bac92211320..eaf8518ae2beaa93bc40ee944c984d142d2bb951 100644 --- a/paddle/api/ParameterUpdater.cpp +++ b/paddle/api/ParameterUpdater.cpp @@ -15,6 +15,7 @@ limitations under the License. */ #include "PaddleAPI.h" #include "PaddleAPIPrivate.h" +#include "paddle/trainer/NewRemoteParameterUpdater.h" #include "paddle/trainer/RemoteParameterUpdater.h" #include "paddle/trainer/ThreadParameterUpdater.h" @@ -28,6 +29,14 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater( return updater; } +ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( + OptimizationConfig *config, const std::string pserverSpec) { + auto updater = new ParameterUpdater(); + updater->m->updater.reset(new paddle::NewRemoteParameterUpdater( + config->m->getConfig(), pserverSpec)); + return updater; +} + ParameterUpdater *ParameterUpdater::createRemoteUpdater( OptimizationConfig *config, int passCount, bool useSparseUpdater) { auto updater = new ParameterUpdater(); diff --git a/paddle/gserver/layers/DetectionUtil.cpp b/paddle/gserver/layers/DetectionUtil.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3e61adc66e60c54250e4f323452aa13045310879 --- /dev/null +++ b/paddle/gserver/layers/DetectionUtil.cpp @@ -0,0 +1,576 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "DetectionUtil.h" + +namespace paddle { + +size_t appendWithPermute(const Matrix& inMatrix, + size_t height, + size_t width, + size_t outTotalSize, + size_t outOffset, + size_t batchSize, + Matrix& outMatrix, + PermMode permMode) { + CHECK_EQ(inMatrix.useGpu(), outMatrix.useGpu()); + bool useGpu = inMatrix.useGpu(); + if (permMode == kNCHWToNHWC) { + size_t inElementCnt = inMatrix.getElementCnt(); + size_t channels = inElementCnt / (height * width * batchSize); + size_t imgSize = height * width; + for (size_t i = 0; i < batchSize; ++i) { + size_t offset = i * (outTotalSize / batchSize) + outOffset; + const MatrixPtr inTmp = Matrix::create( + const_cast(inMatrix.getData()) + i * channels * imgSize, + channels, + imgSize, + false, + useGpu); + MatrixPtr outTmp = + Matrix::create(const_cast(outMatrix.getData()) + offset, + imgSize, + channels, + false, + useGpu); + inTmp->transpose(outTmp, false); + } + return channels * imgSize; + } else { + LOG(FATAL) << "Unkown permute mode"; + } +} + +size_t decomposeWithPermute(const Matrix& inMatrix, + size_t height, + size_t width, + size_t inTotalSize, + size_t inOffset, + size_t batchSize, + Matrix& outMatrix, + PermMode permMode) { + CHECK_EQ(inMatrix.useGpu(), outMatrix.useGpu()); + bool useGpu = inMatrix.useGpu(); + if (permMode == kNHWCToNCHW) { + size_t outElementCnt = outMatrix.getElementCnt(); + size_t channels = outElementCnt / (height * width * batchSize); + size_t imgSize = height * width; + for (size_t i = 0; i < batchSize; ++i) { + size_t offset = i * (inTotalSize / batchSize) + inOffset; + const MatrixPtr inTmp = + Matrix::create(const_cast(inMatrix.getData()) + offset, + imgSize, + channels, + false, + useGpu); + MatrixPtr outTmp = Matrix::create( + const_cast(outMatrix.getData()) + i * channels * imgSize, + channels, + imgSize, + false, + useGpu); + inTmp->transpose(outTmp, false); + } + return channels * imgSize; + } else { + LOG(FATAL) << "Unkown permute mode"; + } +} + +real jaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2) { + if (bbox2.xMin > bbox1.xMax || bbox2.xMax < bbox1.xMin || + bbox2.yMin > bbox1.yMax || bbox2.yMax < bbox1.yMin) { + return 0.0; + } else { + real interXMin = std::max(bbox1.xMin, bbox2.xMin); + real interYMin = std::max(bbox1.yMin, bbox2.yMin); + real interXMax = std::min(bbox1.xMax, bbox2.xMax); + real interYMax = std::min(bbox1.yMax, bbox2.yMax); + + real interWidth = interXMax - interXMin; + real interHeight = interYMax - interYMin; + real interArea = interWidth * interHeight; + + real bboxArea1 = bbox1.getArea(); + real bboxArea2 = bbox2.getArea(); + + return interArea / (bboxArea1 + bboxArea2 - interArea); + } +} + +void encodeBBoxWithVar(const NormalizedBBox& priorBBox, + const vector& priorBBoxVar, + const NormalizedBBox& gtBBox, + vector& outVec) { + real priorBBoxWidth = priorBBox.getWidth(); + real priorBBoxHeight = priorBBox.getHeight(); + real priorBBoxCenterX = priorBBox.getCenterX(); + real priorBBoxCenterY = priorBBox.getCenterY(); + + real gtBBoxWidth = gtBBox.getWidth(); + real gtBBoxHeight = gtBBox.getHeight(); + real gtBBoxCenterX = gtBBox.getCenterX(); + real gtBBoxCenterY = gtBBox.getCenterY(); + + outVec.clear(); + outVec.push_back((gtBBoxCenterX - priorBBoxCenterX) / priorBBoxWidth / + priorBBoxVar[0]); + outVec.push_back((gtBBoxCenterY - priorBBoxCenterY) / priorBBoxHeight / + priorBBoxVar[1]); + outVec.push_back(std::log(std::fabs(gtBBoxWidth / priorBBoxWidth)) / + priorBBoxVar[2]); + outVec.push_back(std::log(std::fabs(gtBBoxHeight / priorBBoxHeight)) / + priorBBoxVar[3]); +} + +NormalizedBBox decodeBBoxWithVar(const NormalizedBBox& priorBBox, + const vector& priorBBoxVar, + const vector& locPredData) { + real priorBBoxWidth = priorBBox.getWidth(); + real priorBBoxHeight = priorBBox.getHeight(); + real priorBBoxCenterX = priorBBox.getCenterX(); + real priorBBoxCenterY = priorBBox.getCenterY(); + + real decodedBBoxCenterX = + priorBBoxVar[0] * locPredData[0] * priorBBoxWidth + priorBBoxCenterX; + real decodedBBoxCenterY = + priorBBoxVar[1] * locPredData[1] * priorBBoxHeight + priorBBoxCenterY; + real decodedBBoxWidth = + std::exp(priorBBoxVar[2] * locPredData[2]) * priorBBoxWidth; + real decodedBBoxHeight = + std::exp(priorBBoxVar[3] * locPredData[3]) * priorBBoxHeight; + + NormalizedBBox decodedBBox; + decodedBBox.xMin = decodedBBoxCenterX - decodedBBoxWidth / 2; + decodedBBox.yMin = decodedBBoxCenterY - decodedBBoxHeight / 2; + decodedBBox.xMax = decodedBBoxCenterX + decodedBBoxWidth / 2; + decodedBBox.yMax = decodedBBoxCenterY + decodedBBoxHeight / 2; + + return decodedBBox; +} + +void getBBoxFromPriorData(const real* priorData, + const size_t numBBoxes, + vector& bboxVec) { + size_t outOffset = bboxVec.size(); + bboxVec.resize(bboxVec.size() + numBBoxes); + for (size_t i = 0; i < numBBoxes; ++i) { + NormalizedBBox bbox; + bbox.xMin = *(priorData + i * 8); + bbox.yMin = *(priorData + i * 8 + 1); + bbox.xMax = *(priorData + i * 8 + 2); + bbox.yMax = *(priorData + i * 8 + 3); + bboxVec[outOffset + i] = bbox; + } +} + +void getBBoxVarFromPriorData(const real* priorData, + const size_t num, + vector>& varVec) { + size_t outOffset = varVec.size(); + varVec.resize(varVec.size() + num); + for (size_t i = 0; i < num; ++i) { + vector var; + var.push_back(*(priorData + i * 8 + 4)); + var.push_back(*(priorData + i * 8 + 5)); + var.push_back(*(priorData + i * 8 + 6)); + var.push_back(*(priorData + i * 8 + 7)); + varVec[outOffset + i] = var; + } +} + +void getBBoxFromLabelData(const real* labelData, + const size_t numBBoxes, + vector& bboxVec) { + size_t outOffset = bboxVec.size(); + bboxVec.resize(bboxVec.size() + numBBoxes); + for (size_t i = 0; i < numBBoxes; ++i) { + NormalizedBBox bbox; + bbox.xMin = *(labelData + i * 6 + 1); + bbox.yMin = *(labelData + i * 6 + 2); + bbox.xMax = *(labelData + i * 6 + 3); + bbox.yMax = *(labelData + i * 6 + 4); + real isDifficult = *(labelData + i * 6 + 5); + if (std::abs(isDifficult - 0.0) < 1e-6) + bbox.isDifficult = false; + else + bbox.isDifficult = true; + bboxVec[outOffset + i] = bbox; + } +} + +void getBBoxFromDetectData(const real* detectData, + const size_t numBBoxes, + vector& labelVec, + vector& scoreVec, + vector& bboxVec) { + size_t outOffset = bboxVec.size(); + labelVec.resize(outOffset + numBBoxes); + scoreVec.resize(outOffset + numBBoxes); + bboxVec.resize(outOffset + numBBoxes); + for (size_t i = 0; i < numBBoxes; ++i) { + labelVec[outOffset + i] = *(detectData + i * 7 + 1); + scoreVec[outOffset + i] = *(detectData + i * 7 + 2); + NormalizedBBox bbox; + bbox.xMin = *(detectData + i * 7 + 3); + bbox.yMin = *(detectData + i * 7 + 4); + bbox.xMax = *(detectData + i * 7 + 5); + bbox.yMax = *(detectData + i * 7 + 6); + bboxVec[outOffset + i] = bbox; + } +} + +void matchBBox(const vector& priorBBoxes, + const vector& gtBBoxes, + real overlapThreshold, + vector* matchIndices, + vector* matchOverlaps) { + map> overlaps; + size_t numPriors = priorBBoxes.size(); + size_t numGTs = gtBBoxes.size(); + + matchIndices->clear(); + matchIndices->resize(numPriors, -1); + matchOverlaps->clear(); + matchOverlaps->resize(numPriors, 0.0); + + // Store the positive overlap between predictions and ground truth + for (size_t i = 0; i < numPriors; ++i) { + for (size_t j = 0; j < numGTs; ++j) { + real overlap = jaccardOverlap(priorBBoxes[i], gtBBoxes[j]); + if (overlap > 1e-6) { + (*matchOverlaps)[i] = std::max((*matchOverlaps)[i], overlap); + overlaps[i][j] = overlap; + } + } + } + // Bipartite matching + vector gtPool; + for (size_t i = 0; i < numGTs; ++i) { + gtPool.push_back(i); + } + while (gtPool.size() > 0) { + // Find the most overlapped gt and corresponding predictions + int maxPriorIdx = -1; + int maxGTIdx = -1; + real maxOverlap = -1.0; + for (map>::iterator it = overlaps.begin(); + it != overlaps.end(); + ++it) { + size_t i = it->first; + if ((*matchIndices)[i] != -1) { + // The prediction already has matched ground truth or is ignored + continue; + } + for (size_t p = 0; p < gtPool.size(); ++p) { + int j = gtPool[p]; + if (it->second.find(j) == it->second.end()) { + // No overlap between the i-th prediction and j-th ground truth + continue; + } + // Find the maximum overlapped pair + if (it->second[j] > maxOverlap) { + maxPriorIdx = (int)i; + maxGTIdx = (int)j; + maxOverlap = it->second[j]; + } + } + } + if (maxPriorIdx == -1) { + break; + } else { + (*matchIndices)[maxPriorIdx] = maxGTIdx; + (*matchOverlaps)[maxPriorIdx] = maxOverlap; + gtPool.erase(std::find(gtPool.begin(), gtPool.end(), maxGTIdx)); + } + } + + // Get most overlaped for the rest prediction bboxes + for (map>::iterator it = overlaps.begin(); + it != overlaps.end(); + ++it) { + size_t i = it->first; + if ((*matchIndices)[i] != -1) { + // The prediction already has matched ground truth or is ignored + continue; + } + int maxGTIdx = -1; + real maxOverlap = -1; + for (size_t j = 0; j < numGTs; ++j) { + if (it->second.find(j) == it->second.end()) { + // No overlap between the i-th prediction and j-th ground truth + continue; + } + // Find the maximum overlapped pair + real overlap = it->second[j]; + if (overlap > maxOverlap && overlap >= overlapThreshold) { + maxGTIdx = j; + maxOverlap = overlap; + } + } + if (maxGTIdx != -1) { + (*matchIndices)[i] = maxGTIdx; + (*matchOverlaps)[i] = maxOverlap; + } + } +} + +pair generateMatchIndices( + const Matrix& priorValue, + const size_t numPriorBBoxes, + const Matrix& gtValue, + const int* gtStartPosPtr, + const size_t seqNum, + const vector>& maxConfScore, + const size_t batchSize, + const real overlapThreshold, + const real negOverlapThreshold, + const size_t negPosRatio, + vector>* matchIndicesVecPtr, + vector>* negIndicesVecPtr) { + vector priorBBoxes; // share same prior bboxes + getBBoxFromPriorData(priorValue.getData(), numPriorBBoxes, priorBBoxes); + size_t totalPos = 0; + size_t totalNeg = 0; + for (size_t n = 0; n < batchSize; ++n) { + vector matchIndices; + vector negIndices; + vector matchOverlaps; + matchIndices.resize(numPriorBBoxes, -1); + matchOverlaps.resize(numPriorBBoxes, 0.0); + size_t numGTBBoxes = 0; + if (n < seqNum) numGTBBoxes = gtStartPosPtr[n + 1] - gtStartPosPtr[n]; + if (!numGTBBoxes) { + matchIndicesVecPtr->push_back(matchIndices); + negIndicesVecPtr->push_back(negIndices); + continue; + } + vector gtBBoxes; + getBBoxFromLabelData( + gtValue.getData() + gtStartPosPtr[n] * 6, numGTBBoxes, gtBBoxes); + + matchBBox( + priorBBoxes, gtBBoxes, overlapThreshold, &matchIndices, &matchOverlaps); + + size_t numPos = 0; + size_t numNeg = 0; + for (size_t i = 0; i < matchIndices.size(); ++i) + if (matchIndices[i] != -1) ++numPos; + totalPos += numPos; + vector> scoresIndices; + for (size_t i = 0; i < matchIndices.size(); ++i) + if (matchIndices[i] == -1 && matchOverlaps[i] < negOverlapThreshold) { + scoresIndices.push_back(std::make_pair(maxConfScore[n][i], i)); + ++numNeg; + } + numNeg = std::min(static_cast(numPos * negPosRatio), numNeg); + std::sort(scoresIndices.begin(), + scoresIndices.end(), + sortScorePairDescend); + for (size_t i = 0; i < numNeg; ++i) + negIndices.push_back(scoresIndices[i].second); + totalNeg += numNeg; + matchIndicesVecPtr->push_back(matchIndices); + negIndicesVecPtr->push_back(negIndices); + } + return std::make_pair(totalPos, totalNeg); +} + +void getMaxConfidenceScores(const real* confData, + const size_t batchSize, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t backgroundId, + vector>* maxConfScoreVecPtr) { + maxConfScoreVecPtr->clear(); + for (size_t i = 0; i < batchSize; ++i) { + vector maxConfScore; + for (size_t j = 0; j < numPriorBBoxes; ++j) { + int offset = j * numClasses; + real maxVal = -FLT_MAX; + real maxPosVal = -FLT_MAX; + real maxScore = 0.0; + for (size_t c = 0; c < numClasses; ++c) { + maxVal = std::max(confData[offset + c], maxVal); + if (c != backgroundId) + maxPosVal = std::max(confData[offset + c], maxPosVal); + } + real sum = 0.0; + for (size_t c = 0; c < numClasses; ++c) + sum += std::exp(confData[offset + c] - maxVal); + maxScore = std::exp(maxPosVal - maxVal) / sum; + maxConfScore.push_back(maxScore); + } + confData += numPriorBBoxes * numClasses; + maxConfScoreVecPtr->push_back(maxConfScore); + } +} + +template +bool sortScorePairDescend(const pair& pair1, + const pair& pair2) { + return pair1.first > pair2.first; +} + +template <> +bool sortScorePairDescend(const pair& pair1, + const pair& pair2) { + return pair1.first > pair2.first; +} + +void applyNMSFast(const vector& bboxes, + const real* confScoreData, + size_t classIdx, + size_t topK, + real confThreshold, + real nmsThreshold, + size_t numPriorBBoxes, + size_t numClasses, + vector* indices) { + vector> scores; + for (size_t i = 0; i < numPriorBBoxes; ++i) { + size_t confOffset = i * numClasses + classIdx; + if (confScoreData[confOffset] > confThreshold) + scores.push_back(std::make_pair(confScoreData[confOffset], i)); + } + std::stable_sort(scores.begin(), scores.end(), sortScorePairDescend); + if (topK > 0 && topK < scores.size()) scores.resize(topK); + while (scores.size() > 0) { + const size_t idx = scores.front().second; + bool keep = true; + for (size_t i = 0; i < indices->size(); ++i) { + if (keep) { + const size_t savedIdx = (*indices)[i]; + real overlap = jaccardOverlap(bboxes[idx], bboxes[savedIdx]); + keep = overlap <= nmsThreshold; + } else { + break; + } + } + if (keep) indices->push_back(idx); + scores.erase(scores.begin()); + } +} + +size_t getDetectionIndices( + const real* confData, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t backgroundId, + const size_t batchSize, + const size_t confThreshold, + const size_t nmsTopK, + const real nmsThreshold, + const size_t keepTopK, + const vector>& allDecodedBBoxes, + vector>>* allDetectionIndices) { + size_t totalKeepNum = 0; + for (size_t n = 0; n < batchSize; ++n) { + const vector& decodedBBoxes = allDecodedBBoxes[n]; + size_t numDetected = 0; + map> indices; + size_t confOffset = n * numPriorBBoxes * numClasses; + for (size_t c = 0; c < numClasses; ++c) { + if (c == backgroundId) continue; + applyNMSFast(decodedBBoxes, + confData + confOffset, + c, + nmsTopK, + confThreshold, + nmsThreshold, + numPriorBBoxes, + numClasses, + &(indices[c])); + numDetected += indices[c].size(); + } + if (keepTopK > 0 && numDetected > keepTopK) { + vector>> scoreIndexPairs; + for (size_t c = 0; c < numClasses; ++c) { + const vector& labelIndices = indices[c]; + for (size_t i = 0; i < labelIndices.size(); ++i) { + size_t idx = labelIndices[i]; + scoreIndexPairs.push_back( + std::make_pair((confData + confOffset)[idx * numClasses + c], + std::make_pair(c, idx))); + } + } + std::sort(scoreIndexPairs.begin(), + scoreIndexPairs.end(), + sortScorePairDescend>); + scoreIndexPairs.resize(keepTopK); + map> newIndices; + for (size_t i = 0; i < scoreIndexPairs.size(); ++i) { + size_t label = scoreIndexPairs[i].second.first; + size_t idx = scoreIndexPairs[i].second.second; + newIndices[label].push_back(idx); + } + allDetectionIndices->push_back(newIndices); + totalKeepNum += keepTopK; + } else { + allDetectionIndices->push_back(indices); + totalKeepNum += numDetected; + } + } + return totalKeepNum; +} + +void getDetectionOutput(const real* confData, + const size_t numKept, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t batchSize, + const vector>>& allIndices, + const vector>& allDecodedBBoxes, + Matrix& out) { + MatrixPtr outBuffer; + Matrix::resizeOrCreate(outBuffer, numKept, 7, false, false); + real* bufferData = outBuffer->getData(); + size_t count = 0; + for (size_t n = 0; n < batchSize; ++n) { + for (map>::const_iterator it = allIndices[n].begin(); + it != allIndices[n].end(); + ++it) { + size_t label = it->first; + const vector& indices = it->second; + const vector& decodedBBoxes = allDecodedBBoxes[n]; + for (size_t i = 0; i < indices.size(); ++i) { + size_t idx = indices[i]; + size_t confOffset = n * numPriorBBoxes * numClasses + idx * numClasses; + bufferData[count * 7] = n; + bufferData[count * 7 + 1] = label; + bufferData[count * 7 + 2] = (confData + confOffset)[label]; + NormalizedBBox clippedBBox = clipBBox(decodedBBoxes[idx]); + bufferData[count * 7 + 3] = clippedBBox.xMin; + bufferData[count * 7 + 4] = clippedBBox.yMin; + bufferData[count * 7 + 5] = clippedBBox.xMax; + bufferData[count * 7 + 6] = clippedBBox.yMax; + ++count; + } + } + } + out.copyFrom(bufferData, numKept * 7); +} + +NormalizedBBox clipBBox(const NormalizedBBox& bbox) { + real realOne = static_cast(1.0); + real realZero = static_cast(0.0); + NormalizedBBox clippedBBox; + clippedBBox.xMin = std::max(std::min(bbox.xMin, realOne), realZero); + clippedBBox.yMin = std::max(std::min(bbox.yMin, realOne), realZero); + clippedBBox.xMax = std::max(std::min(bbox.xMax, realOne), realZero); + clippedBBox.yMax = std::max(std::min(bbox.yMax, realOne), realZero); + return clippedBBox; +} + +} // namespace paddle diff --git a/paddle/gserver/layers/DetectionUtil.h b/paddle/gserver/layers/DetectionUtil.h new file mode 100644 index 0000000000000000000000000000000000000000..fe4f9f075e4cf011c97f68f49598a828d62327b3 --- /dev/null +++ b/paddle/gserver/layers/DetectionUtil.h @@ -0,0 +1,307 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/math/Matrix.h" + +using std::vector; +using std::pair; +using std::map; + +namespace paddle { + +template +struct BBoxBase { + BBoxBase(T xMin, T yMin, T xMax, T yMax) + : xMin(xMin), yMin(yMin), xMax(xMax), yMax(yMax), isDifficult(false) {} + + BBoxBase() {} + + T getWidth() const { return xMax - xMin; } + + T getHeight() const { return yMax - yMin; } + + T getCenterX() const { return (xMin + xMax) / 2; } + + T getCenterY() const { return (yMin + yMax) / 2; } + + T getArea() const { return getWidth() * getHeight(); } + + // coordinate of bounding box + T xMin; + T yMin; + T xMax; + T yMax; + // whether difficult object (e.g. object with heavy occlusion is difficult) + bool isDifficult; +}; + +struct NormalizedBBox : BBoxBase { + NormalizedBBox() : BBoxBase() {} +}; + +enum PermMode { kNCHWToNHWC, kNHWCToNCHW }; + +/** + * @brief First permute input maxtrix then append to output matrix + */ +size_t appendWithPermute(const Matrix& inMatrix, + size_t height, + size_t width, + size_t outTotalSize, + size_t outOffset, + size_t batchSize, + Matrix& outMatrix, + PermMode permMode); + +/** + * @brief First permute input maxtrix then decompose to output + */ +size_t decomposeWithPermute(const Matrix& inMatrix, + size_t height, + size_t width, + size_t totalSize, + size_t offset, + size_t batchSize, + Matrix& outMatrix, + PermMode permMode); + +/** + * @brief Compute jaccard overlap between two bboxes. + * @param bbox1 The first bbox + * @param bbox2 The second bbox + */ +real jaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2); + +/** + * @brief Compute offset parameters between prior bbox and ground truth bbox + * and variances of prior bbox are considered + * @param priorBBox Input prior bbox + * @param priorBBoxVar Variance parameters of prior bbox + * @param gtBBox Groundtruth bbox + * @param outVec Output vector + */ +void encodeBBoxWithVar(const NormalizedBBox& priorBBox, + const vector& priorBBoxVar, + const NormalizedBBox& gtBBox, + vector& outVec); + +/** + * @brief Decode prior bbox with offset parameters + * and variances of prior bbox are considered + * @param priorBBox Prior bbox to be decoded + * @param priorBBoxVar Variance parameters of prior bbox + * @param locPredData Offset parameters + */ +NormalizedBBox decodeBBoxWithVar(const NormalizedBBox& priorBBox, + const vector& priorBBoxVar, + const vector& locPredData); + +/** + * @brief Extract bboxes from prior matrix, the layout is + * xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ... + * @param priorData Matrix of prior value + * @param numBBoxes Number of bbox to be extracted + * @param bboxVec Append to the vector + */ +void getBBoxFromPriorData(const real* priorData, + const size_t numBBoxes, + vector& bboxVec); + +/** + * @brief Extract labels, scores and bboxes from detection matrix, the layout is + * imageId | label | score | xmin | ymin | xmax | ymax + * @param detectData Matrix of detection value + * @param numBBoxes Number of bbox to be extracted + * @param labelVec Label of bbox + * @param scoreVec Score of bbox + * @param bboxVec Append to the vector + */ +void getBBoxFromDetectData(const real* detectData, + const size_t numBBoxes, + vector& labelVec, + vector& scoreVec, + vector& bboxVec); + +/** + * @brief Extract variances from prior matrix, the layout is + * xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ... + * @param priorData Matrix of prior value + * @param num Number to be extracted + * @param varVec Append to the vector + */ +void getBBoxVarFromPriorData(const real* priorData, + const size_t num, + vector>& varVec); + +/** + * @brief Extract bboxes from label matrix, the layout is + * class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ... + * @param labelData Matrix of label value + * @param numBBoxes Number to be extracted + * @param bboxVec Append to the vector + */ +void getBBoxFromLabelData(const real* labelData, + const size_t numBBoxes, + vector& bboxVec); + +/** +* @brief Match prior bbox to groundtruth bbox, the strategy is: +1. Find the most overlaped bbox pair (prior and groundtruth) +2. For rest of prior bboxes find the most overlaped groundtruth bbox +* @param priorBBoxes prior bbox +* @param gtBBoxes groundtruth bbox +* @param overlapThreshold Low boundary of overlap (judge whether matched) +* @param matchIndices For each prior bbox, groundtruth bbox index if matched +otherwise -1 +* @param matchOverlaps For each prior bbox, overap with all groundtruth bboxes +*/ +void matchBBox(const vector& priorBBoxes, + const vector& gtBBoxes, + real overlapThreshold, + vector* matchIndices, + vector* matchOverlaps); + +/** +* @brief Generate positive bboxes and negative bboxes, +|positive bboxes|/|negative bboxes| is negPosRatio +* @param priorValue Prior value +* @param numPriorBBoxes Number of prior bbox +* @param gtValue Groundtruth value +* @param gtStartPosPtr Since groundtruth value stored as sequence type, +this parameter indicates start position of each record +* @param seqNum Number of sequence +* @param maxConfScore Classification score for prior bbox, used to mine +negative examples +* @param batchSize Image number +* @param overlapThreshold Low boundary of overap +* @param negOverlapThreshold Upper boundary of overap (judge negative example) +* @param negPosRatio Control number of negative bboxes +* @param matchIndicesVecPtr Save indices of matched prior bbox +* @param negIndicesVecPtr Save indices of negative prior bbox +*/ +pair generateMatchIndices( + const Matrix& priorValue, + const size_t numPriorBBoxes, + const Matrix& gtValue, + const int* gtStartPosPtr, + const size_t seqNum, + const vector>& maxConfScore, + const size_t batchSize, + const real overlapThreshold, + const real negOverlapThreshold, + const size_t negPosRatio, + vector>* matchIndicesVecPtr, + vector>* negIndicesVecPtr); + +/** + * @brief Get max confidence score for each prior bbox + * @param confData Confidence scores, layout is + * class1 score | class2 score | ... | classN score ... + * @param batchSize Image number + * @param numPriorBBoxes Prior bbox number + * @param numClasses Classes number + * @param backgroundId Background id + * @param maxConfScoreVecPtr Ouput + */ +void getMaxConfidenceScores(const real* confData, + const size_t batchSize, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t backgroundId, + vector>* maxConfScoreVecPtr); + +template +bool sortScorePairDescend(const pair& pair1, + const pair& pair2); + +template <> +bool sortScorePairDescend(const pair& pair1, + const pair& pair2); + +/** + * @brief Do NMS for bboxes to remove duplicated bboxes + * @param bboxes BBoxes to apply NMS + * @param confScoreData Confidence scores + * @param classIdx Class to do NMS + * @param topK Number to keep + * @param confThreshold Low boundary of confidence score + * @param nmsThreshold Threshold of overlap + * @param numPriorBBoxes Total number of prior bboxes + * @param numClasses Total class number + * @param indices Indices of high quality bboxes + */ +void applyNMSFast(const vector& bboxes, + const real* confScoreData, + size_t classIdx, + size_t topK, + real confThreshold, + real nmsThreshold, + size_t numPriorBBoxes, + size_t numClasses, + vector* indices); + +/** + * @brief Get detection results which satify requirements + * @param numPriorBBoxes Prior bbox number + * @param numClasses Class number + * @param backgroundId Background class + * @param batchSize Image number + * @param confThreshold Threshold of class confidence + * @param nmsTopK Used in NMS operation to keep top k bbox + * @param nmsThreshold Used in NMS, threshold of overlap + * @param keepTopK How many bboxes keeped in an image + * @param allDecodedBBoxes Decoded bboxes for all images + * @param allDetectionIndices Save detection bbox indices + */ +size_t getDetectionIndices( + const real* confData, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t backgroundId, + const size_t batchSize, + const size_t confThreshold, + const size_t nmsTopK, + const real nmsThreshold, + const size_t keepTopK, + const vector>& allDecodedBBoxes, + vector>>* allDetectionIndices); + +/** + * @brief Get detection results + * @param confData Confidence scores + * @param numPriorBBoxes Prior bbox number + * @param numClasses Class number + * @param batchSize Image number + * @param allIndices Indices of predicted bboxes + * @param allDecodedBBoxes BBoxes decoded + * @param out Output matrix + * image number | label | confidence score | xMin | yMin | xMax | yMax + */ +void getDetectionOutput(const real* confData, + const size_t numKept, + const size_t numPriorBBoxes, + const size_t numClasses, + const size_t batchSize, + const vector>>& allIndices, + const vector>& allDecodedBBoxes, + Matrix& out); + +NormalizedBBox clipBBox(const NormalizedBBox& bbox); + +} // namespace paddle diff --git a/paddle/trainer/CMakeLists.txt b/paddle/trainer/CMakeLists.txt index 06c019f0a97757b658d1bc3405246d8f47632aad..9d246b6690134d96e9a262c6ac64d998536128a9 100644 --- a/paddle/trainer/CMakeLists.txt +++ b/paddle/trainer/CMakeLists.txt @@ -4,6 +4,7 @@ set(TRAINER_SOURCES ParameterUpdater.cpp ParamUtil.cpp RemoteParameterUpdater.cpp + NewRemoteParameterUpdater.cpp Tester.cpp Trainer.cpp TrainerInternal.cpp @@ -16,6 +17,7 @@ set(TRAINER_HEADERS ParameterUpdater.h ParamUtil.h RemoteParameterUpdater.h + NewRemoteParameterUpdater.h Tester.h TesterConfig.h Trainer.h @@ -32,7 +34,7 @@ add_style_check_target(paddle_trainer_lib add_style_check_target(paddle_trainer_lib ${TRAINER_HEADERS}) add_dependencies(paddle_trainer_lib - gen_proto_cpp) + gen_proto_cpp paddle_pserver_cclient_lib) macro(add_paddle_exe TARGET_NAME) add_executable(${TARGET_NAME} ${ARGN}) @@ -56,3 +58,10 @@ install(TARGETS paddle_trainer paddle_merge_model set_target_properties(paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) set_target_properties(paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) + +if(APPLE) + set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") +endif() + +target_link_libraries(paddle_trainer ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a) +target_link_libraries(paddle_trainer_lib ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a) diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f25ce2f7f06f6da0feab27da61b8e49689cbe213 --- /dev/null +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -0,0 +1,86 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "NewRemoteParameterUpdater.h" +#include "Trainer.h" +#include "paddle/utils/Stat.h" + +DECLARE_int32(trainer_id); +DECLARE_string(save_dir); + +namespace paddle { +NewRemoteParameterUpdater::NewRemoteParameterUpdater( + const OptimizationConfig &config, const std::string pserverSpec) + : parameterClient_(-1), + newParameters_(nullptr), + newGradients_(nullptr), + pserverSpec_(pserverSpec) {} + +void NewRemoteParameterUpdater::init( + const std::vector ¶meters) { + ParameterUpdater::init(parameters); + + for (auto ¶ : parameters_) { + para->getBuf(PARAMETER_VALUE)->zeroMem(); + para->getBuf(PARAMETER_GRADIENT)->zeroMem(); + } + + // create parameter server client. + parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), + FLAGS_trainer_id == 0); + + // init new parameter and gradient. + newParameters_ = initNewParameter(PARAMETER_VALUE); + newGradients_ = initNewParameter(PARAMETER_GRADIENT); + + // init parameter, one trainer will get the opportunity to int parameter and + // send them to parameter server. Others will get the initialized parameter + // from parameter server + if (paddle_begin_init_params(parameterClient_)) { + LOG(INFO) << "paddle_begin_init_params start"; + for (int i = 0; i < parameterSize(); ++i) { + auto paramConfig = parameters_[i]->getConfig(); + std::string bytes = paramConfig.SerializeAsString(); + const char *array = bytes.data(); + int size = (int)bytes.size(); + paddle_init_param( + parameterClient_, *newParameters_[i], (void *)array, size); + } + paddle_finish_init_params(parameterClient_); + LOG(INFO) << "paddle_begin_init_params done"; + } else { + paddle_get_params(parameterClient_, newParameters_, parameterSize()); + } + + LOG(INFO) << "NewRemoteParameterUpdater initialized"; +} + +void NewRemoteParameterUpdater::updateImpl(Parameter *para) {} + +void NewRemoteParameterUpdater::finishBatch(real cost) { + // send gradient to parameter server. + paddle_send_grads(parameterClient_, newGradients_, parameterSize()); + // get the updated parameter from parameterClient. + paddle_get_params(parameterClient_, newParameters_, parameterSize()); + + // clear gradient after update parameter. + for (auto ¶ : parameters_) { + para->getBuf(PARAMETER_GRADIENT)->zeroMem(); + } +} + +void NewRemoteParameterUpdater::startPass() {} + +bool NewRemoteParameterUpdater::finishPass() { return true; } +} diff --git a/paddle/trainer/NewRemoteParameterUpdater.h b/paddle/trainer/NewRemoteParameterUpdater.h new file mode 100644 index 0000000000000000000000000000000000000000..f735185f62b3491a63e34cfc4a2ef73dae12243e --- /dev/null +++ b/paddle/trainer/NewRemoteParameterUpdater.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "ParameterUpdater.h" +#include "libpaddle_pserver_cclient.h" +#include "paddle/pserver/ParameterClient2.h" +#include "paddle/utils/Queue.h" +#include "paddle/utils/Util.h" + +namespace paddle { + +/** + * New remote parameter updater for dense parameters that use cclient of go. + */ +class NewRemoteParameterUpdater : public ParameterUpdater { +public: + NewRemoteParameterUpdater(const OptimizationConfig& config, + const std::string pserverSpec); + ~NewRemoteParameterUpdater() { + releaseNewParameter(newParameters_); + releaseNewParameter(newGradients_); + if (parameterClient_ >= 0) paddle_pserver_client_release(parameterClient_); + } + + /** + * initialize the internal parameter client and itself. + */ + virtual void init(const std::vector& parameters); + /** + * @brief start batch + * + * @note one batch training exhibits stateful feature to help + * to do performance tuning, sgd optimization if necessary. + */ + virtual PassType startBatch(int64_t batchSize) { return PASS_TRAIN; } + + /** + * send parameters to pservers and get returned parameters + * from all pservers if necessary. + */ + virtual void finishBatch(real cost); + virtual void startPass(); + virtual bool finishPass(); + +protected: + /** + * work need to do after finishBatch + */ + virtual void updateImpl(Parameter* para); + +private: + int parameterSize() { return (int)parameters_.size(); } + + /** + * init parameter of go paddle pserver cclient. + * @param new_params + * @param type + */ + paddle_parameter** initNewParameter(ParameterType type) { + paddle_parameter** new_params = + (paddle_parameter**)malloc(sizeof(paddle_parameter*) * parameterSize()); + for (int i = 0; i < parameterSize(); ++i) { + new_params[i] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); + memset(new_params[i], 0, sizeof(paddle_parameter)); + } + + for (int i = 0; i < parameterSize(); ++i) { + ParameterPtr param = parameters_[i]; + new_params[i]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; + new_params[i]->name = (char*)param->getName().c_str(); + new_params[i]->content = + (unsigned char*)(param->getBuf(type).get()->getData()); + new_params[i]->content_len = + (int)param->getBuf(type).get()->getSize() * sizeof(real); + } + return new_params; + } + + void releaseNewParameter(paddle_parameter** newParams) { + if (newParams != nullptr) { + for (int i = 0; i < parameterSize(); ++i) { + free(newParams[i]); + } + free(newParams); + } + } + +protected: + /// internal parameter client object for exchanging data with pserver + paddle_pserver_client parameterClient_; + /// the parameters for new pserver client + paddle_parameter** newParameters_; + /// the gradinets for new pserver client + paddle_parameter** newGradients_; + /// the specification of parameter server "host1:port,host1:port" + std::string pserverSpec_; +}; + +} // namespace paddle diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index e78dc4f3b4d8501b9864ed6c7e38afa98720deb2..2825ceb255dac325a5e6259e33c649587a37e88b 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -126,6 +126,7 @@ def init_config_environment( g_config=TrainerConfig(), g_layer_map={}, g_parameter_map={}, + g_parameter_initializer_map={}, g_extended_config_funcs={}, # store command args of paddle_trainer @@ -439,22 +440,22 @@ def model_type(name): @config_class class Bias(Cfg): - def __init__( - self, - parameter_name=None, - learning_rate=None, - momentum=None, - decay_rate=None, - decay_rate_l1=None, - initial_mean=None, - initial_std=None, - initial_strategy=None, - initial_smart=None, - num_batches_regularization=None, - sparse_remote_update=None, - gradient_clipping_threshold=None, - is_static=None, - is_shared=None, ): + def __init__(self, + parameter_name=None, + learning_rate=None, + momentum=None, + decay_rate=None, + decay_rate_l1=None, + initial_mean=None, + initial_std=None, + initial_strategy=None, + initial_smart=None, + num_batches_regularization=None, + sparse_remote_update=None, + gradient_clipping_threshold=None, + is_static=None, + is_shared=None, + initializer=None): self.add_keys(locals()) @@ -465,6 +466,7 @@ class Input(Cfg): self, input_layer_name, parameter_name=None, + initializer=None, learning_rate=None, momentum=None, decay_rate=None, @@ -521,6 +523,7 @@ class Projection(Input): initial_std=None, initial_strategy=None, initial_smart=None, + initializer=None, num_batches_regularization=None, sparse_remote_update=None, sparse_update=None, @@ -1494,7 +1497,8 @@ class LayerBase(object): gradient_clipping_threshold=bias. gradient_clipping_threshold, is_static=bias.is_static, - is_shared=bias.is_shared, ) + is_shared=bias.is_shared, + initializer=bias.initializer) if for_self: self.config.bias_parameter_name = bias.parameter_name else: @@ -1551,7 +1555,8 @@ class LayerBase(object): format=format, is_static=input_config.is_static, is_shared=input_config.is_shared, - update_hooks=input_config.update_hooks) + update_hooks=input_config.update_hooks, + initializer=input_config.initializer) def set_layer_size(self, size): if self.config.size == 0: @@ -3236,7 +3241,8 @@ def Parameter(name, need_compact=None, is_static=None, is_shared=None, - update_hooks=None): + update_hooks=None, + initializer=None): config_assert(name not in g_parameter_map, 'Duplicated parameter name: ' + name) @@ -3324,6 +3330,11 @@ def Parameter(name, para.update_hooks.extend(update_hooks) g_parameter_map[name] = para + if initializer is not None: + config_assert( + callable(initializer), + "parameter initializer should be a callable object") + g_parameter_initializer_map[name] = initializer @config_func diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py index d1167a234caed3753c6beedfc89b01054e3688e1..4100697c9c3770f1b748ea630d5f8193167fe7fc 100644 --- a/python/paddle/trainer_config_helpers/attrs.py +++ b/python/paddle/trainer_config_helpers/attrs.py @@ -95,6 +95,10 @@ class ParameterAttribute(object): :param sparse_update: Enable sparse update for this parameter. It will enable both local and remote sparse update. :type sparse_update: bool + :param initializer: If not None, it should be a callable object which accepts + a parameter name and returns numpy array for the initial + value of the parameter + :param initializer: callable object """ def __init__(self, @@ -109,7 +113,8 @@ class ParameterAttribute(object): learning_rate=None, momentum=None, gradient_clipping_threshold=None, - sparse_update=False): + sparse_update=False, + initializer=None): self.attr = {} if is_static: @@ -161,6 +166,8 @@ class ParameterAttribute(object): is_compatible_with(gradient_clipping_threshold, float): self.attr['gradient_clipping_threshold'] = \ gradient_clipping_threshold + if initializer is not None: + self.attr['initializer'] = initializer def set_default_parameter_name(self, name): """ diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 5e99d4a241b7fe2b0f9ff4ba191db4b341c4d30e..1ef2dceca910e806bddf17c95d1c345a144d9e31 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -45,7 +45,12 @@ class Optimizer(object): return swig_api.ParameterUpdater.createRemoteUpdater( self.__opt_conf__, pass_num, use_sparse_updater) - def create_updater(self, is_local, num_passes, use_sparse_updater): + def __create_new_remote_updater__(self, pserver_spec): + return swig_api.ParameterUpdater.createNewRemoteUpdater( + self.__opt_conf__, pserver_spec) + + def create_updater(self, is_local, num_passes, use_sparse_updater, + pserver_spec): """ create proper parameter_updater by configuration. :param is_local: create local or remote parameter updater @@ -64,8 +69,12 @@ class Optimizer(object): if is_local: parameter_updater = self.__create_local_updater__() else: - parameter_updater = self.__create_remote_updater__( - num_passes, use_sparse_updater) + if pserver_spec is None: + parameter_updater = self.__create_remote_updater__( + num_passes, use_sparse_updater) + else: + parameter_updater = self.__create_new_remote_updater__( + pserver_spec) return parameter_updater diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index 64805d0c504b876f4d1f6657fe94457534a0b278..ad20241b98302f136326ae491c6723a6c12ae284 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -1,6 +1,7 @@ import numpy as np import py_paddle.swig_paddle as api from paddle.proto.ParameterConfig_pb2 import ParameterConfig +import paddle.trainer.config_parser as cp import struct import tarfile import cStringIO @@ -18,8 +19,11 @@ def create(layers): """ topology = Topology(layers) pool = Parameters() + initializers = cp.g_parameter_initializer_map for param in topology.proto().parameters: pool.__append_config__(param) + if param.name in initializers: + pool[param.name] = initializers[param.name](param.name) return pool diff --git a/python/paddle/v2/tests/test_parameters.py b/python/paddle/v2/tests/test_parameters.py index ebb182caab6430862a8e4da2ae4ea6b1e72f726c..45372e7dd0ec7cbdd6a2eb5c0397ef7e74284cd0 100644 --- a/python/paddle/v2/tests/test_parameters.py +++ b/python/paddle/v2/tests/test_parameters.py @@ -11,6 +11,9 @@ except ImportError: sys.exit(0) import paddle.v2.parameters as parameters +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +from paddle.v2.attr import ParamAttr from paddle.proto.ParameterConfig_pb2 import ParameterConfig import random import cStringIO @@ -55,6 +58,25 @@ class TestParameters(unittest.TestCase): p1 = params_dup.get(name) self.assertTrue(numpy.isclose(p0, p1).all()) + def test_initializer(self): + def initializer(name): + assert name == "fc.w" + mat = numpy.ones((3, 2), dtype=numpy.float32) + mat[1, 1] = 2 + return mat + + x = layer.data(name="x", type=data_type.dense_vector(3)) + y = layer.fc(x, + size=2, + bias_attr=False, + param_attr=ParamAttr( + name="fc.w", initializer=initializer)) + params = parameters.create(y) + val = params["fc.w"] + assert val.shape == (3, 2) + expected = numpy.array([[1, 1], [1, 2], [1, 1]], numpy.float32) + assert numpy.logical_and.reduce(numpy.reshape(val == expected, 6)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 8fdb67cc2688a67ed815af396b214e339195c73f..f9658a8c5df9562073c8a187074a6cb3459ac5d9 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -49,7 +49,8 @@ class SGD(object): parameters, update_equation, extra_layers=None, - is_local=True): + is_local=True, + pserver_spec=None): if not isinstance(parameters, v2_parameters.Parameters): raise TypeError('parameters should be parameters') @@ -63,6 +64,7 @@ class SGD(object): self.__parameters__ = parameters self.__topology_in_proto__ = topology.proto() self.__is_local__ = is_local + self.__pserver_spec__ = pserver_spec self.__use_sparse_updater__ = self.__topology__.use_sparse_updater() # # In local mode, disable sparse_remote_update. @@ -126,7 +128,8 @@ class SGD(object): __check_train_args__(**locals()) self.__parameter_updater__ = self.__optimizer__.create_updater( - self.__is_local__, num_passes, self.__use_sparse_updater__) + self.__is_local__, num_passes, self.__use_sparse_updater__, + self.__pserver_spec__) self.__parameter_updater__.init(self.__gradient_machine__) self.__gradient_machine__.start()