提交 23b1a274 编写于 作者: X xzl

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into improve_pruning

......@@ -50,6 +50,7 @@ before_install:
# protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
- pip install rarfile
- eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
- |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script:
......
......@@ -127,6 +127,7 @@ endif(WITH_GPU)
add_subdirectory(proto)
add_subdirectory(paddle)
add_subdirectory(python)
add_subdirectory(go/pserver/cclient)
if(WITH_DOC)
add_subdirectory(doc)
......
......@@ -59,6 +59,11 @@ context_projection
.. autoclass:: paddle.v2.layer.context_projection
:noindex:
row_conv
--------
.. autoclass:: paddle.v2.layer.row_conv
:noindex:
Image Pooling Layer
===================
......@@ -346,6 +351,12 @@ sampling_id
.. autoclass:: paddle.v2.layer.sampling_id
:noindex:
multiplex
---------
.. autoclass:: paddle.v2.layer.multiplex
:noindex:
Slicing and Joining Layers
==========================
......
......@@ -74,14 +74,25 @@ typedef enum {
typedef struct {
char* name;
paddle_element_type element_type;
void* content;
unsigned char* content;
int content_len;
} paddle_parameter, paddle_gradient;
typedef struct paddle_pserver_client paddle_pserver_client;
typedef int paddle_pserver_client;
paddle_pserver_client* paddle_new_pserver_client();
void paddle_pserver_client_release(paddle_pserver_client* client);
/**
* @brief creates a pserver client that talks to etcd for coordination.
*/
paddle_pserver_client paddle_new_etcd_pserver_client(char* etcd_addr);
/**
* @brief creates a pserver client given pserver addresses.
*
* @param pserver_addrs comma-separated pserver addresses.
* @param selected if current pserver client is selected to initialize all parameter servers.
*/
paddle_pserver_client paddle_new_pserver_client(char* pserver_addrs, int selected);
void paddle_pserver_client_release(paddle_pserver_client c);
/**
* @brief paddle_begin_init_params begins to initialize parameters on
......@@ -95,7 +106,7 @@ void paddle_pserver_client_release(paddle_pserver_client* client);
* @return 1 if the trainer is selected to initialize parameter
* servers, otherwise 0.
*/
int paddle_begin_init_params(paddle_pserver_client* client);
int paddle_begin_init_params(paddle_pserver_client client);
/**
* @brief paddle_init_param initializes the parameter on parameter
......@@ -109,7 +120,7 @@ int paddle_begin_init_params(paddle_pserver_client* client);
* @paddle_begin_init_param). Or simply exit the program and wait for
* the cluster management system to restart the trainer.
*/
int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, const unsigned char* param_config_proto, int config_len);
int paddle_init_param(paddle_pserver_client client, paddle_parameter param, const unsigned char* param_config_proto, int config_len);
/**
* @brief paddle_finish_init_params tells parameter servers client has
......@@ -120,7 +131,7 @@ int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, con
* @paddle_begin_init_param). Or simply exit the program and wait for
* the cluster management system to restart the trainer.
*/
int paddle_finish_init_params(paddle_pserver_client* client);
int paddle_finish_init_params(paddle_pserver_client client);
/**
* @brief paddle_send_grads sends gradients to parameter servers for
......@@ -131,7 +142,7 @@ int paddle_finish_init_params(paddle_pserver_client* client);
* @param learning_rate the learning rate for the gradients.
* @return 0 if successful, otherwise -1.
*/
int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grads, int len);
int paddle_send_grads(paddle_pserver_client client, const paddle_gradient* grads, int len);
/**
* @brief paddle_get_params gets parameters from parameter servers.
......@@ -139,13 +150,15 @@ int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grad
* paddle_get_params will block until parameters are initialized on
* the parameter servers.
*
* @param names the array of names of the parameters to get.
* @param dst the destination array of parameters to save to.
* @param dst the destination array of parameter pointers to save to.
* The parameter pointer must be pre-popullated with required parameter name,
* and the content of parameter must be pre-allocated of the size of required
* parameter on pserver.
* @param len the length of the names array and the paddle_parameter
* array.
* @return 0 if successful, otherwise -1.
*/
int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_parameter* dst, int len);
int paddle_get_params(paddle_pserver_client client, paddle_parameter** dst, int len);
/**
* @brief paddle_save_model indicates parameters to save the parameter
......@@ -154,5 +167,5 @@ int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_
* @param path the path to save parameters.
* @return 0 if successful, otherwise -1.
*/
int paddle_save_model(paddle_pserver_client* client, const char* path);
int paddle_save_model(paddle_pserver_client client, const char* path);
```
# Design Doc: Remote Parameter Updater for Cluster Train
For an overview of distribute training, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter updater that will use parameter server cclient [The Client Library of Parameter Server Design Doc](pserver_client.md) to manage and update parameters.
## Parameter Updater
Parameter Updater is used by trainer to manage and update parameter, there are mainly two kind of parameter updater: local and remote, since this design is for cluster train, we will only discuss remote parameter updater here.
### Remote Parameter Updater
Remote Parameter Updater manage parameters through remote parameter server with the client that communicate with pserver([The Client Library of Parameter Server Design Doc](pserver_client.md))
In PaddlePaddle Python V2 API, trainer is implemented in python, and the trainer will hold a instance of parameter updater and call it's functions directly. In this design, we will also expose the api of RemoteParameterUpdater to python with swig.
#### Sparse Remote Parameter Updater
Since we will only implement dense parameter management new, the mechanism for sparse parameter will be discussed in next stage.
### Interface Design
TBD
......@@ -17,7 +17,7 @@ function(GO_LIBRARY NAME BUILD_TYPE)
endif()
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
file(RELATIVE_PATH rel ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
file(RELATIVE_PATH rel ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
# find Paddle directory.
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
......@@ -32,12 +32,14 @@ function(GO_LIBRARY NAME BUILD_TYPE)
# will use the local changes in Paddle rather than checkout Paddle
# in github.
add_custom_target(copyPaddle
COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH})
COMMAND rm -rf ${PADDLE_IN_GOPATH}/Paddle
COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH}/Paddle)
add_dependencies(goGet copyPaddle)
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
-o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}"
-gcflags=-shared -asmflags=-shared -installsuffix=_shared -a
-o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}"
${CMAKE_GO_FLAGS} ${GO_SOURCE}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
......
package main
import (
"fmt"
"net"
"net/http"
"net/rpc"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/namsral/flag"
"github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/recordio"
)
func main() {
port := flag.Int("port", 8080, "port of the master server.")
dataset := flag.String("training_dataset", "", "dataset: comma separated path to RecordIO paths, supports golb patterns.")
faultTolerance := flag.Bool("fault_tolerance", false, "enable fault tolerance (requires etcd).")
taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.")
taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.")
chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.")
flag.Parse()
if *dataset == "" {
panic("no dataset specified.")
}
if *faultTolerance {
panic("fault tolernance not implemented.")
}
var chunks []master.Chunk
var paths []string
ss := strings.Split(*dataset, ",")
fmt.Println(ss)
for _, s := range ss {
match, err := filepath.Glob(s)
if err != nil {
panic(err)
}
paths = append(paths, match...)
}
if len(paths) == 0 {
panic("no valid datset specified.")
}
idx := 0
for _, path := range paths {
f, err := os.Open(path)
if err != nil {
panic(err)
}
index, err := recordio.LoadIndex(f)
if err != nil {
panic(err)
}
f.Close()
count := index.NumChunks()
for i := 0; i < count; i++ {
chunk := master.Chunk{
Idx: idx,
Path: path,
Index: *index.ChunkIndex(i),
}
chunks = append(chunks, chunk)
}
}
s := master.NewService(chunks, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax)
s := master.NewService(*chunkPerTask, *taskTimeoutDur, *taskTimeoutMax)
err := rpc.Register(s)
if err != nil {
panic(err)
......
......@@ -2,6 +2,7 @@ package connection
import (
"errors"
"log"
"net/rpc"
"sync"
)
......@@ -21,6 +22,18 @@ func New() *Conn {
return c
}
// Close closes the connection.
func (c *Conn) Close() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.client == nil {
return nil
}
return c.client.Close()
}
// Connect connects the connection to a address.
func (c *Conn) Connect(addr string) error {
c.mu.Lock()
......@@ -50,12 +63,20 @@ func (c *Conn) Connect(addr string) error {
c.waitConn = nil
}
} else {
err := client.Close()
if err != nil {
log.Println(err)
}
return errors.New("client already set from a concurrent goroutine")
}
return nil
}
// TODO(helin): refactor Call to be able to perform given retry
// policy.
// Call make a RPC call.
//
// Call will be blocked until the connection to remote RPC service
......
package master
import (
"log"
"time"
"github.com/PaddlePaddle/Paddle/go/connection"
)
// Addresser provide the address of the master server.
type Addresser interface {
Address() string
}
// Client is the client of the master server.
type Client struct {
conn *connection.Conn
}
// NewClient creates a new Client.
func NewClient(addr Addresser) *Client {
c := &Client{}
c.conn = connection.New()
go c.monitorMaster(addr)
return c
}
func (c *Client) monitorMaster(addr Addresser) {
lastMaster := ""
monitor := func() {
// get the lastest address of the master server,
// connect to the new address once address changed.
curMaster := addr.Address()
if curMaster != lastMaster {
if curMaster == "" {
err := c.conn.Close()
if err != nil {
log.Println(err)
}
} else {
err := c.conn.Connect(curMaster)
if err != nil {
log.Println(err)
// connect to addr failed, set
// to last known addr in order
// to retry next time.
curMaster = lastMaster
}
}
}
lastMaster = curMaster
}
monitor()
ticker := time.NewTicker(10 * time.Second)
for _ = range ticker.C {
monitor()
}
}
// SetDataset set dataset for the master server to dispatch.
//
// SetDataset can be call multiple times from different nodes. But
// only the first call will be honored.
func (c *Client) SetDataset(globPaths []string) error {
return c.conn.Call("Service.SetDataset", globPaths, nil)
}
// GetTask gets a new task from the master server.
func (c *Client) GetTask() (Task, error) {
var t Task
err := c.conn.Call("Service.GetTask", 0, &t)
return t, err
}
// TaskFinished tells the master server a task is finished.
func (c *Client) TaskFinished(taskID int) error {
return c.conn.Call("Service.TaskFinished", taskID, nil)
}
package master_test
import (
"fmt"
"net"
"net/http"
"net/rpc"
"os"
"strconv"
"strings"
"testing"
"time"
log "github.com/sirupsen/logrus"
"github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/recordio"
)
const (
totalTask = 20
chunkPerTask = 10
)
var port int
func init() {
log.SetLevel(log.ErrorLevel)
l, err := net.Listen("tcp", ":0")
if err != nil {
panic(err)
}
ss := strings.Split(l.Addr().String(), ":")
p, err := strconv.Atoi(ss[len(ss)-1])
if err != nil {
panic(err)
}
port = p
go func(l net.Listener) {
s := master.NewService(chunkPerTask, time.Second, 1)
server := rpc.NewServer()
err := server.Register(s)
if err != nil {
panic(err)
}
mux := http.NewServeMux()
mux.Handle(rpc.DefaultRPCPath, server)
err = http.Serve(l, mux)
if err != nil {
panic(err)
}
}(l)
}
type addresser string
func (a addresser) Address() string {
return string(a)
}
func TestClientFull(t *testing.T) {
const p = "/tmp/master_client_test_0"
f, err := os.Create(p)
if err != nil {
panic(err)
}
for i := 0; i < totalTask*chunkPerTask; i++ {
w := recordio.NewWriter(f, -1, -1)
w.Write(nil)
// call Close to force RecordIO writing a chunk.
w.Close()
}
f.Close()
c := master.NewClient(addresser(fmt.Sprintf(":%d", port)))
c.SetDataset([]string{p})
checkOnePass := func(i int) {
var tasks []master.Task
for i := 0; i < totalTask; i++ {
task, err := c.GetTask()
if err != nil {
t.Fatal(i, err)
}
tasks = append(tasks, task)
}
_, err = c.GetTask()
if err == nil {
t.Fatal(i, "should get error.")
}
err = c.TaskFinished(tasks[0].ID)
if err != nil {
t.Fatal(err)
}
tasks = tasks[1:]
task, err := c.GetTask()
if err != nil {
t.Fatal(err)
}
tasks = append(tasks, task)
for _, task := range tasks {
err = c.TaskFinished(task.ID)
if err != nil {
t.Fatal(i, err)
}
}
}
for i := 0; i < 10; i++ {
checkOnePass(i)
}
}
......@@ -2,29 +2,25 @@ package master
import (
"errors"
"log"
"os"
"path/filepath"
"sync"
"time"
"github.com/PaddlePaddle/recordio"
)
log "github.com/sirupsen/logrus"
const (
targetTaskCount = 300
)
// errors
var (
ErrNoMoreTask = errors.New("no more task for current pass")
ErrPendingTaskNotFound = errors.New("pending task not found")
"github.com/PaddlePaddle/recordio"
)
// Service is the master server service.
type Service struct {
timeoutDur time.Duration
timeoutMax int
chunksPerTask int
timeoutDur time.Duration
timeoutMax int
ready chan struct{}
mu sync.Mutex
initDone bool
taskQueues taskQueues
}
......@@ -55,7 +51,6 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
if len(cur.Task.Chunks) > 0 {
cur.Task.ID = id
id++
result = append(result, cur)
}
......@@ -63,21 +58,21 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
}
// NewService creates a new service.
func NewService(chunks []Chunk, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service {
func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service {
s := &Service{}
s.chunksPerTask = chunksPerTask
s.timeoutDur = timeoutDur
s.timeoutMax = timeoutMax
s.taskQueues = taskQueues{}
s.taskQueues.Pending = make(map[int]taskEntry)
s.taskQueues.Todo = partition(chunks, chunksPerTask)
s.ready = make(chan struct{})
return s
}
// Chunk is a chunk of data consisted of several data instances.
type Chunk struct {
Idx int // index of the chunk within the file
Path string
Index recordio.Index // block index
Index recordio.Index // chunk index
}
// Task is the basic unit of data instances assigned to trainers.
......@@ -105,74 +100,205 @@ func (s *Service) snapshot() error {
return nil
}
// GetTask gets a new task from the service.
func (s *Service) GetTask(dummy int, task *Task) error {
func readChunks(globPaths []string) ([]Chunk, error) {
var chunks []Chunk
var paths []string
for _, s := range globPaths {
match, err := filepath.Glob(s)
if err != nil {
return nil, err
}
paths = append(paths, match...)
}
if len(paths) == 0 {
return nil, errors.New("no valid dataset specified")
}
for _, path := range paths {
f, err := os.Open(path)
if err != nil {
return nil, err
}
index, err := recordio.LoadIndex(f)
if err != nil {
return nil, err
}
err = f.Close()
if err != nil {
return nil, err
}
count := index.NumChunks()
for i := 0; i < count; i++ {
chunk := Chunk{
Path: path,
Index: *index.ChunkIndex(i),
}
chunks = append(chunks, chunk)
}
}
return chunks, nil
}
// SetDataset sets dataset to dispatch for the master server.
//
// SetDataset can be call multiple times. But only the first call will
// be honored.
func (s *Service) SetDataset(globPaths []string, dummy *int) error {
if len(globPaths) == 0 {
return errors.New("no dataset specified")
}
s.mu.Lock()
defer s.mu.Unlock()
if s.initDone {
// Already initialized. All trainer will call
// SetDataset, but we only handle the first one. Treat
// other calls as successful but do nothing.
return nil
}
if len(s.taskQueues.Todo) == 0 {
return ErrNoMoreTask
chunks, err := readChunks(globPaths)
if err != nil {
return err
}
t := s.taskQueues.Todo[0]
t.Epoch++
s.taskQueues.Todo = s.taskQueues.Todo[1:]
s.taskQueues.Pending[t.Task.ID] = t
err := s.snapshot()
s.taskQueues.Todo = partition(chunks, s.chunksPerTask)
err = s.snapshot()
if err != nil {
log.Errorln(err)
return err
}
time.AfterFunc(s.timeoutDur, func(taskID int, epoch int) func() {
return func() {
s.mu.Lock()
defer s.mu.Unlock()
close(s.ready)
s.initDone = true
return nil
}
t, ok := s.taskQueues.Pending[taskID]
if !ok {
return
}
func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
return func() {
s.mu.Lock()
defer s.mu.Unlock()
if t.Epoch != epoch {
// new epoch, task launched after the
// schedule of this timeout check.
return
t, ok := s.taskQueues.Pending[taskID]
if !ok {
return
}
if t.Epoch != epoch {
// new epoch, task launched after the
// schedule of this timeout check.
return
}
defer func() {
err := s.snapshot()
if err != nil {
log.Errorln(err)
}
}()
delete(s.taskQueues.Pending, t.Task.ID)
defer func() {
err := s.snapshot()
if err != nil {
log.Println(err)
}
}()
t.NumTimeout++
if t.NumTimeout > s.timeoutMax {
log.Warningf("Task %v failed %d times, discard.\n", t.Task, t.NumTimeout)
s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task)
return
}
delete(s.taskQueues.Pending, t.Task.ID)
log.Warningf("Task %v failed %d times, retry.\n", t.Task, t.NumTimeout)
s.taskQueues.Todo = append(s.taskQueues.Todo, t)
}
}
t.NumTimeout++
if t.NumTimeout > s.timeoutMax {
s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task)
return
// GetTask gets a new task from the service.
func (s *Service) GetTask(dummy int, task *Task) error {
select {
case <-s.ready:
}
s.mu.Lock()
defer s.mu.Unlock()
if len(s.taskQueues.Todo) == 0 {
if len(s.taskQueues.Done) == 0 {
if len(s.taskQueues.Pending) == 0 {
err := errors.New("all task failed")
log.Warningln(err)
return err
}
s.taskQueues.Todo = append(s.taskQueues.Todo, t)
// TODO(helin): client need to retry in this
// error case. Gotcha: RPC client can't
// compare returned error with predefined
// errors like io.EOF, because the error
// instance deserialized from RPC is a
// different instance than the error defined
// in package. So we need to figure out a way
// for client to check this error correctly.
err := errors.New("no more available task")
log.Warningln(err)
return err
}
}(t.Task.ID, t.Epoch))
s.taskQueues.Todo = s.taskQueues.Done
s.taskQueues.Done = nil
log.Infoln("No more todo task, but trainer is requesting task to do. Move all done task to todo.")
}
t := s.taskQueues.Todo[0]
t.Epoch++
s.taskQueues.Todo = s.taskQueues.Todo[1:]
s.taskQueues.Pending[t.Task.ID] = t
err := s.snapshot()
if err != nil {
return err
}
*task = t.Task
log.Infof("Task #%d dispatched\n", task.ID)
time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.ID, t.Epoch))
return nil
}
// TaskFinished tell the service that a task is finished.
func (s *Service) TaskFinished(taskID int, dummy *int) error {
select {
case <-s.ready:
}
s.mu.Lock()
defer s.mu.Unlock()
log.Infof("Task %d finished\n", taskID)
t, ok := s.taskQueues.Pending[taskID]
if !ok {
return ErrPendingTaskNotFound
err := errors.New("pending task not found")
log.Warningln(err)
return err
}
// task finished, reset timeout
t.NumTimeout = 0
s.taskQueues.Done = append(s.taskQueues.Done, t)
delete(s.taskQueues.Pending, taskID)
return s.snapshot()
if len(s.taskQueues.Pending) == 0 && len(s.taskQueues.Todo) == 0 {
log.Infoln("No more todo and pending task, start a new pass.")
s.taskQueues.Todo = append(s.taskQueues.Todo, s.taskQueues.Done...)
s.taskQueues.Done = nil
}
err := s.snapshot()
if err != nil {
log.Errorln(err)
}
return err
}
......@@ -9,5 +9,15 @@ project(cxx_go C Go)
include(golang)
include(flags)
go_library(client STATIC)
go_library(paddle_pserver_cclient STATIC)
if(PROJ_ROOT)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a
COMMAND cp ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.h ${PROJ_ROOT}/paddle/trainer/
COMMAND cp ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a ${PROJ_ROOT}/paddle/trainer/
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
DEPENDS paddle_pserver_cclient)
add_custom_target(paddle_pserver_cclient_lib ALL DEPENDS ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a)
endif(PROJ_ROOT)
add_subdirectory(test)
......@@ -19,21 +19,9 @@ typedef struct {
int content_len;
} paddle_parameter, paddle_gradient;
static inline void paddle_release_param(paddle_parameter* param) {
if (param != NULL) {
if (param->name != NULL) {
free(param->name);
}
if (param->content != NULL) {
free(param->content);
}
free(param);
}
}
typedef int client;
typedef int paddle_pserver_client;
#define PSERVER_ERROR -1
#define PSERVER_OK 0
*/
import "C"
......@@ -48,10 +36,10 @@ import (
var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex
var handleMap = make(map[C.client]*pserver.Client)
var curHandle C.client
var handleMap = make(map[C.paddle_pserver_client]*pserver.Client)
var curHandle C.paddle_pserver_client
func add(c *pserver.Client) C.client {
func add(c *pserver.Client) C.paddle_pserver_client {
mu.Lock()
defer mu.Unlock()
client := curHandle
......@@ -60,13 +48,13 @@ func add(c *pserver.Client) C.client {
return client
}
func get(client C.client) *pserver.Client {
func get(client C.paddle_pserver_client) *pserver.Client {
mu.Lock()
defer mu.Unlock()
return handleMap[client]
}
func remove(client C.client) *pserver.Client {
func remove(client C.paddle_pserver_client) *pserver.Client {
mu.Lock()
defer mu.Unlock()
h := handleMap[client]
......@@ -100,7 +88,7 @@ func (l lister) List() []pserver.Server {
}
//export paddle_new_pserver_client
func paddle_new_pserver_client(addrs *C.char, selected int) C.client {
func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_client {
a := C.GoString(addrs)
as := strings.Split(a, ",")
servers := make([]pserver.Server, len(as))
......@@ -113,27 +101,27 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.client {
}
//export paddle_new_etcd_pserver_client
func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.client {
func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.paddle_pserver_client {
// TODO(helin): fault tolerant pserver client using etcd.
panic("not implemented.")
}
//export paddle_pserver_client_release
func paddle_pserver_client_release(client C.client) {
func paddle_pserver_client_release(client C.paddle_pserver_client) {
remove(client)
}
//export paddle_begin_init_params
func paddle_begin_init_params(client C.client) C.int {
func paddle_begin_init_params(client C.paddle_pserver_client) C.int {
c := get(client)
if selected := c.BeginInitParams(); selected {
return 1
}
return 0
return C.PSERVER_OK
}
//export paddle_init_param
func paddle_init_param(client C.client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int {
func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int {
et := pserver.ElementType(param.element_type)
name := C.GoString(param.name)
content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len))
......@@ -143,31 +131,41 @@ func paddle_init_param(client C.client, param C.paddle_parameter, param_config u
}
c := get(client)
err := c.InitParam(pc)
if err != nil {
if err.Error() == pserver.AlreadyInitialized {
log.Printf("parameter %s already initialized, treat paddle_init_param as sucessful.\n", name)
return C.PSERVER_OK
}
log.Println(err)
return -1
return C.PSERVER_ERROR
}
return 0
return C.PSERVER_OK
}
//export paddle_finish_init_params
func paddle_finish_init_params(client C.client) C.int {
func paddle_finish_init_params(client C.paddle_pserver_client) C.int {
c := get(client)
err := c.FinishInitParams()
if err != nil {
if err.Error() == pserver.AlreadyInitialized {
log.Println("parameters already initialized, treat paddle_finish_init_params as sucessful.")
return C.PSERVER_OK
}
log.Println(err)
return -1
return C.PSERVER_ERROR
}
return 0
return C.PSERVER_OK
}
//export paddle_send_grads
func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C.int {
func paddle_send_grads(client C.paddle_pserver_client, grads **C.paddle_gradient, total C.int) C.int {
var gs []pserver.Gradient
for i := 0; i < int(total); i++ {
grad := (*C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads))))
grad := *(**C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads))))
et := pserver.ElementType(grad.element_type)
name := C.GoString(grad.name)
content := cArrayToSlice(unsafe.Pointer(grad.content), int(grad.content_len))
......@@ -178,83 +176,81 @@ func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C
err := c.SendGrads(gs)
if err != nil {
log.Println(err)
return -1
return C.PSERVER_ERROR
}
return 0
return C.PSERVER_OK
}
//export paddle_get_params
func paddle_get_params(client C.client, names **C.char, dst **C.paddle_parameter, total C.int) C.int {
func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, total C.int) C.int {
var ns []string
for i := 0; i < int(total); i++ {
name := *(**C.char)(unsafe.Pointer((uintptr(unsafe.Pointer(names)) + uintptr(i)*unsafe.Sizeof(*names))))
ns = append(ns, C.GoString(name))
param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst))))
ns = append(ns, C.GoString(param.name))
}
c := get(client)
ps, err := c.GetParams(ns)
if err != nil {
log.Println(err)
return -1
return C.PSERVER_ERROR
}
for i := 0; i < int(total); i++ {
if i >= len(ps) {
break
if len(ps) != len(ns) {
pn := make([]string, len(ps))
for i, p := range ps {
pn[i] = p.Name
}
log.Printf("pserver returned wrong number of parameters. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", "))
return C.PSERVER_ERROR
}
for i := range ps {
if ns[i] != ps[i].Name {
pn := make([]string, len(ps))
for i, p := range ps {
pn[i] = p.Name
}
log.Printf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", "))
return C.PSERVER_ERROR
}
}
for i := 0; i < int(total); i++ {
p := ps[i]
param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst))))
nameReady := false
contentAllocated := false
if unsafe.Pointer(param) == nullPtr {
param = (*C.paddle_parameter)(C.calloc(1, C.size_t(unsafe.Sizeof(*param))))
log.Println("must pre-allocate parameter.")
return C.PSERVER_ERROR
} else {
if unsafe.Pointer(param.name) != nullPtr {
if n := C.GoString(param.name); n != p.Name {
log.Println("Warning: the pre-allocated parameter name does not match the parameter name, it will be freed.", n, p.Name)
C.free(unsafe.Pointer(param.name))
} else {
nameReady = true
}
}
if unsafe.Pointer(param.content) != nullPtr {
if int(param.content_len) == len(p.Content) {
contentAllocated = true
} else {
log.Println("Warning: the pre-allocated content len does not match parameter content len, the pre-allocated content will be freed.", param.content_len, len(p.Content))
C.free(unsafe.Pointer(param.content))
if int(param.content_len) != len(p.Content) {
log.Printf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content))
return C.PSERVER_ERROR
}
}
}
if !nameReady {
param.name = C.CString(p.Name)
}
if !contentAllocated {
param.content = (*C.uchar)(C.malloc(C.size_t(len(p.Content))))
}
C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content)))
param.content_len = C.int(len(p.Content))
param.element_type = C.paddle_element_type(p.ElementType)
}
return 0
return C.PSERVER_OK
}
//export paddle_save_model
func paddle_save_model(client C.client, path *C.char) C.int {
func paddle_save_model(client C.paddle_pserver_client, path *C.char) C.int {
p := C.GoString(path)
c := get(client)
err := c.Save(p)
if err != nil {
log.Println(err)
return -1
return C.PSERVER_ERROR
}
return 0
return C.PSERVER_OK
}
func main() {} // Required but ignored
cmake_minimum_required(VERSION 3.0)
include_directories(${CMAKE_BINARY_DIR})
add_executable(main main.c)
add_dependencies(main client)
add_dependencies(main paddle_pserver_cclient)
add_executable(test_cclient test_cclient.c)
add_dependencies(test_cclient paddle_pserver_cclient)
if(APPLE)
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
else()
set(CMAKE_EXE_LINKER_FLAGS "-pthread")
endif()
target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a)
if(PROJ_ROOT)
include_directories(${CMAKE_BINARY_DIR}/go/pserver/cclient/)
target_link_libraries(main ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a pthread)
target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a pthread)
else(PROJ_ROOT)
include_directories(${CMAKE_BINARY_DIR})
target_link_libraries(main ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread)
target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread)
endif(PROJ_ROOT)
#include <stdio.h>
#include "libclient.h"
#include "libpaddle_pserver_cclient.h"
void fail() {
// TODO(helin): fix: gtest using cmake is not working, using this
// hacky way for now.
printf("test failed.\n");
// TODO(helin): Fix: gtest using cmake is not working, using this
// hacky way for now.
#define fail() \
fprintf(stderr, "info: %s:%d: ", __FILE__, __LINE__); \
exit(-1);
void sendGrads(paddle_pserver_client c) {
unsigned char grad_a[2000] = {2};
unsigned char grad_b[3000] = {3};
paddle_gradient grad1 = {
"param_a", PADDLE_ELEMENT_TYPE_FLOAT32, grad_a, 2000};
paddle_gradient grad2 = {
"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, grad_b, 3000};
paddle_gradient* grads[2] = {&grad1, &grad2};
if (paddle_send_grads(c, grads, 2)) {
fail();
}
}
void getParams(paddle_pserver_client c) {
paddle_parameter param_a;
paddle_parameter param_b;
char name_a[] = "param_a";
char name_b[] = "param_b";
// Must pre-allocate the prameter content before calling paddle_get_params.
unsigned char content_a[2000] = {};
unsigned char content_b[3000] = {};
param_a.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param_a.name = name_a;
param_a.content = content_a;
param_a.content_len = 2000;
param_b.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param_b.name = name_b;
param_b.content = content_b;
param_b.content_len = 3000;
paddle_parameter* params[2] = {&param_a, &param_b};
if (paddle_get_params(c, params, 2)) {
fail();
}
}
int main() {
char addr[] = "localhost:3000";
client c = paddle_new_pserver_client(addr, 1);
paddle_pserver_client c = paddle_new_pserver_client(addr, 1);
retry:
if (paddle_begin_init_params(c)) {
paddle_parameter param;
char name_a[] = "param_a";
char name_b[] = "param_b";
unsigned char content[] = {0x00, 0x11, 0x22};
unsigned char content_a[2000] = {1};
unsigned char content_b[3000] = {0};
param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param.name = name_a;
param.content = content;
param.content_len = 3;
if (paddle_init_param(c, param, NULL, 0) != 0) {
param.content = content_a;
param.content_len = 2000;
int error = paddle_init_param(c, param, NULL, 0);
if (error != 0) {
goto retry;
}
param.element_type = PADDLE_ELEMENT_TYPE_INT32;
param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param.name = name_b;
param.content = content;
param.content_len = 3;
if (paddle_init_param(c, param, NULL, 0) != 0) {
param.content = content_b;
param.content_len = 3000;
error = paddle_init_param(c, param, NULL, 0);
if (error != 0) {
goto retry;
}
if (paddle_finish_init_params(c) != 0) {
error = paddle_finish_init_params(c);
if (error != 0) {
goto retry;
}
} else {
fail();
}
unsigned char content[] = {0x00, 0x11, 0x22};
paddle_gradient grads[2] = {
{"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3},
{"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}};
if (!paddle_send_grads(c, grads, 2)) {
fail();
}
paddle_parameter* params[2] = {NULL, NULL};
char* names[] = {"param_a", "param_b"};
if (!paddle_get_params(c, names, params, 2)) {
fail();
int i;
for (i = 0; i < 100; i++) {
sendGrads(c);
getParams(c);
}
// get parameters again by reusing the allocated parameter buffers.
if (!paddle_get_params(c, names, params, 2)) {
fail();
}
paddle_release_param(params[0]);
paddle_release_param(params[1]);
if (!paddle_save_model(c, "/tmp/")) {
if (paddle_save_model(c, "/tmp/")) {
fail();
}
......
#include <stdio.h>
#include <stdlib.h>
#include "libpaddle_pserver_cclient.h"
typedef float real;
void fail() {
// TODO(helin): fix: gtest using cmake is not working, using this
// hacky way for now.
printf("test failed.\n");
exit(-1);
}
void print_parameter(paddle_gradient* param) {
if (param == NULL) {
printf("param is NULL!!\n");
} else {
printf("==== parameter ====\n");
printf("name: %s\n", param->name);
printf("content_len: %d\n", param->content_len);
printf("content_type: %d\n", param->element_type);
int i;
for (i = 0; i < param->content_len / (int)sizeof(real); ++i) {
printf("%f ", ((float*)param->content)[i]);
}
printf("\n\n");
}
}
int main() {
char addr[] = "localhost:3000";
paddle_pserver_client c = paddle_new_pserver_client(addr, 1);
char* names[] = {"param_a", "param_b"};
retry:
printf("init parameter to pserver:\n");
real param_content1[] = {0.1, 0.2, 0.3};
real param_content2[] = {0.4, 0.5, 0.6};
paddle_parameter** params =
(paddle_parameter**)malloc(sizeof(paddle_parameter*) * 2);
params[0] = (paddle_parameter*)malloc(sizeof(paddle_parameter));
params[0]->name = names[0];
params[0]->content = (unsigned char*)param_content1;
params[0]->content_len = 3 * sizeof(real);
params[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
params[1] = (paddle_parameter*)malloc(sizeof(paddle_parameter));
params[1]->name = names[1];
params[1]->content = (unsigned char*)param_content2;
params[1]->content_len = 3 * sizeof(real);
params[1]->element_type = PADDLE_ELEMENT_TYPE_INT32;
if (paddle_begin_init_params(c)) {
if (paddle_init_param(c, *params[0], NULL, 0) != 0) {
goto retry;
}
if (paddle_init_param(c, *params[1], NULL, 0) != 0) {
goto retry;
}
if (paddle_finish_init_params(c) != 0) {
goto retry;
}
} else {
fail();
}
printf("get inited parameters from pserver:\n");
// get parameters again by reusing the allocated parameter buffers.
if (paddle_get_params(c, params, 2) != 0) {
fail();
}
print_parameter(params[0]);
print_parameter(params[1]);
printf("send gradient to pserver:\n");
real gradient_content1[] = {0.01, 0.02, 0.03};
real gradinet_content2[] = {0.04, 0.05, 0.06};
paddle_gradient** grads =
(paddle_gradient**)malloc(sizeof(paddle_gradient*) * 2);
grads[0] = (paddle_gradient*)malloc(sizeof(paddle_gradient));
grads[0]->name = names[0];
grads[0]->content = (unsigned char*)gradient_content1;
grads[0]->content_len = 3 * sizeof(real);
grads[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
grads[1] = (paddle_gradient*)malloc(sizeof(paddle_gradient));
grads[1]->name = names[1];
grads[1]->content = (unsigned char*)gradinet_content2;
grads[1]->content_len = 3 * sizeof(real);
grads[1]->element_type = PADDLE_ELEMENT_TYPE_INT32;
printf("print gradient sent to pserver:\n");
print_parameter(grads[0]);
print_parameter(grads[1]);
if (paddle_send_grads(c, grads, 2) != 0) {
fail();
}
printf("get updated parameters from pserver:\n");
// get parameters again by reusing the allocated parameter buffers.
if (paddle_get_params(c, params, 2) != 0) {
fail();
}
print_parameter(params[0]);
print_parameter(params[1]);
if (paddle_save_model(c, "/tmp/") != 0) {
fail();
}
return 0;
}
import paddle.v2 as paddle
import gzip
def softmax_regression(img):
predict = paddle.layer.fc(input=img,
size=10,
act=paddle.activation.Softmax())
return predict
def multilayer_perceptron(img):
# The first fully-connected layer
hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
# The second fully-connected layer and the according activation function
hidden2 = paddle.layer.fc(input=hidden1,
size=64,
act=paddle.activation.Relu())
# The thrid fully-connected layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
return predict
def convolutional_neural_network(img):
# first conv layer
conv_pool_1 = paddle.networks.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
num_channel=1,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# second conv layer
conv_pool_2 = paddle.networks.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
num_channel=20,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# The first fully-connected layer
fc1 = paddle.layer.fc(input=conv_pool_2,
size=128,
act=paddle.activation.Tanh())
# The softmax layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=fc1,
size=10,
act=paddle.activation.Softmax())
return predict
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
images = paddle.layer.data(
name='pixel', type=paddle.data_type.dense_vector(784))
label = paddle.layer.data(
name='label', type=paddle.data_type.integer_value(10))
# Here we can build the prediction network in different ways. Please
# choose one by uncomment corresponding line.
predict = softmax_regression(images)
#predict = multilayer_perceptron(images)
#predict = convolutional_neural_network(images)
cost = paddle.layer.classification_cost(input=predict, label=label)
parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum(
learning_rate=0.1 / 128.0,
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer,
is_local=False,
pserver_spec="localhost:3000")
lists = []
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 1000 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics)
lists.append((event.pass_id, result.cost,
result.metrics['classification_error_evaluator']))
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192),
batch_size=128),
event_handler=event_handler,
num_passes=100)
# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
test_creator = paddle.dataset.mnist.test()
test_data = []
for item in test_creator():
test_data.append((item[0], ))
if len(test_data) == 100:
break
# output is a softmax layer. It returns probabilities.
# Shape should be (100, 10)
probs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data)
print probs.shape
if __name__ == '__main__':
main()
import paddle.v2 as paddle
import paddle.v2.dataset.uci_housing as uci_housing
def main():
# init
paddle.init(use_gpu=False, trainer_count=1)
# network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param(name='w'),
size=1,
act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param(name='b'))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.mse_cost(input=y_predict, label=y)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
optimizer = paddle.optimizer.Momentum(momentum=0)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer,
is_local=False,
pserver_spec="localhost:3000")
# event_handler to print training and testing info
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass):
if (event.pass_id + 1) % 10 == 0:
result = trainer.test(
reader=paddle.batch(
uci_housing.test(), batch_size=2),
feeding={'x': 0,
'y': 1})
print "Test %d, %.2f" % (event.pass_id, result.cost)
# training
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
num_passes=30)
if __name__ == '__main__':
main()
......@@ -6,7 +6,7 @@ import (
"sort"
"time"
"github.com/PaddlePaddle/Paddle/go/pserver/internal/connection"
"github.com/PaddlePaddle/Paddle/go/connection"
)
// TODO(helin): add RPC call retry logic
......@@ -47,7 +47,7 @@ func NewClient(l Lister, pserverNum int, sel Selector) *Client {
// monitorPservers monitors pserver addresses, and updates connection
// when the address changes.
func (c *Client) monitorPservers(l Lister, pserverNum int) {
knownServers := make([]Server, pserverNum)
lastServers := make([]Server, pserverNum)
ticker := time.NewTicker(10 * time.Second)
monitor := func() {
curServers := make([]Server, pserverNum)
......@@ -56,25 +56,37 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
curServers[l.Index] = l
}
for i := range knownServers {
if knownServers[i].Addr != curServers[i].Addr {
err := c.pservers[i].Connect(curServers[i].Addr)
for i := range lastServers {
if lastServers[i].Addr == curServers[i].Addr {
continue
}
if curServers[i].Addr == "" {
err := c.pservers[i].Close()
if err != nil {
log.Println(err)
// connect to addr failed, set
// to last known addr in order
// to retry next time.
curServers[i].Addr = knownServers[i].Addr
}
continue
}
err := c.pservers[i].Connect(curServers[i].Addr)
if err != nil {
log.Println(err)
// connect to addr failed, set
// to last known addr in order
// to retry next time.
curServers[i].Addr = lastServers[i].Addr
}
}
knownServers = curServers
lastServers = curServers
}
monitor()
for _ = range ticker.C {
for range ticker.C {
monitor()
}
}
......@@ -93,16 +105,14 @@ func (c *Client) BeginInitParams() bool {
// InitParam initializes the parameter on parameter servers.
func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error {
var dummy int
return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, &dummy)
return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, nil)
}
// FinishInitParams tells parameter servers client has sent all
// parameters to parameter servers as initialization.
func (c *Client) FinishInitParams() error {
for _, p := range c.pservers {
var dummy int
err := p.Call("Service.FinishInitParams", dummy, &dummy)
err := p.Call("Service.FinishInitParams", 0, nil)
if err != nil {
return err
}
......@@ -116,8 +126,7 @@ func (c *Client) SendGrads(grads []Gradient) error {
errCh := make(chan error, len(grads))
for _, g := range grads {
go func(g Gradient) {
var dummy int
err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, &dummy)
err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, nil)
errCh <- err
}(g)
}
......@@ -196,8 +205,7 @@ func (c *Client) Save(path string) error {
errCh := make(chan error, len(c.pservers))
for _, p := range c.pservers {
var dummy int
err := p.Call("Service.Save", path, &dummy)
err := p.Call("Service.Save", path, nil)
errCh <- err
}
......
......@@ -117,7 +117,7 @@ func TestClientFull(t *testing.T) {
for i := range params {
if names[i] != params[i].Name {
t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i])
t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i].Name)
}
}
}
......@@ -32,7 +32,13 @@ int update_SGD(void* optimizer,
const void* gradient,
int num_bytes) {
SGD_optimizer* o = (SGD_optimizer*)optimizer;
// TODO
float* parameter = (float*)buffer;
float* grad = (float*)gradient;
int i;
for (i = 0; i < num_bytes / sizeof(float); ++i) {
parameter[i] -= o->learning_rate * grad[i];
}
return 0;
}
......
......@@ -9,8 +9,10 @@ import (
// ElementType is the type of elements of a Parameter.
type ElementType int
var ErrAlreadyInitialized = errors.New("pserver already initialized")
var ErrUninitialized = errors.New("pserver not fully initialized")
const (
AlreadyInitialized = "pserver already initialized"
Uninitialized = "pserver not fully initialized"
)
// Supported element types
const (
......@@ -49,7 +51,7 @@ type Service struct {
// NewService creates a new service.
func NewService() *Service {
s := &Service{opt: newOptimizer(sgd, 0.01)}
s := &Service{opt: newOptimizer(sgd, 0.005)}
s.paramMap = make(map[string]Parameter)
s.initialized = make(chan struct{})
return s
......@@ -59,7 +61,7 @@ func NewService() *Service {
func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error {
select {
case <-s.initialized:
return ErrAlreadyInitialized
return errors.New(AlreadyInitialized)
default:
}
......@@ -80,7 +82,7 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er
func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error {
select {
case <-s.initialized:
return ErrAlreadyInitialized
return errors.New(AlreadyInitialized)
default:
}
......@@ -94,7 +96,7 @@ func (s *Service) SendGrad(g Gradient, dummy *int) error {
select {
case <-s.initialized:
default:
return ErrUninitialized
return errors.New(Uninitialized)
}
s.mu.Lock()
......
......@@ -15,8 +15,7 @@ func TestFull(t *testing.T) {
p.Name = "param_a"
p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0}
p.ElementType = pserver.Int32
var dummy int
err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy)
err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil)
if err != nil {
t.FailNow()
}
......@@ -25,12 +24,12 @@ func TestFull(t *testing.T) {
p1.Name = "param_b"
p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
p1.ElementType = pserver.Float32
err = s.InitParam(pserver.ParameterWithConfig{p1, nil}, &dummy)
err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: nil}, nil)
if err != nil {
t.FailNow()
}
err = s.FinishInitParams(0, &dummy)
err = s.FinishInitParams(0, nil)
if err != nil {
t.FailNow()
}
......@@ -46,11 +45,11 @@ func TestFull(t *testing.T) {
}
g1, g2 := pserver.Gradient(p1), pserver.Gradient(p)
err = s.SendGrad(g1, &dummy)
err = s.SendGrad(g1, nil)
if err != nil {
t.FailNow()
}
err = s.SendGrad(g2, &dummy)
err = s.SendGrad(g2, nil)
if err != nil {
t.FailNow()
......@@ -74,23 +73,21 @@ func TestFull(t *testing.T) {
func TestMultipleInit(t *testing.T) {
s := pserver.NewService()
var dummy int
err := s.FinishInitParams(0, &dummy)
err := s.FinishInitParams(0, nil)
if err != nil {
t.FailNow()
}
err = s.FinishInitParams(0, &dummy)
if err != pserver.ErrAlreadyInitialized {
err = s.FinishInitParams(0, nil)
if err.Error() != pserver.AlreadyInitialized {
t.FailNow()
}
}
func TestUninitialized(t *testing.T) {
s := pserver.NewService()
var dummy int
err := s.SendGrad(pserver.Gradient{}, &dummy)
if err != pserver.ErrUninitialized {
err := s.SendGrad(pserver.Gradient{}, nil)
if err.Error() != pserver.Uninitialized {
t.FailNow()
}
}
......@@ -98,13 +95,14 @@ func TestUninitialized(t *testing.T) {
func TestBlockUntilInitialized(t *testing.T) {
s := pserver.NewService()
ch := make(chan struct{}, 2)
errCh := make(chan error, 2)
var wg sync.WaitGroup
wg.Add(1)
go func() {
var param pserver.Parameter
err := s.GetParam("param_a", &param)
if err != nil {
t.FailNow()
errCh <- err
}
wg.Done()
ch <- struct{}{}
......@@ -112,10 +110,9 @@ func TestBlockUntilInitialized(t *testing.T) {
wg.Add(1)
go func() {
var dummy int
err := s.Save("", &dummy)
err := s.Save("", nil)
if err != nil {
t.FailNow()
errCh <- err
}
wg.Done()
ch <- struct{}{}
......@@ -127,6 +124,8 @@ func TestBlockUntilInitialized(t *testing.T) {
case <-ch:
// some function returned before initialization is completed.
t.FailNow()
case <-errCh:
t.FailNow()
default:
}
......@@ -134,13 +133,12 @@ func TestBlockUntilInitialized(t *testing.T) {
p.Name = "param_a"
p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0}
p.ElementType = pserver.Int32
var dummy int
err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy)
err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil)
if err != nil {
t.FailNow()
}
err = s.FinishInitParams(0, &dummy)
err = s.FinishInitParams(0, nil)
if err != nil {
t.FailNow()
}
......
......@@ -16,7 +16,7 @@ set(API_HEADER
Internal.h)
add_library(paddle_api STATIC ${API_SOURCES})
add_dependencies(paddle_api gen_proto_cpp)
add_dependencies(paddle_api gen_proto_cpp paddle_pserver_cclient_lib)
INCLUDE(${SWIG_USE_FILE})
INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle)
......@@ -45,7 +45,7 @@ SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
)
IF(APPLE)
SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load")
SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load -framework CoreFoundation -framework Security")
ELSE(APPLE)
SET(START_GROUP "-Xlinker -start-group")
SET(END_GROUP "-Xlinker -end-group")
......
......@@ -179,6 +179,7 @@ namespace std {
%newobject ParameterOptimizer::needSpecialTraversal;
%newobject ParameterUpdater::createLocalUpdater;
%newobject ParameterUpdater::createRemoteUpdater;
%newobject ParameterUpdater::createNewRemoteUpdater;
%feature("director") UpdateCallback;
%feature("autodoc", 1); // To generate method stub, for code hint in ide
......
......@@ -841,6 +841,8 @@ public:
static ParameterUpdater* createRemoteUpdater(OptimizationConfig* config,
int passCount,
bool useSparseUpdater);
static ParameterUpdater* createNewRemoteUpdater(
OptimizationConfig* config, const std::string pserverSpec);
~ParameterUpdater();
/**
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
#include "paddle/trainer/NewRemoteParameterUpdater.h"
#include "paddle/trainer/RemoteParameterUpdater.h"
#include "paddle/trainer/ThreadParameterUpdater.h"
......@@ -28,6 +29,14 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater(
return updater;
}
ParameterUpdater *ParameterUpdater::createNewRemoteUpdater(
OptimizationConfig *config, const std::string pserverSpec) {
auto updater = new ParameterUpdater();
updater->m->updater.reset(new paddle::NewRemoteParameterUpdater(
config->m->getConfig(), pserverSpec));
return updater;
}
ParameterUpdater *ParameterUpdater::createRemoteUpdater(
OptimizationConfig *config, int passCount, bool useSparseUpdater) {
auto updater = new ParameterUpdater();
......
......@@ -28,6 +28,7 @@ if(WITH_TESTING)
add_simple_unittest(PadOpTest)
add_simple_unittest(MulOpTest)
add_simple_unittest(CosSimOpTest)
add_simple_unittest(RowConvOpTest)
endif()
endif()
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "RowConvOp.h"
#include <iostream>
#include "paddle/math/Vector.h"
namespace paddle {
template <>
void RowConv<DEVICE_TYPE_CPU>(CpuMatrix& out,
const CpuMatrix& in,
const CpuMatrix& filter,
const CpuIVector& seq) {
const int* starts = seq.getData();
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
for (size_t i = 0; i < numSeq; ++i) {
size_t begin = starts[i];
size_t end = starts[i + 1];
for (size_t j = begin; j < end; ++j) {
MatrixPtr x;
MatrixPtr w;
if ((j + contextLength) < end) {
x = (const_cast<CpuMatrix&>(in)).subMatrix(j, contextLength);
w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, contextLength);
} else {
x = (const_cast<CpuMatrix&>(in)).subMatrix(j, end - j);
w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, end - j);
}
MatrixPtr y = out.subMatrix(j, 1);
y->addDotMulVMM(*x, *w);
}
}
}
template <>
void RowConvGrad<DEVICE_TYPE_CPU>(const CpuMatrix& outG,
const CpuMatrix& in,
const CpuMatrix& filter,
CpuMatrix& inG,
CpuMatrix& filterG,
const CpuIVector& seq) {
// gradient w.r.t filter
const int* starts = seq.getData();
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
if (filterG) {
for (size_t i = 0; i < numSeq; ++i) {
size_t begin = starts[i];
size_t end = starts[i + 1];
size_t steps = end - begin;
for (size_t j = 0; j < contextLength && (begin + j) < end; ++j) {
MatrixPtr x =
(const_cast<CpuMatrix&>(in)).subMatrix(begin + j, steps - j);
MatrixPtr dy =
(const_cast<CpuMatrix&>(outG)).subMatrix(begin, steps - j);
MatrixPtr dw = filterG.subMatrix(j, 1);
dw->addDotMulVMM(*dy, *x);
}
}
}
// gradient w.r.t input feature
if (inG) {
for (size_t i = 0; i < numSeq; ++i) {
size_t begin = starts[i];
size_t end = starts[i + 1];
size_t steps = end - begin;
for (size_t j = 0; j < steps; ++j) {
MatrixPtr dx = inG.subMatrix(begin + j, 1);
for (size_t t = 0; t < contextLength; ++t) {
if (int(j - t) >= 0) {
MatrixPtr dy =
(const_cast<CpuMatrix&>(outG)).subMatrix(begin + j - t, 1);
MatrixPtr w = (const_cast<CpuMatrix&>(filter)).subMatrix(t, 1);
dx->addDotMul(*dy, *w, 1.0, 1.0);
}
}
}
}
}
}
/**
* \brief The row convolution is called lookahead convolution. It is firstly
* introduced in deep-speech2 system. The bidirectional RNN that learns
* representation for a sequence by performing a forward and a backward pass
* through the entire sequence. However, unlike unidirectional RNNs,
* bidirectional RNNs are challenging to deploy in an online and low-latency
* setting. The lookahead convolution incorporates information from future
* subsequences in a computationally efficient manner to improve unidirectional
* recurrent neural networks.
*
* The connection of row convolution is different form the 1D sequence
* convolution. Assumed that, the future context-length is k, that is to say,
* it can get the output at timestep t by using the the input feature from t-th
* timestep to (t+k)-th timestep. Assumed that the hidden dim of input
* activations are d, the activations r_t for the new layer at time-step t are:
*
*
* -- k + 1
* r(t,i) = > W(i,j) * h(t+j-1, i), for (1 <= i <= d)
* -- j = 1
*
*
* The weight shape is: (k + 1) x d
* Function Arguments:
*
* \param inputs[0] The input activations.
* \param inputs[0] The filter (or weight) and shape is (k+1) x d.
* \param outputs[1] The output activations.
*
* [1] Dario Amodei, etc. Deep Speech 2 : End-to-End Speech Recognition in
* English
* and Mandarin. https://arxiv.org/abs/1512.02595
*/
template <DeviceType Device>
class RowConvFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
// check
CHECK_EQ(2UL, inputs.size());
CHECK_EQ(1UL, outputs.size());
// TODO(qingqing): support ASSIGN_TO.
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here.";
const auto in = dynamic_cast<const SequenceArg&>(inputs[0]);
auto out = dynamic_cast<const SequenceArg&>(outputs[0]);
auto w = inputs[1];
CHECK(in.data() && out.data() && in.getSequenceId().data());
CHECK_EQ(in.shape().ndims(), 2UL);
CHECK(in.shape() == out.shape());
CHECK_EQ(w.shape()[1], in.shape()[1]);
auto outMat = out.matrix<Device>();
const auto inMat = in.matrix<Device>();
const auto wMat = w.matrix<Device>();
const auto seqId = in.getSequenceId().vector<int, Device>();
RowConv<Device>(outMat, inMat, wMat, seqId);
}
};
/**
* \brief The backward of row convolution function. This function calculated
* the gradient w.r.t filter and the gradient w.r.t input activations(or data).
*
* Argument in this Function:
*
* \param inputs[0] The gradient w.r.t output activations.
* \param inputs[1] The input activations.
* \param inputs[2] The filter (or weight) and shape is (k+1) x d.
* \param outputs[0] The gradient w.r.t input activations.
* \param outputs[1] The gradient w.r.r filter.
*
* Abbreviation:
* w.r.t: with respect to.
*/
template <DeviceType Device>
class RowConvGradFunc : public FunctionBase {
// TODO(qingqing): split into RowConvDataFunc and RowConvWeightFunc
public:
void init(const FuncConfig& config) override {}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
// check
CHECK_EQ(3UL, inputs.size());
CHECK_EQ(2UL, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
CHECK_EQ(outputs[1].getArgType(), ADD_TO);
CHECK(inputs[0].isSequenceArg() && inputs[1].isSequenceArg() &&
outputs[0].isSequenceArg())
<< "SequenceArg required here.";
const auto outGrad = dynamic_cast<const SequenceArg&>(inputs[0]);
const auto in = dynamic_cast<const SequenceArg&>(inputs[1]);
const auto w = inputs[2];
auto inGrad = dynamic_cast<const SequenceArg&>(outputs[0]);
auto wGrad = outputs[1];
CHECK_EQ(in.shape().ndims(), 2UL);
CHECK(in.shape() == inGrad.shape());
CHECK(in.shape() == outGrad.shape());
CHECK_EQ(wGrad.shape()[1], in.shape()[1]);
const auto outGMat = outGrad.matrix<Device>();
const auto inMat = in.matrix<Device>();
const auto wMat = w.matrix<Device>();
auto inGMat = inGrad.data()
? inGrad.matrix<Device>()
: typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
auto wGMat = wGrad.data()
? wGrad.matrix<Device>()
: typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
const auto seqId = in.getSequenceId().vector<int, Device>();
RowConvGrad<Device>(outGMat, inMat, wMat, inGMat, wGMat, seqId);
}
};
REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc);
#endif
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Function.h"
namespace paddle {
/**
* \brief The forward of row convolution.
*
* \param[out] out The output data and shape is h x d. h is the sum of
* time steps of all samples in one mini-batch.
* \param[in] in The input data and shape is h x d.
* \param[in] filter The filter and shape is k x d. The lookahead step
* number plus one equals k.
* \param[in] seq The sequence start positions.
*
*/
template <DeviceType DType>
void RowConv(typename Tensor<real, DType>::Matrix& out,
const typename Tensor<real, DType>::Matrix& in,
const typename Tensor<real, DType>::Matrix& filter,
const typename Tensor<int, DType>::Vector& seq);
/**
* \brief The backward of row convolution.
*
* \param[in] outG The gradient w.r.t output data.
* \param[in] in The input data.
* \param[in] filter The filter.
* \param[out] inG The gradient w.r.t input data.
* \param[out] filterG The gradient w.r.t filter.
* \param[in] seq The sequence start positions.
*
*/
template <DeviceType DType>
void RowConvGrad(const typename Tensor<real, DType>::Matrix& outG,
const typename Tensor<real, DType>::Matrix& in,
const typename Tensor<real, DType>::Matrix& filter,
typename Tensor<real, DType>::Matrix& inG,
typename Tensor<real, DType>::Matrix& filterG,
const typename Tensor<int, DType>::Vector& seq);
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_base.h"
#include "RowConvOp.h"
namespace paddle {
template<int BLOCK_H, int BLOCK_W>
__global__ void KeRowConv(real* y, const real* x, const real* w,
const int* starts, const int height, const int width,
const int numSeq, const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sw[BLOCK_H][BLOCK_W];
for (int i = tidy; i < context; i += blky) {
sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
real sum = 0;
int off = (start + j) * width;
for (int t = 0; t < context; ++t) {
if ((start + j + t) < end) {
int xoff = off + t * width;
real xVal = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0;
sum += sw[t][tidx] * xVal;
}
}
if (gidx + tidx < width) {
y[off + gidx + tidx] += sum;
}
}
}
}
__global__ void KeRowConv2(real* y, const real* x, const real* w,
const int* starts, const int height, const int width,
const int numSeq, const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
int off = (start + j) * width;
real sum = 0;
for (int t = 0; t < context && (start + j + t) < end; ++t) {
int xoff = off + t * width;
real xd = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0;
real wd = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0;
sum += wd * xd;
}
if (gidx + tidx < width) {
y[off + gidx + tidx] += sum;
}
}
}
}
template <>
void RowConv<DEVICE_TYPE_GPU>(GpuMatrix& out,
const GpuMatrix& in,
const GpuMatrix& filter,
const GpuIVector& seq) {
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
const size_t height = in.getHeight();
const size_t width = in.getWidth();
real* y = out.getData();
const real* x = in.getData();
const real* w = filter.getData();
const int* starts = seq.getData();
dim3 dimBlock(32, 32);
dim3 dimGrid(DIVUP(width, dimBlock.x), 1);
if (contextLength <= 32) {
KeRowConv<32, 32><<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(y, x, w, starts, height, width, numSeq, contextLength);
} else {
KeRowConv2<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(y, x, w, starts, height, width, numSeq, contextLength);
}
CHECK_SYNC("RowConv");
}
template<int BLOCK_H, int BLOCK_W, int CONTEXT>
__global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sh_x[BLOCK_W][BLOCK_H];
__shared__ real sh_dy[BLOCK_W][BLOCK_H + CONTEXT - 1];
__shared__ real sh_dw[CONTEXT][BLOCK_W];
if (tidy < context) {
sh_dw[tidy][tidx] = 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
const int size = ((steps + BLOCK_H - 1)/BLOCK_H) * BLOCK_H;
for (int j = tidy; j < size; j += BLOCK_H) {
int xoff = gidx + tidx;
int yoff = start + j;
// transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0;
__syncthreads();
if (tidy < (context - 1)) {
yoff = yoff - context + 1;
sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0;
}
__syncthreads();
for (int t = 0; t < context; t++) {
real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx + context - 1 - t];
__syncthreads();
// warp size and blockDim.x is 32.
val += __shfl_down(val, 16);
val += __shfl_down(val, 8);
val += __shfl_down(val, 4);
val += __shfl_down(val, 2);
val += __shfl_down(val, 1);
__syncthreads();
if (tidx == 0) {
sh_dw[t][tidy] += val;
}
__syncthreads();
}
}
}
for (int t = tidy; (t < context) && ((gidx + tidx) < width); t += blky) {
dw[t * width + gidx + tidx] += sh_dw[t][tidx];
}
}
template<int BLOCK_H, int BLOCK_W>
__global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sh_x[BLOCK_H][BLOCK_W];
__shared__ real sh_dy[BLOCK_H][BLOCK_W];
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
const int size = ((steps + BLOCK_H - 1)/BLOCK_H) * BLOCK_H;
for (int j = tidy; j < size; j += BLOCK_H) {
int xoff = gidx + tidx;
int yoff = start + j;
// transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
__syncthreads();
for (int t = 0; t < context; t++) {
sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0;
__syncthreads();
real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx];
__syncthreads();
// warp size and blockDim.x is 32.
val += __shfl_down(val, 16);
val += __shfl_down(val, 8);
val += __shfl_down(val, 4);
val += __shfl_down(val, 2);
val += __shfl_down(val, 1);
__syncthreads();
if (tidx == 0 && (gidx + tidy) < width) {
dw[t*width + gidx + tidy] += val;
}
}
}
}
}
template<int BLOCK_H, int BLOCK_W>
__global__ void KeRowConvBwData(real* dx, const real* w, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sw[BLOCK_H][BLOCK_W];
for (int i = tidy; i < context; i += blky) {
sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
real sum = 0;
int off = (start + j) * width;
for (int t = 0; t < context && (j - t) >= 0; ++t) {
int dyOff = off - t * width;
real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0;
sum += sw[t][tidx] * dyVal;
}
if (gidx + tidx < width) {
dx[off + gidx + tidx] += sum;
}
}
}
}
__global__ void KeRowConvBwData2(real* dx, const real* w, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
real sum = 0;
int off = (start + j) * width;
for (int t = 0; t < context && (j - t) >= 0; ++t) {
int dyOff = off - t * width;
real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0;
real wVal = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0;
sum += wVal * dyVal;
}
if (gidx + tidx < width) {
dx[off + gidx + tidx] += sum;
}
}
}
}
template <>
void RowConvGrad<DEVICE_TYPE_GPU>(const GpuMatrix& outG,
const GpuMatrix& in,
const GpuMatrix& filter,
GpuMatrix& inG,
GpuMatrix& filterG,
const GpuIVector& seq) {
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
const size_t height = in.getHeight();
const size_t width = in.getWidth();
const real* dy = outG.getData();
const real* x = in.getData();
const real* w = filter.getData();
const int* starts = seq.getData();
if (filterG) {
dim3 dimBlock(32, 32);
dim3 dimGrid(DIVUP(width, dimBlock.x), 1);
real* dw = filterG.getData();
if (contextLength <= 32) {
KeRowConvBwWeight<32, 32, 32>
<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(dw, x, dy, starts, height, width, numSeq, contextLength);
} else {
KeRowConvBwWeight2<32, 32>
<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(dw, x, dy, starts, height, width, numSeq, contextLength);
}
}
if (inG) {
real* dx = inG.getData();
dim3 dimBlock2(32, 32);
dim3 dimGrid2(DIVUP(width, dimBlock2.x), 1);
if (contextLength <= 64) {
KeRowConvBwData<32, 64>
<<<dimGrid2, dimBlock2, 0, STREAM_DEFAULT>>>
(dx, w, dy, starts, height, width, numSeq, contextLength);
} else {
KeRowConvBwData2
<<<dimGrid2, dimBlock2, 0, STREAM_DEFAULT>>>
(dx, w, dy, starts, height, width, numSeq, contextLength);
}
}
CHECK_SYNC("RowConvGrad");
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "FunctionTest.h"
namespace paddle {
void testRowConvFw(size_t batchSize, size_t dim, size_t contextLength) {
FunctionCompare test("RowConv", FuncConfig());
test.addSequence(SequenceIdArg(TensorShape{batchSize}));
test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}));
test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}),
ADD_TO);
test.run();
}
void testRowConvBw(size_t batchSize, size_t dim, size_t contextLength) {
FunctionCompare test("RowConvGrad", FuncConfig());
test.addSequence(SequenceIdArg(TensorShape{batchSize}));
test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}));
test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}));
test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}),
ADD_TO);
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}),
ADD_TO);
test.run();
}
TEST(RowConv, real) {
for (size_t numSamples : {17, 129, 2020}) {
for (size_t dim : {16, 512, 2560}) {
for (size_t context : {3, 19, 65}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim
<< " context length=" << context;
testRowConvFw(numSamples, dim, context);
testRowConvBw(numSamples, dim, context);
}
}
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "RowConvLayer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
REGISTER_LAYER(row_conv, RowConvLayer);
bool RowConvLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
contexLength_ = config_.inputs(0).row_conv_conf().context_length();
CHECK_EQ(inputLayers_.size(), 1UL);
weight_.reset(new Weight(contexLength_, getSize(), parameters_[0]));
createFunction(forward_, "RowConv", FuncConfig());
createFunction(backward_, "RowConvGrad", FuncConfig());
return true;
}
void RowConvLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr input = getInputValue(0);
size_t height = input->getHeight();
size_t width = input->getWidth();
CHECK_EQ(width, getSize());
resetOutput(height, width);
const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
MatrixPtr w = weight_->getW();
wDims_ = TensorShape({w->getHeight(), w->getWidth()});
MatrixPtr outV = getOutputValue();
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getInputValue(0), *startPos);
inputs.addArg(*w, wDims_);
outputs.addArg(*getOutputValue(), *startPos, ADD_TO);
{
REGISTER_TIMER_INFO("RowConvForward", getName().c_str());
forward_[0]->calc(inputs, outputs);
}
/* activation */ {
REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
forwardActivation();
}
}
void RowConvLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getOutputGrad(), *startPos);
inputs.addArg(*getInputValue(0), *startPos);
inputs.addArg(*weight_->getW(), wDims_);
MatrixPtr inGrad = getInputGrad(0);
MatrixPtr wGrad = weight_->getWGrad();
size_t h = getInputValue(0)->getHeight();
size_t w = getInputValue(0)->getWidth();
outputs.addArg(
inGrad ? (*inGrad) : *(Matrix::create(nullptr, h, w, false, useGpu_)),
*startPos,
ADD_TO);
outputs.addArg(
wGrad ? (*wGrad)
: *(Matrix::create(nullptr, contexLength_, w, false, useGpu_)),
wDims_,
ADD_TO);
{
REGISTER_TIMER_INFO("RowConvBackward", getName().c_str());
backward_[0]->calc(inputs, outputs);
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
weight_->getParameterPtr()->incUpdate(callback);
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
namespace paddle {
/**
* \brief Row Convolution Layer.
*/
class RowConvLayer : public Layer {
public:
explicit RowConvLayer(const LayerConfig& config) : Layer(config) {}
~RowConvLayer() {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
protected:
// Row convolution weight, context_lenght_ * fan_out.
// fan_out is the size of output feature.
std::unique_ptr<Weight> weight_;
// The step number to look ahead plus one equals contexLength_.
size_t contexLength_;
TensorShape wDims_;
};
} // namespace paddle
......@@ -1705,6 +1705,26 @@ TEST(Layer, TransLayer) {
}
}
TEST(Layer, RowConvLayer) {
const int context = 3;
const int size = 512;
TestConfig config;
config.layerConfig.set_type("row_conv");
config.layerConfig.set_size(size);
config.layerConfig.set_active_type("sigmoid");
config.inputDefs.push_back(
{INPUT_SEQUENCE_DATA, "layer_0", size, context * size});
LayerInputConfig* input = config.layerConfig.add_inputs();
RowConvConfig* conv = input->mutable_row_conv_conf();
conv->set_context_length(context);
for (auto useGpu : {false, true}) {
testLayerGrad(config, "row_conv", 100, false, useGpu, false);
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
......
......@@ -42,7 +42,7 @@ TEST(Argument, poolSequenceWithStride) {
CHECK_EQ(outStart[3], 4);
CHECK_EQ(outStart[4], 7);
CHECK_EQ(stridePositions->getSize(), 8);
CHECK_EQ(stridePositions->getSize(), 8UL);
auto result = reversed ? strideResultReversed : strideResult;
for (int i = 0; i < 8; i++) {
CHECK_EQ(stridePositions->getData()[i], result[i]);
......
......@@ -4,6 +4,7 @@ set(TRAINER_SOURCES
ParameterUpdater.cpp
ParamUtil.cpp
RemoteParameterUpdater.cpp
NewRemoteParameterUpdater.cpp
Tester.cpp
Trainer.cpp
TrainerInternal.cpp
......@@ -16,6 +17,7 @@ set(TRAINER_HEADERS
ParameterUpdater.h
ParamUtil.h
RemoteParameterUpdater.h
NewRemoteParameterUpdater.h
Tester.h
TesterConfig.h
Trainer.h
......@@ -32,7 +34,7 @@ add_style_check_target(paddle_trainer_lib
add_style_check_target(paddle_trainer_lib
${TRAINER_HEADERS})
add_dependencies(paddle_trainer_lib
gen_proto_cpp)
gen_proto_cpp paddle_pserver_cclient_lib)
macro(add_paddle_exe TARGET_NAME)
add_executable(${TARGET_NAME} ${ARGN})
......@@ -56,3 +58,10 @@ install(TARGETS paddle_trainer paddle_merge_model
set_target_properties(paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
set_target_properties(paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
if(APPLE)
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
endif()
target_link_libraries(paddle_trainer ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a)
target_link_libraries(paddle_trainer_lib ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "NewRemoteParameterUpdater.h"
#include "Trainer.h"
#include "paddle/utils/Stat.h"
DECLARE_int32(trainer_id);
DECLARE_string(save_dir);
namespace paddle {
NewRemoteParameterUpdater::NewRemoteParameterUpdater(
const OptimizationConfig &config, const std::string pserverSpec)
: parameterClient_(-1),
newParameters_(nullptr),
newGradients_(nullptr),
pserverSpec_(pserverSpec) {}
void NewRemoteParameterUpdater::init(
const std::vector<ParameterPtr> &parameters) {
ParameterUpdater::init(parameters);
for (auto &para : parameters_) {
para->getBuf(PARAMETER_VALUE)->zeroMem();
para->getBuf(PARAMETER_GRADIENT)->zeroMem();
}
// create parameter server client.
parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(),
FLAGS_trainer_id == 0);
// init new parameter and gradient.
newParameters_ = initNewParameter(PARAMETER_VALUE);
newGradients_ = initNewParameter(PARAMETER_GRADIENT);
// init parameter, one trainer will get the opportunity to int parameter and
// send them to parameter server. Others will get the initialized parameter
// from parameter server
if (paddle_begin_init_params(parameterClient_)) {
LOG(INFO) << "paddle_begin_init_params start";
for (int i = 0; i < parameterSize(); ++i) {
auto paramConfig = parameters_[i]->getConfig();
std::string bytes = paramConfig.SerializeAsString();
const char *array = bytes.data();
int size = (int)bytes.size();
paddle_init_param(
parameterClient_, *newParameters_[i], (void *)array, size);
}
paddle_finish_init_params(parameterClient_);
LOG(INFO) << "paddle_begin_init_params done";
} else {
paddle_get_params(parameterClient_, newParameters_, parameterSize());
}
LOG(INFO) << "NewRemoteParameterUpdater initialized";
}
void NewRemoteParameterUpdater::updateImpl(Parameter *para) {}
void NewRemoteParameterUpdater::finishBatch(real cost) {
// send gradient to parameter server.
paddle_send_grads(parameterClient_, newGradients_, parameterSize());
// get the updated parameter from parameterClient.
paddle_get_params(parameterClient_, newParameters_, parameterSize());
// clear gradient after update parameter.
for (auto &para : parameters_) {
para->getBuf(PARAMETER_GRADIENT)->zeroMem();
}
}
void NewRemoteParameterUpdater::startPass() {}
bool NewRemoteParameterUpdater::finishPass() { return true; }
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <thread>
#include "ParameterUpdater.h"
#include "libpaddle_pserver_cclient.h"
#include "paddle/pserver/ParameterClient2.h"
#include "paddle/utils/Queue.h"
#include "paddle/utils/Util.h"
namespace paddle {
/**
* New remote parameter updater for dense parameters that use cclient of go.
*/
class NewRemoteParameterUpdater : public ParameterUpdater {
public:
NewRemoteParameterUpdater(const OptimizationConfig& config,
const std::string pserverSpec);
~NewRemoteParameterUpdater() {
releaseNewParameter(newParameters_);
releaseNewParameter(newGradients_);
if (parameterClient_ >= 0) paddle_pserver_client_release(parameterClient_);
}
/**
* initialize the internal parameter client and itself.
*/
virtual void init(const std::vector<ParameterPtr>& parameters);
/**
* @brief start batch
*
* @note one batch training exhibits stateful feature to help
* to do performance tuning, sgd optimization if necessary.
*/
virtual PassType startBatch(int64_t batchSize) { return PASS_TRAIN; }
/**
* send parameters to pservers and get returned parameters
* from all pservers if necessary.
*/
virtual void finishBatch(real cost);
virtual void startPass();
virtual bool finishPass();
protected:
/**
* work need to do after finishBatch
*/
virtual void updateImpl(Parameter* para);
private:
int parameterSize() { return (int)parameters_.size(); }
/**
* init parameter of go paddle pserver cclient.
* @param new_params
* @param type
*/
paddle_parameter** initNewParameter(ParameterType type) {
paddle_parameter** new_params =
(paddle_parameter**)malloc(sizeof(paddle_parameter*) * parameterSize());
for (int i = 0; i < parameterSize(); ++i) {
new_params[i] = (paddle_parameter*)malloc(sizeof(paddle_parameter));
memset(new_params[i], 0, sizeof(paddle_parameter));
}
for (int i = 0; i < parameterSize(); ++i) {
ParameterPtr param = parameters_[i];
new_params[i]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
new_params[i]->name = (char*)param->getName().c_str();
new_params[i]->content =
(unsigned char*)(param->getBuf(type).get()->getData());
new_params[i]->content_len =
(int)param->getBuf(type).get()->getSize() * sizeof(real);
}
return new_params;
}
void releaseNewParameter(paddle_parameter** newParams) {
if (newParams != nullptr) {
for (int i = 0; i < parameterSize(); ++i) {
free(newParams[i]);
}
free(newParams);
}
}
protected:
/// internal parameter client object for exchanging data with pserver
paddle_pserver_client parameterClient_;
/// the parameters for new pserver client
paddle_parameter** newParameters_;
/// the gradinets for new pserver client
paddle_parameter** newGradients_;
/// the specification of parameter server "host1:port,host1:port"
std::string pserverSpec_;
};
} // namespace paddle
......@@ -194,6 +194,10 @@ message MaxOutConfig {
required uint32 groups = 2;
}
message RowConvConfig {
required uint32 context_length = 1;
}
message ProjectionConfig {
required string type = 1;
required string name = 2;
......@@ -279,6 +283,7 @@ message LayerInputConfig {
optional SppConfig spp_conf = 12;
optional PriorBoxConfig priorbox_conf = 13;
optional PadConfig pad_conf = 14;
optional RowConvConfig row_conv_conf = 15;
}
message LayerConfig {
......
......@@ -2081,6 +2081,23 @@ class MaxOutLayer(LayerBase):
g_layer_map[input_layer.name].width, out_channels)
@config_layer('row_conv')
class RowConvLayer(LayerBase):
def __init__(self, name, inputs, context_length, **xargs):
super(RowConvLayer, self).__init__(
name, 'maxout', 0, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1,
'TransLayer must have one and only one input')
input_layer = self.get_input_layer(0)
row_conv_conf = self.config.inputs[0].row_conv_conf
row_conv_conf.context_length = context_length
self.set_layer_size(input_layer.size)
psize = context_length * input_layer.size
dims = [context_length, input_layer.size]
self.create_input_parameter(0, psize, dims)
# key: cost type
# value: cost class
g_cost_map = {}
......
......@@ -121,6 +121,7 @@ __all__ = [
'smooth_l1_cost',
'layer_support',
'multiplex_layer',
'row_conv_layer',
'dropout_layer',
'prelu_layer',
]
......@@ -179,17 +180,18 @@ class LayerType(object):
EOSID_LAYER = 'eos_id'
RECURRENT_LAYER = 'recurrent'
CONV_SHIFT_LAYER = 'conv_shift'
TENSOR_LAYER = 'tensor'
SEL_FC_LAYER = 'selective_fc'
SAMPLING_ID_LAYER = 'sampling_id'
SLOPE_INTERCEPT_LAYER = 'slope_intercept'
LINEAR_COMBINATION_LAYER = 'convex_comb'
BLOCK_EXPAND = 'blockexpand'
MAXOUT = 'maxout'
SPP_LAYER = 'spp'
PAD_LAYER = 'pad'
MULTIPLEX_LAYER = 'multiplex'
CONV_SHIFT_LAYER = "conv_shift"
TENSOR_LAYER = "tensor"
SEL_FC_LAYER = "selective_fc"
SAMPLING_ID_LAYER = "sampling_id"
SLOPE_INTERCEPT_LAYER = "slope_intercept"
LINEAR_COMBINATION_LAYER = "convex_comb"
BLOCK_EXPAND = "blockexpand"
MAXOUT = "maxout"
SPP_LAYER = "spp"
PAD_LAYER = "pad"
MULTIPLEX_LAYER = "multiplex"
ROW_CONV_LAYER = "row_conv"
PRINT_LAYER = 'print'
PRIORBOX_LAYER = 'priorbox'
......@@ -5585,6 +5587,79 @@ def dropout_layer(input, dropout_rate, name=None):
@wrap_name_default()
@wrap_act_default(act=LinearActivation())
@wrap_param_attr_default()
@layer_support(DROPOUT)
def row_conv_layer(input,
context_len,
act=None,
name=None,
param_attr=None,
layer_attr=None):
"""
The row convolution is called lookahead convolution. It is firstly
introduced in paper of `Deep Speech 2: End-toEnd Speech Recognition
in English and Mandarin <https://arxiv.org/pdf/1512.02595v1.pdf>`_ .
The bidirectional RNN that learns representation for a sequence by
performing a forward and a backward pass through the entire sequence.
However, unlike unidirectional RNNs, bidirectional RNNs are challenging
to deploy in an online and low-latency setting. The lookahead convolution
incorporates information from future subsequences in a computationally
efficient manner to improve unidirectional recurrent neural networks.
The connection of row convolution is different form the 1D sequence
convolution. Assumed that, the future context-length is k, that is to say,
it can get the output at timestep t by using the the input feature from t-th
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
activations are d, the activations r_t for the new layer at time-step t are:
.. math::
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
\quad \text{for} \quad (1 \leq i \leq d)
Note:
The `context_len` is `k + 1`. That is to say, the lookahead step
number plus one equals context_len.
.. code-block:: python
row_conv = row_conv_layer(input=input_layer, context_len=3)
:param input: The input layer.
:type input: LayerOutput
:param context_len: The context length equals the lookahead step number
plus one.
:type context_len: int
:param act: Activation Type. Default is linear activation.
:type act: BaseActivation
:param param_attr: The Parameter Attribute. If None, the parameter will be
initialized smartly. It's better set it by yourself.
:type param_attr: ParameterAttribute
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput)
assert context_len > 0, "the context_len must be greatet than 0."
Layer(
inputs=[Input(input.name, **param_attr.attr)],
name=name,
context_length=context_len,
type=LayerType.ROW_CONV_LAYER,
active_type=act.name,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.ROW_CONV_LAYER, input, activation=act, size=input.size)
@layer_support()
@wrap_name_default()
@wrap_param_attr_default()
......
......@@ -6,6 +6,6 @@ img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cos
test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer)
test_prelu_layer test_row_conv)
export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "data"
type: "data"
size: 2560
active_type: ""
}
layers {
name: "__row_conv_layer_0__"
type: "maxout"
size: 2560
active_type: "relu"
inputs {
input_layer_name: "data"
input_parameter_name: "___row_conv_layer_0__.w0"
row_conv_conf {
context_length: 19
}
}
}
parameters {
name: "___row_conv_layer_0__.w0"
size: 48640
initial_mean: 0.0
initial_std: 0.229415733871
dims: 19
dims: 2560
initial_strategy: 0
initial_smart: true
}
input_layer_names: "data"
output_layer_names: "__row_conv_layer_0__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__row_conv_layer_0__"
input_layer_names: "data"
output_layer_names: "__row_conv_layer_0__"
is_recurrent_layer_group: false
}
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
data = data_layer(name='data', size=2560)
row_conv = row_conv_layer(input=data, context_len=19, act=ReluActivation())
outputs(row_conv)
......@@ -149,3 +149,57 @@ def cluster_files_reader(files_pattern,
yield line
return reader
def convert(output_path,
reader,
num_shards,
name_prefix,
max_lines_to_shuffle=1000):
import recordio
import cPickle as pickle
import random
"""
Convert data from reader to recordio format files.
:param output_path: directory in which output files will be saved.
:param reader: a data reader, from which the convert program will read data instances.
:param num_shards: the number of shards that the dataset will be partitioned into.
:param name_prefix: the name prefix of generated files.
:param max_lines_to_shuffle: the max lines numbers to shuffle before writing.
"""
assert num_shards >= 1
assert max_lines_to_shuffle >= 1
def open_writers():
w = []
for i in range(0, num_shards):
n = "%s/%s-%05d-of-%05d" % (output_path, name_prefix, i,
num_shards - 1)
w.append(recordio.writer(n))
return w
def close_writers(w):
for i in range(0, num_shards):
w[i].close()
def write_data(w, lines):
random.shuffle(lines)
for i, d in enumerate(lines):
d = pickle.dumps(d, pickle.HIGHEST_PROTOCOL)
w[i % num_shards].write(d)
w = open_writers()
lines = []
for i, d in enumerate(reader()):
lines.append(d)
if i % max_lines_to_shuffle == 0 and i >= max_lines_to_shuffle:
write_data(w, lines)
lines = []
continue
write_data(w, lines)
close_writers(w)
......@@ -57,6 +57,38 @@ class TestCommon(unittest.TestCase):
for idx, e in enumerate(reader()):
self.assertEqual(e, str("0"))
def test_convert(self):
record_num = 10
num_shards = 4
def test_reader():
def reader():
for x in xrange(record_num):
yield x
return reader
path = tempfile.mkdtemp()
paddle.v2.dataset.common.convert(path,
test_reader(), num_shards,
'random_images')
files = glob.glob(path + '/random_images-*')
self.assertEqual(len(files), num_shards)
recs = []
for i in range(0, num_shards):
n = "%s/random_images-%05d-of-%05d" % (path, i, num_shards - 1)
r = recordio.reader(n)
while True:
d = r.read()
if d is None:
break
recs.append(d)
recs.sort()
self.assertEqual(total, record_num)
if __name__ == '__main__':
unittest.main()
......@@ -45,7 +45,12 @@ class Optimizer(object):
return swig_api.ParameterUpdater.createRemoteUpdater(
self.__opt_conf__, pass_num, use_sparse_updater)
def create_updater(self, is_local, num_passes, use_sparse_updater):
def __create_new_remote_updater__(self, pserver_spec):
return swig_api.ParameterUpdater.createNewRemoteUpdater(
self.__opt_conf__, pserver_spec)
def create_updater(self, is_local, num_passes, use_sparse_updater,
pserver_spec):
"""
create proper parameter_updater by configuration.
:param is_local: create local or remote parameter updater
......@@ -64,8 +69,12 @@ class Optimizer(object):
if is_local:
parameter_updater = self.__create_local_updater__()
else:
parameter_updater = self.__create_remote_updater__(
num_passes, use_sparse_updater)
if pserver_spec is None:
parameter_updater = self.__create_remote_updater__(
num_passes, use_sparse_updater)
else:
parameter_updater = self.__create_new_remote_updater__(
pserver_spec)
return parameter_updater
......
......@@ -49,7 +49,8 @@ class SGD(object):
parameters,
update_equation,
extra_layers=None,
is_local=True):
is_local=True,
pserver_spec=None):
if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters')
......@@ -63,6 +64,7 @@ class SGD(object):
self.__parameters__ = parameters
self.__topology_in_proto__ = topology.proto()
self.__is_local__ = is_local
self.__pserver_spec__ = pserver_spec
self.__use_sparse_updater__ = self.__topology__.use_sparse_updater()
# # In local mode, disable sparse_remote_update.
......@@ -126,7 +128,8 @@ class SGD(object):
__check_train_args__(**locals())
self.__parameter_updater__ = self.__optimizer__.create_updater(
self.__is_local__, num_passes, self.__use_sparse_updater__)
self.__is_local__, num_passes, self.__use_sparse_updater__,
self.__pserver_spec__)
self.__parameter_updater__.init(self.__gradient_machine__)
self.__gradient_machine__.start()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册