提交 3fe9e48f 编写于 作者: G guosheng

Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into add-L2NormLayer

......@@ -17,10 +17,14 @@
- id: detect-private-key
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
- repo: local
hooks:
- id: clang-formater
- id: clang-format
name: clang-format
description: Format files with ClangFormat.
entry: clang-format -i
language: system
files: \.(c|cc|cxx|cpp|h|hpp|hxx)$
- repo: https://github.com/PaddlePaddle/pre-commit-golang
sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
hooks:
......
......@@ -7,17 +7,8 @@ INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/extern_eigen3)
ExternalProject_Add(
extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS}
# for latest version, please get from official website
# URL "https://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz"
# URL_MD5 "1a47e78efe365a97de0c022d127607c3"
# for no-ssl http support, please get from bazel's mirror
# URL "http://mirror.bazel.build/bitbucket.org/eigen/eigen/get/f3a22f35b044.tar.gz"
# URL_MD5 "4645c66075982da6fa0bcf6b20f3e8f7"
# get from github mirror
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG "a46d2e7337c4656f00abe54a8115f6d76153a048"
GIT_TAG "master"
PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
......
......@@ -153,7 +153,7 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF)
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
LIST(APPEND CUDA_NVCC_FLAGS -std=c++11)
LIST(APPEND CUDA_NVCC_FLAGS -std=c++11 --default-stream per-thread)
LIST(APPEND CUDA_NVCC_FLAGS --use_fast_math)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
......
......@@ -203,6 +203,10 @@ identity_projection
.. autoclass:: paddle.v2.layer.identity_projection
:noindex:
slice_projection
-------------------
.. autoclass:: paddle.v2.layer.slice_projection
:noindex:
table_projection
----------------
......
......@@ -37,8 +37,8 @@ Scope is an association of a name to variable. All variables belong to `Scope`.
```cpp
class Scope {
public:
Variable* CreateVariable(const std::string& name);
const Variable* GetVariable(const std::string& name) const;
Variable* NewVar(const std::string& name);
const Variable* FindVar(const std::string& name) const;
private:
std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
......@@ -58,12 +58,12 @@ class Scope {
public:
Scope(const std::shared_ptr<Scope>& scope): parent_(scope) {}
Variable* GetVariable(const std::string& name) const {
Variable* FindVar(const std::string& name) const {
auto it = vars_.find(name);
if (it != vars_.end()) {
return it->second.get();
} else if (parent_ != nullptr) {
return parent_->GetVariable(name);
return parent_->FindVar(name);
} else {
return nullptr;
}
......@@ -95,10 +95,10 @@ class Scope {
static std::shared_ptr<Scope> Create(const std::shared_ptr<Scope>& parent = nullptr);
// return nullptr if not found.
Variable* GetVariable(const std::string& name) const;
Variable* FindVar(const std::string& name) const;
// return if already contains same name variable.
Variable* CreateVariable(const std::string& name);
Variable* NewVar(const std::string& name);
private:
std::shared_ptr<Scope> parent_;
......@@ -107,11 +107,11 @@ class Scope {
```
## Only scope can create a variable
To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `CreateVariable` can construct `Variable`.
To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `NewVar` can construct `Variable`.
## When scope destroyed, all variables inside this scope should be destroyed together
The scope hold unique pointers for all variables. User can `GetVariable` from scope, but he should not hold this pointer as a member variable. Because when scope is destroyed, all variables inside this scope will be destroyed together.
The scope hold unique pointers for all variables. User can `FindVar` from scope, but he should not hold this pointer as a member variable. Because when scope is destroyed, all variables inside this scope will be destroyed together.
## Sharing a parent scope
......@@ -121,4 +121,4 @@ Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shar
## Orthogonal interface
`GetVariable` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `CreateVariable` will return a `Error` when there is a name conflict locally. Combine `GetVariable` and `CreateVariable`, we can implement `CreateOrGetVariable` easily.
`FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `NewVar` will return a `Error` when there is a name conflict locally. Combine `FindVar` and `NewVar`, we can implement `NewVar` easily.
......@@ -19,6 +19,8 @@ import (
"net"
"net/http"
"net/rpc"
"os"
"os/signal"
"strconv"
"strings"
"time"
......@@ -68,6 +70,20 @@ func main() {
store = &master.InMemStore{}
}
shutdown := func() {
log.Infoln("shutting down gracefully")
err := store.Shutdown()
if err != nil {
log.Errorln(err)
}
}
// Guaranteed to run even panic happens.
defer shutdown()
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt)
s, err := master.NewService(store, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax)
if err != nil {
log.Fatal(err)
......@@ -84,8 +100,12 @@ func main() {
log.Fatal(err)
}
err = http.Serve(l, nil)
if err != nil {
log.Fatal(err)
}
go func() {
err = http.Serve(l, nil)
if err != nil {
log.Fatal(err)
}
}()
<-c
}
......@@ -18,6 +18,8 @@ import (
"net"
"net/http"
"net/rpc"
"os"
"os/signal"
"strconv"
"time"
......@@ -33,7 +35,8 @@ func main() {
index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0")
etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379",
"comma separated endpoint string for pserver to connect to etcd")
etcdTimeout := flag.Duration("etcd-timeout", 5*time.Second, "timeout for etcd calls")
dialTimeout := flag.Duration("dial-timeout", 5*time.Second, "dial timeout")
etcdTTL := flag.Int("etcd-ttl", 5, "etcd time to live in seconds")
numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job")
checkpointPath := flag.String("checkpoint-path", "/checkpoints/", "save checkpoint path")
checkpointInterval := flag.Duration("checkpoint-interval", 600*time.Second, "save checkpoint per interval seconds")
......@@ -53,7 +56,7 @@ func main() {
if *index >= 0 {
idx = *index
} else {
e = pserver.NewEtcdClient(*etcdEndpoint, *numPservers, *etcdTimeout)
e = pserver.NewEtcdClient(*etcdEndpoint, *numPservers, *dialTimeout, *etcdTTL)
idx, err = e.Register(*port)
candy.Must(err)
......@@ -67,6 +70,20 @@ func main() {
}
}
shutdown := func() {
log.Infoln("shutting down gracefully")
sErr := e.Shutdown()
if sErr != nil {
log.Errorln(sErr)
}
}
// Guaranteed to run even panic happens.
defer shutdown()
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt)
s, err := pserver.NewService(idx, *checkpointInterval, *checkpointPath, e, cp)
candy.Must(err)
......@@ -77,7 +94,11 @@ func main() {
l, err := net.Listen("tcp", ":"+strconv.Itoa(*port))
candy.Must(err)
log.Infof("start pserver at port %d", *port)
err = http.Serve(l, nil)
candy.Must(err)
go func() {
log.Infof("start pserver at port %d", *port)
err = http.Serve(l, nil)
candy.Must(err)
}()
<-c
}
hash: a8faea3a363468a88917ddeb3b1c9ea36886fb2c622acbad42604fa9cb4d3855
updated: 2017-07-11T10:04:40.786745417+08:00
hash: 2a1c0eca5c07a130e3d224f9821f96cfa37a39bf6bce141c855bbc57ef569f1c
updated: 2017-07-29T07:34:48.722757905+08:00
imports:
- name: github.com/beorn7/perks
version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9
subpackages:
- quantile
- name: github.com/boltdb/bolt
version: 583e8937c61f1af6513608ccc75c97b6abdf4ff9
- name: github.com/cockroachdb/cmux
version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92
- name: github.com/coreos/etcd
version: cb2a496c4ddd1c87a9f280e116649b599999ec79
version: c31bec0f29facff13f7c3e3d948e55dd6689ed42
subpackages:
- alarm
- auth
- auth/authpb
- client
- clientv3
- clientv3/concurrency
- compactor
- discovery
- embed
- error
- etcdserver
- etcdserver/api
- etcdserver/api/v2http
- etcdserver/api/v2http/httptypes
- etcdserver/api/v3client
- etcdserver/api/v3election
- etcdserver/api/v3election/v3electionpb
- etcdserver/api/v3election/v3electionpb/gw
- etcdserver/api/v3lock
- etcdserver/api/v3lock/v3lockpb
- etcdserver/api/v3lock/v3lockpb/gw
- etcdserver/api/v3rpc
- etcdserver/api/v3rpc/rpctypes
- etcdserver/auth
- etcdserver/etcdserverpb
- etcdserver/etcdserverpb/gw
- etcdserver/membership
- etcdserver/stats
- lease
- lease/leasehttp
- lease/leasepb
- mvcc
- mvcc/backend
- mvcc/mvccpb
- pkg/adt
- pkg/contention
- pkg/cors
- pkg/cpuutil
- pkg/crc
- pkg/debugutil
- pkg/fileutil
- pkg/httputil
- pkg/idutil
- pkg/ioutil
- pkg/logutil
- pkg/monotime
- pkg/netutil
- pkg/pathutil
- pkg/pbutil
- pkg/runtime
- pkg/schedule
- pkg/srv
- pkg/tlsutil
- pkg/transport
- pkg/types
- pkg/wait
- proxy/grpcproxy/adapter
- raft
- raft/raftpb
- rafthttp
- snap
- snap/snappb
- store
- version
- wal
- wal/walpb
- name: github.com/coreos/go-semver
version: 8ab6407b697782a06568d4b7f1db25550ec2e4c6
subpackages:
- semver
- name: github.com/coreos/go-systemd
version: 48702e0da86bd25e76cfef347e2adeb434a0d0a6
subpackages:
- daemon
- journal
- util
- name: github.com/coreos/pkg
version: 3ac0863d7acf3bc44daf49afef8919af12f704ef
subpackages:
- capnslog
- name: github.com/dgrijalva/jwt-go
version: d2709f9f1f31ebcda9651b03077758c1f3a0018c
- name: github.com/ghodss/yaml
version: 0ca9ea5df5451ffdf184b4428c902747c2c11cd7
- name: github.com/gogo/protobuf
version: 909568be09de550ed094403c2bf8a261b5bb730a
subpackages:
- proto
- name: github.com/golang/protobuf
version: 4bd1920723d7b7c925de087aa32e2187708897f7
subpackages:
......@@ -17,14 +107,61 @@ imports:
- proto
- name: github.com/golang/snappy
version: 553a641470496b2327abcac10b36396bd98e45c9
- name: github.com/google/btree
version: 925471ac9e2131377a91e1595defec898166fe49
- name: github.com/grpc-ecosystem/go-grpc-prometheus
version: 6b7015e65d366bf3f19b2b2a000a831940f0f7e0
- name: github.com/grpc-ecosystem/grpc-gateway
version: 18d159699f2e83fc5bb9ef2f79465ca3f3122676
subpackages:
- runtime
- runtime/internal
- utilities
- name: github.com/jonboulle/clockwork
version: 2eee05ed794112d45db504eb05aa693efd2b8b09
- name: github.com/matttproud/golang_protobuf_extensions
version: c12348ce28de40eed0136aa2b644d0ee0650e56c
subpackages:
- pbutil
- name: github.com/namsral/flag
version: 71ceffbeb0ba60fccc853971bb3ed4d7d90bfd04
- name: github.com/PaddlePaddle/recordio
version: edfb82af0739c84f241c87390ec5649c7b28c129
version: 0432dee9fd4b24fb6840fb20a8c055b0c933fb81
- name: github.com/prometheus/client_golang
version: c5b7fccd204277076155f10851dad72b76a49317
subpackages:
- prometheus
- name: github.com/prometheus/client_model
version: 6f3806018612930941127f2a7c6c453ba2c527d2
subpackages:
- go
- name: github.com/prometheus/common
version: 49fee292b27bfff7f354ee0f64e1bc4850462edf
subpackages:
- expfmt
- internal/bitbucket.org/ww/goautoneg
- model
- name: github.com/prometheus/procfs
version: a1dba9ce8baed984a2495b658c82687f8157b98f
subpackages:
- xfs
- name: github.com/sirupsen/logrus
version: 7f976d3a76720c4c27af2ba716b85d2e0a7e38b1
version: a3f95b5c423586578a4e099b11a46c2479628cac
- name: github.com/topicai/candy
version: 1b9030d056fa9f8c4b1f9c91b52fe4b8ab4cd8cc
- name: github.com/ugorji/go
version: ded73eae5db7e7a0ef6f55aace87a2873c5d2b74
subpackages:
- codec
- name: github.com/xiang90/probing
version: 07dd2e8dfe18522e9c447ba95f2fe95262f63bb2
- name: golang.org/x/crypto
version: 1351f936d976c60a0a48d728281922cf63eafb8d
repo: https://github.com/golang/crypto.git
vcs: git
subpackages:
- bcrypt
- blowfish
- name: golang.org/x/net
version: c8c74377599bd978aee1cf3b9b63a8634051cec2
subpackages:
......@@ -36,11 +173,15 @@ imports:
- lex/httplex
- trace
- name: golang.org/x/sys
version: abf9c25f54453410d0c6668e519582a9e1115027
version: 0f826bdd13b500be0f1d4004938ad978fcc6031e
repo: https://github.com/golang/sys.git
vcs: git
subpackages:
- unix
- name: golang.org/x/text
version: cfdf022e86b4ecfb646e1efbd7db175dd623a8fa
version: 836efe42bb4aa16aaa17b9c155d8813d336ed720
repo: https://github.com/golang/text.git
vcs: git
subpackages:
- secure/bidirule
- transform
......@@ -60,4 +201,23 @@ imports:
- stats
- tap
- transport
testImports: []
- name: gopkg.in/yaml.v2
version: cd8b52f8269e0feb286dfeef29f8fe4d5b397e0b
testImports:
- name: github.com/davecgh/go-spew
version: 04cdfd42973bb9c8589fd6a731800cf222fde1a9
subpackages:
- spew
- name: github.com/docker/docker
version: b6d164e6c46d8115b146e4c3ac93784e9ef8b49e
subpackages:
- pkg/ioutils
- pkg/longpath
- name: github.com/pmezard/go-difflib
version: d8ed2627bdf02c080bf22230dbb337003b7aba2d
subpackages:
- difflib
- name: github.com/stretchr/testify
version: 05e8a0eda380579888eb53c394909df027f06991
subpackages:
- assert
......@@ -6,8 +6,19 @@ import:
subpackages:
- clientv3
- clientv3/concurrency
- embed
- etcdserver
- package: github.com/namsral/flag
version: ^1.7.4-pre
- package: github.com/sirupsen/logrus
version: ^1.0.0
- package: github.com/topicai/candy
- package: golang.org/x/crypto
vcs: git
repo: https://github.com/golang/crypto.git
- package: golang.org/x/sys
vcs: git
repo: https://github.com/golang/sys.git
- package: golang.org/x/text
vcs: git
repo: https://github.com/golang/text.git
......@@ -39,15 +39,12 @@ type EtcdClient struct {
statePath string
client *clientv3.Client
lock *concurrency.Mutex
sess *concurrency.Session
}
// NewEtcdClient creates a new EtcdClient.
func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) {
log.Debugf("Connecting to etcd at %v", endpoints)
// TODO(helin): gracefully shutdown etcd store. Because etcd
// store holds a etcd lock, even though the lock will expire
// when the lease timeout, we need to implement graceful
// shutdown to release the lock.
cli, err := clientv3.New(clientv3.Config{
Endpoints: endpoints,
DialTimeout: dialTimeout,
......@@ -67,12 +64,12 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
// one master running, but split-brain problem may cause
// multiple master servers running), and the cluster management
// software will kill one of them.
log.Debugf("Trying to acquire lock at %s.", lockPath)
log.Infof("Trying to acquire lock at %s.", lockPath)
err = lock.Lock(context.TODO())
if err != nil {
return nil, err
}
log.Debugf("Successfully acquired lock at %s.", lockPath)
log.Infof("Successfully acquired lock at %s.", lockPath)
put := clientv3.OpPut(addrPath, addr)
resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit()
......@@ -89,6 +86,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
statePath: statePath,
client: cli,
lock: lock,
sess: sess,
}
return e, nil
......@@ -157,6 +155,21 @@ func (e *EtcdClient) Load() ([]byte, error) {
return state, nil
}
// Shutdown shuts down the etcd client gracefully.
func (e *EtcdClient) Shutdown() error {
err := e.sess.Close()
newErr := e.client.Close()
if newErr != nil {
if err == nil {
err = newErr
} else {
log.Errorln(newErr)
}
}
return err
}
// GetKey gets the value by the specify key.
func GetKey(c *clientv3.Client, key string, timeout time.Duration) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
......
......@@ -40,3 +40,8 @@ func (m *InMemStore) Load() ([]byte, error) {
return m.buf, nil
}
// Shutdown shuts down the in mem store.
func (m *InMemStore) Shutdown() error {
return nil
}
......@@ -50,6 +50,7 @@ var ErrPassAfter = errors.New("pass number larger than master")
type Store interface {
Save([]byte) error
Load() ([]byte, error)
Shutdown() error
}
// Chunk is a chunk of data consisted of several data instances.
......
package master_test
import (
"os"
"testing"
"time"
"github.com/PaddlePaddle/Paddle/go/master"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/embed"
"github.com/docker/docker/pkg/ioutils"
"github.com/stretchr/testify/assert"
)
func TestNewServiceWithEtcd(t *testing.T) {
// setup an embed etcd server
etcdDir, err := ioutils.TempDir("", "")
if err != nil {
t.Fatal(err)
}
cfg := embed.NewConfig()
cfg.Dir = etcdDir
e, err := embed.StartEtcd(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
e.Close()
if err := os.RemoveAll(etcdDir); err != nil {
t.Fatal(err)
}
}()
select {
case <-e.Server.ReadyNotify():
t.Log("Server is ready!")
case <-time.After(60 * time.Second):
e.Server.Stop() // trigger a shutdown
t.Fatal("Server took too long to start!")
}
ep := []string{"127.0.0.1:2379"}
masterAddr := "127.0.0.1:3306"
store, err := master.NewEtcdClient(ep, masterAddr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, 30)
if err != nil {
t.Fatal(err)
}
_, err = master.NewService(store, 10, 10, 3)
if err != nil {
t.Fatal(err)
}
cli, err := clientv3.New(clientv3.Config{
Endpoints: ep,
DialTimeout: 3 * time.Second,
})
if err != nil {
t.Fatal(err)
}
v, err := master.GetKey(cli, master.DefaultAddrPath, 3*time.Second)
if err != nil {
t.Fatal(err)
}
if err := cli.Close(); err != nil {
t.Fatal(err)
}
// test master process registry itself into etcd server.
assert.Equal(t, masterAddr, v, "master process should registry itself into etcd server.")
}
......@@ -55,10 +55,10 @@ var curHandle C.paddle_pserver_client
func add(c *client.Client) C.paddle_pserver_client {
mu.Lock()
defer mu.Unlock()
client := curHandle
cli := curHandle
curHandle++
handleMap[client] = c
return client
handleMap[cli] = c
return cli
}
func get(client C.paddle_pserver_client) *client.Client {
......
......@@ -3,24 +3,11 @@ import paddle.v2.dataset.uci_housing as uci_housing
import paddle.v2.master as master
import os
import cPickle as pickle
from paddle.v2.reader.creator import cloud_reader
etcd_ip = os.getenv("MASTER_IP", "127.0.0.1")
etcd_endpoint = "http://" + etcd_ip + ":2379"
print "connecting to master, etcd endpoints: ", etcd_endpoint
master_client = master.client(etcd_endpoint, 5, 64)
def cloud_reader():
global master_client
master_client.set_dataset(
["/pfs/dlnel/public/dataset/uci_housing/uci_housing-*"], passes=30)
while 1:
r, e = master_client.next_record()
if not r:
if e != -2: # other errors
print "get record error:", e
break
yield pickle.loads(r)
etcd_endpoints = "http://" + etcd_ip + ":2379"
print "etcd endpoints: ", etcd_endpoints
def main():
......@@ -49,7 +36,7 @@ def main():
parameters=parameters,
update_equation=optimizer,
is_local=False,
pserver_spec=etcd_endpoint,
pserver_spec=etcd_endpoints,
use_etcd=True)
# event_handler to print training and testing info
......@@ -75,7 +62,11 @@ def main():
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
cloud_reader, buf_size=500), batch_size=2),
cloud_reader(
["/pfs/dlnel/public/dataset/uci_housing/uci_housing*"],
etcd_endpoints),
buf_size=500),
batch_size=2),
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
......
......@@ -34,16 +34,19 @@ const (
PsPath = "/ps/"
// PsCheckpoint is the etcd path for store checkpoints information
PsCheckpoint = "/checkpoints/"
retryTimeout = 5 * time.Second
)
// EtcdClient is the etcd client that the pserver uses for fault
// tolerance, service registry and coordination.
type EtcdClient struct {
numPservers int
etcdEndpoints string
etcdClient *clientv3.Client
// etcdTimeout is also used as retry intervals.
etcdTimeout time.Duration
numPservers int
endpoints string
client *clientv3.Client
sess *concurrency.Session
dialTimeout time.Duration
ttlSec int
// FIXME: ensure GetExternalIP gets the correct ip for trainers to connect.
externalIP string
// desired number of pservers in the job.
......@@ -52,11 +55,12 @@ type EtcdClient struct {
}
// NewEtcdClient creates an EtcdClient
func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *EtcdClient {
func NewEtcdClient(endpoints string, numPservers int, dialtimeout time.Duration, ttlSec int) *EtcdClient {
return &EtcdClient{
etcdTimeout: timeout,
numPservers: numPservers,
etcdEndpoints: endpoints,
dialTimeout: dialtimeout,
ttlSec: ttlSec,
numPservers: numPservers,
endpoints: endpoints,
}
}
......@@ -64,7 +68,6 @@ func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *Et
//
// Register returns the index of the current pserver.
func (e *EtcdClient) Register(port int) (int, error) {
var err error
e.externalIP, err = networkhelper.GetExternalIP()
if err != nil {
......@@ -72,19 +75,26 @@ func (e *EtcdClient) Register(port int) (int, error) {
}
// initialize connection to etcd.
ep := strings.Split(e.etcdEndpoints, ",")
ep := strings.Split(e.endpoints, ",")
for {
cli, err := clientv3.New(clientv3.Config{
Endpoints: ep,
DialTimeout: e.etcdTimeout,
DialTimeout: e.dialTimeout,
})
if err != nil {
log.Errorf("connect to etcd error: %v", err)
time.Sleep(e.etcdTimeout)
time.Sleep(retryTimeout)
continue
}
e.client = cli
sess, err := concurrency.NewSession(cli, concurrency.WithTTL(e.ttlSec))
if err != nil {
log.Errorf("create etcd session error: %v", err)
time.Sleep(retryTimeout)
continue
}
e.etcdClient = cli
log.Debugf("inited client to %s", e.etcdEndpoints)
e.sess = sess
log.Debugf("inited client to %s", e.endpoints)
break
}
// init /ps_desired using transaction, for multiple pservers may want to write
......@@ -95,7 +105,7 @@ func (e *EtcdClient) Register(port int) (int, error) {
cancel()
if err != nil {
log.Warn(err)
time.Sleep(e.etcdTimeout)
time.Sleep(retryTimeout)
continue
}
break
......@@ -106,18 +116,18 @@ func (e *EtcdClient) Register(port int) (int, error) {
// wait and set s.desired init value
for {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
resp, err := e.etcdClient.Get(ctx, PsDesired)
resp, err := e.client.Get(ctx, PsDesired)
cancel()
if err != nil {
log.Errorf("getting %s error: %v", PsDesired, err)
time.Sleep(e.etcdTimeout)
time.Sleep(retryTimeout)
continue
}
if len(resp.Kvs) != 0 {
e.desired, err = strconv.Atoi(string(resp.Kvs[0].Value))
if err != nil {
log.Errorf("value of %s invalid %v\n", PsDesired, err)
time.Sleep(e.etcdTimeout)
time.Sleep(retryTimeout)
// NOTE: wait util ps_desired value change
continue
}
......@@ -134,7 +144,7 @@ func (e *EtcdClient) Register(port int) (int, error) {
cancel()
if err != nil {
log.Warn(err)
time.Sleep(e.etcdTimeout)
time.Sleep(retryTimeout)
continue
}
break
......@@ -144,10 +154,10 @@ func (e *EtcdClient) Register(port int) (int, error) {
}
func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) {
return concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error {
return concurrency.NewSTM(e.client, func(c concurrency.STM) error {
dsStr := c.Get(PsDesired)
if dsStr == "" {
c.Put(PsDesired, strconv.Itoa(numPservers))
c.Put(PsDesired, strconv.Itoa(numPservers), clientv3.WithLease(e.sess.Lease()))
}
return nil
}, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads))
......@@ -156,7 +166,7 @@ func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) (
// registerPserverEtcd registers pserver node on etcd using transaction.
func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, error) {
var idx int
_, err := concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error {
_, err := concurrency.NewSTM(e.client, func(c concurrency.STM) error {
registered := false
for i := 0; i < e.desired; i++ {
psKey := PsPath + strconv.Itoa(i)
......@@ -165,26 +175,10 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er
log.Debugf("got value (%s) for key: %s", ps, psKey)
if ps == "" {
resp, err := e.etcdClient.Grant(context.TODO(), 5)
if err != nil {
log.Fatal(err)
}
// find the first id and write info
pserverAddr := e.externalIP + ":" + strconv.Itoa(port)
c.Put(psKey, pserverAddr, clientv3.WithLease(resp.ID))
c.Put(psKey, pserverAddr, clientv3.WithLease(e.sess.Lease()))
log.Debugf("set pserver node %s with value %s", psKey, pserverAddr)
ch, kaerr := e.etcdClient.KeepAlive(context.TODO(), resp.ID)
if kaerr != nil {
log.Errorf("keepalive etcd node error: %v", kaerr)
return kaerr
}
// Eat the keep alive message so etcd
// will not expire the lease.
go func(ch <-chan *clientv3.LeaseKeepAliveResponse) {
ka := <-ch
log.Debugf("keepalive: %d\n", ka.TTL)
}(ch)
log.Debug("register finished")
idx = i
registered = true
......@@ -207,7 +201,7 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er
// GetKey gets the value by the specified key
func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
resp, err := e.etcdClient.Get(ctx, key)
resp, err := e.client.Get(ctx, key)
cancel()
if err != nil {
return []byte{}, err
......@@ -223,7 +217,27 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
// PutKey put into etcd with value by key specified
func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
_, err := e.etcdClient.Put(ctx, key, string(value))
_, err := e.client.Put(ctx, key, string(value), clientv3.WithLease(e.sess.Lease()))
cancel()
return err
}
// Shutdown shuts down the etcd client gracefully.
func (e *EtcdClient) Shutdown() error {
var err error
if e.sess != nil {
err = e.sess.Close()
}
if e.client != nil {
newErr := e.client.Close()
if newErr != nil {
if err != nil {
log.Errorln(newErr)
} else {
err = newErr
}
}
}
return err
}
......@@ -269,8 +269,7 @@ void hl_sequence2batch_copy_padding(real* batch,
int blockDimY = CUDA_BLOCK_SIZE / blockDimX;
dim3 threads(blockDimX, blockDimY);
int gridDimX = (maxSequenceLength * blockDimX + CUDA_BLOCK_SIZE - 1) /
CUDA_BLOCK_SIZE;
int gridDimX = (maxSequenceLength + blockDimY - 1) / blockDimY;
int gridDimY = numSequences;
dim3 grid(gridDimX, gridDimY);
......
......@@ -8,7 +8,9 @@ cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_test(variable_test SRCS variable_test.cc)
cc_test(scope_test SRCS scope_test.cc)
cc_library(scope SRCS scope.cc)
cc_test(scope_test SRCS scope_test.cc DEPS scope)
proto_library(attr_type SRCS attr_type.proto)
proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
......@@ -16,7 +18,7 @@ proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)
cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor)
cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor scope)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS op_proto operator)
......@@ -30,4 +32,7 @@ add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch
add_dependencies(framework_py_proto framework_py_proto_init)
cc_library(net SRCS net.cc DEPS op_registry)
cc_test(net_op_test SRCS net_op_test.cc DEPS net add_op mul_op sigmoid_op softmax_op fc_op)
cc_test(net_op_test SRCS net_op_test.cc DEPS net)
cc_library(backward SRCS backward.cc DEPS net)
cc_test(backward_test SRCS backward_test.cc DEPS backward)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/backward.h"
#include <list>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace framework {
static bool AllInSet(const std::vector<std::string>& names,
const std::string& suffix,
const std::unordered_set<std::string>& set) {
for (auto& name : names) {
if (set.find(name + suffix) == set.end()) {
return false;
}
}
return true;
}
static std::shared_ptr<OperatorBase> NOP() {
auto net_op = std::make_shared<NetOp>();
net_op->type_ = "@NOP@";
net_op->CompleteAddOp();
return net_op;
}
// Get backward operator from a forward operator, recursively implementation.
//
// no_grad_names the gradient variable names without gradient calculating.
//
// uniq_id is a unique index used inside recursively calling BackwardRecursive.
// use `uid = uniq_id++;` to get the unique index, and pass `uniq_id` through
// recursive calling.
//
// returns The backward operator. For simple situation, it is a simple
// operator. For complex situation, it is a NetOp.
//
// See Backward.h for details
static std::shared_ptr<OperatorBase> BackwardRecursive(
const OperatorBase& forwardOp,
std::unordered_set<std::string>& no_grad_names, size_t& uniq_id);
std::shared_ptr<OperatorBase> BackwardRecursive(
const OperatorBase& forwardOp,
std::unordered_set<std::string>& no_grad_names, size_t& uniq_id) {
// If all input gradients of forwarding operator do not need to calculate,
// just return an NOP. Not return null ptr because NOP does not take
// too much time for calculation, but it is useful for simplifying logic.
if (AllInSet(forwardOp.inputs_, OperatorBase::GRAD_VAR_SUFFIX(),
no_grad_names)) {
return NOP();
}
// All output gradients of forwarding operator do not need to calculate. Then
// all input gradients cannot be computed at all, and we put them into
// `no_grad_names` set. Return an NOP.
if (AllInSet(forwardOp.outputs_, OperatorBase::GRAD_VAR_SUFFIX(),
no_grad_names)) {
for (auto& name : forwardOp.inputs_) {
// Mark all input is not need
no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX());
}
return NOP();
}
// Returned gradient network
auto net = std::make_shared<NetOp>();
if (forwardOp.IsNetOp()) {
// Because forwardOp is a net op, it can static_cast.
auto& forwardNet = static_cast<const NetOp&>(forwardOp);
// Map from output gradient variable name to operator's indices in backward
// net. That operator generates that variable.
std::unordered_map<std::string, std::vector<size_t>> dup_output_ops;
size_t local_op_id = 0;
// reversely travel forwardNet
for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
++it, ++local_op_id) {
auto fwd = *it;
auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id);
net->AddOp(bwd);
for (auto& out : bwd->outputs_) {
dup_output_ops[out].emplace_back(local_op_id);
}
}
// Get unique ID for this method.
auto uid = uniq_id++;
// TODO(dzh): more comment
using Pos = std::pair<size_t, std::shared_ptr<OperatorBase>>;
std::list<Pos> insert_position;
for (auto& dup_output_op : dup_output_ops) {
const std::string& name = dup_output_op.first;
auto& dup_op = dup_output_op.second;
if (dup_op.size() == 1) continue;
std::vector<std::string> dup_outputs;
for (size_t i = 0; i < dup_op.size(); ++i) {
auto op_offset = dup_op[i];
dup_outputs.push_back(name + "@RENAME@" + std::to_string(uid) + "@" +
std::to_string(i));
net->ops_[op_offset]->Rename(name, dup_outputs.back());
}
insert_position.push_back(
{dup_op.back(),
OpRegistry::CreateOp(
"add", {dup_outputs}, {name},
{{"input_format",
std::vector<int>{0, static_cast<int>(dup_outputs.size())}}})});
}
insert_position.sort(
[](const Pos& l, const Pos& r) { return l.first > r.first; });
for (auto& pos : insert_position) {
net->InsertOp(pos.first + 1, pos.second);
}
} else {
std::shared_ptr<OperatorBase> grad_op = OpRegistry::CreateGradOp(forwardOp);
for (std::string& grad_input : grad_op->inputs_) {
if (no_grad_names.count(grad_input)) {
std::string prefix = grad_input.substr(
0, grad_input.size() - OperatorBase::GRAD_VAR_SUFFIX().size());
grad_input = prefix + OperatorBase::ZERO_VAR_SUFFIX();
// If part of input gradient of that operator is not calculated, fill
// zero variables to that input gradient.
net->AddOp(OpRegistry::CreateOp("fill_zeros_like", {prefix},
{grad_input}, {}));
}
}
for (std::string& grad_output : grad_op->outputs_) {
if (no_grad_names.count(grad_output)) {
grad_output = OperatorBase::EMPTY_VAR_NAME();
}
}
if (net->ops_.empty()) { // Current no aux op is added to network
return grad_op;
}
net->AddOp(grad_op);
}
net->type_ = "@GENERATED_BACKWARD@";
net->CompleteAddOp();
return net;
}
// See header for comments
std::shared_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars) {
std::unordered_set<std::string> no_grad_names;
no_grad_names.reserve(no_grad_vars.size());
for (auto& name : no_grad_vars) {
no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX());
}
size_t uid = 0;
return BackwardRecursive(forwardOp, no_grad_names, uid);
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <unordered_set>
#include "operator.h"
namespace paddle {
namespace framework {
// Create the backward operator from a forward operator.
// TODO(yuyang18): Add more API reference comment.
extern std::shared_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework
} // namespace paddle
## Operator/expression 's Backward
### Motivation
In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation lineage, the operator/ expression's Backward feature will generate the backward pass respect to forward pass.
### Implement : gradient operator registry
| | forward operator | backward operator |
| ---------------------- | ---------------- | -------------------------------- |
| **Operator::inputs_** | Inputs | Inputs, Outputs, OutputGradients |
| **Operator::outputs_** | Outputs | InputGradients |
Inputs/Outputs means the input/output of the operator, InputGradients/OutputGradients is the gradient respect to forward opeartor. Forward operator and Backward operator are isomorphic, save their corresponding needs into member attribute.
We use a global hash map record the gradient operators available, follow the philosophy of minimum core, make operator pluggable unit. Each gradient is an operator and it needs to regist itself.
grad_op_builder(fengjiayi)
### Implement : Backward network
given a forward network, it generates the backward network. We only care about the Gradients—`OutputGradients`,`InputGradients`.
1. bla bla bla (yuyang)
2. NetOp
when the input forward network is a NetOp, it need to call the sub NetOp/Operators backward function recursively and ensure them done. During the process, we need to collect the `OutputGradients` name.
We share variable in the same scope, as a result, duplicate operator `OutputGradients` will overwirte then duplicate variable.
![./images/duplicate_op]()
Share variable between operators or same input variable used in multiple operators lead to a duplicate gradient variable. As demo show above, we need to rename gradient name recursively, and add a generic add operator instead.
![./images/duplicate_op2]()
​ Then collect the sub graph OutputGradients/InputGradients as the NetOp's and return it.
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/backward.h"
#include <gtest/gtest.h>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace framework {
class EmptyOp : public OperatorBase {
public:
void InferShape(const Scope &scope) const override {}
void Run(const Scope &scope,
const platform::DeviceContext &dev_ctx) const override {}
};
class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
public:
RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input X of Add").IgnoreGradient();
AddInput("b", "Bias of Add").IgnoreGradient();
AddOutput("Out", "Out of Add").IgnoreGradient();
AddComment("Add Op");
}
};
class MulOpMaker : public OpProtoAndCheckerMaker {
public:
MulOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("A", "A");
AddInput("B", "B");
AddOutput("Out", "Out");
AddComment("Mul");
}
};
class SigmoidOpMaker : public OpProtoAndCheckerMaker {
public:
SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "X");
AddOutput("Y", "Y");
AddComment("Sigmoid");
}
};
class NoGradOpMaker : public OpProtoAndCheckerMaker {
public:
NoGradOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "X input");
AddOutput("Y", "Y output");
AddComment("NoGradOp, same input output. no Grad");
}
};
class FcOp : public NetOp {
public:
void Init() override {
AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")},
{Output("mul_result")}, {}));
auto b_name = Input("b");
std::string before_act = "mul_result";
if (b_name != EMPTY_VAR_NAME()) {
AddOp(OpRegistry::CreateOp("rowwise_add", {Output("mul_result"), b_name},
{Output("add_result")}, {}));
before_act = "add_result";
} else {
auto out_varname = Output("add_result");
if (out_varname != EMPTY_VAR_NAME()) {
this->Rename(out_varname, EMPTY_VAR_NAME());
}
}
AddOp(OpRegistry::CreateOp("sigmoid", {Output(before_act)}, {Output("Out")},
{}));
CompleteAddOp(false);
}
};
class FcOpMaker : public OpProtoAndCheckerMaker {
public:
FcOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "x");
AddInput("W", "w");
AddInput("b", "b");
AddOutput("mul_result", "").SetTemporary();
AddOutput("add_result", "").SetTemporary();
AddOutput("Out", "");
AddComment("");
}
};
class ManyOutputOpMaker : public OpProtoAndCheckerMaker {
public:
ManyOutputOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("x", "x");
AddOutput("y", "y");
AddOutput("z", "z");
AddComment("");
}
};
class FillZeroOpMaker : public OpProtoAndCheckerMaker {
public:
FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("x", "x");
AddOutput("out", "out");
AddComment("");
}
};
class AddOpMaker : public OpProtoAndCheckerMaker {
public:
AddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "x").SetMultiple();
AddOutput("Y", "y");
AddComment("");
}
};
} // namespace framework
} // namespace paddle
namespace f = paddle::framework;
using EnforceNotMet = paddle::platform::EnforceNotMet;
REGISTER_OP(rowwise_add, f::EmptyOp, f::RowWiseAddOpMaker);
REGISTER_GRADIENT_OP(rowwise_add, rowwise_add_grad, f::EmptyOp);
REGISTER_OP(mul, f::EmptyOp, f::MulOpMaker);
REGISTER_GRADIENT_OP(mul, mul_grad, f::EmptyOp);
REGISTER_OP(sigmoid, f::EmptyOp, f::SigmoidOpMaker);
REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, f::EmptyOp);
REGISTER_OP(nograd, f::EmptyOp, f::NoGradOpMaker);
REGISTER_OP(fill_zeros_like, f::EmptyOp, f::FillZeroOpMaker);
REGISTER_OP(add, f::EmptyOp, f::AddOpMaker);
REGISTER_GRADIENT_OP(add, add_grad, f::EmptyOp);
REGISTER_OP(fc, f::FcOp, f::FcOpMaker);
REGISTER_OP(many_output_op, f::EmptyOp, f::ManyOutputOpMaker);
REGISTER_GRADIENT_OP(many_output_op, many_output_op_grad, f::EmptyOp);
TEST(Backward, simple_op_grad) {
auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {});
ASSERT_NE(fwd, nullptr);
auto gop = f::OpRegistry::CreateGradOp(*fwd);
ASSERT_EQ(1UL, gop->inputs_.size());
ASSERT_EQ("Out" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->inputs_[0]);
ASSERT_EQ("rowwise_add_grad", gop->type_);
ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[0]);
ASSERT_EQ("b" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[1]);
ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(),
gop->Output("X" + f::OperatorBase::GRAD_VAR_SUFFIX()));
}
TEST(Backward, simple_op_not_need_grad) {
auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {});
ASSERT_NE(fwd, nullptr);
auto gop = f::Backward(*fwd, {"X"});
ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(),
"X" + f::OperatorBase::GRAD_VAR_SUFFIX()),
gop->outputs_.end());
auto no_input_gop = f::Backward(*fwd, {"X", "b"});
ASSERT_NE(no_input_gop, nullptr);
ASSERT_TRUE(no_input_gop->IsNetOp());
ASSERT_EQ(0UL, std::static_pointer_cast<f::NetOp>(no_input_gop)->ops_.size());
}
TEST(Backward, net_fc_backward_normal) {
std::shared_ptr<f::OperatorBase> fwd = f::OpRegistry::CreateOp(
"fc", {"X", "w", "b"}, {"mul_result", "add_result", "out"}, {});
ASSERT_NE(fwd, nullptr);
std::shared_ptr<f::OperatorBase> gop = f::Backward(*fwd, {});
ASSERT_TRUE(gop->IsNetOp());
auto net = static_cast<f::NetOp *>(gop.get());
ASSERT_NO_THROW(net->DebugString());
ASSERT_EQ(3UL, net->ops_.size());
f::OperatorBase &d_sigmoid = *net->ops_[0];
ASSERT_EQ("sigmoid_grad", d_sigmoid.type_);
f::OperatorBase &d_add = *net->ops_[1];
ASSERT_EQ("rowwise_add_grad", d_add.type_);
f::OperatorBase &d_mul = *net->ops_[2];
ASSERT_EQ("mul_grad", d_mul.type_);
}
TEST(Backward, net_fc_backward_not_have_b) {
std::shared_ptr<f::OperatorBase> fwd = f::OpRegistry::CreateOp(
"fc", {"X", "w", f::OperatorBase::EMPTY_VAR_NAME()},
{"mul_result", "add_result", "tmp"}, {});
ASSERT_NE(fwd, nullptr);
std::shared_ptr<f::OperatorBase> gop = f::Backward(*fwd, {});
ASSERT_TRUE(gop->IsNetOp());
auto net = static_cast<f::NetOp *>(gop.get());
ASSERT_NO_THROW(net->DebugString());
ASSERT_EQ(2UL, net->ops_.size());
f::OperatorBase &d_sigmoid = *net->ops_[0];
ASSERT_EQ("sigmoid_grad", d_sigmoid.type_);
f::OperatorBase &d_mul = *net->ops_[1];
ASSERT_EQ("mul_grad", d_mul.type_);
}
TEST(Backward, net_input_of_network_not_need_grad) {
f::NetOp net;
net.AddOp(f::OpRegistry::CreateOp("fc", {"X", "W1", "b1"},
{"mul_tmp_0", "add_tmp_0", "hidden0"}, {}));
net.AddOp(f::OpRegistry::CreateOp("fc", {"hidden0", "W2", "b2"},
{"mul_tmp_1", "add_tmp_1", "hidden1"}, {}));
net.CompleteAddOp();
auto bwd = Backward(net, {"X"}); // X@GRAD is not need.
ASSERT_TRUE(bwd->IsNetOp());
auto bwd_net = static_cast<f::NetOp *>(bwd.get());
std::unordered_set<std::string> all_output = std::unordered_set<std::string>(
bwd_net->outputs_.begin(), bwd_net->outputs_.end());
all_output.erase(f::OperatorBase::EMPTY_VAR_NAME());
for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) {
ASSERT_NE(all_output.find(out + f::OperatorBase::GRAD_VAR_SUFFIX()),
all_output.end());
}
// Not Generated X
ASSERT_EQ(all_output.find("X" + f::OperatorBase::GRAD_VAR_SUFFIX()),
all_output.end());
ASSERT_EQ(2UL, bwd_net->ops_.size());
ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp());
auto first_fc_grad = static_cast<f::NetOp *>(bwd_net->ops_[1].get());
ASSERT_EQ(3UL, first_fc_grad->ops_.size());
ASSERT_EQ(
f::OperatorBase::EMPTY_VAR_NAME(),
first_fc_grad->ops_[2]->Output("A" + f::OperatorBase::GRAD_VAR_SUFFIX()));
}
TEST(Backward, net_shared_weight) {
f::NetOp net;
net.AddOp(f::OpRegistry::CreateOp("mul", {"X", "W"}, {"Out"}, {}));
net.AddOp(f::OpRegistry::CreateOp("mul", {"Out", "W"}, {"FinalOut"}, {}));
net.CompleteAddOp();
auto bwd = f::Backward(net, {});
ASSERT_TRUE(bwd->IsNetOp());
auto bwd_net = static_cast<f::NetOp *>(bwd.get());
ASSERT_EQ(3UL, bwd_net->ops_.size());
ASSERT_EQ("add", bwd_net->ops_[2]->type_);
}
TEST(Backward, op_register_grad_not_for_network) {
auto fwd = f::OpRegistry::CreateOp(
"fc", {"X", "W", "b"}, {"mul_out", "add_out", "out1"},
{{"temporary_index", std::vector<int>{0, 1}}});
ASSERT_THROW(f::OpRegistry::CreateGradOp(*fwd), EnforceNotMet);
}
TEST(Backward, op_all_input_are_not_need) {
auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {});
auto backward = f::Backward(*fwd, {"X", "b"});
ASSERT_TRUE(backward->IsNetOp());
auto net = static_cast<f::NetOp *>(backward.get());
ASSERT_TRUE(net->ops_.empty());
}
TEST(Backward, op_all_output_are_not_need) {
auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {});
auto backward = f::Backward(*fwd, {"Out"});
ASSERT_TRUE(backward->IsNetOp());
auto net = static_cast<f::NetOp *>(backward.get());
ASSERT_TRUE(net->ops_.empty());
}
TEST(Backward, op_part_of_output_are_not_need) {
auto fwd = f::OpRegistry::CreateOp("many_output_op", {"X"}, {"Y", "Z"}, {});
auto backward = f::Backward(*fwd, {"Z"});
ASSERT_TRUE(backward->IsNetOp());
auto net = static_cast<f::NetOp *>(backward.get());
ASSERT_EQ(net->ops_.size(), 2UL);
auto &fill_zero = *net->ops_[0];
ASSERT_EQ("fill_zeros_like", fill_zero.type_);
ASSERT_EQ(1UL, fill_zero.inputs_.size());
ASSERT_EQ("Z", fill_zero.inputs_[0]);
ASSERT_EQ(1UL, fill_zero.outputs_.size());
ASSERT_EQ("Z" + f::OperatorBase::ZERO_VAR_SUFFIX(), fill_zero.outputs_[0]);
auto &d_many_out = *net->ops_[1];
ASSERT_EQ("many_output_op_grad", d_many_out.type_);
ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size()); // I/O/OG
ASSERT_EQ("Z" + f::OperatorBase::ZERO_VAR_SUFFIX(),
d_many_out.Input("z" + f::OperatorBase::GRAD_VAR_SUFFIX()));
ASSERT_EQ("Y" + f::OperatorBase::GRAD_VAR_SUFFIX(),
d_many_out.Input("y" + f::OperatorBase::GRAD_VAR_SUFFIX()));
ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(),
d_many_out.Output("x" + f::OperatorBase::GRAD_VAR_SUFFIX()));
}
TEST(Backward, op_part_of_input_are_not_need) {
auto fwd = f::OpRegistry::CreateOp("mul", {"a", "b"}, {"out"}, {});
auto backward = f::Backward(*fwd, {"a"});
auto &grad_mul = *backward;
ASSERT_EQ(grad_mul.type_, "mul_grad");
ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL);
ASSERT_EQ(grad_mul.outputs_.size(), 2UL);
ASSERT_EQ(grad_mul.Output("A" + f::OperatorBase::GRAD_VAR_SUFFIX()),
f::OperatorBase::EMPTY_VAR_NAME());
ASSERT_EQ(grad_mul.Output("B" + f::OperatorBase::GRAD_VAR_SUFFIX()),
"b" + f::OperatorBase::GRAD_VAR_SUFFIX());
ASSERT_EQ(grad_mul.Input("Out" + f::OperatorBase::GRAD_VAR_SUFFIX()),
"out" + f::OperatorBase::GRAD_VAR_SUFFIX());
ASSERT_EQ(grad_mul.Input("A"), "a");
ASSERT_EQ(grad_mul.Input("B"), "b");
ASSERT_EQ(grad_mul.Input("Out"), "out");
}
TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
f::NetOp net;
net.AddOp(f::OpRegistry::CreateOp("fc", {"x1", "w1", "b1"},
{"mul_out1", "add_out1", "out1"}, {}));
net.AddOp(f::OpRegistry::CreateOp("fc", {"out1", "w2", "b2"},
{"mul_out2", "tmp_out2", "out2"}, {}));
net.AddOp(f::OpRegistry::CreateOp("fc", {"out2", "w3", "b3"},
{"mul_out3", "tmp_out3", "out3"}, {}));
net.CompleteAddOp();
auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"});
ASSERT_TRUE(backward->IsNetOp());
auto bwd_net = static_cast<f::NetOp *>(backward.get());
ASSERT_EQ(bwd_net->ops_.size(), 3UL);
auto &grad_fc = *bwd_net->ops_[0];
EXPECT_EQ(grad_fc.inputs_.size(),
3UL /* external input number */
+ 1UL /* external output number*/
+ 1UL /* number of gradient of external output*/
- 1UL /*ignoreGradient varable number*/
+ 2U /* internal variable number*/);
EXPECT_EQ(grad_fc.outputs_.size(), 2UL /* input number of mul*/
+ 2UL /* input number of rowwise_add */
+ 1UL /* input number of sigmod */);
EXPECT_EQ(bwd_net->ops_[1]->inputs_.size(), 0UL);
EXPECT_EQ(bwd_net->ops_[1]->outputs_.size(), 0UL);
EXPECT_EQ(bwd_net->ops_[2]->inputs_.size(), 0UL);
EXPECT_EQ(bwd_net->ops_[2]->outputs_.size(), 0UL);
/*
EXPECT_EQ(grad_fc.Output("X" + f::OperatorBase::GRAD_VAR_SUFFIX()),
f::OperatorBase::EMPTY_VAR_NAME());
EXPECT_EQ(grad_fc.Output("W" + f::OperatorBase::GRAD_VAR_SUFFIX()),
"w3" + f::OperatorBase::GRAD_VAR_SUFFIX());
EXPECT_EQ(grad_fc.Output("b" + f::OperatorBase::GRAD_VAR_SUFFIX()),
"b3" + f::OperatorBase::GRAD_VAR_SUFFIX());
EXPECT_EQ(grad_fc.Output("mul_result" + f::OperatorBase::GRAD_VAR_SUFFIX()),
"mul_out3" + f::OperatorBase::GRAD_VAR_SUFFIX());
EXPECT_EQ(grad_fc.Input("Out" + f::OperatorBase::GRAD_VAR_SUFFIX()),
"out3" + f::OperatorBase::GRAD_VAR_SUFFIX());
EXPECT_EQ(grad_fc.Input("X"), "out2");
EXPECT_EQ(grad_fc.Input("W"), "w3");
EXPECT_EQ(grad_fc.Input("mul_result"), "mul_out3");
EXPECT_EQ(grad_fc.Input("add_result"), "tmp_out3");
EXPECT_EQ(grad_fc.Input("Out"), "out3");
*/
}
......@@ -83,56 +83,38 @@ inline void Tensor::ShareDataWith(const Tensor& src) {
template <typename T>
inline void Tensor::CopyFrom(const Tensor& src,
const platform::CPUDeviceContext& ctx) {
const platform::Place& dst_place) {
src.check_memory_size<T>();
Resize(src.dims());
auto src_place = src.holder_->place();
auto src_ptr = static_cast<const void*>(src.data<T>());
auto dst_place = ctx.GetPlace();
auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
auto size = product(src.dims_) * sizeof(T);
if (platform::is_cpu_place(src_place)) {
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
#ifndef PADDLE_ONLY_CPU
else if (platform::is_gpu_place(src_place)) {
else if (platform::is_gpu_place(src_place) &&
platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::GPUPlace>(src_place), src_ptr, size, 0);
}
#endif
}
#ifndef PADDLE_ONLY_CPU
template <typename T>
inline void Tensor::CopyFrom(const Tensor& src,
const platform::CUDADeviceContext& ctx) {
src.check_memory_size<T>();
Resize(src.dims());
auto src_place = src.holder_->place();
auto src_ptr = static_cast<const void*>(src.data<T>());
auto dst_place = ctx.GetPlace();
auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
auto size = product(src.dims_) * sizeof(T);
if (platform::is_cpu_place(src_place)) {
} else if (platform::is_cpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size,
ctx.stream());
} else if (platform::is_gpu_place(src_place)) {
boost::get<platform::CPUPlace>(src_place), src_ptr, size, 0);
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr,
boost::get<platform::GPUPlace>(src_place), src_ptr, size,
ctx.stream());
boost::get<platform::GPUPlace>(src_place), src_ptr, size, 0);
}
}
#endif
}
template <typename T>
inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
......
......@@ -80,5 +80,21 @@ struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
}
};
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenScalar {
// Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
using Type = Eigen::TensorMap<
Eigen::TensorFixedSize<T, Eigen::Sizes<>, MajorType, IndexType>>;
using ConstType = Eigen::TensorMap<
Eigen::TensorFixedSize<const T, Eigen::Sizes<>, MajorType, IndexType>>;
static Type From(Tensor& tensor) { return Type(tensor.data<T>()); }
static ConstType From(const Tensor& tensor) {
return ConstType(tensor.data<T>());
}
};
} // namespace framework
} // namespace paddle
......@@ -46,6 +46,17 @@ TEST(Eigen, Tensor) {
}
}
TEST(Eigen, ScalarFrom) {
Tensor t;
int* p = t.mutable_data<int>(make_ddim({1}), platform::CPUPlace());
*p = static_cast<int>(100);
EigenScalar<int>::Type es = EigenScalar<int>::From(t);
ASSERT_EQ(0, es.dimension(0));
ASSERT_EQ(100, es(0));
}
TEST(Eigen, VectorFrom) {
Tensor t;
float* p = t.mutable_data<float>(make_ddim({6}), platform::CPUPlace());
......
......@@ -20,7 +20,7 @@ namespace framework {
OperatorBase* GradOpBuilder::Build() {
BuildOpInOutArgList();
std::string grad_op_type = OpRegistry::grad_ops().at(op_->type_);
std::string grad_op_type = OpRegistry::grad_ops().at(op_.type_);
OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)();
grad_op->type_ = grad_op_type;
CompleteGradOp(grad_op);
......@@ -39,15 +39,15 @@ OpInOutArg* GradOpBuilder::BuildArg(const VarProto& var,
}
void GradOpBuilder::BuildOpInOutArgList() {
const OpProto& op_proto = OpRegistry::protos().at(op_->type_);
const auto& var_map = *(OpRegistry::VarIndexMaps().at(op_->type_));
const OpProto& op_proto = OpRegistry::protos().at(op_.type_);
const auto& var_map = *(OpRegistry::VarIndexMaps().at(op_.type_));
const std::vector<int>& in_format =
op_->attrs_.count("input_format")
? op_->GetAttr<std::vector<int>>("input_format")
op_.attrs_.count("input_format")
? op_.GetAttr<std::vector<int>>("input_format")
: std::vector<int>();
const std::vector<int>& out_format =
op_->attrs_.count("output_format")
? op_->GetAttr<std::vector<int>>("output_format")
op_.attrs_.count("output_format")
? op_.GetAttr<std::vector<int>>("output_format")
: std::vector<int>();
for (const auto& var : op_proto.inputs()) {
arg_list_.emplace_back(
......@@ -70,8 +70,7 @@ void GradOpBuilder::AddArgIntoGradOp(const OpInOutArg* arg,
}
(*varmap)[var_name] = idx++;
size_t pre_sz = in_out.size();
auto base_it =
arg->type_ == IN ? op_->inputs_.begin() : op_->outputs_.begin();
auto base_it = arg->type_ == IN ? op_.inputs_.begin() : op_.outputs_.begin();
std::copy(base_it + arg->begin_idx_, base_it + arg->end_idx_,
std::back_inserter(in_out));
if (is_grad) {
......@@ -83,7 +82,7 @@ void GradOpBuilder::AddArgIntoGradOp(const OpInOutArg* arg,
}
void GradOpBuilder::CompleteGradOp(OperatorBase* grad_op) const {
grad_op->attrs_ = op_->attrs_;
grad_op->attrs_ = op_.attrs_;
grad_op->attrs_.erase("input_format");
grad_op->attrs_.erase("output_format");
VarIndexMap* grad_varmap = new VarIndexMap();
......
......@@ -29,7 +29,7 @@ class GradOpBuilder {
using VarIndexMap = std::unordered_map<std::string, int>;
public:
GradOpBuilder(const OperatorBase* op) : op_(op) {}
GradOpBuilder(const OperatorBase& op) : op_(op) {}
OperatorBase* Build();
private:
......@@ -40,7 +40,7 @@ class GradOpBuilder {
std::vector<int>& format, VarIndexMap* varmap, int& idx,
bool is_grad) const;
void CompleteGradOp(OperatorBase* grad_op) const;
const OperatorBase* op_;
const OperatorBase& op_;
std::vector<std::shared_ptr<OpInOutArg>> arg_list_;
};
......
......@@ -11,7 +11,7 @@ namespace framework {
TEST(GradOpBuilder, AddTwo) {
std::shared_ptr<OperatorBase> add_op(
OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {}));
std::shared_ptr<OperatorBase> grad_add_op = OpRegistry::CreateGradOp(add_op);
std::shared_ptr<OperatorBase> grad_add_op = OpRegistry::CreateGradOp(*add_op);
EXPECT_EQ(static_cast<int>(grad_add_op->inputs_.size()), 4);
EXPECT_EQ(static_cast<int>(grad_add_op->outputs_.size()), 2);
EXPECT_EQ(grad_add_op->Input("X"), "x");
......
......@@ -43,7 +43,7 @@ class NetOp : public OperatorBase {
* Infer all the operators' input and output variables' shapes, will be called
* before every mini-batch
*/
void InferShape(const std::shared_ptr<Scope>& scope) const override {
void InferShape(const Scope& scope) const override {
for (auto& op : ops_) {
op->InferShape(scope);
}
......@@ -56,7 +56,7 @@ class NetOp : public OperatorBase {
* scope will be used instead. If no OpContext is provicded, default context
* will be used.
*/
void Run(const std::shared_ptr<Scope>& scope,
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
for (auto& op : ops_) {
op->Run(scope, dev_ctx);
......@@ -68,9 +68,18 @@ class NetOp : public OperatorBase {
*/
void AddOp(const std::shared_ptr<OperatorBase>& op) {
PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed");
PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op");
ops_.push_back(op);
}
void InsertOp(size_t pos, const std::shared_ptr<OperatorBase>& op) {
PADDLE_ENFORCE(!add_op_done_,
"Cannot InsertOp when this network is sealed");
PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op");
PADDLE_ENFORCE(pos <= ops_.size(), "Out of range");
ops_.insert(ops_.begin() + pos, op);
}
void CompleteAddOp(bool calculate = true);
std::string DebugString() const override;
......
......@@ -3,11 +3,6 @@
#include <paddle/framework/op_registry.h>
#include <paddle/framework/operator.h>
USE_OP(add_two);
USE_OP(mul);
USE_OP(sigmoid);
USE_OP(softmax);
namespace paddle {
namespace framework {
......@@ -16,16 +11,22 @@ static int run_cnt = 0;
class TestOp : public OperatorBase {
public:
void InferShape(
const std::shared_ptr<framework::Scope>& scope) const override {
void InferShape(const framework::Scope& scope) const override {
++infer_shape_cnt;
}
void Run(const std::shared_ptr<framework::Scope>& scope,
void Run(const framework::Scope& scope,
const paddle::platform::DeviceContext& dev_ctx) const override {
++run_cnt;
}
};
class EmptyOp : public OperatorBase {
public:
void InferShape(const Scope& scope) const override {}
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {}
};
template <typename T>
void AssertSameVectorWithoutOrder(const std::vector<T>& expected,
const std::vector<T>& actual) {
......@@ -62,7 +63,7 @@ TEST(OpKernel, all) {
ASSERT_EQ(1UL, tmp_idx.size());
ASSERT_EQ("y", net->outputs_[tmp_idx[0]]);
auto scope = std::make_shared<Scope>();
Scope scope;
platform::CPUDeviceContext dev_ctx;
net->InferShape(scope);
......@@ -72,20 +73,17 @@ TEST(OpKernel, all) {
ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet);
}
//! TODO(yuyang18): Refine Backward Op.
// TEST(AddBackwardOp, TestGradOp) {
// auto net = std::make_shared<NetOp>();
// ASSERT_NE(net, nullptr);
// net->AddOp(framework::OpRegistry::CreateOp("mul", {"X", "Y"}, {"Out"}, {}));
// net->AddOp(
// framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {"Out"}, {}));
// net->AddOp(framework::OpRegistry::CreateOp("add_two", {"X", "Y"}, {""},
// {}));
// auto grad_ops = AddBackwardOp(net);
// for (auto& op : grad_ops->ops_) {
// op->DebugString();
// }
//}
TEST(Net, insert_op) {
NetOp net;
auto op1 = std::make_shared<EmptyOp>();
op1->inputs_ = {"x", "w1", "b1"};
op1->outputs_ = {"y"};
net.AddOp(op1);
net.InsertOp(0, op1);
ASSERT_EQ(2UL, net.ops_.size());
net.InsertOp(2, op1);
ASSERT_EQ(3UL, net.ops_.size());
}
} // namespace framework
} // namespace paddle
......@@ -86,43 +86,46 @@ class OpProtoAndCheckerMaker {
}
protected:
void AddInput(const std::string& name, const std::string& comment,
bool multiple = false, bool ignore_gradient = false) {
struct VariableBuilder {
VarProto* var_;
std::function<void()> on_multiple_;
std::function<void()> on_temporary_;
VariableBuilder& SetMultiple() {
var_->set_multiple(true);
on_multiple_();
return *this;
}
VariableBuilder& SetTemporary() {
PADDLE_ENFORCE(bool(on_temporary_), "Cannot set temporary");
var_->set_temporary(true);
on_temporary_();
return *this;
}
VariableBuilder& IgnoreGradient() {
var_->set_ignore_gradient(true);
return *this;
}
};
VariableBuilder AddInput(const std::string& name,
const std::string& comment) {
auto input = proto_->mutable_inputs()->Add();
*input->mutable_name() = name;
*input->mutable_comment() = comment;
input->set_ignore_gradient(ignore_gradient);
input->set_multiple(multiple);
if (multiple) {
SetHasMultipleInput();
}
}
void AddInputs(const std::string& name, const std::string& comment,
bool ignore_gradient = false) {
AddInput(name, comment, true, ignore_gradient);
return VariableBuilder{input, [=] { this->SetHasMultipleInput(); },
nullptr};
}
void AddOutput(const std::string& name, const std::string& comment,
bool temporary = false, bool multiple = false,
bool ignore_gradient = false) {
VariableBuilder AddOutput(const std::string& name,
const std::string& comment) {
auto output = proto_->mutable_outputs()->Add();
*output->mutable_name() = name;
*output->mutable_comment() = comment;
output->set_ignore_gradient(ignore_gradient);
output->set_multiple(multiple);
if (multiple) {
SetHasMultipleOutput();
}
output->set_temporary(temporary);
if (temporary) {
SetHasTemporaryOutput();
}
}
void AddOutputs(const std::string& name, const std::string& comment,
bool temporary = false, bool ignore_gradient = false) {
AddOutput(name, comment, temporary, true, ignore_gradient);
return VariableBuilder{output, [=] { this->SetHasMultipleOutput(); },
[=] { this->SetHasTemporaryOutput(); }};
}
template <typename T>
......@@ -300,9 +303,10 @@ class OpRegistry {
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}
static std::shared_ptr<OperatorBase> CreateGradOp(
std::shared_ptr<OperatorBase> op) {
GradOpBuilder builder(op.get());
static std::shared_ptr<OperatorBase> CreateGradOp(const OperatorBase& op) {
PADDLE_ENFORCE(!op.IsNetOp(),
"Use framework::Backward to get backward ops");
GradOpBuilder builder(op);
std::shared_ptr<OperatorBase> grad_op(builder.Build());
grad_op->Init();
return grad_op;
......
......@@ -7,9 +7,9 @@ namespace paddle {
namespace framework {
class CosineOp : public OperatorBase {
public:
void Run(const std::shared_ptr<Scope>& scope,
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {}
void InferShape(const std::shared_ptr<Scope>& scope) const override {}
void InferShape(const Scope& scope) const override {}
};
class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
......@@ -27,8 +27,8 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
class MyTestOp : public OperatorBase {
public:
void InferShape(const std::shared_ptr<Scope>& scope) const override {}
void Run(const std::shared_ptr<Scope>& scope,
void InferShape(const Scope& scope) const override {}
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {}
};
......@@ -36,9 +36,8 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
MyTestOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInputs("input", "input of cosine op");
AddOutput("output", "output of cosine op",
/*temporary*/ true);
AddInput("input", "input of cosine op").SetMultiple();
AddOutput("output", "output of cosine op").SetTemporary();
auto my_checker = [](int i) {
PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!");
};
......@@ -69,7 +68,7 @@ TEST(OpRegistry, CreateOp) {
std::shared_ptr<paddle::framework::OperatorBase> op =
paddle::framework::OpRegistry::CreateOp(op_desc);
auto scope = std::make_shared<paddle::framework::Scope>();
paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx);
float scale_get = op->GetAttr<float>("scale");
......@@ -111,7 +110,7 @@ TEST(OpRegistry, DefaultValue) {
std::shared_ptr<paddle::framework::OperatorBase> op =
paddle::framework::OpRegistry::CreateOp(op_desc);
auto scope = std::make_shared<paddle::framework::Scope>();
paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx);
ASSERT_EQ(op->GetAttr<float>("scale"), 1.0);
......@@ -173,7 +172,7 @@ TEST(OpRegistry, CustomChecker) {
SetInputFormat(&op_desc);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::platform::CPUDeviceContext dev_ctx;
auto scope = std::make_shared<paddle::framework::Scope>();
paddle::framework::Scope scope;
op->Run(scope, dev_ctx);
int test_attr = op->GetAttr<int>("test_attr");
ASSERT_EQ(test_attr, 4);
......
......@@ -20,7 +20,7 @@ namespace paddle {
namespace framework {
template <>
Eigen::DefaultDevice* KernelContext::GetEigenDevice<
Eigen::DefaultDevice* ExecutionContext::GetEigenDevice<
platform::CPUPlace, Eigen::DefaultDevice>() const {
return device_context_.get_eigen_device<Eigen::DefaultDevice>();
}
......@@ -28,28 +28,33 @@ Eigen::DefaultDevice* KernelContext::GetEigenDevice<
#ifndef PADDLE_ONLY_CPU
template <>
Eigen::GpuDevice*
KernelContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
return device_context_.get_eigen_device<Eigen::GpuDevice>();
}
#endif
const std::string& OperatorBase::Input(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr,
"Input Output Indices could not be nullptr");
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
if (attrs_.count("input_format") == 0) {
return inputs_[it->second];
return inputs_.at((size_t)it->second);
} else {
const auto& input_format = GetAttr<std::vector<int>>("input_format");
int idx = input_format[it->second];
return inputs_.at(idx);
return inputs_.at((size_t)idx);
}
}
std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "IO Idx could not be nullptr");
auto input_format = GetAttr<std::vector<int>>("input_format");
auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(input_format.at(static_cast<size_t>(offset) + 1) <=
static_cast<int>(inputs_.size()),
"Input Out Of Range");
return std::vector<std::string>{
inputs_.begin() + input_format.at(offset),
......@@ -57,23 +62,26 @@ std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
}
const std::string& OperatorBase::Output(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr");
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
if (attrs_.count("output_format") == 0) {
return outputs_[it->second];
return outputs_.at((size_t)it->second);
} else {
const auto& output_format = GetAttr<std::vector<int>>("output_format");
int idx = output_format[it->second];
return outputs_.at(idx);
return outputs_.at((size_t)idx);
}
}
std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr");
auto output_format = GetAttr<std::vector<int>>("output_format");
auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(output_format.at(static_cast<size_t>(offset) + 1) <=
static_cast<int>(outputs_.size()),
"Output Out of Range");
return std::vector<std::string>{
outputs_.begin() + output_format.at(offset),
outputs_.begin() + output_format.at(offset + 1)};
......@@ -99,5 +107,11 @@ std::string OperatorBase::DebugString() const {
return ss.str();
}
void OperatorBase::Rename(const std::string& old_name,
const std::string& new_name) {
std::replace(inputs_.begin(), inputs_.end(), old_name, new_name);
std::replace(outputs_.begin(), outputs_.end(), old_name, new_name);
}
} // namespace framework
} // namespace paddle
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <algorithm>
#include <boost/variant.hpp>
#include <string>
#include <unordered_map>
......@@ -31,22 +32,9 @@ limitations under the License. */
namespace paddle {
namespace framework {
template <typename T>
struct EigenDeviceConverter;
template <>
struct EigenDeviceConverter<platform::CPUPlace> {
using EigenDeviceType = Eigen::DefaultDevice;
};
#ifndef PADDLE_ONLY_CPU
template <>
struct EigenDeviceConverter<platform::GPUPlace> {
using EigenDeviceType = Eigen::GpuDevice;
};
#endif
class OperatorBase;
class InferShapeContext;
class ExecutionContext;
/**
* OperatorBase has the basic element that Net will call to do computation.
* Only CreateOperator from OpRegistry will new Operator directly. User
......@@ -67,6 +55,9 @@ class OperatorBase {
/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
static std::string GRAD_VAR_SUFFIX() { return "@GRAD"; }
/// Variables with this suffix are supposed to be filled up with zeros.
static std::string ZERO_VAR_SUFFIX() { return "@ZERO"; }
virtual ~OperatorBase() {}
template <typename T>
......@@ -84,16 +75,20 @@ class OperatorBase {
/// InferShape infer the size of Variables used by this Operator with
/// information inside scope
virtual void InferShape(const std::shared_ptr<Scope>& scope) const = 0;
virtual void InferShape(const Scope& scope) const = 0;
/// Net will call this function to Run an op.
virtual void Run(const std::shared_ptr<Scope>& scope,
virtual void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const = 0;
virtual bool IsNetOp() const { return false; }
/// rename inputs outputs name
void Rename(const std::string& old_name, const std::string& new_name);
//! Get a input with argument's name described in `op_proto`
const std::string& Input(const std::string& name) const;
//! Get a input which has multiple variables.
//! TODO add a vector_view to prevent memory copy.
std::vector<std::string> Inputs(const std::string& name) const;
......@@ -105,53 +100,156 @@ class OperatorBase {
public:
std::string type_;
// NOTE: in case of OpGrad, inputs_ contains:
// I (Inputs)
// O (Outputs)
// OG (Output Gradients)
std::vector<std::string> inputs_;
// NOTE: in case of OpGrad, outputs_ contains
// IG (Inputs Gradients)
std::vector<std::string> outputs_;
AttributeMap attrs_;
// store the arguments' offset described in op_desc.
std::shared_ptr<std::unordered_map<std::string, int>> in_out_idxs_;
};
class KernelContext {
class OperatorContext {
public:
KernelContext(const OperatorBase* op, const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& device_context)
: op_(*op), scope_(scope), device_context_(device_context) {}
OperatorContext(const OperatorBase* op, const Scope& scope)
: op_(*op), scope_(scope) {}
size_t InputSize() const { return op_.inputs_.size(); }
const Variable* Input(int index) const {
return scope_->GetVariable(op_.inputs_[index]);
size_t OutputSize() const { return op_.outputs_.size(); }
const Variable* InputVar(const size_t index) const {
return scope_.FindVar(op_.inputs_.at(index));
}
Variable* Output(int index) const {
return scope_->GetVariable(op_.outputs_[index]);
Variable* OutputVar(const size_t index) const {
return scope_.FindVar(op_.outputs_.at(index));
}
const Variable* Input(const std::string& name) const {
return scope_->GetVariable(op_.Input(name));
const Variable* InputVar(const std::string& name) const {
return scope_.FindVar(op_.Input(name));
}
const Variable* Output(const std::string& name) const {
return scope_->GetVariable(op_.Output(name));
Variable* OutputVar(const std::string& name) const {
return scope_.FindVar(op_.Output(name));
}
const std::vector<const Variable*> Inputs(const std::string& name) const {
const std::vector<const Variable*> MultiInputVar(
const std::string& name) const {
auto names = op_.Inputs(name);
std::vector<const Variable*> res;
res.reserve(names.size());
std::transform(
names.begin(), names.end(), res.begin(),
[this](const std::string& name) { return scope_->GetVariable(name); });
names.begin(), names.end(), std::back_inserter(res),
[this](const std::string& name) { return scope_.FindVar(name); });
return res;
}
const std::vector<const Variable*> Outputs(const std::string& name) const {
std::vector<const Variable*> MultiOutputVar(const std::string& name) const {
auto names = op_.Outputs(name);
std::vector<const Variable*> res;
res.reserve(names.size());
std::transform(
names.begin(), names.end(), res.begin(),
[this](const std::string& name) { return scope_->GetVariable(name); });
names.begin(), names.end(), std::back_inserter(res),
[this](const std::string& name) { return scope_.FindVar(name); });
return res;
}
template <typename T>
const T* Input(const size_t index) const {
auto var = InputVar(index);
PADDLE_ENFORCE(var != nullptr, "Input(%d) should not be nullptr", index);
return &var->Get<T>();
}
template <typename T>
T* Output(const size_t index) const {
auto var = OutputVar(index);
PADDLE_ENFORCE(var != nullptr, "Output(%d) should not be nullptr", index);
return var->GetMutable<T>();
}
template <typename T>
const T* Input(const std::string& name) const {
auto var = InputVar(name);
PADDLE_ENFORCE(var != nullptr, "Input(%s) should not be nullptr", name);
return &var->Get<T>();
}
template <typename T>
T* Output(const std::string& name) const {
auto var = OutputVar(name);
PADDLE_ENFORCE(var != nullptr, "Output(%s) should not be nullptr", name);
return var->GetMutable<T>();
}
template <typename T>
const std::vector<const T*> MultiInput(const std::string& name) const {
auto names = op_.Inputs(name);
std::vector<const T*> res;
res.reserve(names.size());
std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr,
"MultiInput(%s:%s) should not be nullptr",
name, sub_name);
return &var->Get<T>();
});
return res;
}
template <typename T>
std::vector<const T*> MultiOutput(const std::string& name) const {
auto names = op_.Outputs(name);
std::vector<const T*> res;
res.reserve(names.size());
std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr,
"MultiOutput(%s:%s) should not be nullptr",
name, sub_name);
return var->GetMutable<T>();
});
return res;
}
const OperatorBase& op_;
const Scope& scope_;
};
class InferShapeContext : public OperatorContext {
public:
InferShapeContext(const OperatorBase* op, const Scope& scope)
: OperatorContext(op, scope) {}
};
template <typename T>
struct EigenDeviceConverter;
template <>
struct EigenDeviceConverter<platform::CPUPlace> {
using EigenDeviceType = Eigen::DefaultDevice;
};
#ifndef PADDLE_ONLY_CPU
template <>
struct EigenDeviceConverter<platform::GPUPlace> {
using EigenDeviceType = Eigen::GpuDevice;
};
#endif
class ExecutionContext : public OperatorContext {
public:
ExecutionContext(const OperatorBase* op, const Scope& scope,
const platform::DeviceContext& device_context)
: OperatorContext(op, scope), device_context_(device_context) {}
template <typename PlaceType,
typename DeviceType =
typename EigenDeviceConverter<PlaceType>::EigenDeviceType>
......@@ -159,38 +257,23 @@ class KernelContext {
platform::Place GetPlace() const { return device_context_.GetPlace(); }
const OperatorBase& op_;
const std::shared_ptr<Scope>& scope_;
const platform::DeviceContext& device_context_;
};
class OpKernel {
public:
/**
* KernelContext is the only parameter of Kernel Run function.
* ExecutionContext is the only parameter of Kernel Run function.
* Run will get input/output variables, state such as momentum and
* device resource such as CUDA stream, cublas handle, etc. from
* KernelContext. User should construct it before run the Operator.
* ExecutionContext. User should construct it before run the Operator.
*/
virtual void Compute(const KernelContext& context) const = 0;
virtual void Compute(const ExecutionContext& context) const = 0;
virtual ~OpKernel() {}
};
template <typename T>
struct VarToTensor {};
template <>
struct VarToTensor<Tensor*> {
Tensor* operator()(Variable* var) { return var->GetMutable<Tensor>(); }
};
template <>
struct VarToTensor<const Tensor*> {
const Tensor* operator()(Variable* var) { return &var->Get<Tensor>(); }
};
class OperatorWithKernel : public OperatorBase {
public:
struct OpKernelKey {
......@@ -216,10 +299,14 @@ class OperatorWithKernel : public OperatorBase {
using OpKernelMap =
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
void Run(const std::shared_ptr<Scope>& scope,
void InferShape(const Scope& scope) const {
InferShape(InferShapeContext(this, scope));
}
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const final {
auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx));
opKernel->Compute(KernelContext(this, scope, dev_ctx));
opKernel->Compute(ExecutionContext(this, scope, dev_ctx));
}
static std::unordered_map<std::string /* op_type */, OpKernelMap>&
......@@ -228,34 +315,8 @@ class OperatorWithKernel : public OperatorBase {
return g_all_op_kernels;
}
void InferShape(const std::shared_ptr<Scope>& scope) const final {
std::vector<const Tensor*> ins;
VarNamesToTensors(scope, inputs_, &ins);
std::vector<Tensor*> outs;
VarNamesToTensors(scope, outputs_, &outs);
InferShape(ins, outs);
};
private:
template <typename T>
void VarNamesToTensors(const std::shared_ptr<Scope>& scope,
const std::vector<std::string>& var_names,
std::vector<T>* container) const {
container->reserve(var_names.size());
VarToTensor<T> convert;
for (auto& name : var_names) {
auto var = scope->GetVariable(name);
if (var != nullptr) {
container->push_back(convert(var));
} else {
container->push_back(nullptr);
}
}
}
protected:
virtual void InferShape(const std::vector<const Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const = 0;
virtual void InferShape(const InferShapeContext& ctx) const = 0;
};
} // namespace framework
......
......@@ -24,15 +24,15 @@ static int op_run_num = 0;
class OpWithoutKernelTest : public OperatorBase {
public:
void Init() override { x = 1; }
void InferShape(const std::shared_ptr<Scope>& scope) const override {}
void Run(const std::shared_ptr<Scope>& scope,
void InferShape(const Scope& scope) const override {}
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
op_run_num++;
ASSERT_EQ((int)inputs_.size(), 1);
ASSERT_EQ((int)outputs_.size(), 1);
ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr);
ASSERT_EQ(scope.FindVar(inputs_[0]), nullptr);
ASSERT_EQ(x, 1);
ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr);
ASSERT_NE(scope.FindVar(outputs_[0]), nullptr);
}
public:
......@@ -68,11 +68,12 @@ TEST(OperatorBase, all) {
attr->set_f(3.14);
paddle::platform::CPUDeviceContext device_context;
auto scope = std::make_shared<paddle::framework::Scope>();
paddle::framework::Scope scope;
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
scope->CreateVariable("OUT1");
scope.NewVar("OUT1");
ASSERT_EQ(paddle::framework::op_run_num, 0);
op->InferShape(scope);
op->Run(scope, device_context);
ASSERT_EQ(paddle::framework::op_run_num, 1);
}
......@@ -97,14 +98,13 @@ static int cpu_kernel_run_num = 0;
class OpWithKernelTest : public OperatorWithKernel {
protected:
void InferShape(const std::vector<const Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {}
void InferShape(const framework::InferShapeContext& ctx) const override {}
};
template <typename T1, typename T2>
class CPUKernelTest : public OpKernel {
public:
void Compute(const KernelContext& ctx) const {
void Compute(const ExecutionContext& ctx) const {
std::cout << "this is cpu kernel" << std::endl;
std::cout << ctx.op_.DebugString() << std::endl;
cpu_kernel_run_num++;
......@@ -117,12 +117,12 @@ class CPUKernelTest : public OpKernel {
class OperatorMultiInputsTest : public OperatorBase {
public:
void Init() override { x = 1; }
void InferShape(const std::shared_ptr<Scope>& scope) const override {}
void Run(const std::shared_ptr<Scope>& scope,
void InferShape(const Scope& scope) const override {}
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr);
ASSERT_EQ(scope.FindVar(inputs_[0]), nullptr);
ASSERT_EQ(x, 1);
ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr);
ASSERT_NE(scope.FindVar(outputs_[0]), nullptr);
ASSERT_EQ(Input("x"), "IN1");
ASSERT_EQ(Input("y"), "OUT1");
}
......@@ -137,9 +137,9 @@ class OpKernelTestMultiInputsProtoAndCheckerMaker
OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto,
OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInputs("xs", "inputs of test op");
AddInput("xs", "inputs of test op").SetMultiple();
AddInput("k", "input of test op");
AddOutputs("ys", "outputs of test op");
AddOutput("ys", "outputs of test op").SetMultiple();
AddAttr<float>("scale", "scale of cosine op")
.SetDefault(1.0)
.LargerThan(0.0);
......@@ -149,13 +149,31 @@ class OpKernelTestMultiInputsProtoAndCheckerMaker
class CPUKernalMultiInputsTest : public OpKernel {
public:
void Compute(const KernelContext& ctx) const {
void Compute(const ExecutionContext& ctx) const {
auto xs = ctx.op_.Inputs("xs");
ASSERT_EQ(xs.size(), 3UL);
ASSERT_EQ(xs[0], "x0");
ASSERT_EQ(xs[1], "x1");
ASSERT_EQ(xs[2], "x2");
auto inVar0 = ctx.MultiInputVar("xs");
ASSERT_EQ(inVar0.size(), 3);
auto intVar1 = ctx.InputVar("k");
ASSERT_NE(intVar1, nullptr);
auto outVar0 = ctx.MultiOutputVar("ys");
ASSERT_EQ(outVar0.size(), 2);
auto inTensor0 = ctx.MultiInput<Tensor>("xs");
ASSERT_EQ(inTensor0.size(), 3);
auto intTensor1 = ctx.Input<Tensor>("k");
ASSERT_NE(intTensor1, nullptr);
auto outTensor0 = ctx.MultiOutput<Tensor>("ys");
ASSERT_EQ(outTensor0.size(), 2);
auto k = ctx.op_.Input("k");
ASSERT_EQ(k, "k0");
......@@ -186,7 +204,7 @@ TEST(OpKernel, all) {
attr->set_f(3.14);
paddle::platform::CPUDeviceContext cpu_device_context;
auto scope = std::make_shared<paddle::framework::Scope>();
paddle::framework::Scope scope;
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 0);
......@@ -232,7 +250,13 @@ TEST(OpKernel, multi_inputs) {
output_format->Add(2); // y1
paddle::platform::CPUDeviceContext cpu_device_context;
auto scope = std::make_shared<Scope>();
paddle::framework::Scope scope;
scope.NewVar("x0")->GetMutable<Tensor>();
scope.NewVar("x1")->GetMutable<Tensor>();
scope.NewVar("x2")->GetMutable<Tensor>();
scope.NewVar("k0")->GetMutable<Tensor>();
scope.NewVar("y0")->GetMutable<Tensor>();
scope.NewVar("y1")->GetMutable<Tensor>();
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cpu_device_context);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/scope.h"
#include "paddle/string/printf.h"
namespace paddle {
namespace framework {
Scope::~Scope() {
DropKids();
for (auto& kv : vars_) delete kv.second;
}
Scope& Scope::NewScope() const {
kids_.push_back(new Scope(this));
return *kids_.back();
}
Variable* Scope::NewVar(const std::string& name) {
auto iter = vars_.find(name);
if (iter != vars_.end()) {
return iter->second;
}
Variable* v = new Variable();
vars_[name] = v;
v->name_ = &(vars_.find(name)->first);
return v;
}
Variable* Scope::NewVar() {
return NewVar(string::Sprintf("%p.%d", this, vars_.size()));
}
Variable* Scope::FindVar(const std::string& name) const {
auto it = vars_.find(name);
if (it != vars_.end()) return it->second;
return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
}
const Scope* Scope::FindScope(const Variable* var) const {
for (auto& kv : vars_) {
if (kv.second == var) {
return this;
}
}
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
}
void Scope::DropKids() {
for (Scope* s : kids_) delete s;
kids_.clear();
}
} // namespace framework
} // namespace paddle
......@@ -14,9 +14,9 @@ limitations under the License. */
#pragma once
#include <list>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/framework/variable.h"
......@@ -35,73 +35,42 @@ class Scope;
*/
class Scope {
public:
/**
* @brief Initialize s Scope without parent.
*/
Scope() {}
~Scope();
/**
* @brief Initialize a Scope with parent.
*/
explicit Scope(const std::shared_ptr<Scope>& parent) : parent_(parent) {}
/**
* @brief Create Variable
*
* Create Variable in this Scope. Return the exist one if Variable already
* been created.
*/
Variable* CreateVariable(const std::string& name) {
auto var = GetVariable(name);
if (var) {
return var;
} else {
auto ptr = new Variable();
name_to_var_[name] = std::unique_ptr<Variable>(ptr);
var_to_name_[ptr] = name;
return GetVariable(name);
}
}
/**
* @brief Get Variable.
*
* Get Variable from this Scope, this function will recursive find Variable
* from it's parent scope. Return nullptr if not found.
*/
Variable* GetVariable(const std::string& name) const {
auto it = name_to_var_.find(name);
if (it != name_to_var_.end()) {
return it->second.get();
} else if (parent_ != nullptr) {
return parent_->GetVariable(name);
} else {
return nullptr;
}
}
/**
* @brief If this scope has a Var named name.
*
* Find if there is a Variable in this scope and it's parent scope
*/
bool HasVariable(const std::string& name) const {
return (name_to_var_.find(name) != name_to_var_.end() ||
(parent_ && parent_->HasVariable(name)));
}
std::string GetVariableName(Variable* const var) const {
try {
return var_to_name_.at(var);
} catch (...) {
return "";
}
}
// Disable Copy, Assign, Move.
Scope(const Scope& other) = delete;
Scope& operator=(const Scope& other) = delete;
Scope(Scope&& other) = delete;
/// Create a sub-scope. Returns a reference other than a pointer so
/// to prevent from manual deletion.
/// Mark it to const because that new kid scope cannot change parent scope.
Scope& NewScope() const;
/// Create a variable with given name if it doesn't exist.
Variable* NewVar(const std::string& name);
/// Create a variable with a scope-unique name.
Variable* NewVar();
/// Find a variable in the scope or any of its ancestors. Returns
/// nullptr if cannot find.
Variable* FindVar(const std::string& name) const;
/// Find the scope or an ancestor scope that contains the given variable.
const Scope* FindScope(const Variable* var) const;
/// Drop all kids scopes belonged to this scope.
void DropKids();
private:
std::unordered_map<Variable*, std::string> var_to_name_;
std::unordered_map<std::string, std::unique_ptr<Variable>> name_to_var_;
std::shared_ptr<Scope> parent_{nullptr};
// Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const* parent) : parent_(parent) {}
std::unordered_map<std::string, Variable*> vars_;
mutable std::list<Scope*> kids_;
Scope const* parent_{nullptr};
};
} // namespace framework
......
......@@ -15,49 +15,42 @@ limitations under the License. */
#include "paddle/framework/scope.h"
#include "gtest/gtest.h"
TEST(Scope, Create) {
using paddle::framework::Scope;
using paddle::framework::Variable;
using paddle::framework::Scope;
using paddle::framework::Variable;
auto scope = std::make_shared<Scope>();
TEST(Scope, VarsShadowing) {
Scope s;
Scope& ss1 = s.NewScope();
Scope& ss2 = s.NewScope();
Variable* var0 = scope->CreateVariable("");
EXPECT_NE(var0, nullptr);
Variable* v0 = s.NewVar("a");
Variable* v1 = ss1.NewVar("a");
/// GetVariable will return nullptr if not exist.
Variable* var1 = scope->GetVariable("a");
EXPECT_EQ(var1, nullptr);
EXPECT_NE(v0, v1);
/// CreateVariable will return one.
Variable* var2 = scope->CreateVariable("a");
EXPECT_NE(var2, nullptr);
/// Get the created variable.
Variable* var3 = scope->GetVariable("a");
EXPECT_EQ(var2, var3);
EXPECT_EQ(v0, s.FindVar("a"));
EXPECT_EQ(v1, ss1.FindVar("a"));
EXPECT_EQ(v0, ss2.FindVar("a"));
}
/// CreateVariable will just return the variable if it's
/// already exist.
Variable* var4 = scope->CreateVariable("a");
EXPECT_EQ(var4, var2);
TEST(Scope, FindVar) {
Scope s;
Scope& ss = s.NewScope();
EXPECT_EQ("a", scope->GetVariableName(var4));
Scope scope2;
auto var = scope2.CreateVariable("tmp");
EXPECT_EQ("", scope->GetVariableName(var));
}
EXPECT_EQ(nullptr, s.FindVar("a"));
EXPECT_EQ(nullptr, ss.FindVar("a"));
TEST(Scope, Parent) {
using paddle::framework::Scope;
using paddle::framework::Variable;
ss.NewVar("a");
auto parent_scope = std::make_shared<Scope>();
auto scope = std::make_shared<Scope>(parent_scope);
EXPECT_EQ(nullptr, s.FindVar("a"));
EXPECT_NE(nullptr, ss.FindVar("a"));
}
Variable* var0 = parent_scope->CreateVariable("a");
EXPECT_NE(var0, nullptr);
TEST(Scope, FindScope) {
Scope s;
Scope& ss = s.NewScope();
Variable* v = s.NewVar("a");
/// GetVariable will get Variable from parent scope if exist.
Variable* var1 = scope->GetVariable("a");
EXPECT_EQ(var0, var1);
EXPECT_EQ(&s, s.FindScope(v));
EXPECT_EQ(&s, ss.FindScope(v));
}
......@@ -94,14 +94,7 @@ class Tensor {
* @note CopyFrom supports CPU <-> GPU, GPU <-> GPU.
*/
template <typename T>
inline void CopyFrom(const Tensor& src,
const platform::CPUDeviceContext& ctx);
#ifndef PADDLE_ONLY_CPU
template <typename T>
inline void CopyFrom(const Tensor& src,
const platform::CUDADeviceContext& ctx);
#endif
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place);
/**
* @brief Return the slice of the tensor.
......@@ -129,13 +122,16 @@ class Tensor {
virtual platform::Place place() const = 0;
};
template <typename T, typename PlaceType>
template <typename T, typename Place>
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(PlaceType place, size_t size)
PlaceholderImpl(Place place, size_t size)
: ptr_(static_cast<T*>(memory::Alloc(place, size)),
memory::PODDeleter<T, PlaceType>(place)),
memory::PODDeleter<T, Place>(place)),
place_(place),
size_(size) {}
size_(size) {
PADDLE_ENFORCE(ptr_ != nullptr, "Insufficient %s memory to allocation.",
is_cpu_place(place_) ? "CPU" : "GPU");
}
virtual size_t size() const { return size_; }
virtual platform::Place place() const { return place_; }
......@@ -143,7 +139,7 @@ class Tensor {
virtual std::type_index type() const { return std::type_index(typeid(T)); }
/*! the pointer of memory block. */
std::unique_ptr<T, memory::PODDeleter<T, PlaceType>> ptr_;
std::unique_ptr<T, memory::PODDeleter<T, Place>> ptr_;
/*! the place of memory block. */
platform::Place place_;
......
......@@ -198,8 +198,8 @@ TEST(Tensor, CopyFrom) {
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
memcpy(src_ptr, arr, 9 * sizeof(int));
auto* cpu_ctx = new paddle::platform::CPUDeviceContext();
dst_tensor.CopyFrom<int>(src_tensor, *cpu_ctx);
auto cpu_place = new paddle::platform::CPUPlace();
dst_tensor.CopyFrom<int>(src_tensor, *cpu_place);
const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr);
......@@ -208,7 +208,7 @@ TEST(Tensor, CopyFrom) {
}
Tensor slice_tensor = src_tensor.Slice<int>(1, 2);
dst_tensor.CopyFrom<int>(slice_tensor, *cpu_ctx);
dst_tensor.CopyFrom<int>(slice_tensor, *cpu_place);
const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr);
......@@ -228,12 +228,12 @@ TEST(Tensor, CopyFrom) {
memcpy(src_ptr, arr, 9 * sizeof(int));
// CPU Tensor to GPU Tensor
auto gpu_ctx = new paddle::platform::CUDADeviceContext(0);
gpu_tensor.CopyFrom<int>(src_tensor, *gpu_ctx);
auto gpu_place = new paddle::platform::GPUPlace(0);
gpu_tensor.CopyFrom<int>(src_tensor, *gpu_place);
// GPU Tensor to CPU Tensor
auto cpu_ctx = new paddle::platform::CPUDeviceContext();
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_ctx);
auto cpu_place = new paddle::platform::CPUPlace();
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place);
// Compare Tensors
const int* dst_ptr = dst_tensor.data<int>();
......@@ -245,10 +245,10 @@ TEST(Tensor, CopyFrom) {
Tensor slice_tensor = src_tensor.Slice<int>(1, 2);
// CPU Slice Tensor to GPU Tensor
gpu_tensor.CopyFrom<int>(slice_tensor, *gpu_ctx);
gpu_tensor.CopyFrom<int>(slice_tensor, *gpu_place);
// GPU Tensor to CPU Tensor
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_ctx);
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place);
// Compare Slice Tensors
const int* slice_ptr = slice_tensor.data<int>();
......
......@@ -16,7 +16,7 @@
#include <typeindex>
#include <typeinfo>
#include "paddle/platform/assert.h"
#include "paddle/platform/enforce.h"
namespace paddle {
namespace framework {
......@@ -25,7 +25,7 @@ class Variable {
public:
template <typename T>
const T& Get() const {
PADDLE_ASSERT(IsType<T>());
PADDLE_ENFORCE(IsType<T>(), "Variable must be type %s", typeid(T).name());
return *static_cast<const T*>(holder_->Ptr());
}
......@@ -65,6 +65,17 @@ class Variable {
std::unique_ptr<Placeholder>
holder_; // pointers to a PlaceholderImpl object indeed.
// name_ is only meaningful with a Scope and accessible by it.
//
// NOTE: Please don't expose name_ by adding methods like
// Variable::Name or Scope::VarName! A variable could have a human
// readable name or an auto-generated scope-unique name. In the
// former case, the caller knows the name and doesn't need to access
// the name; in the latter case, the variable should be identified
// by its address but not the unreadable name.
friend class Scope;
const std::string* name_;
};
} // namespace framework
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Projection.h"
namespace paddle {
/**
* SliceProjection can slice the input value into multiple parts,
* and then select some of them to merge into a new output.
*
* First, calculate the slices that need to be merged into the output.
* slices = input.slices().for_output()
*
* Second, merge each slice into the output.
* for(auto slice: slices) {
* out.addAtOffset(slice, offset);
* }
*
* Input slices as output: s0, s1, ...:
* -----------------------
* |///| |//////| |
* |/s0| |//s1//| |
* |///| |//////| |
* -----------------------
* Output, merge s0, s1, ... into one output:
* ----------------
* |///|//////| |
* |/s0|//s1//|...|
* |///|//////| |
* ----------------
*
* The config file api is slice_projection.
*/
class SliceProjection : public Projection {
public:
SliceProjection(const ProjectionConfig& config,
const ParameterPtr& parameter,
bool useGpu);
virtual void forward();
virtual void backward(const UpdateCallback& callback);
protected:
std::vector<std::pair<size_t, size_t>> slices_;
};
REGISTER_PROJECTION(slice, SliceProjection);
/**
* Constructed function.
* @note SliceProjection should not have any parameter.
*/
SliceProjection::SliceProjection(const ProjectionConfig& config,
const ParameterPtr& parameter,
bool useGpu)
: Projection(config, parameter, useGpu) {
CHECK(!parameter) << "'slice' projection should not have any parameter";
slices_.reserve(config.slices_size());
for (const auto& slice : config.slices()) {
slices_.push_back(std::make_pair(slice.start(), slice.end()));
}
}
void SliceProjection::forward() {
size_t offset = 0;
for (auto& slice : slices_) {
auto slice_out = in_->value->subColMatrix(slice.first, slice.second);
out_->value->addAtOffset(*slice_out, offset);
offset += slice_out->getWidth();
}
}
void SliceProjection::backward(const UpdateCallback& callback) {
if (in_->grad) {
size_t offset = 0;
for (auto& slice : slices_) {
auto slice_out = in_->grad->subColMatrix(slice.first, slice.second);
slice_out->addAtOffset(*out_->grad, offset);
offset += slice_out->getWidth();
}
}
}
} // namespace paddle
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=10)
data = data_layer(name ="input", size=8*16*16)
conv1 = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=16, stride=1,
bias_attr=False,
act=ReluActivation())
conv2 = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=16, stride=1,
bias_attr=False,
act=ReluActivation())
proj1 = slice_projection(input=conv1, slices=[(0, 4), (4, 12)])
proj2 = slice_projection(input=conv2, slices=[(1, 5), (5, 15)])
concat = concat_layer(input=[proj1, proj2])
outputs(concat)
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=10)
data = data_layer(name ="input", size=8*16*16)
conv1 = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=16, stride=1,
bias_attr=False,
act=ReluActivation())
conv2 = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=16, stride=1,
bias_attr=False,
act=ReluActivation())
proj1 = slice_projection(input=conv1, slices=[(0, 12)])
proj2 = slice_projection(input=conv2, slices=[(1, 15)])
concat = concat_layer(input=[proj1, proj2])
outputs(concat)
......@@ -152,6 +152,26 @@ TEST(Projection, identity) {
}
}
TEST(Projection, slice) {
ProjectionConfig conf;
conf.set_type("slice");
conf.set_input_size(100);
SliceConfig& slice1 = *conf.add_slices();
slice1.set_start(10);
slice1.set_end(20);
SliceConfig& slice2 = *conf.add_slices();
slice2.set_start(50);
slice2.set_end(70);
conf.set_output_size(30);
for (auto useGpu : {false, true}) {
testProjectionGrad(conf,
INPUT_DATA,
/* parameterSize */ 0,
/* batchSize */ 10,
useGpu);
}
}
TEST(Projection, scaling) {
ProjectionConfig conf;
conf.set_type("scaling");
......
......@@ -237,6 +237,12 @@ TEST(Compare, concat_table) {
compareNetwork(config_file_a, config_file_b);
}
TEST(Compare, concat_slice) {
std::string config_file_a = "./gserver/tests/concat_slice_a.conf";
std::string config_file_b = "./gserver/tests/concat_slice_b.conf";
compareNetwork(config_file_a, config_file_b);
}
#ifndef PADDLE_ONLY_CPU
TEST(Compare, img_pool) {
std::string config_file_a = "./gserver/tests/img_pool_a.conf";
......
......@@ -1141,4 +1141,64 @@ TEST(CpuMatrix, copyFrom) {
TensorCheckEqual(cpu, copy);
}
void testBatch2seqPadding(int batchSize, int inputDim) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(batchSize, inputDim);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(batchSize, inputDim);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
IVectorPtr cpuSequence;
generateSequenceStartPositions(batchSize, cpuSequence);
IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true);
gpuSequence->copyFrom(*cpuSequence);
size_t numSeq = cpuSequence->getSize() - 1;
size_t maxSeqLen = *std::max_element(cpuSequence->getData(),
cpuSequence->getData() + numSeq);
MatrixPtr cBatch = std::make_shared<CpuMatrix>(numSeq * maxSeqLen, inputDim);
MatrixPtr gBatch = std::make_shared<GpuMatrix>(numSeq * maxSeqLen, inputDim);
MatrixPtr cCheck = std::make_shared<CpuMatrix>(numSeq * maxSeqLen, inputDim);
hl_sequence2batch_copy_padding(gBatch->getData(),
gpuInput->getData(),
cpuSequence->getData(),
inputDim,
maxSeqLen,
numSeq,
false,
true);
cCheck->copyFrom(*gBatch);
int* seqStart = cpuSequence->getData();
float* batchData = cBatch->getData();
float* seqData = cpuInput->getData();
for (size_t i = 0; i < maxSeqLen; i++) {
for (size_t j = 0; j < numSeq; j++) {
size_t sequenceStart = seqStart[j];
size_t sequenceLength = seqStart[j + 1] - seqStart[j];
if (i < sequenceLength) {
memcpy(batchData + (i * numSeq + j) * inputDim,
seqData + (sequenceStart + i) * inputDim,
inputDim * sizeof(real));
} else {
memset(batchData + (i * numSeq + j) * inputDim,
0,
inputDim * sizeof(real));
}
}
}
TensorCheckErr(*cBatch, *cCheck);
}
TEST(Matrix, warpCTC) {
for (auto batchSize : {51, 526, 2884}) {
for (auto inputDim : {32, 512, 2026}) {
VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim;
testBatch2seqPadding(batchSize, inputDim);
}
}
}
#endif
......@@ -35,7 +35,7 @@ void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
platform::GPUPlace src_place,
const void* src, size_t num,
cudaStream_t stream) {
platform::GPUPlaceGuard g(src_place.device);
platform::SetDeviceId(src_place.device);
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream);
}
......@@ -45,7 +45,7 @@ void Copy<platform::GPUPlace, platform::CPUPlace>(platform::GPUPlace dst_place,
platform::CPUPlace src_place,
const void* src, size_t num,
cudaStream_t stream) {
platform::GPUPlaceGuard g(dst_place.device);
platform::SetDeviceId(dst_place.device);
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream);
}
......@@ -56,7 +56,7 @@ void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
const void* src, size_t num,
cudaStream_t stream) {
if (dst_place == src_place) {
platform::GPUPlaceGuard g(src_place.device);
platform::SetDeviceId(src_place.device);
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream);
} else {
platform::GpuMemcpyPeer(dst, dst_place.device, src, src_place.device, num,
......
......@@ -20,13 +20,39 @@ limitations under the License. */
namespace paddle {
namespace memory {
/**
* \brief Copy memory from one place to another place.
*
* \param[in] DstPlace Destination allocation place (CPU).
* \param[in] dst Destination memory address.
* \param[in] SrcPlace Source allocation place (CPU).
* \param[in] src Source memory address.
* \param[in] num memory size in bytes to copy.
*
*/
template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
#ifndef PADDLE_ONLY_CPU
/**
* \brief Copy memory from one place to another place.
*
* \param[in] DstPlace Destination allocation place (CPU or GPU).
* \param[in] dst Destination memory address.
* \param[in] SrcPlace Source allocation place (CPU or GPU).
* \param[in] src Source memory address.
* \param[in] num memory size in bytes to copy.
* \param[in] stream CUDA stream.
*
* \note For GPU memory copy, CUDA stream need to be specified
* for asynchronously memory copy.
*
*/
template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num,
cudaStream_t stream);
#endif // PADDLE_ONLY_CPU
} // namespace memory
......
......@@ -60,6 +60,7 @@ detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
platform::GpuMaxChunkSize());
}
}
platform::SetDeviceId(gpu_id);
return as[gpu_id];
}
......
......@@ -20,15 +20,49 @@ limitations under the License. */
namespace paddle {
namespace memory {
/**
* \brief Allocate memory block in one place.
*
* \param[in] place Allocation place (CPU or GPU).
* \param[in] size Allocation size.
*
* \return Allocated memory block address.
*
* \note If return nullptr, it indicates memory allocation failed
* because insufficient memory in current system. When Alloc
* function is invoked, you must check the returned memory
* address is valid or not.
*/
template <typename Place>
void* Alloc(Place, size_t);
void* Alloc(Place place, size_t size);
/**
* \brief Free memory block in one place.
*
* \param[in] place Allocation place (CPU or GPU).
* \param[in] ptr Memory block address to free.
*
*/
template <typename Place>
void Free(Place, void*);
void Free(Place place, void* ptr);
/**
* \brief Total size of used memory in one place.
*
* \param[in] place Allocation place (CPU or GPU).
*
*/
template <typename Place>
size_t Used(Place);
size_t Used(Place place);
/**
* \brief Free memory block in one place.
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
*
*/
template <typename T, typename Place>
class PODDeleter {
static_assert(std::is_pod<T>::value, "T must be POD");
......
......@@ -44,13 +44,26 @@ endfunction()
op_library(add_op SRCS add_op.cc add_op.cu)
cc_test(add_op_test SRCS add_op_test.cc DEPS add_op)
op_library(mean_op SRCS mean_op.cc mean_op.cu)
cc_test(mean_op_test SRCS mean_op_test.cc DEPS mean_op)
op_library(mul_op SRCS mul_op.cc mul_op.cu)
op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc)
op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu)
op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu)
op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op
softmax_op net)
op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu)
op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
op_library(fc_op
SRCS fc_op.cc
DEPS mul_op rowwise_add_op sigmoid_op softmax_op net)
op_library(recurrent_network_op
SRCS recurrent_network_op.cc
DEPS op_desc tensor net)
cc_test(recurrent_network_op_test
SRCS recurrent_network_op_test.cc
DEPS recurrent_network_op mul_op add_op)
......@@ -19,16 +19,16 @@ namespace operators {
class AddOp : public OperatorWithKernel {
protected:
void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "Input size of AddOp must be two");
PADDLE_ENFORCE(outputs.size() == 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(
inputs[0] != nullptr && inputs[1] != nullptr && outputs[0] != nullptr,
"Inputs/Outputs of AddOp must all be set");
PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(),
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2, "Input size of AddOp must be two");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr,
"Inputs of AddOp must all be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr,
"Outputs of AddOp must all be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(),
"Two input of Add Op's dimension must be same.");
outputs[0]->Resize(inputs[0]->dims());
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims());
}
};
......@@ -49,8 +49,7 @@ The equation is: Out = X + Y
class AddOpGrad : public OperatorWithKernel {
protected:
void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<Tensor *> &outputs) const override {}
void InferShape(const InferShapeContext &ctx) const override {}
std::string DebugString() const override {
LOG(INFO) << "AddOpGrad";
return "";
......
......@@ -21,16 +21,17 @@ namespace operators {
template <typename Place, typename T>
class AddKernel : public OpKernel {
public:
void Compute(const KernelContext& context) const override {
auto input0 = context.Input(0)->Get<Tensor>();
auto input1 = context.Input(1)->Get<Tensor>();
auto output = context.Output(0)->GetMutable<Tensor>();
void Compute(const ExecutionContext& context) const override {
auto input0 = context.Input<Tensor>(0);
auto input1 = context.Input<Tensor>(1);
auto output = context.Output<Tensor>(0);
output->mutable_data<T>(context.GetPlace());
EigenVector<T>::Flatten(*output).device(
*(context.GetEigenDevice<Place>())) =
EigenVector<T>::Flatten(input0) + EigenVector<T>::Flatten(input1);
framework::EigenVector<T>::Flatten(*input0) +
framework::EigenVector<T>::Flatten(*input1);
}
};
......
......@@ -19,20 +19,20 @@ namespace operators {
class OnehotCrossEntropyOp : public OperatorWithKernel {
protected:
void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2,
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2,
"Input size of OnehotCrossEntropyOp must be two");
PADDLE_ENFORCE(outputs.size() == 1,
PADDLE_ENFORCE(ctx.OutputSize() == 1,
"Output size of OnehotCrossEntropyOp must be one");
PADDLE_ENFORCE(inputs[0] != nullptr && inputs[1] != nullptr,
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr,
"Inputs of OnehotCrossEntropyOp must all be set");
PADDLE_ENFORCE(outputs[0] != nullptr,
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr,
"Outputs of OnehotCrossEntropyOp must all be set");
PADDLE_ENFORCE(inputs[0]->dims().size() == 2, "X's dimension must be 2.");
PADDLE_ENFORCE(outputs[0]->dims().size() == 1,
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims().size() == 2,
"X's dimension must be 2.");
PADDLE_ENFORCE(ctx.Output<Tensor>(0)->dims().size() == 1,
"label's dimension must be 1.");
outputs[0]->Resize({inputs[0]->dims()[0]});
ctx.Output<Tensor>(0)->Resize({ctx.Input<Tensor>(0)->dims()[0]});
}
};
......
......@@ -23,18 +23,18 @@ class OnehotCrossEntropyOpKernel : public OpKernel {
public:
constexpr T LOG_THRESHOLD() const { return static_cast<T>(1e-20); }
void Compute(const KernelContext& context) const override {
auto X = context.Input(0)->Get<Tensor>();
const T* X_data = X.data<T>();
const int* label_data = context.Input(1)->Get<Tensor>().data<int>();
auto* Y = context.Output(0)->GetMutable<Tensor>();
void Compute(const ExecutionContext& ctx) const override {
auto X = ctx.Input<Tensor>(0);
const T* X_data = X->data<T>();
const int* label_data = ctx.Input<Tensor>(1)->data<int>();
auto Y = ctx.Output<Tensor>(0);
Y->mutable_data<T>(context.GetPlace());
Y->mutable_data<T>(ctx.GetPlace());
T* Y_data = Y->data<T>();
int batch_size = X.dims()[0];
int class_num = X.dims()[1];
int batch_size = X->dims()[0];
int class_num = X->dims()[1];
// Y[i] = -log(X[i][j])
for (int i = 0; i < batch_size; ++i) {
......
......@@ -50,8 +50,8 @@ public:
AddInput("b", "the bias of fc operator");
AddOutput("Y", "the output of fc operator");
AddOutput(
"before_act", "the before activation output of fc operator", true);
AddOutput("before_act", "the before activation output of fc operator")
.SetTemporary();
AddAttr<std::string>("activation", "The activation key for fc layer")
.SetDefault("sigmoid")
.InEnum({"sigmoid", "softmax"});
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/fill_zeros_like_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle {
namespace operators {
class FillZerosLikeOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1UL,
"Input size of FillZerosLikeOp must be one.");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL,
"Output size of AddOp must be one.");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr,
"Input of FillZerosLikeOp must be set.");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr,
"Output of FillZerosLikeOp must be set.");
ctx.Output<framework::Tensor>(0)->Resize(
ctx.Input<framework::Tensor>(0)->dims());
}
};
class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
FillZerosLikeOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Src", "The input of fill-zeros-like op.");
AddOutput("Dst", "The varibale will be filled up with zeros.");
AddComment(R"DOC(
Fill up a vriable with zeros.
The output will have the same size with input.
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(fill_zeros_like,
paddle::operators::FillZerosLikeOp,
paddle::operators::FillZerosLikeOpMaker);
REGISTER_OP_CPU_KERNEL(
fill_zeros_like,
paddle::operators::FillZerosLikeKernel<paddle::platform::CPUPlace, float>);
#include "paddle/framework/op_registry.h"
#include "paddle/operators/fill_zeros_like_op.h"
REGISTER_OP_GPU_KERNEL(
fill_zeros_like,
paddle::operators::FillZerosLikeKernel<paddle::platform::GPUPlace, float>);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "glog/logging.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class FillZerosLikeKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* output = context.Output<framework::Tensor>(0);
output->mutable_data<T>(context.GetPlace());
framework::EigenVector<T>::Flatten(*output).setZero();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/mean_op.h"
namespace paddle {
namespace operators {
class MeanOp : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1, "Input size of AddOp must be one");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.OutputVar(0) != nullptr,
"Input/Output of MeanOp must be initialized.");
ctx.Output<Tensor>(0)->Resize(framework::make_ddim({1}));
}
};
class MeanOpMaker : public OpProtoAndCheckerMaker {
public:
MeanOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op");
AddComment("Mean Operator");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(mean, ops::MeanOp, ops::MeanOpMaker);
REGISTER_OP_CPU_KERNEL(mean, ops::MeanKernel<ops::CPUPlace, float>);
#define EIGEN_USE_GPU
#include "paddle/operators/mean_op.h"
REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel<ops::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/type_alias.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class MeanKernel : public OpKernel {
public:
void Compute(const ExecutionContext& context) const override {
auto input = context.Input<Tensor>(0);
auto output = context.Output<Tensor>(0);
output->mutable_data<T>(context.GetPlace());
EigenScalar<T>::From(*output).device(*(context.GetEigenDevice<Place>())) =
EigenVector<T>::Flatten(*input).mean();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <paddle/framework/op_registry.h>
USE_OP(mean);
TEST(MeanOp, GetOpProto) {
auto& protos = paddle::framework::OpRegistry::protos();
auto it = protos.find("mean");
ASSERT_NE(it, protos.end());
}
......@@ -19,18 +19,17 @@ namespace operators {
class MulOp : public OperatorWithKernel {
protected:
void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "The mul op must take two inputs");
auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims();
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2, "The mul op must take two inputs");
auto dim0 = ctx.Input<Tensor>(0)->dims();
auto dim1 = ctx.Input<Tensor>(1)->dims();
PADDLE_ENFORCE(dim0.size() == 2 && dim1.size() == 2,
"The input of mul op must be matrix");
PADDLE_ENFORCE(
dim0[1] == dim1[0],
"First matrix's width must be equal with second matrix's height.");
PADDLE_ENFORCE(outputs.size() == 1, "The mul op must take one output");
outputs[0]->Resize({dim0[0], dim1[1]});
PADDLE_ENFORCE(ctx.OutputSize() == 1, "The mul op must take one output");
ctx.Output<Tensor>(0)->Resize({dim0[0], dim1[1]});
}
};
......@@ -51,8 +50,7 @@ The equation is: Out = X * Y
class MulOpGrad : public OperatorWithKernel {
protected:
void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<Tensor *> &outputs) const override {}
void InferShape(const InferShapeContext &ctx) const override {}
std::string DebugString() const override {
LOG(INFO) << "MulGrad";
return "";
......
......@@ -22,19 +22,17 @@ namespace operators {
template <typename Place, typename T>
class MulKernel : public OpKernel {
public:
void Compute(const KernelContext& context) const override {
void Compute(const ExecutionContext& context) const override {
Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair = {
{Eigen::IndexPair<Eigen::DenseIndex>(1, 0)}};
auto input0 = context.Input(0)->Get<Tensor>();
auto input1 = context.Input(1)->Get<Tensor>();
auto* output = context.Output(0)->GetMutable<Tensor>();
auto output = context.Output<Tensor>(0);
output->mutable_data<T>(context.GetPlace());
EigenMatrix<T>::From(*output).device(*(context.GetEigenDevice<Place>())) =
EigenMatrix<T>::From(input0).contract(EigenMatrix<T>::From(input1),
dim_pair);
EigenMatrix<T>::From(*context.Input<Tensor>("X"))
.contract(EigenMatrix<T>::From(*context.Input<Tensor>("Y")),
dim_pair);
}
};
} // namespace operators
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/recurrent_network_op.h"
#include <glog/logging.h>
#include <cstring>
#include <sstream>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include "paddle/platform/enforce.h"
namespace paddle {
namespace operators {
namespace rnn {
void SegmentInputs(const std::vector<Scope*>& step_scopes,
const std::vector<Link>& inlinks,
const size_t seq_len) {
PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided.");
for (size_t i = 0; i < inlinks.size(); ++i) {
Tensor* input =
step_scopes[0]->FindVar(inlinks[i].external)->GetMutable<Tensor>();
DDim dims = input->dims();
PADDLE_ENFORCE(static_cast<size_t>(dims[0]) == seq_len,
"all the inlinks must have same length");
DDim step_dims = slice_ddim(dims, 1, dims.size());
for (size_t j = 0; j < seq_len; j++) {
Tensor* step_input =
step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable<Tensor>();
*step_input = input->Slice<float>(j, j + 1);
step_input->Resize(step_dims);
}
}
}
void ConcatOutputs(const std::vector<Scope*>& step_scopes,
const std::vector<Link>& outlinks,
const size_t seq_len) {
for (size_t i = 0; i < outlinks.size(); i++) {
Tensor* output =
step_scopes[0]->FindVar(outlinks[i].external)->GetMutable<Tensor>();
// TODO(qingiqng) remove following code after adding
// InferShape in RecurrentGradientOp
DDim step_dims = step_scopes[0]
->FindVar(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
std::vector<int> dims_vec = vectorize(step_dims);
dims_vec.insert(dims_vec.begin(), seq_len);
output->mutable_data<float>(make_ddim(dims_vec), platform::CPUPlace());
for (size_t j = 0; j < seq_len; j++) {
Tensor* step_output =
step_scopes[j]->FindVar(outlinks[i].internal)->GetMutable<Tensor>();
// TODO(luotao02) data type and platform::DeviceContext() should set
// correctly
(output->Slice<float>(j, j + 1))
.CopyFrom<float>(*step_output, platform::CPUPlace());
}
}
}
void LinkMemories(const std::vector<Scope*>& scopes,
const std::vector<rnn::MemoryAttr>& memories,
size_t step_id,
int offset) {
PADDLE_ENFORCE(step_id < scopes.size(),
"step [%d] is out of range of step scopes' size [%d]",
step_id,
scopes.size());
PADDLE_ENFORCE(static_cast<int>(step_id) + offset >= 0,
"offset [%d] must be large than -[%d]",
offset,
step_id);
PADDLE_ENFORCE(step_id + offset < scopes.size(),
"offset [%d] is out of range, it must be less than (%d - %d)",
offset,
scopes.size(),
step_id);
auto scope = scopes[step_id];
auto linked_scope = scopes[step_id + offset];
for (auto& attr : memories) {
auto mem = scope->NewVar(attr.pre_var)->GetMutable<Tensor>();
// maybe share variable is better?
auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable<Tensor>();
mem->ShareDataWith<float>(*linked_mem);
// TODO(qingqing) remove following code
// the memory of current step should be allocated in step net
auto m = scope->NewVar(attr.var)->GetMutable<Tensor>();
// for unit test, as addOp and mulOp are null currently, if not
// mutable_data, mem.data() in output will be error. We will
// remove this line after merge the correct addOp and mulOp.
m->mutable_data<float>(mem->dims(), platform::CPUPlace());
}
}
void InitArgument(const ArgumentName& name,
Argument* arg,
const OperatorBase& op) {
arg->step_net = op.Input(name.step_net);
arg->step_scopes = op.Output(name.step_scopes);
auto inlinks = op.Inputs(name.inlinks);
auto inlink_alias = op.GetAttr<std::vector<std::string>>(name.inlink_alias);
PADDLE_ENFORCE(inlinks.size() == inlink_alias.size(),
"the size of inlinks and inlink_alias don't match:%d,%d",
inlinks.size(),
inlink_alias.size());
for (size_t i = 0; i < inlinks.size(); ++i) {
rnn::Link link;
link.external = inlinks[i];
link.internal = inlink_alias[i];
(arg->inlinks).push_back(link);
}
auto outlinks = op.Outputs(name.outlinks);
auto outlink_alias = op.GetAttr<std::vector<std::string>>(name.outlink_alias);
PADDLE_ENFORCE(outlinks.size() == outlink_alias.size(),
"the size of outlinks and outlink_alias don't match:%d,%d",
outlinks.size(),
outlink_alias.size());
for (size_t i = 0; i < outlinks.size(); ++i) {
rnn::Link link;
link.external = outlinks[i];
link.internal = outlink_alias[i];
(arg->outlinks).push_back(link);
}
auto boot_memories = op.Inputs(name.boot_memories);
// attributes
auto memories = op.GetAttr<std::vector<std::string>>(name.memories);
auto pre_memories = op.GetAttr<std::vector<std::string>>(name.pre_memories);
PADDLE_ENFORCE(memories.size() == boot_memories.size(),
"the size of memories, boot_memories don't match:%d,%d",
memories.size(),
boot_memories.size());
PADDLE_ENFORCE(pre_memories.size() == boot_memories.size(),
"the size of pre_memories, boot_memories don't match:%d,%d",
pre_memories.size(),
boot_memories.size());
PADDLE_ENFORCE(memories.size() > 0, "more than 1 memories should be set");
for (size_t i = 0; i < memories.size(); ++i) {
rnn::MemoryAttr mem_attr;
mem_attr.var = memories[i];
mem_attr.pre_var = pre_memories[i];
mem_attr.boot_var = boot_memories[i];
(arg->memories).push_back(mem_attr);
}
}
} // namespace rnn
void RecurrentAlgorithm::InferShape(const Scope& scope) const {
seq_len_ = scope.FindVar((arg_->inlinks[0]).external)
->GetMutable<Tensor>()
->dims()[0];
CreateScopes(scope);
auto step_scopes = GetStepScopes(scope);
// SegmentInputs is called in InferShape. The input must hold memory in
// SegmentInputs. But the other op only set dimension for the output in
// InferShape. That's a problem. Wether the RNN op needs InferShape or not?
// Wether the following functions (SegmentInputs, InitMemories, ...) need
// to rewrite for RNN op?
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
InitMemories(step_scopes[0]);
PADDLE_ENFORCE(scope.FindVar(arg_->step_net) != nullptr,
"stepnet [%s] is not in scope.",
arg_->step_net);
Variable* net = scope.FindVar(arg_->step_net);
PADDLE_ENFORCE(net != nullptr, "failed to get step net");
// If the InferShape is called in OperatorBase's run function,
// the rnn op only needs to do InferShape for the first time step
for (size_t i = 0; i < seq_len_; i++) {
if (i > 0) {
rnn::LinkMemories(step_scopes, arg_->memories, i, -1);
}
net->GetMutable<NetOp>()->InferShape(*step_scopes[i]);
}
auto outlinks = arg_->outlinks;
for (size_t i = 0; i < outlinks.size(); i++) {
DDim step_dims = step_scopes[0]
->FindVar(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
std::vector<int> dims_vec = vectorize(step_dims);
// now only support fixed length
dims_vec.insert(dims_vec.begin(), seq_len_);
Tensor* output =
step_scopes[0]->FindVar(outlinks[i].external)->GetMutable<Tensor>();
output->Resize(make_ddim(dims_vec));
}
}
void RecurrentAlgorithm::Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const {
auto step_scopes = GetStepScopes(scope);
Variable* net = scope.FindVar(arg_->step_net);
for (size_t step_id = 0; step_id < seq_len_; step_id++) {
// the link memory is done in InferShape
// maybe remove following code after testing
if (step_id > 0) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1);
}
net->GetMutable<NetOp>()->Run(*step_scopes[step_id], dev_ctx);
}
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_);
}
void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
// TODO(xxx) Only two scopes are needed for inference, this case will be
// supported later.
auto step_scopes =
scope.FindVar(arg_->step_scopes)->GetMutable<std::vector<Scope*>>();
if (seq_len_ > step_scopes->size()) {
for (size_t i = step_scopes->size(); i < seq_len_; ++i) {
auto& step_scope = scope.NewScope();
// Now all variables in scope must be created outside of op.
auto net_op = scope.FindVar(arg_->step_net)->GetMutable<NetOp>();
for (auto& input : net_op->inputs_) {
if (!step_scope.FindVar(input)) step_scope.NewVar(input);
}
for (auto& output : net_op->outputs_) {
step_scope.NewVar(output);
}
step_scopes->emplace_back(&step_scope);
}
}
}
void RecurrentAlgorithm::InitMemories(Scope* step_scope) const {
for (auto& attr : arg_->memories) {
Tensor* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable<Tensor>();
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
"memory [%s]'s boot variable [%s] not exists",
attr.var,
attr.boot_var);
Tensor* boot_mem = step_scope->FindVar(attr.boot_var)->GetMutable<Tensor>();
pre_mem->ShareDataWith<float>(*boot_mem);
// TODO(qingqing) remove following code
// the memory of current step should be allocated in step net
// here for unit test
auto cur_step_mem = step_scope->NewVar(attr.var)->GetMutable<Tensor>();
cur_step_mem->mutable_data<float>(boot_mem->dims(), platform::CPUPlace());
}
}
const rnn::ArgumentName RecurrentOp::kArgName{"step_net",
"step_scopes",
"inlinks",
"outlinks",
"inlink_alias",
"outlink_alias",
"memories",
"pre_memories",
"boot_memories"};
const rnn::ArgumentName RecurrentGradientOp::kArgName{"step_net",
"step_scopes",
"outlink@grad",
"inlink@grad",
"inlink_alias",
"outlink_alias",
"memories",
"pre_memories",
"boot_memories@grad"};
void RecurrentOp::Init() {
OperatorBase::Init();
std::unique_ptr<rnn::Argument> arg(new rnn::Argument());
rnn::InitArgument(kArgName, arg.get(), *this);
alg_.Init(std::move(arg));
}
class RecurrentAlgorithmProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
RecurrentAlgorithmProtoAndCheckerMaker(OpProto* proto,
OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
const auto& name = RecurrentOp::kArgName;
// inputs and outputs stored in proto
AddInput(name.inlinks, "the input that need to be segmented for each step.")
.SetMultiple();
AddInput(name.boot_memories, "variables to initialize memories.")
.SetMultiple();
AddInput(name.step_net, "network shared by all steps.");
AddOutput(name.outlinks, "the output that need to concated for all steps.")
.SetMultiple();
AddOutput(name.step_scopes, "step scopes");
// Attributes stored in AttributeMap
AddAttr<std::vector<std::string>>(name.inlink_alias, "alias of inlinks");
AddAttr<std::vector<std::string>>(name.outlink_alias, "alias of outlinks");
AddAttr<std::vector<std::string>>(name.pre_memories,
"names of pre-memories");
AddAttr<std::vector<std::string>>(name.memories, "names of memories");
AddComment("This is a recurrent group operator.");
}
};
void RecurrentGradientAlgorithm::Run(
const Scope& scope, const platform::DeviceContext& dev_ctx) const {
auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
PADDLE_ENFORCE(scope.FindVar(arg_->step_net) != nullptr,
"step net is not in scope.");
Variable* net = scope.FindVar(arg_->step_net);
PADDLE_ENFORCE(net != nullptr, "failed to get step net");
for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
if (static_cast<size_t>(step_id) != seq_len_ - 1) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1);
}
net->GetMutable<NetOp>()->Run(*step_scopes[step_id], dev_ctx);
}
LinkBootMemoryGradients(step_scopes[0]);
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_);
}
void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
Scope* step_scope) const {
for (auto& attr : arg_->memories) {
Tensor* mem_grad = step_scope->NewVar(attr.var)->GetMutable<Tensor>();
PADDLE_ENFORCE(mem_grad != nullptr,
"boot_tensor should be retrieved before");
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
"memory [%s]'s boot variable [%s] not exists",
attr.var,
attr.boot_var);
Tensor* boot_mem_grad =
step_scope->NewVar(attr.boot_var)->GetMutable<Tensor>();
boot_mem_grad->ShareDataWith<float>(*mem_grad);
}
}
void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const {
seq_len_ = scope.FindVar((arg_->inlinks[0]).external)
->GetMutable<Tensor>()
->dims()[0];
auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
PADDLE_ENFORCE(scope.FindVar(arg_->step_net) != nullptr,
"step net is not in scope.");
Variable* net = scope.FindVar(arg_->step_net);
PADDLE_ENFORCE(net != nullptr, "failed to get step net");
for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
if (static_cast<size_t>(step_id) != seq_len_ - 1) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1);
}
net->GetMutable<NetOp>()->InferShape(*step_scopes[step_id]);
}
auto outlinks = arg_->outlinks;
for (size_t i = 0; i < outlinks.size(); i++) {
DDim step_dims = step_scopes[0]
->FindVar(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
std::vector<int> dims_vec = vectorize(step_dims);
// now only support fixed length
dims_vec.insert(dims_vec.begin(), seq_len_);
Tensor* output =
step_scopes[0]->FindVar(outlinks[i].external)->GetMutable<Tensor>();
output->Resize(make_ddim(dims_vec));
}
LinkBootMemoryGradients(step_scopes[0]);
}
void RecurrentGradientOp::Init() {
OperatorBase::Init();
std::unique_ptr<rnn::Argument> arg(new rnn::Argument());
rnn::InitArgument(kArgName, arg.get(), *this);
alg_.Init(std::move(arg));
}
} // namespace operators
} // namespace paddle
REGISTER_OP(recurrent_op,
paddle::operators::RecurrentOp,
paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/operator.h"
namespace paddle {
namespace operators {
using namespace paddle::framework;
namespace rnn {
/**
* Memory of a RNN (same as the role of `Momory` in PaddlePaddle).
*
* Memory attributes cached by this op, dims will be infered from
* boot memories in father scope. Other attributes are copied from Op's proto
* attributes.
*/
struct MemoryAttr {
// name of current state variable
std::string var;
// name of previous step's state variable
std::string pre_var;
// name of the variables to init this memory (same role of `boot_layer` in
// PaddlePaddle), which is store in father's scope.
std::string boot_var;
};
struct Link {
// input or output links name.
std::string internal;
// alias to avoid duplicate keys in scopes.
std::string external;
};
struct Argument {
std::string step_net;
std::string step_scopes;
std::vector<Link> inlinks;
std::vector<Link> outlinks;
std::vector<rnn::MemoryAttr> memories;
};
struct ArgumentName {
std::string step_net;
std::string step_scopes;
std::string inlinks;
std::string outlinks;
std::string inlink_alias; // the alias of inlinks in step net.
std::string outlink_alias; // the alias of outlinks in step net.
std::string memories; // the memory name
std::string pre_memories; // the previous memory name
std::string boot_memories; // the boot memory name
};
/**
* Prepare inputs for each step net.
*/
void SegmentInputs(const std::vector<Scope*>& step_scopes,
const std::vector<Link>& inlinks,
const size_t seq_len);
/**
* Process outputs of step nets and merge to variables.
*/
void ConcatOutputs(const std::vector<Scope*>& step_scopes,
const std::vector<Link>& outlinks,
const size_t seq_len);
void LinkMemories(const std::vector<Scope*>& step_scopes,
const std::vector<MemoryAttr>& memories,
size_t step_id,
int offset);
void InitArgument(const ArgumentName& name, Argument* arg);
}; // namespace rnn
// The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now.
// TODO:
// 1. No-padding computing for sequences with indifinite length in one batch.
// 2. Hierarchical RNN for sequence with sub-sequence.
// 3. Internal Memory.
// 4. More Complex RNN architecture, such as Gated Feedback RNN.
// Refer to: https://arxiv.org/pdf/1502.02367.pdf
class RecurrentAlgorithm {
public:
void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const;
void Init(std::unique_ptr<rnn::Argument> arg) { arg_ = std::move(arg); }
/**
* InferShape must be called before Run.
*/
void InferShape(const Scope& scope) const;
protected:
/*
* The step scopes will be stored in the father scope as a variable.
*
* NOTE the scopes are reused in both the forward and backward, so just
* create once and expand its size if more steps need.
*/
void CreateScopes(const Scope& scope) const;
const std::vector<Scope*>& GetStepScopes(const Scope& scope) const {
return *scope.FindVar(arg_->step_scopes)->GetMutable<std::vector<Scope*>>();
}
void InitMemories(Scope* step_scopes) const;
private:
std::unique_ptr<rnn::Argument> arg_;
mutable size_t seq_len_;
};
class RecurrentGradientAlgorithm {
/**
* RNN's backward alogorithm.
*
* To accelerate the development of RecurrentGradientOp, we decouple RNN's
* algorithm and `OperatorBase`'s implementation, the former contains the core
* implementation of a RNN, and will keep stable even if the framework changes
* a
* lot, and the latter is a wrapper acts like an dapter for it to make RNN an
* operator.
*/
public:
void Init(std::unique_ptr<rnn::Argument> arg) { arg_ = std::move(arg); }
void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const;
void LinkBootMemoryGradients(Scope* step_scopes) const;
/**
* InferShape must be called before Run.
*/
void InferShape(const Scope& scope) const;
protected:
inline const std::vector<Scope*>& GetStepScopes(const Scope& scope) const {
return *scope.FindVar(arg_->step_scopes)->GetMutable<std::vector<Scope*>>();
}
private:
std::unique_ptr<rnn::Argument> arg_;
mutable size_t seq_len_;
};
class RecurrentOp final : public OperatorBase {
public:
void Init() override;
/**
* InferShape must be called before Run.
*/
virtual void InferShape(const Scope& scope) const override {
alg_.InferShape(scope);
}
virtual void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
alg_.Run(scope, dev_ctx);
}
static const rnn::ArgumentName kArgName;
private:
RecurrentAlgorithm alg_;
};
class RecurrentGradientOp final : public OperatorBase {
public:
void Init() override;
/**
* InferShape must be called before Run.
*/
virtual void InferShape(const Scope& scope) const override {
alg_.InferShape(scope);
}
virtual void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
alg_.Run(scope, dev_ctx);
}
static const rnn::ArgumentName kArgName;
private:
RecurrentGradientAlgorithm alg_;
};
} // namespace operators
} // namespace paddle
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/tensor.h"
#include "paddle/operators/recurrent_network_op.h"
namespace paddle {
namespace operators {
class RecurrentOpTest : public ::testing::Test {
protected:
virtual void SetUp() override {
CreateGlobalVariables();
CreateStepNet();
CreateRNNOp();
}
virtual void TearDown() override {}
void CreateGlobalVariables() {
// create input, and init content
LOG(INFO) << "create global variable x";
for (auto inlink : std::vector<std::string>{"x", "x0", "x1", "h"}) {
Variable* x = scope_.NewVar(inlink);
DDim dims = make_ddim(std::vector<int>{
10 /*sent size*/, 20 /*batch size*/, 30 /*input dim*/});
x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
}
// create output alias just for test
for (auto inlink : std::vector<std::string>{"h@alias"}) {
Variable* x = scope_.NewVar(inlink);
DDim dims =
make_ddim(std::vector<int>{20 /*batch size*/, 30 /*input dim*/});
x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
}
LOG(INFO) << "create global variable w";
Variable* w = scope_.NewVar("rnn/w");
w->GetMutable<Tensor>()->mutable_data<float>(
make_ddim(std::vector<int>{30, 30}), platform::CPUPlace());
for (auto boot : std::vector<std::string>{"x_boot", "h_boot"}) {
LOG(INFO) << "create global variable " << boot;
Variable* h_boot = scope_.NewVar(boot);
h_boot->GetMutable<Tensor>()->mutable_data<float>(
make_ddim(std::vector<int>{20 /*batch size*/, 30 /*input dim*/}),
platform::CPUPlace());
}
LOG(INFO) << "create variable step_scopes";
scope_.NewVar("step_scopes");
LOG(INFO) << "create variable h";
scope_.NewVar("h");
}
void CreateRNNOp() {
OpDesc op_desc;
op_desc.set_type("recurrent_op");
// inlinks 0
op_desc.add_inputs("x");
op_desc.add_inputs("x0");
op_desc.add_inputs("x1");
// boot_memories 3
op_desc.add_inputs("x_boot");
op_desc.add_inputs("h_boot");
// step net 5
op_desc.add_inputs("step_net");
// outlinks 6
op_desc.add_outputs("h");
// step scopes 7
op_desc.add_outputs("step_scopes");
auto _input_format = std::vector<int>{
0, // in_link
3, // memories
5 // step_net
};
auto input_format = op_desc.add_attrs();
input_format->set_name("input_format");
input_format->set_type(paddle::framework::AttrType::INTS);
for (auto i : _input_format) {
input_format->add_ints(i);
}
auto output_format = op_desc.add_attrs();
output_format->set_name("output_format");
output_format->set_type(paddle::framework::AttrType::INTS);
for (auto i : std::vector<int>{0, 1, 2}) {
output_format->add_ints(i);
}
auto inlink_alias = op_desc.add_attrs();
inlink_alias->set_name("inlink_alias");
inlink_alias->set_type(paddle::framework::AttrType::STRINGS);
auto outlink_alias = op_desc.add_attrs();
outlink_alias->set_name("outlink_alias");
outlink_alias->set_type(paddle::framework::AttrType::STRINGS);
auto pre_memories = op_desc.add_attrs();
pre_memories->set_name("pre_memories");
pre_memories->set_type(paddle::framework::AttrType::STRINGS);
auto memories = op_desc.add_attrs();
memories->set_name("memories");
memories->set_type(paddle::framework::AttrType::STRINGS);
// create inlink_alias
for (const auto& item :
std::vector<std::string>{"x@alias", "x0@alias", "x1@alias"}) {
inlink_alias->add_strings(item);
}
// pre memories
for (const auto& item :
std::vector<std::string>{"rnn/x@pre", "rnn/h@pre"}) {
pre_memories->add_strings(item);
}
// memories
for (const auto& item : std::vector<std::string>{"rnn/x", "rnn/h"}) {
memories->add_strings(item);
}
// output alias
for (const auto& item : std::vector<std::string>{"h@alias"}) {
outlink_alias->add_strings(item);
}
rnn_op_ = OpRegistry::CreateOp(op_desc);
LOG(INFO) << "rnn_op finish init";
}
void CreateStepNet() {
LOG(INFO) << "create variable step_net";
Variable* var = scope_.NewVar("step_net");
auto net = var->GetMutable<NetOp>();
// rnn/s is net's input or output?
net->inputs_ = {"rnn/h@pre", "rnn/w", "rnn/x"};
net->inputs_ = {"rnn/s", "rnn/h"};
net->AddOp(
OpRegistry::CreateOp("mul", {"rnn/h@pre", "rnn/w"}, {"rnn/s"}, {}));
net->AddOp(
OpRegistry::CreateOp("add_two", {"rnn/x", "rnn/s"}, {"rnn/h"}, {}));
net->CompleteAddOp();
}
// father scope
Scope scope_;
std::shared_ptr<OperatorBase> rnn_op_;
};
TEST_F(RecurrentOpTest, Run) {
platform::CPUDeviceContext ctx;
rnn_op_->InferShape(scope_);
rnn_op_->Run(scope_, ctx);
}
class RecurrentGradientAlgorithmTest : public ::testing::Test {
protected:
virtual void SetUp() override {
CreateGlobalVariables();
CreateStepScopes();
CreateStepNet();
CreateRNNGradientAlgorithm();
// segment inputs
SegmentInputs();
// link forward memories
LinkeMemories();
}
virtual void TearDown() override {}
void CreateGlobalVariables() {
// inputs: x
LOG(INFO) << "create global variable x";
Variable* x = scope_.NewVar("x");
DDim dims =
make_ddim({10 /*sent size*/, 20 /*batch size*/, 30 /*input dim*/});
x->GetMutable<Tensor>()->mutable_data<float>(dims, platform::CPUPlace());
// inputs: h_boot
LOG(INFO) << "create global variable h_boot";
Variable* h_boot = scope_.NewVar("h_boot");
h_boot->GetMutable<Tensor>()->mutable_data<float>(
make_ddim({20 /*batch size*/, 30 /*input dim*/}), platform::CPUPlace());
// inputs: w
LOG(INFO) << "create global variable w";
Variable* w = scope_.NewVar("rnn/w");
w->GetMutable<Tensor>()->mutable_data<float>(make_ddim({30, 30}),
platform::CPUPlace());
// inputs: h_grad
LOG(INFO) << "create variable h_grad";
Variable* dh = scope_.NewVar("h_grad");
dh->GetMutable<Tensor>()->mutable_data<float>(make_ddim({10, 20, 30}),
platform::CPUPlace());
// inputs: step_scopes
LOG(INFO) << "create variable step_scopes";
scope_.NewVar("step_scopes");
// inputs: step_net
LOG(INFO) << "create variable step_net";
scope_.NewVar("step_net");
// outputs: w_grad
LOG(INFO) << "create global variable w_grad";
scope_.NewVar("rnn/w_grad");
// outputs: x_grad
LOG(INFO) << "create global variable x_grad";
scope_.NewVar("x_grad");
// outputs: h_boot_grad
LOG(INFO) << "create global variable h_boot_grad";
scope_.NewVar("h_boot_grad");
}
void CreateStepScopes() {
auto step_scopes =
scope_.FindVar("step_scopes")->GetMutable<std::vector<Scope*>>();
for (int i = 0; i < 10; ++i) {
auto& scope = scope_.NewScope();
auto pre_t = scope.NewVar("rnn/pre_h")->GetMutable<Tensor>();
pre_t->mutable_data<float>({20, 30}, platform::CPUPlace());
auto tensor = scope.NewVar("rnn/h")->GetMutable<Tensor>();
tensor->mutable_data<float>({20, 30}, platform::CPUPlace());
// for unit test of ConcatOutputs
auto xg = scope.NewVar("rnn/x_grad")->GetMutable<Tensor>();
xg->mutable_data<float>({20, 30}, platform::CPUPlace());
step_scopes->emplace_back(&scope);
}
// last time step
auto g = (*step_scopes)[9]->NewVar("rnn/h_pre_grad")->GetMutable<Tensor>();
g->mutable_data<float>({20, 30}, platform::CPUPlace());
}
void CreateRNNGradientAlgorithm() {
std::unique_ptr<rnn::Argument> arg(new rnn::Argument());
arg->step_net = "step_net";
arg->step_scopes = "step_scopes";
rnn::Link inlink;
inlink.external = "h_grad";
inlink.internal = "rnn/h_grad";
arg->inlinks = std::vector<rnn::Link>{inlink};
rnn::Link outlink;
outlink.external = "x_grad";
outlink.internal = "rnn/x_grad";
arg->outlinks = std::vector<rnn::Link>{outlink};
rnn::MemoryAttr mem_attr;
mem_attr.pre_var = "rnn/h_pre_grad";
mem_attr.var = "rnn/h_grad";
mem_attr.boot_var = "h_boot_grad";
arg->memories = std::vector<rnn::MemoryAttr>{mem_attr};
rnn_grad_algo_.Init(std::move(arg));
}
void CreateStepNet() {
LOG(INFO) << "create variable step_net";
Variable* var = scope_.NewVar("step_net");
auto net = var->GetMutable<NetOp>();
net->AddOp(OpRegistry::CreateOp("mul",
{"rnn/h_pre", "rnn/w", "rnn/s_grad"},
{"rnn/h_pre_grad", "rnn/w_grad"},
{}));
net->AddOp(OpRegistry::CreateOp(
"add_two", {"rnn/h_grad"}, {"rnn/x_grad", "rnn/s_grad"}, {}));
net->CompleteAddOp();
}
void SegmentInputs() {
LOG(INFO) << "segment inputs";
std::vector<std::string> inlinks = {"x"};
std::vector<std::string> inlinks_alias = {"rnn/x"};
rnn::Link inlink;
inlink.external = "x";
inlink.internal = "rnn/x";
auto step_scopes =
scope_.FindVar("step_scopes")->GetMutable<std::vector<Scope*>>();
rnn::SegmentInputs(*step_scopes, std::vector<rnn::Link>{inlink}, 10);
}
void LinkeMemories() {
LOG(INFO) << "link memories";
rnn::MemoryAttr mem_attr;
mem_attr.pre_var = "rnn/h_pre";
mem_attr.var = "rnn/h";
mem_attr.boot_var = "boot_h";
std::vector<rnn::MemoryAttr> memories;
memories.push_back(mem_attr);
auto step_scopes =
scope_.FindVar("step_scopes")->GetMutable<std::vector<Scope*>>();
for (int i = 1; i < 10; ++i) {
rnn::LinkMemories(*step_scopes, memories, i, -1);
}
}
Scope scope_;
RecurrentGradientAlgorithm rnn_grad_algo_;
};
// TEST_F(RecurrentGradientAlgorithmTest, Run) {
// platform::CPUDeviceContext ctx;
// rnn_grad_algo_.Run(scope_, ctx);
// }
} // namespace operators
} // namespace paddle
TEST(RecurrentOp, LinkMemories) {
using namespace paddle::framework;
using namespace paddle::platform;
using namespace paddle::operators;
// create and init step scopes
int len = 10;
std::vector<Scope*> step_scopes;
for (int i = 0; i < len; ++i) {
auto scope = new Scope();
scope->NewVar("pre_h");
auto tensor = scope->NewVar("h")->GetMutable<Tensor>();
float* data = tensor->mutable_data<float>({15, 20}, CPUPlace());
for (int j = 0; j < 15 * 20; ++j) {
data[j] = rand() * (1. / (double)RAND_MAX);
}
step_scopes.push_back(scope);
}
// create MemoryAttr
rnn::MemoryAttr mem_attr;
mem_attr.pre_var = "pre_h";
mem_attr.var = "h";
mem_attr.boot_var = "boot_h";
std::vector<rnn::MemoryAttr> memories;
memories.push_back(mem_attr);
for (int i = 1; i < len; ++i) {
rnn::LinkMemories(step_scopes, memories, i, -1);
}
// check
for (int i = 0; i < len - 1; ++i) {
const float* a =
step_scopes[i]->FindVar("h")->GetMutable<Tensor>()->data<float>();
const float* b = step_scopes[i + 1]
->FindVar("pre_h")
->GetMutable<Tensor>()
->data<float>();
for (size_t i = 0; i < 15 * 20; ++i) {
ASSERT_FLOAT_EQ(a[i], b[i]);
}
}
for (int i = len - 2; i >= 0; --i) {
rnn::LinkMemories(step_scopes, memories, i, 1);
}
// check
for (int i = len - 2; i >= 0; --i) {
const float* a =
step_scopes[i]->FindVar("pre_h")->GetMutable<Tensor>()->data<float>();
const float* b =
step_scopes[i + 1]->FindVar("h")->GetMutable<Tensor>()->data<float>();
for (size_t i = 0; i < 15 * 20; ++i) {
ASSERT_FLOAT_EQ(a[i], b[i]);
}
}
for (auto s : step_scopes) {
delete s;
}
}
USE_OP(add_two);
USE_OP(mul);
// int main() {
// //! TODO(yuyang18): Temporary disable this unit-test because implementation
// //! error.
// return 0;
//}
\ No newline at end of file
# RNN 变长输入设计
对变长序列的学习,现有主流框架比如 tensorflow, pytorch, caffe2, mxnet 等均使用了padding的方式,
即将一个mini-batch内不同长度的序列补0到固定长度参与计算。
现有Paddle包括 `RecurrentLayerGroup` 在内的RNN均实现了无padding的变长序列支持,本文也将基于该模块的思路,设计重构后的变长序列支持。
## 背景介绍
由于tensor必须有明确的shape,因此基于tensor 的主流框架在存储变长序列时,
必须用zero-padding的方式将变长序列补全为固定shape的tensor。
由于padding是一种框架实现变长序列的妥协, 从用户角度,在使用RNN类模型时自然会比较介意padding的存在,
因此会有pytorch中对非padding方式变长序列支持长篇的讨论[3]。
由于padding对内存和计算会有额外的消耗,tensorflow和mxnet均使用了bucketing来进行优化[1][2]
但不管是padding还是bucket,对于用户都是额外的使用负担。
因此,**paddle原生支持变长序列的方式,能直接满足用户对变长序列的最直接的需求,在当前主流平台中可以算是一大优势**
但对变长序列的支持,需要对目前框架做一些修改,下面讨论如何在最小修改下支持变长序列。
## 多层序列数据格式 `LODTensor`
目前 Paddle 会将一个mini-batch内的数据存储在一维的内存上,
额外使用 `Argument.sequenceStartPositions` 来存储每个句子的信息。
Paddle里使用 `Argument.subSequenceStartPositions` 来存储2层的序列信息,更高维度的序列则无法直接支持;
为了支持 `N-level` 序列的存储,本文将序列信息定义成如下数据结构:
```c++
std::shared_ptr<std::vector<std::vector<int>>> lod_start_pos_;
```
或者更明确的定义
```c++
typedef std::vector<int> level_t;
std::vector<level_t> lod_start_pos;
```
这里的每一个 `level_t` 存储一个粒度(level)的偏移信息,和paddle目前做法一致。
为了更透明地传递序列信息,我们引入了一种新的tensor 称为 `LODTensor`[4],
其关于tensor相关的接口都直接继承自 `Tensor`,但另外添加了序列相关接口。
如此,在操作一个 `LODTensor` 时,普通 `Op` 直接当成 `Tensor` 使用,
而操作序列的 `Op` 会额外操作 `LODTensor` 的变长序列操作的相关接口。
`LODTensor` 具体定义如下:
```c++
class LODTensor : public Tensor {
public:
size_t Levels() const { return seq_start_positions_.size(); }
size_t Elements(int level = 0) const {
return seq_start_positions_[level].size();
}
// slice of level[elem_begin: elem_end]
// NOTE low performance in slice seq_start_positions_.
// TODO should call Tensor's Slice.
LODTensor LODSlice(int level, int elem_begin, int elem_end) const;
// slice with tensor's data shared with this.
LODTensor LODSliceShared(int level, int elem_begin, int elem_end) const;
// copy other's lod_start_pos_, to share LOD info.
// NOTE the LOD info sould not be changed.
void ShareConstLODFrom(const LODTensor &other) {
lod_start_pos_ = other.lod_start_pos_;
}
// copy other's lod_start_pos_'s content, free to mutate.
void ShareMutableLODFrom(const LODTensor &other) {
lod_start_pos_ = std::make_shared <
std::vector<std::vector<int>>(other.lod_start_pos_.begin(),
other.lod_start_pos_.end());
}
private:
std::shared_ptr<std::vector<std::vector<int>>> lod_start_pos_;
};
```
其中, `lod_start_pos_` 使用了 `shared_ptr` 来减少存储和复制的代价,
可以认为 `LODTensor``Tensor` 的扩展,几乎完全兼容原始 `Tensor` 的使用。
## 框架支持
### 框架现有的 `Tensor` 调用替换为 `LODTensor`
为了实现 `LODTensor` 的传递,框架里很多 `Tensor` 都需要变成 `LODTensor`
简单实现,直接 **把之前所有的`Tensor` 全部替换成 `LODTensor`,这里可以直接修改 `pybind.cc` 里面创建`Tensor`的接口**
此外,用户有可能需要感知序列的存在(比如序列的可视化需要解析模型中输出的序列),因此一些序列操作的API也需要暴露到 python 层。
### `lod_start_pos` 随着Op调用链传递
框架需要支持下列特性,以实现`lod_start_pos`的传递:
1.`shared_ptr` 的方式实现传递
- 不修改 `lod_start_pos` 内容的作为 consumer
- 修改 `lod_start_pos` 的作为 producer
- 约定 consumer 只需要复制传递过来的 `shared_ptr`
- producer 需要创建自己的独立的内存,以存储自己独立的修改,并暴露 `shared_ptr` 给后续 consumer
- 由于传递过程是以复制`shared_ptr`的方式实现,因此框架只需要传递一次 `lod_start_pos`
2. 对于不感知 `lod_start_pos` 的Op足够透明
3. 需要修改 `lod_start_pos` 的producer Op可以在 `Run` 时更新自己的 `lod_start_pos` 数据
具体的设计分为以下3小节
#### `load_start_pos` 的传递
- 对于不需要修改 `lod_start_pos` 的情况,调用 LODTensor的 `ShareConstLODFrom` 接口实现复制
- 需要修改的,调用`ShareMutableLODFrom` 接口自己分配内存以存储修改
#### 框架透明
传递这一步需要加入到网络跑之前的初始化操作中,并且只需要初始化一次,基于当前框架设计的初步方案如下
- 在 Op 的 `attrs` 中添加一项 `do_mutate_lod_info` 的属性,默认为 `false`
- 有需要修改 `lod_start_pos` 的Op需要在定义 `OpProto` 时设置为 `true`
- `OperatorBase``InferShape` 中会读取 `do_mutate_lod_info` ,并且调用 `LODTensor` 相关的方法实现 `lod_start_pos` 的复制。
- `OperatorBase` 中添加一个 member `is_lod_inited{false}` 来保证传递只进行一次
一些逻辑如下
```c++
class OperatorBase {
public:
// ...
void InferShape() {
if (!is_load_inited) {
bool do_mutate_lod_info = GetAttr<bool>("do_mutate_load_info");
// find a input having LOD to copy
auto lod_input = ValidLODInput();
for (auto &output : outputs) {
if (do_mutate_load_info) {
output.ShareMutableLODFrom(lod_input);
} else {
output.ShareConstLODFrom(load_input);
}
}
is_pod_inited = true;
}
// call op's InferShape
// ...
}
private:
// ...
bool is_lod_inited{false};
};
```
如此,`lod_start_pos` 的信息的传递对非OLD的Op的实现是完全透明的。
#### `lod_start_pos` 的更新
上一小节介绍到,对于需要修改 `load_start_pos` 的Op,`OperatorBase` 会分配一块自己的内存以存储修改,
Op在 `Run` 的实现中,操作更新自己的 `load_start_pos`
而所有依赖其 outputs 的 op 会通过共享的指针自动获取到其更新。
## 根据长度排序
按照长度排序后,从前往后的时间步的batch size会自然地递减,可以直接塞入 Net 做batch计算
比如原始的输入:
```
origin:
xxxx
xx
xxx
-> sorted:
xxxx
xxx
xx
```
经过 `SegmentInputs` 之后,每个会有4个时间步,每个时间步的输入如下(纵向排列)
```
0 1 2 3
x x x x
x x x
x x
```
为了追踪排序前后序列的变化,这里用
```c++
struct SortedSeqItem {
void *start{nullptr};
void *end{nullptr};
};
std::vector<SortedSeqItem> sorted_seqs;
```
来追踪序列排序后的位置,并添加一个新的接口
```c++
std::vector<SortedSeqItem> SortBySeqLen(const LODTensor& tensor);
```
由于输入序列的顺序变化,以下现有的接口需要针对性地修改:
- InitMemories, memory需要根据 `sorted_seqs` 重新排列
- SetmentInputs
- ConcatOutputs
此外,由于 `sorted_seqs` 需要被 `RecurrentGradientOp` 复用,因此会变成 `RecurrentOp` 一个新的output输出,
之后作为 `RecurrentGradientOp` 的一个输入传入。
## InitMemories
由于序列顺序的变化,`boot_memories` 的batch上的element的顺序也需要对应重新排列。
## SegmentInputs
`SegmentInputs` 会依赖 `sorted_seqs` 的信息,将原始的序列按照排序后的序列顺序,从横向切割,转为每个step中的inputs。
即下面的转变:
```
origin:
xxxx
xx
xxx
|
|
\ /
!
0 1 2 3
x x x x
x x x
x x
```
## ConcatOutputs
`ConcatOutputs` 需要
- 将每个时间步的输出重新还原为原始输入的序列顺序(以防止Infer阶段顺序打乱)
- 将每个序列concat 为规则的mini-batch表示
## 参考文献
1. [Tensorflow Bucketing](https://www.tensorflow.org/versions/r0.12/api_docs/python/contrib.training/bucketing)
2. [mxnet Bucketing](http://mxnet.io/how_to/bucketing.html)
3. [variable length input in RNN scenario](https://discuss.pytorch.org/t/about-the-variable-length-input-in-rnn-scenario/345/5)
4. [Level of details](https://en.wikipedia.org/wiki/Level_of_detail)
......@@ -18,17 +18,17 @@ namespace operators {
class RowWiseAddOp : public OperatorWithKernel {
protected:
void InferShape(const std::vector<const Tensor *> &inputs,
const std::vector<Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2UL, "Two inputs is needed by rowwise add");
auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims();
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2UL,
"Two inputs is needed by rowwise add");
auto dim0 = ctx.Input<Tensor>(0)->dims();
auto dim1 = ctx.Input<Tensor>(1)->dims();
PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix");
PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector");
PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same");
PADDLE_ENFORCE(outputs.size() == 1, "The output size must be 1");
outputs[0]->Resize(inputs[0]->dims());
PADDLE_ENFORCE(ctx.OutputSize() == 1, "The output size must be 1");
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims());
}
};
......
......@@ -21,14 +21,12 @@ namespace operators {
template <typename Place, typename T>
class RowWiseAddKernel : public OpKernel {
public:
void Compute(const KernelContext& context) const override {
auto in0 = context.Input(0)->Get<Tensor>();
auto in1 = context.Input(1)->Get<Tensor>();
auto* out = context.Output(0)->GetMutable<Tensor>();
void Compute(const ExecutionContext& context) const override {
auto out = context.Output<Tensor>(0);
out->mutable_data<T>(context.GetPlace());
auto input = EigenMatrix<T>::From(in0);
auto bias = EigenVector<T>::From(in1);
auto input = EigenMatrix<T>::From(*context.Input<Tensor>(0));
auto bias = EigenVector<T>::From(*context.Input<Tensor>(1));
auto output = EigenMatrix<T>::From(*out);
const int bias_size = bias.dimension(0);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python
add_op fc_op sgd_op cross_entropy_op)
cc_library(paddle_pybind SHARED
SRCS pybind.cc
DEPS pybind python
fc_op
sgd_op
add_op
mean_op
cross_entropy_op
recurrent_network_op)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册