提交 107b3395 编写于 作者: Y Yu Yang

Merge branch 'develop' into feature/better_network_debug_str

......@@ -21,10 +21,10 @@
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
hooks:
- id: clang-formater
- repo: https://github.com/dnephin/pre-commit-golang
sha: e4693a4c282b4fc878eda172a929f7a6508e7d16
- repo: https://github.com/PaddlePaddle/pre-commit-golang
sha: 16398aeccf263adaf53b2495eed0406347d76281
hooks:
- id: go-fmt
files: (.*\.go)
- id: go-lint
files: (.*\.go)
types: [go]
- id: gometalinter
types: [go]
......@@ -41,6 +41,8 @@ before_install:
- pip install rarfile
- curl https://glide.sh/get | bash
- eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
- go get -u github.com/alecthomas/gometalinter
- gometalinter --install
- |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script:
......
......@@ -137,7 +137,8 @@ if(WITH_GPU)
endif(WITH_GPU)
if(USE_NNPACK)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt")
include(external/nnpack)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
endif(USE_NNPACK)
add_subdirectory(proto)
......
......@@ -14,6 +14,17 @@ RUN apt-get update && \
wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \
apt-get clean -y
# Install Go and glide
RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \
tar -C /usr/local -xzf go.tgz && \
mkdir /root/gopath && \
mkdir /root/gopath/bin && \
mkdir /root/gopath/src && \
rm go.tgz
ENV GOROOT=/usr/local/go GOPATH=/root/gopath
# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
# git credential to skip password typing
RUN git config --global credential.helper store
......
......@@ -108,6 +108,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0")
ENDIF()
IF(ANDROID_ABI STREQUAL "arm64-v8a")
SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android)
SET(CMAKE_SYSTEM_PROCESSOR aarch64)
ENDIF()
SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-")
ENDIF()
......@@ -166,7 +167,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0")
ENDIF()
IF(ANDROID_ABI STREQUAL "arm64-v8a")
LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a)
LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a)
ENDIF()
STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}")
......@@ -193,6 +194,10 @@ ELSE()
SET(CMAKE_ANDROID_STANDALONE_TOOLCHAIN ${ANDROID_STANDALONE_TOOLCHAIN})
ENDIF()
SET(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ABI})
SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE})
SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON})
IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$")
SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE})
IF(ANDROID_ABI STREQUAL "armeabi-v7a")
SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON})
ENDIF()
ENDIF()
ENDIF()
......@@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib)
if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
set(NNPACK_FOUND ON)
INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})
set(NNPACK_LIBS)
list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB})
if (NNPACK_UKERNELS_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB})
endif()
if (NNPACK_CPUFEATURES_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB})
endif()
if(NOT ANDROID)
list(APPEND NNPACK_LIBS "rt")
endif()
else()
message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
endif()
......@@ -23,7 +23,6 @@ import (
log "github.com/sirupsen/logrus"
)
var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex
var handleMap = make(map[C.paddle_master_client]*master.Client)
var curHandle C.paddle_master_client
......@@ -114,13 +113,13 @@ func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int {
if err != nil {
// Error
// TODO: return the type of error?
*record = (*C.uchar)(nullPtr)
*record = (*C.uchar)(nil)
return -1
}
if len(r) == 0 {
// Empty record
*record = (*C.uchar)(nullPtr)
*record = (*C.uchar)(nil)
return 0
}
......
......@@ -69,7 +69,10 @@ func (c *Client) getRecords() {
// We treat a task as finished whenever the last data
// instance of the task is read. This is not exactly
// correct, but a reasonable approximation.
c.taskFinished(t.Meta.ID)
err = c.taskFinished(t.Meta.ID)
if err != nil {
log.Errorln(err)
}
}
}
......
......@@ -66,11 +66,21 @@ func TestGetFinishTask(t *testing.T) {
for i := 0; i < totalTask*chunkPerTask; i++ {
w := recordio.NewWriter(f, -1, -1)
w.Write(nil)
_, err = w.Write(nil)
if err != nil {
panic(err)
}
// call Close to force RecordIO writing a chunk.
w.Close()
err = w.Close()
if err != nil {
panic(err)
}
}
err = f.Close()
if err != nil {
panic(err)
}
f.Close()
// Manually intialize client to avoid calling c.getRecords()
c := &Client{}
......@@ -79,7 +89,11 @@ func TestGetFinishTask(t *testing.T) {
ch := make(chan string, 1)
ch <- addr
go c.monitorMaster(ch)
c.SetDataset([]string{path})
err = c.SetDataset([]string{path})
if err != nil {
panic(err)
}
checkOnePass := func(i int) {
var tasks []Task
for idx := 0; idx < totalTask; idx++ {
......
......@@ -57,14 +57,30 @@ func TestNextRecord(t *testing.T) {
w := recordio.NewWriter(f, -1, -1)
for i := 0; i < total; i++ {
w.Write([]byte{byte(i)})
_, err = w.Write([]byte{byte(i)})
if err != nil {
panic(err)
}
}
err = w.Close()
if err != nil {
panic(err)
}
err = f.Close()
if err != nil {
panic(err)
}
w.Close()
f.Close()
curAddr := make(chan string, 1)
curAddr <- fmt.Sprintf(":%d", p)
c := master.NewClient(curAddr, 10)
c.SetDataset([]string{path})
err = c.SetDataset([]string{path})
if err != nil {
panic(err)
}
for pass := 0; pass < 50; pass++ {
received := make(map[byte]bool)
for i := 0; i < total; i++ {
......
......@@ -30,7 +30,7 @@ type EtcdClient struct {
// NewEtcdClient creates a new EtcdClient.
func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) {
log.Debugf("Connecting to etcd at %v", endpoints)
// TODO(helin): gracefully shutdown etcd store. Becuase etcd
// TODO(helin): gracefully shutdown etcd store. Because etcd
// store holds a etcd lock, even though the lock will expire
// when the lease timeout, we need to implement graceful
// shutdown to release the lock.
......@@ -60,7 +60,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
}
log.Debugf("Successfully acquired lock at %s.", lockPath)
put := clientv3.OpPut(addrPath, string(addr))
put := clientv3.OpPut(addrPath, addr)
resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit()
if err != nil {
return nil, err
......
......@@ -4,7 +4,7 @@ import "sync"
// InMemStore is an in memory implementation of Store interface.
//
// It does not tolerate the fault that casues the program to crash.
// It does not tolerate the fault that causes the program to crash.
type InMemStore struct {
mu sync.Mutex
buf []byte
......
......@@ -160,7 +160,7 @@ func (s *Service) recover() (bool, error) {
// snapshot *must* be called with s.mu being held.
func (s *Service) snapshot() error {
// TOOD(helin): etcd request has a size limit, so the snapshot
// TODO(helin): etcd request has a size limit, so the snapshot
// size is limited by the max request size. We should either
// divide the snapshot into smaller chunks and save under
// different keys, or configure the request size to be big
......@@ -289,7 +289,6 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) {
log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure)
s.taskQueues.Todo = append(s.taskQueues.Todo, t)
return
}
func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
......
......@@ -34,7 +34,6 @@ import (
log "github.com/sirupsen/logrus"
)
var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex
var handleMap = make(map[C.paddle_pserver_client]*client.Client)
var curHandle C.paddle_pserver_client
......@@ -63,7 +62,7 @@ func remove(client C.paddle_pserver_client) *client.Client {
}
func cArrayToSlice(p unsafe.Pointer, len int) []byte {
if p == nullPtr {
if p == nil {
return nil
}
......@@ -101,11 +100,11 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli
}
//export paddle_new_etcd_pserver_client
func paddle_new_etcd_pserver_client(etcd_endpoints *C.char, selected int) C.paddle_pserver_client {
func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client {
// TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters)
addr := C.GoString(etcd_endpoints)
etcd_client := client.NewEtcd(addr)
c := client.NewClient(etcd_client, etcd_client.Desired(), selector(selected != 0))
addr := C.GoString(etcdEndpoints)
etcdClient := client.NewEtcd(addr)
c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0))
return add(c)
}
......@@ -124,20 +123,20 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int {
}
//export paddle_init_param
func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int {
func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, paramConfig unsafe.Pointer, configLen C.int) C.int {
et := pserver.ElementType(param.element_type)
name := C.GoString(param.name)
content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len))
pc := pserver.ParameterWithConfig{
Param: pserver.Parameter{Name: name, ElementType: et, Content: content},
Config: cArrayToSlice(param_config, int(config_len)),
Config: cArrayToSlice(paramConfig, int(configLen)),
}
c := get(client)
err := c.InitParam(pc)
if err != nil {
if err.Error() == pserver.AlreadyInitialized {
log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.", name)
log.Warningf("parameter %s already initialized, treat paddle_init_param as successful.", name)
return C.PSERVER_OK
}
log.Errorln(err)
......@@ -153,7 +152,7 @@ func paddle_finish_init_params(client C.paddle_pserver_client) C.int {
err := c.FinishInitParams()
if err != nil {
if err.Error() == pserver.AlreadyInitialized {
log.Warningln("parameters already initialized, treat paddle_finish_init_params as sucessful.")
log.Warningln("parameters already initialized, treat paddle_finish_init_params as successful.")
return C.PSERVER_OK
}
......@@ -223,12 +222,12 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter,
p := ps[i]
param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst))))
if unsafe.Pointer(param) == nullPtr {
if unsafe.Pointer(param) == nil {
log.Errorln("must pre-allocate parameter.")
return C.PSERVER_ERROR
}
if unsafe.Pointer(param.content) != nullPtr {
if unsafe.Pointer(param.content) != nil {
if int(param.content_len) != len(p.Content) {
log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content))
return C.PSERVER_ERROR
......
......@@ -233,7 +233,7 @@ func (c *Client) Save(path string) error {
func strHash(s string) uint32 {
h := fnv.New32a()
h.Write([]byte(s))
_, _ = h.Write([]byte(s))
return h.Sum32()
}
......
......@@ -79,15 +79,33 @@ func initEtcdClient() {
log.Errorf("err %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
client.Delete(ctx, pserver.PsDesired)
client.Delete(ctx, pserver.PsPath)
client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver))
_, err = client.Delete(ctx, pserver.PsDesired)
if err != nil {
panic(err)
}
_, err = client.Delete(ctx, pserver.PsPath)
if err != nil {
panic(err)
}
_, err = client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver))
if err != nil {
panic(err)
}
ports := initClient()
for i := 0; i < numPserver; i++ {
client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i]))
_, err = client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i]))
if err != nil {
panic(err)
}
}
cancel()
client.Close()
err = client.Close()
if err != nil {
panic(err)
}
}
type selector bool
......
......@@ -12,8 +12,7 @@ import (
)
const (
// DefaultEtcdTimeout is the default etcd timeout
DefaultEtcdTimeout time.Duration = 5 * time.Second
defaultEtcdTimeout time.Duration = 5 * time.Second
)
// EtcdClient is used by pserver client that is a part of trainer process.
......@@ -48,7 +47,7 @@ func (p *EtcdClient) Desired() int {
psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value))
if err != nil {
log.Errorf("psDesired %s invalid %v", psDesired, err)
log.Errorf("psDesired %d invalid %v", psDesired, err)
time.Sleep(p.timeout)
continue
}
......@@ -67,12 +66,12 @@ func (p *EtcdClient) List() []Server {
for {
for i := 0; i < psDesired; i++ {
ctx, cancel := context.WithTimeout(context.Background(), p.timeout)
cancel()
psKey := pserver.PsPath + strconv.Itoa(i)
log.Debugf("checking %s", psKey)
resp, err := p.client.Get(ctx, psKey)
cancel()
if err != nil {
log.Infof("Get psKey=%s error, %v", psKey, err)
log.Infof("Get psKey= %s error, %v", psKey, err)
time.Sleep(p.timeout)
continue
}
......@@ -107,11 +106,11 @@ func NewEtcd(endpoints string) *EtcdClient {
for {
cli, err = clientv3.New(clientv3.Config{
Endpoints: ep,
DialTimeout: DefaultEtcdTimeout,
DialTimeout: defaultEtcdTimeout,
})
if err != nil {
log.Errorf("Init etcd connection failed: %v", err)
time.Sleep(DefaultEtcdTimeout)
time.Sleep(defaultEtcdTimeout)
continue
}
break
......@@ -119,7 +118,7 @@ func NewEtcd(endpoints string) *EtcdClient {
log.Infof("Connected to etcd: %s\n", endpoints)
client := &EtcdClient{
client: cli,
timeout: DefaultEtcdTimeout,
timeout: defaultEtcdTimeout,
endpoints: ep,
}
return client
......
......@@ -177,10 +177,10 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er
break
}
}
if registered == true {
if registered {
return nil
}
return errors.New("not registerd, may due to already have enough pservers")
return errors.New("not registered, may due to already have enough pservers")
}, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads))
if err != nil {
......@@ -211,8 +211,5 @@ func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) err
ctx, cancel := context.WithTimeout(context.Background(), timeout)
_, err := e.etcdClient.Put(ctx, key, string(value))
cancel()
if err != nil {
return err
}
return nil
return err
}
......@@ -14,8 +14,6 @@ import (
log "github.com/sirupsen/logrus"
)
var nullPtr = unsafe.Pointer(uintptr(0))
type optimizer struct {
opt *C.struct_paddle_optimizer
elementType ElementType
......@@ -23,7 +21,7 @@ type optimizer struct {
}
func cArrayToSlice(p unsafe.Pointer, len int) []byte {
if p == nullPtr {
if p == nil {
return nil
}
......@@ -92,8 +90,8 @@ func (o *optimizer) UpdateParameter(g Gradient) error {
}
func (o *optimizer) Cleanup() {
if unsafe.Pointer(o.opt) != nullPtr {
if unsafe.Pointer(o.opt) != nil {
C.paddle_release_optimizer(o.opt)
o.opt = (*C.struct_paddle_optimizer)(nullPtr)
o.opt = (*C.struct_paddle_optimizer)(nil)
}
}
......@@ -211,7 +211,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
// learning optimization methods are stochastic in
// nature. This race condition is allowed deliberately
// to save the program from making a copy of the
// paramter content.
// parameter content.
parameter.Name = name
parameter.ElementType = opt.elementType
parameter.Content = opt.GetWeights()
......@@ -219,7 +219,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
}
// pserver save checkpoint
func (s *Service) doCheckpoint() error {
func (s *Service) doCheckpoint() (err error) {
<-s.initialized
s.mu.Lock()
defer s.mu.Unlock()
......@@ -237,9 +237,9 @@ func (s *Service) doCheckpoint() error {
}
var buf bytes.Buffer
encoder := gob.NewEncoder(&buf)
err := encoder.Encode(cp)
err = encoder.Encode(cp)
if err != nil {
return err
return
}
cpMeta := checkpointMeta{}
......@@ -248,10 +248,14 @@ func (s *Service) doCheckpoint() error {
h := md5.New()
cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes()))
cpMetajson, _ := json.Marshal(cpMeta)
cpMetajson, err := json.Marshal(cpMeta)
if err != nil {
return
}
err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second)
if err != nil {
return err
return
}
if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) {
log.Info("checkpoint does not exists.")
......@@ -264,15 +268,32 @@ func (s *Service) doCheckpoint() error {
}
}
f, err := os.Create(cpMeta.UUID)
defer f.Close()
if err != nil {
return err
return
}
defer func() {
closeErr := f.Close()
if closeErr != nil {
if err != nil {
log.Errorln(closeErr)
} else {
// Set closeErr as return value.
err = closeErr
}
}
}()
writer := bufio.NewWriter(f)
_, err = writer.Write(buf.Bytes())
writer.Flush()
if err != nil {
return err
return
}
return nil
err = writer.Flush()
if err != nil {
return
}
return
}
# ddim lib
cc_library(enforce SRCS enforce.cc DEPS glog)
cc_test(enforce_test SRCS enforce_test.cc DEPS enforce)
cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_test(tensor_test SRCS tensor_test.cc DEPS ddim glog gflags)
cc_library(tensor SRCS tensor.cc DEPS ddim place enforce paddle_memory)
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(variable_test SRCS variable_test.cc)
cc_test(scope_test SRCS scope_test.cc)
cc_library(enforce SRCS enforce.cc DEPS glog gflags)
cc_test(enforce_test SRCS enforce_test.cc DEPS enforce)
proto_library(attr_type SRCS attr_type.proto)
proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)
cc_library(operator SRCS operator.cc DEPS op_desc device_context enforce)
cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc enforce)
......
......@@ -117,6 +117,8 @@ int DDim::operator[](int idx) const {
return boost::apply_visitor(DynamicConstIndexer(idx), var);
}
ssize_t DDim::size() const { return arity(*this); }
bool DDim::operator==(DDim d) const {
if (var.which() != d.getVar().which()) {
return false;
......@@ -278,5 +280,9 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) {
return os;
}
DDim::DDim(std::initializer_list<int> init_list) {
*this = make_ddim(init_list);
}
} // namespace framework
} // namespace paddle
......@@ -29,6 +29,8 @@ struct DDim {
template <int D>
explicit DDim(const Dim<D>& in) : var(in) {}
/*implicit*/ DDim(std::initializer_list<int> init_list);
template <int D>
DDim& operator=(const Dim<D>& in) {
var = in;
......@@ -57,6 +59,8 @@ struct DDim {
DDim operator+(DDim d) const;
DDim operator*(DDim d) const;
ssize_t size() const;
};
/**
......
......@@ -49,6 +49,7 @@ TEST(DDim, Equality) {
// arity of a DDim
EXPECT_EQ(paddle::framework::arity(ddim), 3);
EXPECT_EQ(ddim.size(), 3);
// product of a DDim
EXPECT_EQ(paddle::framework::product(vddim), 45);
......
#pragma once
#include <algorithm>
#include <atomic>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
......@@ -197,6 +198,8 @@ Add a mark to which output is temporary is helpful for future optimization.
class OpRegistry {
using OpCreator = std::function<OperatorBase*()>;
using VarIndexMap = std::unordered_map<std::string, int>;
using VarNameList = std::vector<std::string>;
public:
template <typename OpType, typename ProtoMakerType>
......@@ -211,24 +214,64 @@ class OpRegistry {
op_proto.IsInitialized(),
"Fail to initialize %s's OpProto, because %s is not initialized",
op_type, op_proto.InitializationErrorString());
VarIndexMaps()[op_type].reset(new VarIndexMap());
auto& varmap = *VarIndexMaps()[op_type];
int idx = 0;
for (auto& var : op_proto.inputs()) {
varmap[var.name()] = idx++;
}
idx = 0;
for (auto& var : op_proto.outputs()) {
varmap[var.name()] = idx++;
}
}
static OperatorPtr CreateOp(const std::string& type,
const VarNameList& inputs,
const VarNameList& outputs,
const AttributeMap& attrs) {
auto op_create_it = creators().find(type);
PADDLE_ENFORCE(op_create_it != creators().end(),
"Operator %s cannot be found", type);
auto op = op_create_it->second();
op->type_ = type;
op->inputs_ = inputs;
op->outputs_ = outputs;
op->attrs_ = attrs;
op_checkers().at(type).Check(op->attrs_);
GenerateTempVariableName(op);
{
auto var_index_it = VarIndexMaps().find(type);
if (var_index_it != VarIndexMaps().end()) {
op->in_out_idxs_ = var_index_it->second;
}
}
op->Init();
return OperatorPtr(op);
}
static OperatorPtr CreateOp(const OpDesc& op_desc) {
std::string op_type = op_desc.type();
OperatorPtr op(creators().at(op_type)());
op->type_ = op_desc.type();
op->inputs_.reserve((size_t)op_desc.inputs_size());
std::vector<std::string> inputs;
inputs.reserve((size_t)op_desc.inputs_size());
std::copy(op_desc.inputs().begin(), op_desc.inputs().end(),
std::back_inserter(op->inputs_));
op->outputs_.reserve((size_t)op_desc.outputs_size());
std::back_inserter(inputs));
std::vector<std::string> outputs;
outputs.reserve((size_t)op_desc.outputs_size());
std::copy(op_desc.outputs().begin(), op_desc.outputs().end(),
std::back_inserter(op->outputs_));
std::back_inserter(outputs));
AttributeMap attrs;
for (auto& attr : op_desc.attrs()) {
op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
attrs[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
}
op_checkers().at(op_type).Check(op->attrs_);
op->Init();
return op;
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}
static std::unordered_map<std::string, OpProto>& protos() {
......@@ -237,6 +280,23 @@ class OpRegistry {
};
private:
static std::unordered_map<std::string, std::shared_ptr<VarIndexMap>>&
VarIndexMaps() {
static std::unordered_map<std::string, std::shared_ptr<VarIndexMap>> maps_;
return maps_;
}
static void GenerateTempVariableName(OperatorBase* op) {
static std::atomic<size_t> gUniqId(0UL);
for (auto& outname : op->outputs_) {
if (outname == OperatorBase::TMP_VAR_NAME()) {
outname += op->type_;
outname += "@";
outname += std::to_string(gUniqId.fetch_add(1));
}
}
}
static std::unordered_map<std::string, OpCreator>& creators() {
static std::unordered_map<std::string, OpCreator> creators_;
return creators_;
......@@ -278,7 +338,7 @@ class OpRegisterHelper {
/**
* Macro to Register OperatorKernel.
*/
#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, KernelType) \
#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, ...) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op_kernel_##type##_##DEVICE_TYPE##__, \
"REGISTER_OP_KERNEL must be in global namespace"); \
......@@ -287,17 +347,19 @@ class OpRegisterHelper {
::paddle::framework::OperatorWithKernel::OpKernelKey key; \
key.place_ = PlaceType(); \
::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \
.reset(new KernelType()); \
.reset(new __VA_ARGS__()); \
} \
}; \
static __op_kernel_register__##type##__ __reg_kernel_##type##__; \
int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; }
#define REGISTER_OP_GPU_KERNEL(type, KernelType) \
REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, KernelType)
// (type, KernelType)
#define REGISTER_OP_GPU_KERNEL(type, ...) \
REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__)
#define REGISTER_OP_CPU_KERNEL(type, KernelType) \
REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, KernelType)
// (type, KernelType)
#define REGISTER_OP_CPU_KERNEL(type, ...) \
REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
/**
* Macro to mark what Operator and Kernel we will use and tell the compiler to
......
......@@ -12,30 +12,76 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "paddle/framework/operator.h"
namespace paddle {
namespace framework {
const std::string& OperatorBase::Input(const std::string& name) const {
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
if (attrs_.count("input_format") == 0) {
return inputs_[it->second];
} else {
const auto& input_format = GetAttr<std::vector<int>>("input_format");
int idx = input_format[it->second];
return inputs_.at(idx);
}
}
std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
auto input_format = GetAttr<std::vector<int>>("input_format");
auto offset = in_out_idxs_->at(name);
return std::vector<std::string>{
inputs_.begin() + input_format.at(offset),
inputs_.begin() + input_format.at(offset + 1)};
}
const std::string& OperatorBase::Output(const std::string& name) const {
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
if (attrs_.count("output_format") == 0) {
return outputs_[it->second];
} else {
const auto& output_format = GetAttr<std::vector<int>>("output_format");
int idx = output_format[it->second];
return outputs_.at(idx);
}
}
std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
auto output_format = GetAttr<std::vector<int>>("output_format");
auto offset = in_out_idxs_->at(name);
return std::vector<std::string>{
outputs_.begin() + output_format.at(offset),
outputs_.begin() + output_format.at(offset + 1)};
}
std::string OperatorBase::DebugString() const {
std::stringstream ss;
ss << "=================\n";
ss << "type = " << type_ << "\n";
ss << "inputs = [";
for (auto& ipt : inputs_) {
ss << ipt << ", ";
}
ss << "]\n";
ss << "outputs = [";
for (auto& opt : outputs_) {
ss << opt << ", ";
ss << "Op(" << type_ << "), inputs:(";
for (size_t i = 0; i < inputs_.size(); ++i) {
ss << inputs_[i];
if (i != inputs_.size() - 1) {
ss << ", ";
}
}
ss << "]\n";
ss << "attr_keys = [";
for (auto& attr : attrs_) {
ss << attr.first << ", ";
ss << "), outputs:(";
for (size_t i = 0; i < outputs_.size(); ++i) {
ss << outputs_[i];
if (i != outputs_.size() - 1) {
ss << ", ";
}
}
ss << "]\n";
ss << ").";
return ss.str();
}
......
......@@ -14,18 +14,20 @@ limitations under the License. */
#pragma once
#include <paddle/framework/attr_checker.h>
#include <paddle/framework/op_desc.pb.h>
#include <paddle/framework/scope.h>
#include <paddle/framework/tensor.h>
#include <paddle/platform/device_context.h>
#include <paddle/platform/place.h>
#include <paddle/utils/Error.h>
#include <boost/variant.hpp>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/framework/attr_checker.h"
#include "paddle/framework/op_desc.pb.h"
#include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/place.h"
#include "paddle/utils/Error.h"
namespace paddle {
namespace framework {
......@@ -39,6 +41,13 @@ using OperatorPtr = std::shared_ptr<OperatorBase>;
*/
class OperatorBase {
public:
/// If a variable is a empty variable, that name will be used.
static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; }
/// If a variable is a temporary variable, that name will be set in Python,
/// but it will be convert to a unique name in scope after OpCreator.
static std::string TMP_VAR_NAME() { return "@TEMP@"; }
virtual ~OperatorBase() {}
template <typename T>
......@@ -62,11 +71,69 @@ class OperatorBase {
virtual void Run(const ScopePtr& scope,
const platform::DeviceContext& dev_ctx) const = 0;
// Get a input with argument's name described in `op_proto`
const std::string& Input(const std::string& name) const;
// Get a input which has multiple variables.
// TODO add a vector_view to prevent memory copy.
std::vector<std::string> Inputs(const std::string& name) const;
// Get a output with argument's name described in `op_proto`
const std::string& Output(const std::string& name) const;
// Get an output which has multiple variables.
// TODO add a vector_view to prevent memory copy.
std::vector<std::string> Outputs(const std::string& name) const;
public:
std::string type_;
std::vector<std::string> inputs_;
std::vector<std::string> outputs_;
AttributeMap attrs_;
// store the arguments' offset described in op_desc.
std::shared_ptr<std::unordered_map<std::string, int>> in_out_idxs_;
};
class KernelContext {
public:
KernelContext(const OperatorBase* op, const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& device_context)
: op_(*op), scope_(scope), device_context_(device_context) {}
const Variable* Input(int index) const {
return scope_->GetVariable(op_.inputs_[index]);
}
Variable* Output(int index) const {
return scope_->GetVariable(op_.outputs_[index]);
}
const Variable* Input(const std::string& name) const {
return scope_->GetVariable(op_.Input(name));
}
const Variable* Output(const std::string& name) const {
return scope_->GetVariable(op_.Output(name));
}
const std::vector<const Variable*> Inputs(const std::string& name) const {
auto names = op_.Inputs(name);
std::vector<const Variable*> res;
std::transform(
names.begin(), names.end(), res.begin(),
[this](const std::string& name) { return scope_->GetVariable(name); });
return res;
}
const std::vector<const Variable*> Outputs(const std::string& name) const {
auto names = op_.Outputs(name);
std::vector<const Variable*> res;
std::transform(
names.begin(), names.end(), res.begin(),
[this](const std::string& name) { return scope_->GetVariable(name); });
return res;
}
const OperatorBase& op_;
const std::shared_ptr<Scope>& scope_;
const platform::DeviceContext& device_context_;
};
class OpKernel {
......@@ -77,25 +144,6 @@ class OpKernel {
* device resource such as CUDA stream, cublas handle, etc. from
* KernelContext. User should construct it before run the Operator.
*/
class KernelContext {
public:
KernelContext(const OperatorBase* op, const ScopePtr& scope,
const platform::DeviceContext& device_context)
: op_(*op), scope_(scope), device_context_(device_context) {}
const Variable* Input(int index) const {
return scope_->GetVariable(op_.inputs_[index]);
}
Variable* Output(int index) const {
return scope_->GetVariable(op_.outputs_[index]);
}
const OperatorBase& op_;
const ScopePtr& scope_;
const platform::DeviceContext& device_context_;
};
virtual void Compute(const KernelContext& context) const = 0;
virtual ~OpKernel() {}
......@@ -140,7 +188,7 @@ class OperatorWithKernel : public OperatorBase {
void Run(const ScopePtr& scope,
const platform::DeviceContext& dev_ctx) const final {
auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx));
opKernel->Compute(OpKernel::KernelContext(this, scope, dev_ctx));
opKernel->Compute(KernelContext(this, scope, dev_ctx));
}
static std::unordered_map<std::string /* op_type */, OpKernelMap>&
......@@ -148,6 +196,7 @@ class OperatorWithKernel : public OperatorBase {
static std::unordered_map<std::string, OpKernelMap> g_all_op_kernels;
return g_all_op_kernels;
}
void InferShape(const std::shared_ptr<Scope>& scope) const final {
std::vector<const Tensor*> ins;
VarNamesToTensors(scope, inputs_, &ins);
......
......@@ -30,7 +30,6 @@ class OpWithoutKernelTest : public OperatorBase {
op_run_num++;
ASSERT_EQ((int)inputs_.size(), 1);
ASSERT_EQ((int)outputs_.size(), 1);
ASSERT_NEAR(GetAttr<float>("scale"), 3.14, 1e-5);
ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr);
ASSERT_EQ(x, 1);
ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr);
......@@ -86,9 +85,11 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of test op");
AddOutput("output", "output of test op");
AddAttr<float>("scale", "scale of cosine op");
AddInput("x", "input of test op");
AddOutput("y", "output of test op");
AddAttr<float>("scale", "scale of cosine op")
.SetDefault(1.0)
.LargerThan(0.0);
AddComment("This is test op");
}
};
......@@ -101,13 +102,68 @@ class OpWithKernelTest : public OperatorWithKernel {
const std::vector<Tensor*>& outputs) const override {}
};
template <typename T1, typename T2>
class CPUKernelTest : public OpKernel {
public:
void Compute(const KernelContext& context) const {
void Compute(const KernelContext& ctx) const {
std::cout << "this is cpu kernel" << std::endl;
std::cout << ctx.op_.DebugString() << std::endl;
cpu_kernel_run_num++;
ASSERT_EQ((int)context.op_.inputs_.size(), 1);
ASSERT_EQ((int)context.op_.outputs_.size(), 1);
ASSERT_NEAR(context.op_.GetAttr<float>("scale"), 3.14, 1e-5);
ASSERT_EQ(ctx.op_.Input("x"), "IN1");
ASSERT_EQ(ctx.op_.Output("y"), "OUT1");
}
};
// multiple inputs test
class OperatorMultiInputsTest : public OperatorBase {
public:
void Init() override { x = 1; }
void InferShape(const std::shared_ptr<Scope>& scope) const override {}
void Run(const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& dev_ctx) const override {
ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr);
ASSERT_EQ(x, 1);
ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr);
ASSERT_EQ(Input("x"), "IN1");
ASSERT_EQ(Input("y"), "OUT1");
}
public:
float x = 0;
};
class OpKernelTestMultiInputsProtoAndCheckerMaker
: public OpProtoAndCheckerMaker {
public:
OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto,
OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInputs("xs", "inputs of test op");
AddInput("k", "input of test op");
AddOutputs("ys", "outputs of test op");
AddAttr<float>("scale", "scale of cosine op")
.SetDefault(1.0)
.LargerThan(0.0);
AddComment("This is test op");
}
};
class CPUKernalMultiInputsTest : public OpKernel {
public:
void Compute(const KernelContext& ctx) const {
auto xs = ctx.op_.Inputs("xs");
ASSERT_EQ(xs.size(), 3UL);
ASSERT_EQ(xs[0], "x0");
ASSERT_EQ(xs[1], "x1");
ASSERT_EQ(xs[2], "x2");
auto k = ctx.op_.Input("k");
ASSERT_EQ(k, "k0");
auto ys = ctx.op_.Outputs("ys");
ASSERT_EQ(ys.size(), 2UL);
ASSERT_EQ(ys[0], "y0");
ASSERT_EQ(ys[1], "y1");
}
};
......@@ -116,8 +172,10 @@ class CPUKernelTest : public OpKernel {
REGISTER_OP(op_with_kernel, paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(op_with_kernel, paddle::framework::CPUKernelTest);
REGISTER_OP_CPU_KERNEL(op_with_kernel,
paddle::framework::CPUKernelTest<float, float>);
// test with single input
TEST(OpKernel, all) {
paddle::framework::OpDesc op_desc;
op_desc.set_type("op_with_kernel");
......@@ -137,3 +195,47 @@ TEST(OpKernel, all) {
op->Run(scope, cpu_device_context);
ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 1);
}
REGISTER_OP(op_multi_inputs_with_kernel, paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestMultiInputsProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
paddle::framework::CPUKernalMultiInputsTest);
// test with multi inputs
TEST(OpKernel, multi_inputs) {
using namespace paddle::framework;
OpDesc op_desc;
op_desc.set_type("op_multi_inputs_with_kernel");
*op_desc.mutable_inputs()->Add() = "x0";
*op_desc.mutable_inputs()->Add() = "x1";
*op_desc.mutable_inputs()->Add() = "x2";
*op_desc.mutable_inputs()->Add() = "k0";
*op_desc.mutable_outputs()->Add() = "y0";
*op_desc.mutable_outputs()->Add() = "y1";
auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT);
attr->set_f(3.14);
auto attr0 = op_desc.mutable_attrs()->Add();
attr0->set_name("input_format");
attr0->set_type(paddle::framework::AttrType::INTS);
auto input_format = attr0->mutable_ints();
input_format->Add(0); // x0
input_format->Add(3); // k
input_format->Add(4); // end
auto attr1 = op_desc.mutable_attrs()->Add();
attr1->set_name("output_format");
attr1->set_type(paddle::framework::AttrType::INTS);
auto output_format = attr1->mutable_ints();
output_format->Add(0); // y0
output_format->Add(2); // y1
paddle::platform::CPUDeviceContext cpu_device_context;
auto scope = std::make_shared<Scope>();
OperatorPtr op(paddle::framework::OpRegistry::CreateOp(op_desc));
op->Run(scope, cpu_device_context);
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/tensor.h>
namespace paddle {
namespace framework {}
} // namespace paddle
......@@ -17,19 +17,24 @@ limitations under the License. */
#include <cstdint>
#include <cstring>
#include <memory>
#include <typeindex>
#include "paddle/framework/ddim.h"
#include "paddle/framework/enforce.h"
#include "paddle/memory/memory.h"
#include "paddle/platform/place.h"
namespace paddle {
namespace pybind {
namespace details { // forward declare
template <bool less, size_t i, typename... args>
struct CastToPyBufferImpl;
} // namespace details
} // namespace pybind
namespace framework {
class Tensor {
public:
Tensor() : numel_(0), offset_(0) {}
Tensor& operator=(const Tensor& src) = delete;
Tensor() : offset_(0) {}
template <typename T>
const T* data() const {
......@@ -39,21 +44,33 @@ class Tensor {
}
template <typename T>
T* mutable_data(DDim dims, paddle::platform::Place place) {
T* mutable_data(DDim dims, platform::Place place) {
set_dims(dims);
return mutable_data<T>(place);
}
template <typename T>
T* mutable_data(paddle::platform::Place place) {
PADDLE_ENFORCE(numel_ > 0,
"Tensor::numel_ must be larger than zero to call "
T* mutable_data(platform::Place place) {
PADDLE_ENFORCE(product(dims_) > 0,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first.");
if (holder_ == nullptr ||
!(holder_->place() ==
place) /* some versions of boost::variant don't have operator!= */
|| holder_->size() < numel_ * sizeof(T) + offset_) {
holder_.reset(new PlaceholderImpl<T>(place, numel_ * sizeof(T)));
|| holder_->size() < product(dims_) * sizeof(T) + offset_) {
if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), product(dims_) * sizeof(T)));
} else if (platform::is_gpu_place(place)) {
#ifdef __CUDACC__
holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
boost::get<platform::GPUPlace>(place), product(dims_) * sizeof(T)));
#else
PADDLE_ENFORCE(true, "'GPUPlace' is not supported in CPU only device.");
#endif
} else {
PADDLE_ENFORCE(true, "Unknown 'place'.");
}
offset_ = 0;
}
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
......@@ -69,12 +86,12 @@ class Tensor {
}
template <typename T>
void CopyFrom(const Tensor& src, paddle::platform::Place dst_place) {
void CopyFrom(const Tensor& src, platform::Place dst_place) {
PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) &&
platform::is_cpu_place(dst_place),
"Tensor::CopyFrom only support CPU now.");
src.CheckDims<T>();
size_t size = src.numel_ * sizeof(T);
size_t size = product(src.dims_) * sizeof(T);
set_dims(src.dims());
const void* src_ptr = static_cast<const void*>(src.data<T>());
void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
......@@ -108,7 +125,6 @@ class Tensor {
return;
}
dims_ = dims;
numel_ = product(dims_);
}
DDim dims() const { return dims_; }
......@@ -119,53 +135,55 @@ class Tensor {
struct Placeholder {
virtual ~Placeholder() {}
virtual void* ptr() const = 0;
virtual paddle::platform::Place place() const = 0;
virtual platform::Place place() const = 0;
virtual size_t size() const = 0;
virtual std::type_index type() const = 0;
};
template <typename T>
template <typename T, typename PlaceType>
struct PlaceholderImpl : public Placeholder {
private:
template <typename PType>
class Deleter {
public:
Deleter(platform::Place place) : place_(place) {}
void operator()(T* ptr) {
paddle::memory::Free(place_, static_cast<void*>(ptr));
}
Deleter(PType place) : place_(place) {}
void operator()(T* ptr) { memory::Free(place_, static_cast<void*>(ptr)); }
private:
paddle::platform::Place place_;
PType place_;
};
public:
PlaceholderImpl(paddle::platform::Place place, size_t size)
: ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)),
Deleter(place)),
PlaceholderImpl(PlaceType place, size_t size)
: ptr_(static_cast<T*>(memory::Alloc(place, size)),
Deleter<PlaceType>(place)),
place_(place),
size_(size) {}
virtual void* ptr() const { return static_cast<void*>(ptr_.get()); }
virtual size_t size() const { return size_; }
virtual paddle::platform::Place place() const { return place_; }
virtual std::type_index type() const { return std::type_index(typeid(T)); }
std::unique_ptr<T, Deleter> ptr_;
paddle::platform::Place place_; // record the place of ptr_.
size_t size_; // size of the memory block.
std::unique_ptr<T, Deleter<PlaceType>> ptr_;
platform::Place place_; // record the place of ptr_.
size_t size_; // size of the memory block.
};
template <typename T>
inline void CheckDims() const {
PADDLE_ENFORCE(holder_ != nullptr,
"Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= numel_ * sizeof(T) + offset_,
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.");
}
std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
DDim dims_;
size_t numel_; // cache of `product(dims_)`
size_t offset_; // marks the begin of tensor data area.
template <bool less, size_t i, typename... args>
friend struct paddle::pybind::details::CastToPyBufferImpl;
};
} // namespace framework
......
......@@ -47,7 +47,7 @@ TEST(Tensor, DataAssert) {
/* following tests are not available at present
because Memory::Alloc() and Memory::Free() have not been ready.
*/
TEST(Tensor, MutableData) {
using namespace paddle::framework;
using namespace paddle::platform;
......@@ -72,7 +72,7 @@ TEST(Tensor, MutableData) {
p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), CPUPlace());
EXPECT_EQ(p1, p2);
}
#ifdef __CUDACC__
{
Tensor src_tensor;
float* p1 = nullptr;
......@@ -94,6 +94,7 @@ TEST(Tensor, MutableData) {
p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), GPUPlace());
EXPECT_EQ(p1, p2);
}
#endif
}
TEST(Tensor, ShareDataFrom) {
......@@ -108,9 +109,11 @@ TEST(Tensor, ShareDataFrom) {
dst_tensor.ShareDataFrom<float>(src_tensor);
} catch (EnforceNotMet err) {
caught = true;
std::string msg = "Tenosr holds no memory. Call Tensor::mutable_data
first."; const char* what = err.what(); for (size_t i = 0; i < msg.length();
++i) { ASSERT_EQ(what[i], msg[i]);
std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first.";
const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
}
ASSERT_TRUE(caught);
......@@ -120,6 +123,7 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length();
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
}
#ifdef __CUDACC__
{
Tensor src_tensor;
Tensor dst_tensor;
......@@ -127,6 +131,7 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length();
dst_tensor.ShareDataFrom<int>(src_tensor);
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
}
#endif
}
TEST(Tensor, Slice) {
......@@ -155,6 +160,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address);
}
#ifdef __CUDACC__
{
Tensor src_tensor;
src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace());
......@@ -176,6 +182,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ(slice_data_address, slice_mutable_data_address);
EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address);
}
#endif
}
TEST(Tensor, CopyFrom) {
......@@ -203,4 +210,3 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
}
*/
\ No newline at end of file
......@@ -11,7 +11,6 @@ if(WITH_GPU)
endif()
if(USE_NNPACK)
include(nnpack/nnpack.cmake)
list(APPEND cpp_files nnpack/NNPACKConvOp.cpp)
if(WITH_TESTING)
add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp)
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/function/ConvOp.h"
DEFINE_bool(nnpack_allocate_outside,
false,
true,
"Allocate and free workspace memory outside the NNPACK interface.");
DEFINE_int32(nnpack_num_threads,
0,
......@@ -58,18 +58,10 @@ public:
workspaceBuffer_ = nullptr;
workspaceSize_ = 0;
threadpool_ = nullptr;
if (FLAGS_nnpack_num_threads) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
create_nnpack_threadpool();
}
~NNPACKConvFunction() {
if (threadpool_) {
pthreadpool_destroy(threadpool_);
}
if (workspaceBuffer_) {
free(workspaceBuffer_);
}
......@@ -225,14 +217,25 @@ public:
}
}
static void create_nnpack_threadpool() {
if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
}
private:
nnp_convolution_algorithm algorithm_;
nnp_convolution_transform_strategy transform_strategy_;
void* workspaceBuffer_;
size_t workspaceSize_;
pthreadpool_t threadpool_;
static pthreadpool_t threadpool_;
};
template <DeviceType Device>
pthreadpool_t NNPACKConvFunction<Device>::threadpool_ = nullptr;
REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction);
} // namespace paddle
if(WITH_GPU)
nv_library(add_op SRCS add_op.cc add_op.cu DEPS operator op_registry glog ddim)
else()
cc_library(add_op SRCS add_op.cc DEPS operator op_registry glog ddim)
endif()
function(op_library TARGET)
# op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library
# for ops.
set(cc_srcs)
set(cu_srcs)
set(op_common_deps operator op_registry)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(op_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
foreach(src ${op_library_SRCS})
if (${src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src})
else()
message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu")
endif()
endforeach()
list(LENGTH cc_srcs cc_srcs_len)
if (${cc_srcs_len} EQUAL 0)
message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file")
endif()
list(LENGTH cu_srcs cu_srcs_len)
if (${cu_srcs_len} EQUAL 0)
message(WARNING "The op library ${TARGET} not support GPU!")
endif()
if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
else()
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
endif()
endfunction()
op_library(add_op SRCS add_op.cc add_op.cu)
cc_test(add_op_test SRCS add_op_test.cc DEPS add_op)
op_library(mul_op SRCS mul_op.cc mul_op.cu)
op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc)
op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
......@@ -31,8 +31,7 @@ protected:
"Inputs/Outputs of AddOp must all be set");
PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(),
"Two input of Add Op's dimension must be same.");
// Need set dims in Tensor
// outputs[0]->set_dims(inputs[0]->dims())
outputs[0]->set_dims(inputs[0]->dims());
}
};
......
......@@ -8,10 +8,10 @@ namespace operators {
template <typename Place>
class AddKernel : public framework::OpKernel {
public:
void Compute(const KernelContext &context) const override {
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Add kernel in " << typeid(Place).name();
}
};
} // namespace op
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/framework/tensor.h>
#include <paddle/operators/mul_op.h>
namespace paddle {
namespace operators {
class MulOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "The mul op must take two inputs");
auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims();
PADDLE_ENFORCE(dim0.size() == 2 && dim1.size() == 2,
"The input of mul op must be matrix");
PADDLE_ENFORCE(
dim0[1] == dim1[0],
"First matrix's width must be equal with second matrix's height.");
PADDLE_ENFORCE(outputs.size() == 1, "The mul op must take one output");
outputs[0]->set_dims({dim0[0], dim1[1]});
}
};
class MulOpMaker : public framework::OpProtoAndCheckerMaker {
public:
MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of mul op");
AddInput("Y", "The second input of mul op");
AddOutput("Out", "The output of mul op");
AddComment(R"DOC(
Two Element Mul Operator.
The equation is: Out = X * Y
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker);
REGISTER_OP_CPU_KERNEL(
mul, paddle::operators::MulKernel<paddle::platform::CPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/operators/mul_op.h>
#include <paddle/framework/op_registry.h>
REGISTER_OP_GPU_KERNEL(mul,
paddle::operators::MulKernel<paddle::platform
::GPUPlace>);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class MulKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Mul kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/operators/rowwise_add_op.h>
namespace paddle {
namespace operators {
class RowWiseAddOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2UL, "Two inputs is needed by rowwise add");
auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims();
PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix");
PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector");
PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same");
PADDLE_ENFORCE(outputs.size() == 1, "The output size must be 1");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker {
public:
RowWiseAddOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The left input of row-wise add op, must be matrix");
AddInput("b", "The right input of row-wise add op, must be vector");
AddOutput("Out", "The output of row-wise add op");
AddComment(R"DOC(Row-wise Add operator
for i in xrange(X.shape[0]):
Out = X[i] + b
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(rowwise_add,
paddle::operators::RowWiseAddOp,
paddle::operators::RowWiseAddOpMaker);
REGISTER_OP_CPU_KERNEL(
rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace>);
#include <paddle/framework/op_registry.h>
#include <paddle/operators/rowwise_add_op.h>
REGISTER_OP_GPU_KERNEL(
rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class RowWiseAddKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/operators/sigmoid_op.h>
namespace paddle {
namespace operators {
class SigmoidOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input");
PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "sigmoid input");
AddInput("Y", "sigmoid output");
AddComment("Sigmoid function");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(sigmoid,
paddle::operators::SigmoidOp,
paddle::operators::SigmoidOpMaker);
REGISTER_OP_CPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::CPUPlace>);
#include <paddle/operators/sigmoid_op.h>
#include <paddle/framework/op_registry.h>
REGISTER_OP_GPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class SigmoidKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/operators/softmax_op.h>
namespace paddle {
namespace operators {
class SoftmaxOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax");
PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftmaxOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "input of softmax");
AddOutput("Y", "output of softmax");
AddComment("Softmax Op");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<paddle::platform::CPUPlace>);
#include <paddle/framework/op_registry.h>
#include <paddle/operators/softmax_op.h>
REGISTER_OP_GPU_KERNEL(
softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class SoftmaxKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Softmax kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python add_op)
cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python
add_op mul_op rowwise_add_op sigmoid_op softmax_op)
......@@ -15,6 +15,8 @@ limitations under the License. */
#include <Python.h>
#include <paddle/framework/op_registry.h>
#include <paddle/framework/scope.h>
#include <paddle/pybind/tensor_bind.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <fstream>
......@@ -24,10 +26,35 @@ namespace py = pybind11;
namespace pd = paddle::framework;
USE_OP(add_two);
USE_OP(softmax);
USE_OP(mul);
USE_OP(rowwise_add);
USE_OP(sigmoid);
PYBIND11_PLUGIN(core) {
py::module m("core", "C++ core of Paddle Paddle");
py::class_<pd::Tensor>(m, "Tensor", py::buffer_protocol())
.def_buffer([](pd::Tensor& self) -> py::buffer_info {
return paddle::pybind::CastToPyBuffer(self);
})
.def("get_dims",
[](const pd::Tensor& self) { return pd::vectorize(self.dims()); })
.def("set_dims",
[](pd::Tensor& self, const std::vector<int>& dim) {
self.set_dims(pd::make_ddim(dim));
})
.def("alloc_float",
[](pd::Tensor& self) {
self.mutable_data<float>(paddle::platform::CPUPlace());
})
.def("alloc_int",
[](pd::Tensor& self) {
self.mutable_data<int>(paddle::platform::CPUPlace());
})
.def("set", paddle::pybind::PyTensorSetFromArray<float>)
.def("set", paddle::pybind::PyTensorSetFromArray<int>);
py::class_<pd::Variable>(m, "Variable", R"DOC(Variable Class.
All parameter, weight, gradient are variables in Paddle.
......@@ -38,7 +65,12 @@ All parameter, weight, gradient are variables in Paddle.
*var.GetMutable<int>() = val;
})
.def("get_int",
[](const pd::Variable& var) -> int { return var.Get<int>(); });
[](const pd::Variable& var) -> int { return var.Get<int>(); })
.def("get_tensor",
[](pd::Variable& self) -> pd::Tensor* {
return self.GetMutable<pd::Tensor>();
},
py::return_value_policy::reference);
py::class_<pd::Scope, std::shared_ptr<pd::Scope>>(m, "Scope")
.def(py::init<const std::shared_ptr<pd::Scope>&>())
......@@ -63,6 +95,23 @@ All parameter, weight, gradient are variables in Paddle.
}
return ret_values;
});
m.def_submodule(
"var_names",
"The module will return special predefined variable name in Paddle")
.def("empty", pd::OperatorBase::EMPTY_VAR_NAME)
.def("temp", pd::OperatorBase::TMP_VAR_NAME);
py::class_<pd::OperatorBase, pd::OperatorPtr>(m, "Operator")
.def("__str__", &pd::OperatorBase::DebugString)
.def_static("create", [](const std::string& protobin) {
pd::OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s",
desc.InitializationErrorString());
return pd::OpRegistry::CreateOp(desc);
});
return m.ptr();
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <paddle/framework/tensor.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
namespace py = pybind11;
namespace paddle {
namespace pybind {
namespace details {
template <bool less, size_t I, typename... ARGS>
struct CastToPyBufferImpl;
template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<false, I, ARGS...> {
py::buffer_info operator()(framework::Tensor &tensor) {
PADDLE_THROW("This type of tensor cannot be expose to Python");
return py::buffer_info();
}
};
template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<true, I, ARGS...> {
using CUR_TYPE = typename std::tuple_element<I, std::tuple<ARGS...>>::type;
py::buffer_info operator()(framework::Tensor &tensor) {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(tensor.holder_->place()),
"Only CPU tensor can cast to numpy array");
if (std::type_index(typeid(CUR_TYPE)) == tensor.holder_->type()) {
auto dim_vec = framework::vectorize(tensor.dims());
std::vector<size_t> dims_outside;
std::vector<size_t> strides;
dims_outside.resize(dim_vec.size());
strides.resize(dim_vec.size());
size_t prod = 1;
for (size_t i = dim_vec.size(); i != 0; --i) {
dims_outside[i - 1] = (size_t)dim_vec[i - 1];
strides[i - 1] = sizeof(CUR_TYPE) * prod;
prod *= dims_outside[i - 1];
}
return py::buffer_info(
tensor.mutable_data<CUR_TYPE>(tensor.holder_->place()),
sizeof(CUR_TYPE),
py::format_descriptor<CUR_TYPE>::format(),
(size_t)framework::arity(tensor.dims()),
dims_outside,
strides);
} else {
constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value;
return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor);
}
}
};
} // namespace details
inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
auto buffer_info = details::CastToPyBufferImpl<true, 0, float, int>()(tensor);
return buffer_info;
}
template <typename T>
void PyTensorSetFromArray(
framework::Tensor &self,
py::array_t<T, py::array::c_style | py::array::forcecast> array) {
std::vector<int> dims;
dims.reserve(array.ndim());
for (size_t i = 0; i < array.ndim(); ++i) {
dims.push_back((int)array.shape()[i]);
}
self.set_dims(framework::make_ddim(dims));
auto *dst = self.mutable_data<T>(paddle::platform::CPUPlace());
std::memcpy(dst, array.data(), sizeof(T) * array.size());
}
} // namespace pybind
} // namespace paddle
......@@ -2,9 +2,9 @@
set -xe
mkdir -p /paddle/build
cd /paddle/build
rm -f /paddle/install 2>/dev/null || true
mkdir -p /paddle/build_android
cd /paddle/build_android
rm -rf /paddle/install 2>/dev/null || true
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
-DANDROID_ABI=armeabi-v7a \
......@@ -21,6 +21,3 @@ cmake -DCMAKE_SYSTEM_NAME=Android \
..
make -j `nproc`
make install
export PATH=/paddle/install/bin:/paddle/install/opt/paddle/bin:$PATH
paddle version
......@@ -13,6 +13,11 @@ export PATH=/usr/bin:$PATH
pre-commit install
clang-format --version
# set up go environment for running gometalinter
mkdir -p $GOPATH/src/github.com/PaddlePaddle/
ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle
cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd -
if ! pre-commit run -a ; then
git diff --exit-code
fi
......
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2
import cStringIO
def get_all_op_protos():
"""
Get all registered op proto from Paddle C++
:return: list of OpProto
"""
protostrs = core.get_all_op_protos()
ret_values = []
for pbstr in protostrs:
op_proto = op_proto_pb2.OpProto.FromString(str(pbstr))
ret_values.append(op_proto)
return ret_values
class OpDescCreationMethod(object):
"""
A Functor object to convert user input(use key word args) to OpDesc based on
OpProto.
:param op_proto: The OpProto object.
:type op_proto: op_proto_pb2.OpProto
"""
def __init__(self, op_proto):
if not isinstance(op_proto, op_proto_pb2.OpProto):
raise TypeError("Argument should be OpProto")
self.__op_proto__ = op_proto
def __call__(self, *args, **kwargs):
"""
Convert user input to OpDesc. Only key-word args are supported.
:return: OpDesc based on user input
:rtype: op_desc_pb2.OpDesc
"""
if len(args) != 0:
raise ValueError("Only keyword arguments is supported by Paddle")
op_desc = op_desc_pb2.OpDesc()
# Inputs
ipts, ipt_format, _ = OpDescCreationMethod.extract_input_or_output(
"input", kwargs, self.__op_proto__.inputs)
op_desc.inputs.extend(ipts)
if ipt_format is not None:
op_desc.attrs.extend([ipt_format])
# Outputs
outs, out_format, tmp_index = OpDescCreationMethod.extract_input_or_output(
"output", kwargs, self.__op_proto__.outputs)
op_desc.outputs.extend(outs)
if out_format is not None:
op_desc.attrs.extend([out_format])
if len(tmp_index) != 0:
tmp_index_attr = op_desc.attrs.add()
tmp_index_attr.type = attr_type_pb2.INTS
tmp_index_attr.name = "temporary_index"
tmp_index_attr.ints.extend(tmp_index)
# Types
op_desc.type = self.__op_proto__.type
# Attrs
for attr in self.__op_proto__.attrs:
if attr.generated:
continue
user_defined_attr = kwargs.get(attr.name, None)
if user_defined_attr is not None:
new_attr = op_desc.attrs.add()
new_attr.name = attr.name
new_attr.type = attr.type
if attr.type == attr_type_pb2.INT:
new_attr.i = user_defined_attr
elif attr.type == attr_type_pb2.FLOAT:
new_attr.f = user_defined_attr
elif attr.type == attr_type_pb2.STRING:
new_attr.s = user_defined_attr
elif attr.type == attr_type_pb2.INTS:
new_attr.ints.extend(user_defined_attr)
elif attr.type == attr_type_pb2.FLOATS:
new_attr.floats.extend(user_defined_attr)
elif attr.type == attr_type_pb2.STRINGS:
new_attr.strings.extend(user_defined_attr)
else:
raise NotImplementedError("Not support attribute type " +
attr.type)
return op_desc
@staticmethod
def extract_input_or_output(in_out, kwargs, meta):
"""
Extract input variable names or output variable names from key-word
arguments, which base on VarProtos.
:param in_out: "input" or "output"
:param kwargs: key-word arguments that user inputted.
:param meta: a list of VarProto
:return: The three object will be return. The variable names. The
input_format or output_format attribute(None if the input or output is
not multiple). The temporary variable index list.
"""
multiple = OpDescCreationMethod.any_is_true((m.multiple for m in meta))
tmp_index = []
retv = []
if multiple:
var_format = op_desc_pb2.AttrDesc()
var_format.type = attr_type_pb2.INTS
var_format.name = "%s_format" % in_out
var_format.ints.append(0)
for var in meta:
var_name = var.name
if var.temporary:
var_name = [core.var_names.temp()]
tmp_index.append(len(retv))
else:
var_name = kwargs.get(var_name, [])
if not isinstance(var_name, list):
var_name = [var_name]
retv.extend(var_name)
var_format.ints.append(len(var_name) + var_format.ints[-1])
return retv, var_format, tmp_index
else:
for var in meta:
if var.temporary:
retv.append(kwargs.get(var.name, core.var_names.temp()))
tmp_index.append(len(retv))
else:
retv.append(kwargs.get(var.name, core.var_names.empty()))
return retv, None, tmp_index
@staticmethod
def any_is_true(generator):
"""
Reduce a bool array to one. If any of them is True, then return True.
"""
for flag in generator:
if flag:
return True
return False
def get_docstring_from_op_proto(op_proto):
"""
Generate docstring from a OpProto
:param op_proto: a OpProto instance.
:type op_proto: op_proto_pb2.OpProto
:return: docstring
"""
if not isinstance(op_proto, op_proto_pb2.OpProto):
raise TypeError("Input must be OpProto")
f = cStringIO.StringIO()
f.write(op_proto.comment)
f.write("\n")
def __append_param__(name, comment, type):
# Maybe replace the following line with template engine is better.
f.write(":param ")
f.write(name)
f.write(": ")
f.write(comment)
f.write("\n")
f.write(":type ")
f.write(name)
f.write(": ")
f.write(type)
f.write("\n")
for ipt in op_proto.inputs:
__append_param__(ipt.name, ipt.comment, "list | basestr"
if ipt.multiple else "basestr")
temp_var_prefix = \
"This is a temporary variable. It does not have to set by user. "
for opt in op_proto.outputs:
__append_param__(opt.name, opt.comment if not opt.temporary else
temp_var_prefix + opt.comment, "list | basestr"
if opt.multiple else "basestr")
for attr in op_proto.attrs:
attr_type = None
if attr.type == attr_type_pb2.INT:
attr_type = "int"
elif attr.type == attr_type_pb2.FLOAT:
attr_type = "float"
elif attr.type == attr_type_pb2.STRING:
attr_type = "basestr"
elif attr.type == attr_type_pb2.INTS:
attr_type = "list of int"
elif attr.type == attr_type_pb2.FLOATS:
attr_type = "list of float"
elif attr.type == attr_type_pb2.STRINGS:
attr_type = "list of basestr"
if attr_type is None:
raise RuntimeError("Not supported attribute type " + attr.type)
__append_param__(attr.name, attr.comment, attr_type)
return f.getvalue()
def create_op_creation_method(op_proto):
"""
Generate op creation method for an OpProto
"""
method = OpDescCreationMethod(op_proto)
def __impl__(*args, **kwargs):
opdesc = method(*args, **kwargs)
return core.Operator.create(opdesc.SerializeToString())
__impl__.__doc__ = get_docstring_from_op_proto(op_proto)
return __impl__
class OpCreationsHolder(object):
"""
A object will holds all op creation methods.
Use `op_creations.xxx_op` to access them.
"""
pass
op_creations = OpCreationsHolder()
def __bootstrap__():
"""
Bootstrap function for this module. It will dynamic create all op creation
methods in runtime.
"""
for op_proto in get_all_op_protos():
func = create_op_creation_method(op_proto)
func.__name__ = str(op_proto.type)
setattr(op_creations, func.__name__, func)
__bootstrap__()
add_python_test(test_framework test_protobuf.py test_scope.py
test_default_scope_funcs.py test_op_creation_methods.py)
test_default_scope_funcs.py test_op_creation_methods.py
test_tensor.py)
import unittest
import paddle.v2.framework.create_op_creation_methods as creation
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2
class TestOpCreationsMethods(unittest.TestCase):
def test_all_protos(self):
class TestGetAllProtos(unittest.TestCase):
def test_all(self):
all_protos = creation.get_all_op_protos()
self.assertNotEqual(0, len(all_protos))
......@@ -11,5 +15,240 @@ class TestOpCreationsMethods(unittest.TestCase):
self.assertTrue(each.IsInitialized())
class TestOpDescCreationMethod(unittest.TestCase):
def test_plain_input_output(self):
op = op_proto_pb2.OpProto()
op.type = "test"
ipt = op.inputs.add()
ipt.name = "X"
ipt.comment = "not matter"
ipt = op.inputs.add()
ipt.name = "Y"
ipt.comment = "not matter"
opt = op.outputs.add()
opt.name = "Z"
opt.comment = "not matter"
op.comment = "not matter"
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
output = method(X="a", Y="b", Z="c")
expected = op_desc_pb2.OpDesc()
expected.type = "test"
expected.inputs.extend(["a", "b"])
expected.outputs.append("c")
self.assertEqual(expected, output)
def test_multiple_input_plain_output(self):
op = op_proto_pb2.OpProto()
op.type = "fc"
ipt = op.inputs.add()
ipt.name = "X"
ipt.comment = ""
ipt.multiple = True
ipt = op.inputs.add()
ipt.name = "W"
ipt.comment = ""
ipt.multiple = True
ipt = op.inputs.add()
ipt.name = "b"
ipt.comment = ""
out = op.outputs.add()
out.name = "Y"
out.comment = ""
op.comment = ""
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
generated1 = method(X="x", W="w", b="b", Y="y")
expected1 = op_desc_pb2.OpDesc()
expected1.inputs.extend(['x', 'w', 'b'])
expected1.outputs.extend(['y'])
expected1.type = 'fc'
attr = expected1.attrs.add()
attr.name = 'input_format'
attr.type = attr_type_pb2.INTS
attr.ints.extend([0, 1, 2, 3])
self.assertEqual(expected1, generated1)
generated2 = method(
X=['x1', 'x2', 'x3'], b='b', W=['w1', 'w2', 'w3'], Y='y')
expected2 = op_desc_pb2.OpDesc()
expected2.inputs.extend(['x1', 'x2', 'x3', 'w1', 'w2', 'w3', 'b'])
expected2.outputs.extend(['y'])
expected2.type = 'fc'
attr = expected2.attrs.add()
attr.name = 'input_format'
attr.type = attr_type_pb2.INTS
attr.ints.extend([0, 3, 6, 7])
self.assertEqual(expected2, generated2)
def test_attrs(self):
op = op_proto_pb2.OpProto()
op.type = "test"
ipt = op.inputs.add()
ipt.name = 'X'
ipt.comment = ""
def __add_attr__(name, type):
attr = op.attrs.add()
attr.name = name
attr.comment = ""
attr.type = type
__add_attr__("int_attr", attr_type_pb2.INT)
__add_attr__("float_attr", attr_type_pb2.FLOAT)
__add_attr__("string_attr", attr_type_pb2.STRING)
__add_attr__("ints_attr", attr_type_pb2.INTS)
__add_attr__("floats_attr", attr_type_pb2.FLOATS)
__add_attr__("strings_attr", attr_type_pb2.STRINGS)
op.comment = ""
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
generated = method(
X="a",
int_attr=10,
float_attr=3.2,
string_attr="test_str",
ints_attr=[0, 1, 2, 3, 4],
floats_attr=[0.2, 3.2, 4.5],
strings_attr=["a", "b", "c"])
expected = op_desc_pb2.OpDesc()
expected.type = "test"
expected.inputs.extend(['a'])
attr = expected.attrs.add()
attr.name = "int_attr"
attr.type = attr_type_pb2.INT
attr.i = 10
attr = expected.attrs.add()
attr.name = "float_attr"
attr.type = attr_type_pb2.FLOAT
attr.f = 3.2
attr = expected.attrs.add()
attr.name = "string_attr"
attr.type = attr_type_pb2.STRING
attr.s = "test_str"
attr = expected.attrs.add()
attr.name = "ints_attr"
attr.type = attr_type_pb2.INTS
attr.ints.extend([0, 1, 2, 3, 4])
attr = expected.attrs.add()
attr.name = 'floats_attr'
attr.type = attr_type_pb2.FLOATS
attr.floats.extend([0.2, 3.2, 4.5])
attr = expected.attrs.add()
attr.name = 'strings_attr'
attr.type = attr_type_pb2.STRINGS
attr.strings.extend(['a', 'b', 'c'])
self.assertEqual(expected, generated)
def test_input_temporary_output(self):
op = op_proto_pb2.OpProto()
op.type = "test"
out = op.outputs.add()
out.name = "OUT"
out.comment = ""
out = op.outputs.add()
out.name = "TMP"
out.comment = ""
out.temporary = True
out = op.outputs.add()
out.name = "OUT2"
out.comment = ""
op.comment = ""
method = creation.OpDescCreationMethod(op)
generated = method(OUT="a", OUT2="b")
desc = op_desc_pb2.OpDesc()
desc.outputs.extend(["a", core.var_names.temp(), "b"])
desc.type = "test"
attr = desc.attrs.add()
attr.name = "temporary_index"
attr.type = attr_type_pb2.INTS
attr.ints.append(2)
self.assertEqual(generated, desc)
class TestOpCreationDocStr(unittest.TestCase):
def test_all(self):
op = op_proto_pb2.OpProto()
op.type = "test"
op.comment = """Test Op.
This op is used for unit test, not a real op.
"""
a = op.inputs.add()
a.name = "a"
a.comment = "Input a for test op"
a.multiple = True
b = op.inputs.add()
b.name = "b"
b.comment = "Input b for test op"
self.assertTrue(op.IsInitialized())
o1 = op.outputs.add()
o1.name = "output"
o1.comment = "The output of test op"
o2 = op.outputs.add()
o2.name = "temp output"
o2.comment = "The temporary output of test op"
o2.temporary = True
test_str = op.attrs.add()
test_str.name = "str_attr"
test_str.type = attr_type_pb2.STRING
test_str.comment = "A string attribute for test op"
actual = creation.get_docstring_from_op_proto(op)
expected_docstring = '''Test Op.
This op is used for unit test, not a real op.
:param a: Input a for test op
:type a: list | basestr
:param b: Input b for test op
:type b: basestr
:param output: The output of test op
:type output: basestr
:param temp output: This is a temporary variable. It does not have to set by user. The temporary output of test op
:type temp output: basestr
:param str_attr: A string attribute for test op
:type str_attr: basestr
'''
self.assertEqual(expected_docstring, actual)
class TestOpCreations(unittest.TestCase):
def test_all(self):
add_op = creation.op_creations.add_two(X="a", Y="b", Out="z")
self.assertIsNotNone(add_op)
# Invoke C++ DebugString()
self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).',
str(add_op))
if __name__ == "__main__":
unittest.main()
import paddle.v2.framework.core as core
import unittest
import numpy
class TestScope(unittest.TestCase):
def test_int_tensor(self):
scope = core.Scope(None)
var = scope.create_var("test_tensor")
tensor = var.get_tensor()
tensor.set_dims([1000, 784])
tensor.alloc_int()
tensor_array = numpy.array(tensor)
self.assertEqual((1000, 784), tensor_array.shape)
tensor_array[3, 9] = 1
tensor_array[19, 11] = 2
tensor.set(tensor_array)
tensor_array_2 = numpy.array(tensor)
self.assertEqual(1.0, tensor_array_2[3, 9])
self.assertEqual(2.0, tensor_array_2[19, 11])
def test_float_tensor(self):
scope = core.Scope(None)
var = scope.create_var("test_tensor")
tensor = var.get_tensor()
tensor.set_dims([1000, 784])
tensor.alloc_float()
tensor_array = numpy.array(tensor)
self.assertEqual((1000, 784), tensor_array.shape)
tensor_array[3, 9] = 1.0
tensor_array[19, 11] = 2.0
tensor.set(tensor_array)
tensor_array_2 = numpy.array(tensor)
self.assertAlmostEqual(1.0, tensor_array_2[3, 9])
self.assertAlmostEqual(2.0, tensor_array_2[19, 11])
if __name__ == '__main__':
unittest.main()
import py_paddle.swig_paddle as swig_api
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.trainer_config_helpers.optimizers as v1_optimizers
"""
......@@ -17,6 +16,7 @@ __all__ = [
class Optimizer(object):
def __init__(self, **kwargs):
import py_paddle.swig_paddle as swig_api
if 'batch_size' in kwargs:
del kwargs['batch_size'] # not important for python library.
......@@ -35,18 +35,22 @@ class Optimizer(object):
For each optimizer(SGD, Adam), GradientMachine should enable different
buffers.
"""
import py_paddle.swig_paddle as swig_api
tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__)
assert isinstance(tmp, swig_api.ParameterOptimizer)
return tmp.getParameterTypes()
def __create_local_updater__(self):
import py_paddle.swig_paddle as swig_api
return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__)
def __create_remote_updater__(self, pass_num, use_sparse_updater):
import py_paddle.swig_paddle as swig_api
return swig_api.ParameterUpdater.createRemoteUpdater(
self.__opt_conf__, pass_num, use_sparse_updater)
def __create_new_remote_updater__(self, pserver_spec, use_etcd):
import py_paddle.swig_paddle as swig_api
return swig_api.ParameterUpdater.createNewRemoteUpdater(
self.__opt_conf__, pserver_spec, use_etcd)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册