提交 8539222a 编写于 作者: Y Yu Yang 提交者: GitHub

Merge branch 'develop' into feature/is_in_gpu

......@@ -21,10 +21,10 @@
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
hooks:
- id: clang-formater
- repo: https://github.com/dnephin/pre-commit-golang
sha: e4693a4c282b4fc878eda172a929f7a6508e7d16
- repo: https://github.com/PaddlePaddle/pre-commit-golang
sha: 16398aeccf263adaf53b2495eed0406347d76281
hooks:
- id: go-fmt
files: (.*\.go)
- id: go-lint
files: (.*\.go)
types: [go]
- id: gometalinter
types: [go]
......@@ -41,6 +41,8 @@ before_install:
- pip install rarfile
- curl https://glide.sh/get | bash
- eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
- go get -u github.com/alecthomas/gometalinter
- gometalinter --install
- |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script:
......
......@@ -137,7 +137,8 @@ if(WITH_GPU)
endif(WITH_GPU)
if(USE_NNPACK)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt")
include(external/nnpack)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
endif(USE_NNPACK)
add_subdirectory(proto)
......
......@@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib)
if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
set(NNPACK_FOUND ON)
INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})
set(NNPACK_LIBS)
list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB})
if (NNPACK_UKERNELS_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB})
endif()
if (NNPACK_CPUFEATURES_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB})
endif()
if(NOT ANDROID)
list(APPEND NNPACK_LIBS "rt")
endif()
else()
message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
endif()
......@@ -23,7 +23,6 @@ import (
log "github.com/sirupsen/logrus"
)
var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex
var handleMap = make(map[C.paddle_master_client]*master.Client)
var curHandle C.paddle_master_client
......@@ -114,13 +113,13 @@ func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int {
if err != nil {
// Error
// TODO: return the type of error?
*record = (*C.uchar)(nullPtr)
*record = (*C.uchar)(nil)
return -1
}
if len(r) == 0 {
// Empty record
*record = (*C.uchar)(nullPtr)
*record = (*C.uchar)(nil)
return 0
}
......
......@@ -69,7 +69,10 @@ func (c *Client) getRecords() {
// We treat a task as finished whenever the last data
// instance of the task is read. This is not exactly
// correct, but a reasonable approximation.
c.taskFinished(t.Meta.ID)
err = c.taskFinished(t.Meta.ID)
if err != nil {
log.Errorln(err)
}
}
}
......
......@@ -66,11 +66,21 @@ func TestGetFinishTask(t *testing.T) {
for i := 0; i < totalTask*chunkPerTask; i++ {
w := recordio.NewWriter(f, -1, -1)
w.Write(nil)
_, err = w.Write(nil)
if err != nil {
panic(err)
}
// call Close to force RecordIO writing a chunk.
w.Close()
err = w.Close()
if err != nil {
panic(err)
}
}
err = f.Close()
if err != nil {
panic(err)
}
f.Close()
// Manually intialize client to avoid calling c.getRecords()
c := &Client{}
......@@ -79,7 +89,11 @@ func TestGetFinishTask(t *testing.T) {
ch := make(chan string, 1)
ch <- addr
go c.monitorMaster(ch)
c.SetDataset([]string{path})
err = c.SetDataset([]string{path})
if err != nil {
panic(err)
}
checkOnePass := func(i int) {
var tasks []Task
for idx := 0; idx < totalTask; idx++ {
......
......@@ -57,14 +57,30 @@ func TestNextRecord(t *testing.T) {
w := recordio.NewWriter(f, -1, -1)
for i := 0; i < total; i++ {
w.Write([]byte{byte(i)})
_, err = w.Write([]byte{byte(i)})
if err != nil {
panic(err)
}
}
err = w.Close()
if err != nil {
panic(err)
}
err = f.Close()
if err != nil {
panic(err)
}
w.Close()
f.Close()
curAddr := make(chan string, 1)
curAddr <- fmt.Sprintf(":%d", p)
c := master.NewClient(curAddr, 10)
c.SetDataset([]string{path})
err = c.SetDataset([]string{path})
if err != nil {
panic(err)
}
for pass := 0; pass < 50; pass++ {
received := make(map[byte]bool)
for i := 0; i < total; i++ {
......
......@@ -30,7 +30,7 @@ type EtcdClient struct {
// NewEtcdClient creates a new EtcdClient.
func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) {
log.Debugf("Connecting to etcd at %v", endpoints)
// TODO(helin): gracefully shutdown etcd store. Becuase etcd
// TODO(helin): gracefully shutdown etcd store. Because etcd
// store holds a etcd lock, even though the lock will expire
// when the lease timeout, we need to implement graceful
// shutdown to release the lock.
......@@ -60,7 +60,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
}
log.Debugf("Successfully acquired lock at %s.", lockPath)
put := clientv3.OpPut(addrPath, string(addr))
put := clientv3.OpPut(addrPath, addr)
resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit()
if err != nil {
return nil, err
......
......@@ -4,7 +4,7 @@ import "sync"
// InMemStore is an in memory implementation of Store interface.
//
// It does not tolerate the fault that casues the program to crash.
// It does not tolerate the fault that causes the program to crash.
type InMemStore struct {
mu sync.Mutex
buf []byte
......
......@@ -160,7 +160,7 @@ func (s *Service) recover() (bool, error) {
// snapshot *must* be called with s.mu being held.
func (s *Service) snapshot() error {
// TOOD(helin): etcd request has a size limit, so the snapshot
// TODO(helin): etcd request has a size limit, so the snapshot
// size is limited by the max request size. We should either
// divide the snapshot into smaller chunks and save under
// different keys, or configure the request size to be big
......@@ -289,7 +289,6 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) {
log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure)
s.taskQueues.Todo = append(s.taskQueues.Todo, t)
return
}
func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
......
......@@ -34,7 +34,6 @@ import (
log "github.com/sirupsen/logrus"
)
var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex
var handleMap = make(map[C.paddle_pserver_client]*client.Client)
var curHandle C.paddle_pserver_client
......@@ -63,7 +62,7 @@ func remove(client C.paddle_pserver_client) *client.Client {
}
func cArrayToSlice(p unsafe.Pointer, len int) []byte {
if p == nullPtr {
if p == nil {
return nil
}
......@@ -101,11 +100,11 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli
}
//export paddle_new_etcd_pserver_client
func paddle_new_etcd_pserver_client(etcd_endpoints *C.char, selected int) C.paddle_pserver_client {
func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client {
// TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters)
addr := C.GoString(etcd_endpoints)
etcd_client := client.NewEtcd(addr)
c := client.NewClient(etcd_client, etcd_client.Desired(), selector(selected != 0))
addr := C.GoString(etcdEndpoints)
etcdClient := client.NewEtcd(addr)
c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0))
return add(c)
}
......@@ -124,20 +123,20 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int {
}
//export paddle_init_param
func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int {
func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, paramConfig unsafe.Pointer, configLen C.int) C.int {
et := pserver.ElementType(param.element_type)
name := C.GoString(param.name)
content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len))
pc := pserver.ParameterWithConfig{
Param: pserver.Parameter{Name: name, ElementType: et, Content: content},
Config: cArrayToSlice(param_config, int(config_len)),
Config: cArrayToSlice(paramConfig, int(configLen)),
}
c := get(client)
err := c.InitParam(pc)
if err != nil {
if err.Error() == pserver.AlreadyInitialized {
log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.", name)
log.Warningf("parameter %s already initialized, treat paddle_init_param as successful.", name)
return C.PSERVER_OK
}
log.Errorln(err)
......@@ -153,7 +152,7 @@ func paddle_finish_init_params(client C.paddle_pserver_client) C.int {
err := c.FinishInitParams()
if err != nil {
if err.Error() == pserver.AlreadyInitialized {
log.Warningln("parameters already initialized, treat paddle_finish_init_params as sucessful.")
log.Warningln("parameters already initialized, treat paddle_finish_init_params as successful.")
return C.PSERVER_OK
}
......@@ -223,12 +222,12 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter,
p := ps[i]
param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst))))
if unsafe.Pointer(param) == nullPtr {
if unsafe.Pointer(param) == nil {
log.Errorln("must pre-allocate parameter.")
return C.PSERVER_ERROR
}
if unsafe.Pointer(param.content) != nullPtr {
if unsafe.Pointer(param.content) != nil {
if int(param.content_len) != len(p.Content) {
log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content))
return C.PSERVER_ERROR
......
......@@ -233,7 +233,7 @@ func (c *Client) Save(path string) error {
func strHash(s string) uint32 {
h := fnv.New32a()
h.Write([]byte(s))
_, _ = h.Write([]byte(s))
return h.Sum32()
}
......
......@@ -79,15 +79,33 @@ func initEtcdClient() {
log.Errorf("err %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
client.Delete(ctx, pserver.PsDesired)
client.Delete(ctx, pserver.PsPath)
client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver))
_, err = client.Delete(ctx, pserver.PsDesired)
if err != nil {
panic(err)
}
_, err = client.Delete(ctx, pserver.PsPath)
if err != nil {
panic(err)
}
_, err = client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver))
if err != nil {
panic(err)
}
ports := initClient()
for i := 0; i < numPserver; i++ {
client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i]))
_, err = client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i]))
if err != nil {
panic(err)
}
}
cancel()
client.Close()
err = client.Close()
if err != nil {
panic(err)
}
}
type selector bool
......
......@@ -12,8 +12,7 @@ import (
)
const (
// DefaultEtcdTimeout is the default etcd timeout
DefaultEtcdTimeout time.Duration = 5 * time.Second
defaultEtcdTimeout time.Duration = 5 * time.Second
)
// EtcdClient is used by pserver client that is a part of trainer process.
......@@ -48,7 +47,7 @@ func (p *EtcdClient) Desired() int {
psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value))
if err != nil {
log.Errorf("psDesired %s invalid %v", psDesired, err)
log.Errorf("psDesired %d invalid %v", psDesired, err)
time.Sleep(p.timeout)
continue
}
......@@ -67,12 +66,12 @@ func (p *EtcdClient) List() []Server {
for {
for i := 0; i < psDesired; i++ {
ctx, cancel := context.WithTimeout(context.Background(), p.timeout)
cancel()
psKey := pserver.PsPath + strconv.Itoa(i)
log.Debugf("checking %s", psKey)
resp, err := p.client.Get(ctx, psKey)
cancel()
if err != nil {
log.Infof("Get psKey=%s error, %v", psKey, err)
log.Infof("Get psKey= %s error, %v", psKey, err)
time.Sleep(p.timeout)
continue
}
......@@ -107,11 +106,11 @@ func NewEtcd(endpoints string) *EtcdClient {
for {
cli, err = clientv3.New(clientv3.Config{
Endpoints: ep,
DialTimeout: DefaultEtcdTimeout,
DialTimeout: defaultEtcdTimeout,
})
if err != nil {
log.Errorf("Init etcd connection failed: %v", err)
time.Sleep(DefaultEtcdTimeout)
time.Sleep(defaultEtcdTimeout)
continue
}
break
......@@ -119,7 +118,7 @@ func NewEtcd(endpoints string) *EtcdClient {
log.Infof("Connected to etcd: %s\n", endpoints)
client := &EtcdClient{
client: cli,
timeout: DefaultEtcdTimeout,
timeout: defaultEtcdTimeout,
endpoints: ep,
}
return client
......
......@@ -177,10 +177,10 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er
break
}
}
if registered == true {
if registered {
return nil
}
return errors.New("not registerd, may due to already have enough pservers")
return errors.New("not registered, may due to already have enough pservers")
}, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads))
if err != nil {
......@@ -211,8 +211,5 @@ func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) err
ctx, cancel := context.WithTimeout(context.Background(), timeout)
_, err := e.etcdClient.Put(ctx, key, string(value))
cancel()
if err != nil {
return err
}
return nil
}
......@@ -14,8 +14,6 @@ import (
log "github.com/sirupsen/logrus"
)
var nullPtr = unsafe.Pointer(uintptr(0))
type optimizer struct {
opt *C.struct_paddle_optimizer
elementType ElementType
......@@ -23,7 +21,7 @@ type optimizer struct {
}
func cArrayToSlice(p unsafe.Pointer, len int) []byte {
if p == nullPtr {
if p == nil {
return nil
}
......@@ -92,8 +90,8 @@ func (o *optimizer) UpdateParameter(g Gradient) error {
}
func (o *optimizer) Cleanup() {
if unsafe.Pointer(o.opt) != nullPtr {
if unsafe.Pointer(o.opt) != nil {
C.paddle_release_optimizer(o.opt)
o.opt = (*C.struct_paddle_optimizer)(nullPtr)
o.opt = (*C.struct_paddle_optimizer)(nil)
}
}
......@@ -211,7 +211,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
// learning optimization methods are stochastic in
// nature. This race condition is allowed deliberately
// to save the program from making a copy of the
// paramter content.
// parameter content.
parameter.Name = name
parameter.ElementType = opt.elementType
parameter.Content = opt.GetWeights()
......@@ -219,7 +219,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
}
// pserver save checkpoint
func (s *Service) doCheckpoint() error {
func (s *Service) doCheckpoint() (err error) {
<-s.initialized
s.mu.Lock()
defer s.mu.Unlock()
......@@ -237,9 +237,9 @@ func (s *Service) doCheckpoint() error {
}
var buf bytes.Buffer
encoder := gob.NewEncoder(&buf)
err := encoder.Encode(cp)
err = encoder.Encode(cp)
if err != nil {
return err
return
}
cpMeta := checkpointMeta{}
......@@ -248,10 +248,14 @@ func (s *Service) doCheckpoint() error {
h := md5.New()
cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes()))
cpMetajson, _ := json.Marshal(cpMeta)
cpMetajson, err := json.Marshal(cpMeta)
if err != nil {
return
}
err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second)
if err != nil {
return err
return
}
if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) {
log.Info("checkpoint does not exists.")
......@@ -264,15 +268,32 @@ func (s *Service) doCheckpoint() error {
}
}
f, err := os.Create(cpMeta.UUID)
defer f.Close()
if err != nil {
return err
return
}
defer func() {
closeErr := f.Close()
if closeErr != nil {
if err != nil {
log.Errorln(closeErr)
} else {
// Set closeErr as return value.
err = closeErr
}
}
}()
writer := bufio.NewWriter(f)
_, err = writer.Write(buf.Bytes())
writer.Flush()
if err != nil {
return err
return
}
return nil
err = writer.Flush()
if err != nil {
return
}
return
}
......@@ -117,6 +117,8 @@ int DDim::operator[](int idx) const {
return boost::apply_visitor(DynamicConstIndexer(idx), var);
}
ssize_t DDim::size() const { return arity(*this); }
bool DDim::operator==(DDim d) const {
if (var.which() != d.getVar().which()) {
return false;
......@@ -278,5 +280,9 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) {
return os;
}
DDim::DDim(std::initializer_list<int> init_list) {
*this = make_ddim(init_list);
}
} // namespace framework
} // namespace paddle
......@@ -29,6 +29,8 @@ struct DDim {
template <int D>
explicit DDim(const Dim<D>& in) : var(in) {}
/*implicit*/ DDim(std::initializer_list<int> init_list);
template <int D>
DDim& operator=(const Dim<D>& in) {
var = in;
......@@ -57,6 +59,8 @@ struct DDim {
DDim operator+(DDim d) const;
DDim operator*(DDim d) const;
ssize_t size() const;
};
/**
......
......@@ -49,6 +49,7 @@ TEST(DDim, Equality) {
// arity of a DDim
EXPECT_EQ(paddle::framework::arity(ddim), 3);
EXPECT_EQ(ddim.size(), 3);
// product of a DDim
EXPECT_EQ(paddle::framework::product(vddim), 45);
......
......@@ -198,6 +198,7 @@ Add a mark to which output is temporary is helpful for future optimization.
class OpRegistry {
using OpCreator = std::function<OperatorBase*()>;
using VarIndexMap = std::unordered_map<std::string, int>;
public:
template <typename OpType, typename ProtoMakerType>
......@@ -212,6 +213,17 @@ class OpRegistry {
op_proto.IsInitialized(),
"Fail to initialize %s's OpProto, because %s is not initialized",
op_type, op_proto.InitializationErrorString());
VarIndexMaps()[op_type].reset(new VarIndexMap());
auto& varmap = *VarIndexMaps()[op_type];
int idx = 0;
for (auto& var : op_proto.inputs()) {
varmap[var.name()] = idx++;
}
idx = 0;
for (auto& var : op_proto.outputs()) {
varmap[var.name()] = idx++;
}
}
static OperatorPtr CreateOp(const OpDesc& op_desc) {
......@@ -220,7 +232,6 @@ class OpRegistry {
OperatorPtr op(creators().at(op_type)());
//! Fill op's data member. Not use constructor because it will be noising
//! for Op developer.
const OpProto& op_proto = protos().at(op_type);
op->type_ = op_desc.type();
// set op's inputs_ from desc.
op->inputs_.reserve((size_t)op_desc.inputs_size());
......@@ -240,25 +251,31 @@ class OpRegistry {
//! Convert Temporary variable name to an unique variable name.
GenerateTempVariableName(op.get());
// set argument offsets stored in op.
CreateInOutOffsetMap(op, op_proto);
//! set argument offsets stored in op.
{
auto var_index_it = VarIndexMaps().find(op_type);
if (var_index_it != VarIndexMaps().end()) {
op->in_out_idxs_ = var_index_it->second;
}
}
//! Other op's custom Init for a complex Op. For simple Op, the Init
//! method do nothing.
op->Init();
return op;
}
// init op.in_out_idxs_ to accelerate argument's offset lookup.
static void CreateInOutOffsetMap(OperatorPtr op, const OpProto& proto) {
op->CreateInOutOffsetMap(proto);
}
static std::unordered_map<std::string, OpProto>& protos() {
static std::unordered_map<std::string, OpProto> protos_;
return protos_;
};
private:
static std::unordered_map<std::string, std::shared_ptr<VarIndexMap>>&
VarIndexMaps() {
static std::unordered_map<std::string, std::shared_ptr<VarIndexMap>> maps_;
return maps_;
}
static void GenerateTempVariableName(OperatorBase* op) {
static std::atomic<size_t> gUniqId(0UL);
for (auto& outname : op->outputs_) {
......@@ -311,7 +328,7 @@ class OpRegisterHelper {
/**
* Macro to Register OperatorKernel.
*/
#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, KernelType) \
#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, ...) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op_kernel_##type##_##DEVICE_TYPE##__, \
"REGISTER_OP_KERNEL must be in global namespace"); \
......@@ -320,17 +337,19 @@ class OpRegisterHelper {
::paddle::framework::OperatorWithKernel::OpKernelKey key; \
key.place_ = PlaceType(); \
::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \
.reset(new KernelType()); \
.reset(new __VA_ARGS__()); \
} \
}; \
static __op_kernel_register__##type##__ __reg_kernel_##type##__; \
int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; }
#define REGISTER_OP_GPU_KERNEL(type, KernelType) \
REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, KernelType)
// (type, KernelType)
#define REGISTER_OP_GPU_KERNEL(type, ...) \
REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__)
#define REGISTER_OP_CPU_KERNEL(type, KernelType) \
REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, KernelType)
// (type, KernelType)
#define REGISTER_OP_CPU_KERNEL(type, ...) \
REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
/**
* Macro to mark what Operator and Kernel we will use and tell the compiler to
......
......@@ -19,21 +19,10 @@ limitations under the License. */
namespace paddle {
namespace framework {
void OperatorBase::CreateInOutOffsetMap(const OpProto& proto) {
PADDLE_ENFORCE(in_out_idxs_.empty(), "duplicate call CreateInOutOffsetMap");
for (int i = 0; i < proto.inputs_size(); i++) {
const auto& name = proto.inputs()[i].name();
in_out_idxs_[name] = i;
}
for (int i = 0; i < proto.outputs_size(); i++) {
const auto& name = proto.outputs()[i].name();
in_out_idxs_[name] = i;
}
}
const std::string& OperatorBase::Input(const std::string& name) const {
auto it = in_out_idxs_.find(name);
PADDLE_ENFORCE(it != in_out_idxs_.end(), "no key [%s] in in_out_idxs_", name);
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
if (attrs_.count("input_format") == 0) {
return inputs_[it->second];
......@@ -46,7 +35,7 @@ const std::string& OperatorBase::Input(const std::string& name) const {
std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
auto input_format = GetAttr<std::vector<int>>("input_format");
auto offset = in_out_idxs_.at(name);
auto offset = in_out_idxs_->at(name);
return std::vector<std::string>{
inputs_.begin() + input_format.at(offset),
......@@ -54,8 +43,9 @@ std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
}
const std::string& OperatorBase::Output(const std::string& name) const {
auto it = in_out_idxs_.find(name);
PADDLE_ENFORCE(it != in_out_idxs_.end(), "no key [%s] in in_out_idxs_", name);
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
if (attrs_.count("output_format") == 0) {
return outputs_[it->second];
......@@ -68,7 +58,7 @@ const std::string& OperatorBase::Output(const std::string& name) const {
std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
auto output_format = GetAttr<std::vector<int>>("output_format");
auto offset = in_out_idxs_.at(name);
auto offset = in_out_idxs_->at(name);
return std::vector<std::string>{
outputs_.begin() + output_format.at(offset),
......
......@@ -82,16 +82,13 @@ class OperatorBase {
// TODO add a vector_view to prevent memory copy.
std::vector<std::string> Outputs(const std::string& name) const;
// init in_out_idxs_ to accelerate argument's offset lookup.
void CreateInOutOffsetMap(const OpProto& proto);
public:
std::string type_;
std::vector<std::string> inputs_;
std::vector<std::string> outputs_;
AttributeMap attrs_;
// store the arguments' offset described in op_desc.
std::unordered_map<std::string, int> in_out_idxs_;
std::shared_ptr<std::unordered_map<std::string, int>> in_out_idxs_;
};
class KernelContext {
......
......@@ -102,6 +102,7 @@ class OpWithKernelTest : public OperatorWithKernel {
const std::vector<Tensor*>& outputs) const override {}
};
template <typename T1, typename T2>
class CPUKernelTest : public OpKernel {
public:
void Compute(const KernelContext& ctx) const {
......@@ -171,7 +172,8 @@ class CPUKernalMultiInputsTest : public OpKernel {
REGISTER_OP(op_with_kernel, paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(op_with_kernel, paddle::framework::CPUKernelTest);
REGISTER_OP_CPU_KERNEL(op_with_kernel,
paddle::framework::CPUKernelTest<float, float>);
// test with single input
TEST(OpKernel, all) {
......
......@@ -34,7 +34,7 @@ namespace framework {
class Tensor {
public:
Tensor() : numel_(0), offset_(0) {}
Tensor() : offset_(0) {}
template <typename T>
const T* data() const {
......@@ -51,30 +51,26 @@ class Tensor {
template <typename T>
T* mutable_data(platform::Place place) {
PADDLE_ENFORCE(numel_ > 0,
"Tensor::numel_ must be larger than zero to call "
PADDLE_ENFORCE(product(dims_) > 0,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first.");
if (holder_ == nullptr ||
!(holder_->place() ==
place) /* some versions of boost::variant don't have operator!= */
|| holder_->size() < numel_ * sizeof(T) + offset_) {
|| holder_->size() < product(dims_) * sizeof(T) + offset_) {
if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), product(dims_) * sizeof(T)));
} else if (platform::is_gpu_place(place)) {
#ifdef __CUDACC__
switch (place.which()) {
case 0:
holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
boost::get<platform::GPUPlace>(place), numel_ * sizeof(T)));
break;
case 1:
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), numel_ * sizeof(T)));
break;
}
boost::get<platform::GPUPlace>(place), product(dims_) * sizeof(T)));
#else
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), numel_ * sizeof(T)));
PADDLE_ENFORCE(true, "'GPUPlace' is not supported in CPU only device.");
#endif
} else {
PADDLE_ENFORCE(true, "Unknown 'place'.");
}
offset_ = 0;
}
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
......@@ -95,7 +91,7 @@ class Tensor {
platform::is_cpu_place(dst_place),
"Tensor::CopyFrom only support CPU now.");
src.CheckDims<T>();
size_t size = src.numel_ * sizeof(T);
size_t size = product(src.dims_) * sizeof(T);
set_dims(src.dims());
const void* src_ptr = static_cast<const void*>(src.data<T>());
void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
......@@ -129,7 +125,6 @@ class Tensor {
return;
}
dims_ = dims;
numel_ = product(dims_);
}
DDim dims() const { return dims_; }
......@@ -179,18 +174,17 @@ class Tensor {
inline void CheckDims() const {
PADDLE_ENFORCE(holder_ != nullptr,
"Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= numel_ * sizeof(T) + offset_,
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.");
}
std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
DDim dims_;
size_t numel_; // cache of `product(dims_)`
size_t offset_; // marks the begin of tensor data area.
template <bool less, size_t i, typename... args>
friend struct paddle::pybind::details::CastToPyBufferImpl;
}; // namespace framework
};
} // namespace framework
} // namespace paddle
......@@ -11,7 +11,6 @@ if(WITH_GPU)
endif()
if(USE_NNPACK)
include(nnpack/nnpack.cmake)
list(APPEND cpp_files nnpack/NNPACKConvOp.cpp)
if(WITH_TESTING)
add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp)
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/function/ConvOp.h"
DEFINE_bool(nnpack_allocate_outside,
false,
true,
"Allocate and free workspace memory outside the NNPACK interface.");
DEFINE_int32(nnpack_num_threads,
0,
......@@ -58,18 +58,10 @@ public:
workspaceBuffer_ = nullptr;
workspaceSize_ = 0;
threadpool_ = nullptr;
if (FLAGS_nnpack_num_threads) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
create_nnpack_threadpool();
}
~NNPACKConvFunction() {
if (threadpool_) {
pthreadpool_destroy(threadpool_);
}
if (workspaceBuffer_) {
free(workspaceBuffer_);
}
......@@ -225,14 +217,25 @@ public:
}
}
static void create_nnpack_threadpool() {
if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
}
private:
nnp_convolution_algorithm algorithm_;
nnp_convolution_transform_strategy transform_strategy_;
void* workspaceBuffer_;
size_t workspaceSize_;
pthreadpool_t threadpool_;
static pthreadpool_t threadpool_;
};
template <DeviceType Device>
pthreadpool_t NNPACKConvFunction<Device>::threadpool_ = nullptr;
REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction);
} // namespace paddle
......@@ -42,3 +42,8 @@ endfunction()
op_library(add_op SRCS add_op.cc add_op.cu)
cc_test(add_op_test SRCS add_op_test.cc DEPS add_op)
op_library(mul_op SRCS mul_op.cc mul_op.cu)
op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc)
op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
......@@ -31,8 +31,7 @@ protected:
"Inputs/Outputs of AddOp must all be set");
PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(),
"Two input of Add Op's dimension must be same.");
// Need set dims in Tensor
// outputs[0]->set_dims(inputs[0]->dims())
outputs[0]->set_dims(inputs[0]->dims());
}
};
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/framework/tensor.h>
#include <paddle/operators/mul_op.h>
namespace paddle {
namespace operators {
class MulOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "The mul op must take two inputs");
auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims();
PADDLE_ENFORCE(dim0.size() == 2 && dim1.size() == 2,
"The input of mul op must be matrix");
PADDLE_ENFORCE(
dim0[1] == dim1[0],
"First matrix's width must be equal with second matrix's height.");
PADDLE_ENFORCE(outputs.size() == 1, "The mul op must take one output");
outputs[0]->set_dims({dim0[0], dim1[1]});
}
};
class MulOpMaker : public framework::OpProtoAndCheckerMaker {
public:
MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of mul op");
AddInput("Y", "The second input of mul op");
AddOutput("Out", "The output of mul op");
AddComment(R"DOC(
Two Element Mul Operator.
The equation is: Out = X * Y
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker);
REGISTER_OP_CPU_KERNEL(
mul, paddle::operators::MulKernel<paddle::platform::CPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/operators/mul_op.h>
#include <paddle/framework/op_registry.h>
REGISTER_OP_GPU_KERNEL(mul,
paddle::operators::MulKernel<paddle::platform
::GPUPlace>);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class MulKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Mul kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/operators/rowwise_add_op.h>
namespace paddle {
namespace operators {
class RowWiseAddOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2UL, "Two inputs is needed by rowwise add");
auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims();
PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix");
PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector");
PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same");
PADDLE_ENFORCE(outputs.size() == 1, "The output size must be 1");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker {
public:
RowWiseAddOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The left input of row-wise add op, must be matrix");
AddInput("b", "The right input of row-wise add op, must be vector");
AddOutput("Out", "The output of row-wise add op");
AddComment(R"DOC(Row-wise Add operator
for i in xrange(X.shape[0]):
Out = X[i] + b
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(rowwise_add,
paddle::operators::RowWiseAddOp,
paddle::operators::RowWiseAddOpMaker);
REGISTER_OP_CPU_KERNEL(
rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace>);
#include <paddle/framework/op_registry.h>
#include <paddle/operators/rowwise_add_op.h>
REGISTER_OP_GPU_KERNEL(
rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class RowWiseAddKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/operators/sigmoid_op.h>
namespace paddle {
namespace operators {
class SigmoidOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input");
PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "sigmoid input");
AddInput("Y", "sigmoid output");
AddComment("Sigmoid function");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(sigmoid,
paddle::operators::SigmoidOp,
paddle::operators::SigmoidOpMaker);
REGISTER_OP_CPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::CPUPlace>);
#include <paddle/operators/sigmoid_op.h>
#include <paddle/framework/op_registry.h>
REGISTER_OP_GPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class SigmoidKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/operators/softmax_op.h>
namespace paddle {
namespace operators {
class SoftmaxOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax");
PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftmaxOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "input of softmax");
AddOutput("Y", "output of softmax");
AddComment("Softmax Op");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<paddle::platform::CPUPlace>);
#include <paddle/framework/op_registry.h>
#include <paddle/operators/softmax_op.h>
REGISTER_OP_GPU_KERNEL(
softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class SoftmaxKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Softmax kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python add_op)
cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python
add_op mul_op rowwise_add_op sigmoid_op softmax_op)
......@@ -26,6 +26,10 @@ namespace py = pybind11;
namespace pd = paddle::framework;
USE_OP(add_two);
USE_OP(softmax);
USE_OP(mul);
USE_OP(rowwise_add);
USE_OP(sigmoid);
PYBIND11_PLUGIN(core) {
py::module m("core", "C++ core of Paddle Paddle");
......
......@@ -13,6 +13,11 @@ export PATH=/usr/bin:$PATH
pre-commit install
clang-format --version
# set up go environment for running gometalinter
mkdir -p $GOPATH/src/github.com/PaddlePaddle/
ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle
cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd -
if ! pre-commit run -a ; then
git diff --exit-code
fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册