提交 dfa46509 编写于 作者: F fengjiayi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_add_tensor_copy

...@@ -22,3 +22,6 @@ cmake-build-* ...@@ -22,3 +22,6 @@ cmake-build-*
# generated while compiling # generated while compiling
python/paddle/v2/framework/core.so python/paddle/v2/framework/core.so
CMakeFiles
cmake_install.cmake
...@@ -28,7 +28,9 @@ if(NOT CMAKE_CROSSCOMPILING) ...@@ -28,7 +28,9 @@ if(NOT CMAKE_CROSSCOMPILING)
endif(NOT CMAKE_CROSSCOMPILING) endif(NOT CMAKE_CROSSCOMPILING)
find_package(Git REQUIRED) find_package(Git REQUIRED)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
find_package(Boost QUIET) if(NOT ANDROID)
find_package(Boost QUIET)
endif()
include(simd) include(simd)
...@@ -140,6 +142,10 @@ endif(USE_NNPACK) ...@@ -140,6 +142,10 @@ endif(USE_NNPACK)
add_subdirectory(proto) add_subdirectory(proto)
# "add_subdirectory(go)" should be placed after the following loine,
# because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer)
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
# placed after this block, because they depends on it. # placed after this block, because they depends on it.
if(WITH_GOLANG) if(WITH_GOLANG)
...@@ -147,7 +153,9 @@ if(WITH_GOLANG) ...@@ -147,7 +153,9 @@ if(WITH_GOLANG)
endif(WITH_GOLANG) endif(WITH_GOLANG)
add_subdirectory(paddle) add_subdirectory(paddle)
add_subdirectory(python) if(WITH_PYTHON)
add_subdirectory(python)
endif()
if(WITH_DOC) if(WITH_DOC)
add_subdirectory(doc) add_subdirectory(doc)
endif() endif()
...@@ -106,6 +106,9 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ...@@ -106,6 +106,9 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0")
SET(CMAKE_SYSTEM_PROCESSOR armv7-a) SET(CMAKE_SYSTEM_PROCESSOR armv7-a)
ENDIF() ENDIF()
ENDIF() ENDIF()
IF(ANDROID_ABI STREQUAL "arm64-v8a")
SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android)
ENDIF()
SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-") SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-")
ENDIF() ENDIF()
...@@ -162,6 +165,10 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ...@@ -162,6 +165,10 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0")
ENDIF() ENDIF()
ENDIF() ENDIF()
IF(ANDROID_ABI STREQUAL "arm64-v8a")
LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a)
ENDIF()
STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}") STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}")
STRING(REPLACE ";" " " ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}") STRING(REPLACE ";" " " ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}")
......
...@@ -52,6 +52,7 @@ ExternalProject_Add( ...@@ -52,6 +52,7 @@ ExternalProject_Add(
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
ADD_DEPENDENCIES(glog extern_glog) ADD_DEPENDENCIES(glog extern_glog gflags)
LINK_LIBRARIES(glog gflags)
LIST(APPEND external_project_dependencies glog) LIST(APPEND external_project_dependencies glog)
...@@ -32,7 +32,12 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -32,7 +32,12 @@ IF(NOT ${CBLAS_FOUND})
# arm_soft_fp_abi branch of OpenBLAS to support softfp # arm_soft_fp_abi branch of OpenBLAS to support softfp
# https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi # https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi
SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$")
SET(TARGET "ARMV7")
ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a")
SET(TARGET "ARMV8")
ENDIF()
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=${TARGET} ARM_SOFTFP_ABI=1 USE_THREAD=0)
ELSEIF(RPI) ELSEIF(RPI)
# use hardfp # use hardfp
SET(OPENBLAS_COMMIT "v0.2.19") SET(OPENBLAS_COMMIT "v0.2.19")
......
...@@ -90,11 +90,11 @@ ...@@ -90,11 +90,11 @@
# including binary directory for generated headers. # including binary directory for generated headers.
include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(NOT APPLE) if(NOT APPLE AND NOT ANDROID)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT}) link_libraries(${CMAKE_THREAD_LIBS_INIT})
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -ldl") set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -ldl -lrt")
endif(NOT APPLE) endif(NOT APPLE AND NOT ANDROID)
function(merge_static_libs TARGET_NAME) function(merge_static_libs TARGET_NAME)
set(libs ${ARGN}) set(libs ${ARGN})
...@@ -301,7 +301,7 @@ function(go_library TARGET_NAME) ...@@ -301,7 +301,7 @@ function(go_library TARGET_NAME)
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
# FIXME: link path
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND rm "${${TARGET_NAME}_LIB_PATH}" COMMAND rm "${${TARGET_NAME}_LIB_PATH}"
# Golang build source code # Golang build source code
...@@ -309,7 +309,7 @@ function(go_library TARGET_NAME) ...@@ -309,7 +309,7 @@ function(go_library TARGET_NAME)
-o "${${TARGET_NAME}_LIB_PATH}" -o "${${TARGET_NAME}_LIB_PATH}"
"./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}" "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}"
# must run under GOPATH # must run under GOPATH
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go")
add_dependencies(${TARGET_NAME} go_vendor) add_dependencies(${TARGET_NAME} go_vendor)
endfunction(go_library) endfunction(go_library)
...@@ -320,14 +320,11 @@ function(go_binary TARGET_NAME) ...@@ -320,14 +320,11 @@ function(go_binary TARGET_NAME)
cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
# FIXME: link path
add_custom_command(OUTPUT ${TARGET_NAME}_timestamp add_custom_command(OUTPUT ${TARGET_NAME}_timestamp
COMMAND env LIBRARY_PATH=${CMAKE_BINARY_DIR}/go/pserver/client/c/:$ENV{LIBRARY_PATH} COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
-o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
"./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}" "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}"
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go")
# TODO: don't know what ${TARGET_NAME}_link does
add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS}) add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin)
endfunction(go_binary) endfunction(go_binary)
...@@ -335,15 +332,18 @@ endfunction(go_binary) ...@@ -335,15 +332,18 @@ endfunction(go_binary)
function(go_test TARGET_NAME) function(go_test TARGET_NAME)
set(options OPTIONAL) set(options OPTIONAL)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs DEPS)
cmake_parse_arguments(go_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(go_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_custom_command(OUTPUT ${TARGET_NAME}_timestamp string(REPLACE "${PADDLE_GO_PATH}" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${go_test_DEPS})
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test
-c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" -c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
${go_test_SRCS} ".${CMAKE_CURRENT_SOURCE_REL_DIR}"
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go")
add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS})
add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME})
endfunction(go_test) endfunction(go_test)
function(proto_library TARGET_NAME) function(proto_library TARGET_NAME)
......
...@@ -17,3 +17,7 @@ add_subdirectory(pserver/client/c) ...@@ -17,3 +17,7 @@ add_subdirectory(pserver/client/c)
add_subdirectory(cmd/pserver) add_subdirectory(cmd/pserver)
add_subdirectory(cmd/master) add_subdirectory(cmd/master)
add_subdirectory(master/c) add_subdirectory(master/c)
add_subdirectory(master)
add_subdirectory(pserver)
add_subdirectory(pserver/client)
add_subdirectory(utils/networkhelper)
...@@ -12,4 +12,4 @@ ...@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
go_binary(master SRC master.go DEPS paddle_go_optimizer) go_binary(master SRC master.go)
...@@ -8,6 +8,7 @@ import ( ...@@ -8,6 +8,7 @@ import (
"time" "time"
"github.com/namsral/flag" "github.com/namsral/flag"
"github.com/topicai/candy"
"github.com/PaddlePaddle/Paddle/go/pserver" "github.com/PaddlePaddle/Paddle/go/pserver"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
...@@ -18,53 +19,47 @@ func main() { ...@@ -18,53 +19,47 @@ func main() {
index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0") index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0")
etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379",
"comma separated endpoint string for pserver to connect to etcd") "comma separated endpoint string for pserver to connect to etcd")
etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") etcdTimeout := flag.Duration("etcd-timeout", 5*time.Second, "timeout for etcd calls")
numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job") numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job")
checkpointPath := flag.String("checkpoint-path", "/checkpoints/", "save checkpoint path") checkpointPath := flag.String("checkpoint-path", "/checkpoints/", "save checkpoint path")
checkpointInterval := flag.Int("checkpoint-interval", 600, "save checkpoint per interval seconds") checkpointInterval := flag.Duration("checkpoint-interval", 600*time.Second, "save checkpoint per interval seconds")
logLevel := flag.String("log-level", "info", logLevel := flag.String("log-level", "info",
"log level, possible values: debug, info, warning, error, fatal, panic") "log level, possible values: debug, info, warning, error, fatal, panic")
flag.Parse() flag.Parse()
level, err := log.ParseLevel(*logLevel) level, err := log.ParseLevel(*logLevel)
if err != nil { candy.Must(err)
panic(err)
}
log.SetLevel(level) log.SetLevel(level)
var idx int var idx int
var cp pserver.Checkpoint var cp pserver.Checkpoint
var e *pserver.EtcdClient var e *pserver.EtcdClient
if *index >= 0 { if *index >= 0 {
idx = *index idx = *index
} else { } else {
timeout := time.Second * time.Duration((*etcdTimeout)) e = pserver.NewEtcdClient(*etcdEndpoint, *numPservers, *etcdTimeout)
e = pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout)
idx, err = e.Register() idx, err = e.Register()
candy.Must(err)
cp, err = pserver.NewCheckpointFromFile(*checkpointPath, idx, e)
if err != nil { if err != nil {
panic(err) log.Errorf("Fetch checkpoint failed, %s", err)
} }
} }
s, err := pserver.NewService(idx, *checkpointInterval, *checkpointPath, e, cp) s, err := pserver.NewService(idx, *checkpointInterval, *checkpointPath, e, cp)
if err != nil { candy.Must(err)
panic(err)
}
err = rpc.Register(s) err = rpc.Register(s)
if err != nil { candy.Must(err)
panic(err)
}
rpc.HandleHTTP() rpc.HandleHTTP()
l, err := net.Listen("tcp", ":"+strconv.Itoa(*port)) l, err := net.Listen("tcp", ":"+strconv.Itoa(*port))
if err != nil { candy.Must(err)
panic(err)
}
log.Infof("start pserver at port %d", *port) log.Infof("start pserver at port %d", *port)
err = http.Serve(l, nil) err = http.Serve(l, nil)
candy.Must(err)
if err != nil {
panic(err)
}
} }
hash: b8f18ce6784bd3fadd9fed0b8443e7b658234ea785ae1f220723ae2c1f652aa7 hash: a8faea3a363468a88917ddeb3b1c9ea36886fb2c622acbad42604fa9cb4d3855
updated: 2017-06-27T14:05:48.925262819+08:00 updated: 2017-07-11T10:04:40.786745417+08:00
imports: imports:
- name: github.com/coreos/etcd - name: github.com/coreos/etcd
version: 61fc123e7a8b14a0a258aa3f5c4159861b1ec2e7 version: cb2a496c4ddd1c87a9f280e116649b599999ec79
subpackages: subpackages:
- auth/authpb - auth/authpb
- clientv3 - clientv3
...@@ -22,7 +22,9 @@ imports: ...@@ -22,7 +22,9 @@ imports:
- name: github.com/PaddlePaddle/recordio - name: github.com/PaddlePaddle/recordio
version: edfb82af0739c84f241c87390ec5649c7b28c129 version: edfb82af0739c84f241c87390ec5649c7b28c129
- name: github.com/sirupsen/logrus - name: github.com/sirupsen/logrus
version: 202f25545ea4cf9b191ff7f846df5d87c9382c2b version: 7f976d3a76720c4c27af2ba716b85d2e0a7e38b1
- name: github.com/topicai/candy
version: 1b9030d056fa9f8c4b1f9c91b52fe4b8ab4cd8cc
- name: golang.org/x/net - name: golang.org/x/net
version: c8c74377599bd978aee1cf3b9b63a8634051cec2 version: c8c74377599bd978aee1cf3b9b63a8634051cec2
subpackages: subpackages:
...@@ -34,11 +36,11 @@ imports: ...@@ -34,11 +36,11 @@ imports:
- lex/httplex - lex/httplex
- trace - trace
- name: golang.org/x/sys - name: golang.org/x/sys
version: f7928cfef4d09d1b080aa2b6fd3ca9ba1567c733 version: abf9c25f54453410d0c6668e519582a9e1115027
subpackages: subpackages:
- unix - unix
- name: golang.org/x/text - name: golang.org/x/text
version: 4e9ab9ee170f2a39bd66c92b3e0a47ff47a4bc77 version: cfdf022e86b4ecfb646e1efbd7db175dd623a8fa
subpackages: subpackages:
- secure/bidirule - secure/bidirule
- transform - transform
......
...@@ -10,3 +10,4 @@ import: ...@@ -10,3 +10,4 @@ import:
version: ^1.7.4-pre version: ^1.7.4-pre
- package: github.com/sirupsen/logrus - package: github.com/sirupsen/logrus
version: ^1.0.0 version: ^1.0.0
- package: github.com/topicai/candy
if(WITH_TESTING)
go_test(master_test)
endif()
if(WITH_TESTING)
go_test(pserver_test DEPS paddle_go_optimizer)
endif()
if(WITH_TESTING)
go_test(pserver_client_test DEPS paddle_go_optimizer)
endif()
libpaddle_go_optimizer.a
cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf)
target_link_libraries(paddle_go_optimizer stdc++ m) target_link_libraries(paddle_go_optimizer stdc++ m)
# Copy library to the required place.
# See: go/pserver/optimizer.go:
# // #cgo LDFLAGS: ${SRCDIR}/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
add_custom_command(TARGET paddle_go_optimizer POST_BUILD
COMMAND cp "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_go_optimizer.a" "${CMAKE_CURRENT_SOURCE_DIR}"
)
go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer) go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer)
if(WITH_TESTING) if(WITH_TESTING)
# FIXME: this test requires pserver which is not managed by the test # FIXME: this test requires pserver which is not managed by the test
......
...@@ -16,7 +16,7 @@ import ( ...@@ -16,7 +16,7 @@ import (
const ( const (
// PsDesired is etcd path for store desired pserver count // PsDesired is etcd path for store desired pserver count
PsDesired = "/ps_desired" PsDesired = "/ps_desired"
// PsAddr is the base dir for pserver to store their addr // PsPath is the base dir for pserver to store their addr
PsPath = "/ps/" PsPath = "/ps/"
// PsCheckpoint is the etcd path for store checkpoints information // PsCheckpoint is the etcd path for store checkpoints information
PsCheckpoint = "/checkpoints/" PsCheckpoint = "/checkpoints/"
...@@ -189,9 +189,25 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { ...@@ -189,9 +189,25 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) {
return idx, nil return idx, nil
} }
// GetKey gets the value by the specified key
func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
resp, err := e.etcdClient.Get(ctx, key)
cancel()
if err != nil {
return []byte{}, err
}
kvs := resp.Kvs
if len(kvs) == 0 {
return []byte{}, nil
}
v := kvs[0].Value
return v, nil
}
// PutKey put into etcd with value by key specified // PutKey put into etcd with value by key specified
func (e *EtcdClient) PutKey(key string, value []byte, timeout int) error { func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(timeout)) ctx, cancel := context.WithTimeout(context.Background(), timeout)
_, err := e.etcdClient.Put(ctx, key, string(value)) _, err := e.etcdClient.Put(ctx, key, string(value))
cancel() cancel()
if err != nil { if err != nil {
......
package pserver package pserver
// #cgo CFLAGS: -I ../../ // #cgo CFLAGS: -I ../../
// //FIXME: ldflags contain "build" path // #cgo LDFLAGS: ${SRCDIR}/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
// #cgo LDFLAGS: ${SRCDIR}/../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
// #include "paddle/optimizer/optimizer.h" // #include "paddle/optimizer/optimizer.h"
// #include <stdlib.h> // #include <stdlib.h>
// #include <string.h> // #include <string.h>
......
...@@ -9,6 +9,7 @@ import ( ...@@ -9,6 +9,7 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"strconv" "strconv"
...@@ -21,14 +22,14 @@ import ( ...@@ -21,14 +22,14 @@ import (
// ElementType is the type of elements of a Parameter. // ElementType is the type of elements of a Parameter.
type ElementType int type ElementType int
// RPC error message.
const ( const (
// AlreadyInitialized is true if pserver is initialized AlreadyInitialized = "pserver already initialized"
AlreadyInitialized = "pserver already initialized" Uninitialized = "pserver not fully initialized"
// Uninitialized is true if pserver not fully initialized CheckpointMD5Failed = "checkpoint file MD5 validation failed"
Uninitialized = "pserver not fully initialized"
) )
// Supported element types // Supported element types.
const ( const (
Int32 ElementType = iota Int32 ElementType = iota
UInt32 UInt32
...@@ -51,21 +52,15 @@ type ParameterWithConfig struct { ...@@ -51,21 +52,15 @@ type ParameterWithConfig struct {
Config []byte // parameter configuration in Proto Buffer format Config []byte // parameter configuration in Proto Buffer format
} }
// ParameterCheckpoint is Parameter and State checkpoint // checkpointMeta saves checkpoint metadata
type ParameterCheckpoint struct {
ParamConfig ParameterWithConfig
State []byte
}
// checkpoint signature
type checkpointMeta struct { type checkpointMeta struct {
UUID string `json:"uuid"` UUID string `json:"uuid"`
Md5sum string `json:"md5sum"` MD5 string `json:"md5"`
Timestamp string `json:"timestamp"` Timestamp int64 `json:"timestamp"`
} }
// Checkpoint is the pserver shard persist in file // Checkpoint is the pserver shard persist in file
type Checkpoint []ParameterCheckpoint type Checkpoint []parameterCheckpoint
// Gradient is the gradient of the parameter. // Gradient is the gradient of the parameter.
type Gradient Parameter type Gradient Parameter
...@@ -81,12 +76,53 @@ type Service struct { ...@@ -81,12 +76,53 @@ type Service struct {
optMap map[string]*optimizer optMap map[string]*optimizer
} }
// parameterCheckpoint saves parameter checkpoint
type parameterCheckpoint struct {
ParameterWithConfig
State []byte
}
// NewCheckpointFromFile loads parameters and state from checkpoint file
func NewCheckpointFromFile(cpPath string, idx int, e *EtcdClient) (Checkpoint, error) {
v, err := e.GetKey(PsPath+string(idx), 3*time.Second)
if err != nil {
return nil, err
}
var cpMeta checkpointMeta
if err = json.Unmarshal(v, &cpMeta); err != nil {
return nil, err
}
fn := filepath.Join(cpPath, cpMeta.UUID)
if _, err = os.Stat(fn); os.IsNotExist(err) {
return nil, err
}
content, err := ioutil.ReadFile(fn)
if err != nil {
return nil, err
}
h := md5.New()
md5 := hex.EncodeToString(h.Sum(content))
if md5 != cpMeta.MD5 {
return nil, errors.New(CheckpointMD5Failed)
}
dec := gob.NewDecoder(bytes.NewReader(content))
cp := Checkpoint{}
if err = dec.Decode(cp); err != nil {
return nil, err
}
return cp, nil
}
// NewService creates a new service, will bypass etcd registration if no // NewService creates a new service, will bypass etcd registration if no
// endpoints specified. // endpoints specified. It will recovery from checkpoint file if a exists a specified checkpoint.
func NewService(idx int, seconds int, path string, client *EtcdClient, cp Checkpoint) (*Service, error) { func NewService(idx int, interval time.Duration, path string, client *EtcdClient, cp Checkpoint) (*Service, error) {
s := &Service{ s := &Service{
idx: idx, idx: idx,
checkpointInterval: time.Second * time.Duration(seconds), checkpointInterval: interval,
checkpointPath: path, checkpointPath: path,
client: client, client: client,
} }
...@@ -95,9 +131,11 @@ func NewService(idx int, seconds int, path string, client *EtcdClient, cp Checkp ...@@ -95,9 +131,11 @@ func NewService(idx int, seconds int, path string, client *EtcdClient, cp Checkp
if cp != nil { if cp != nil {
for _, item := range cp { for _, item := range cp {
p := item.ParamConfig p := ParameterWithConfig{
st := item.State Param: item.Param,
s.optMap[p.Param.Name] = newOptimizer(p, st) Config: item.Config,
}
s.optMap[p.Param.Name] = newOptimizer(p, item.State)
} }
} }
return s, nil return s, nil
...@@ -186,13 +224,13 @@ func (s *Service) doCheckpoint() error { ...@@ -186,13 +224,13 @@ func (s *Service) doCheckpoint() error {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
cp := make([]ParameterCheckpoint, 0, len(s.optMap)) cp := make([]parameterCheckpoint, len(s.optMap))
index := 0 index := 0
for name, opt := range s.optMap { for name, opt := range s.optMap {
var pc ParameterCheckpoint var pc parameterCheckpoint
pc.ParamConfig.Param.Name = name pc.Param.Name = name
pc.ParamConfig.Param.ElementType = opt.elementType pc.Param.ElementType = opt.elementType
pc.ParamConfig.Param.Content = opt.GetWeights() pc.Param.Content = opt.GetWeights()
pc.State = opt.GetStates() pc.State = opt.GetStates()
cp[index] = pc cp[index] = pc
index++ index++
...@@ -206,12 +244,12 @@ func (s *Service) doCheckpoint() error { ...@@ -206,12 +244,12 @@ func (s *Service) doCheckpoint() error {
cpMeta := checkpointMeta{} cpMeta := checkpointMeta{}
cpMeta.UUID = s.checkpointPath + strconv.Itoa(s.idx) cpMeta.UUID = s.checkpointPath + strconv.Itoa(s.idx)
cpMeta.Timestamp = time.Now().String() cpMeta.Timestamp = time.Now().UnixNano()
h := md5.New() h := md5.New()
cpMeta.Md5sum = hex.EncodeToString(h.Sum(buf.Bytes())) cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes()))
cpMetajson, _ := json.Marshal(cpMeta) cpMetajson, _ := json.Marshal(cpMeta)
err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3) err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second)
if err != nil { if err != nil {
return err return err
} }
...@@ -219,7 +257,11 @@ func (s *Service) doCheckpoint() error { ...@@ -219,7 +257,11 @@ func (s *Service) doCheckpoint() error {
log.Info("checkpoint does not exists.") log.Info("checkpoint does not exists.")
} else { } else {
err = os.Remove(cpMeta.UUID) err = os.Remove(cpMeta.UUID)
log.Infof("checkpoint %s already exsits, removing ", cpMeta.UUID) if err != nil {
log.Infof("Removing checkpoint %s failed", cpMeta.UUID)
} else {
log.Infof("checkpoint %s already exsits, removing ", cpMeta.UUID)
}
} }
f, err := os.Create(cpMeta.UUID) f, err := os.Create(cpMeta.UUID)
defer f.Close() defer f.Close()
......
if(WITH_TESTING)
go_test(network_helper_test)
endif()
...@@ -8,7 +8,6 @@ add_subdirectory(gserver) ...@@ -8,7 +8,6 @@ add_subdirectory(gserver)
add_subdirectory(pserver) add_subdirectory(pserver)
add_subdirectory(trainer) add_subdirectory(trainer)
add_subdirectory(scripts) add_subdirectory(scripts)
add_subdirectory(optimizer)
add_subdirectory(string) add_subdirectory(string)
if(Boost_FOUND) if(Boost_FOUND)
......
...@@ -11,7 +11,7 @@ proto_library(op_proto SRCS op_proto.proto DEPS attr_type) ...@@ -11,7 +11,7 @@ proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
proto_library(op_desc SRCS op_desc.proto DEPS attr_type) proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)
cc_library(operator SRCS operator.cc DEPS op_desc protobuf) cc_library(operator SRCS operator.cc DEPS op_desc device_context)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc) cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry operator) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry operator)
......
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <type_traits>
#include "paddle/framework/attr_checker.h" #include "paddle/framework/attr_checker.h"
#include "paddle/framework/op_desc.pb.h" #include "paddle/framework/op_desc.pb.h"
#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/op_proto.pb.h"
...@@ -81,8 +82,6 @@ class OpProtoAndCheckerMaker { ...@@ -81,8 +82,6 @@ class OpProtoAndCheckerMaker {
return op_checker_->AddAttrChecker<T>(name); return op_checker_->AddAttrChecker<T>(name);
} }
void AddType(const std::string& op_type) { proto_->set_type(op_type); }
void AddComment(const std::string& comment) { void AddComment(const std::string& comment) {
*(proto_->mutable_comment()) = comment; *(proto_->mutable_comment()) = comment;
} }
...@@ -101,8 +100,11 @@ class OpRegistry { ...@@ -101,8 +100,11 @@ class OpRegistry {
OpProto& op_proto = protos()[op_type]; OpProto& op_proto = protos()[op_type];
OpAttrChecker& op_checker = op_checkers()[op_type]; OpAttrChecker& op_checker = op_checkers()[op_type];
ProtoMakerType(&op_proto, &op_checker); ProtoMakerType(&op_proto, &op_checker);
PADDLE_ENFORCE(op_proto.IsInitialized(), *op_proto.mutable_type() = op_type;
"Fail to initialize %s's OpProto !", op_type); PADDLE_ENFORCE(
op_proto.IsInitialized(),
"Fail to initialize %s's OpProto, because %s is not initialized",
op_type, op_proto.InitializationErrorString());
} }
static OperatorBase* CreateOp(const OpDesc& op_desc) { static OperatorBase* CreateOp(const OpDesc& op_desc) {
...@@ -119,6 +121,7 @@ class OpRegistry { ...@@ -119,6 +121,7 @@ class OpRegistry {
op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr); op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
} }
op_checkers().at(op_type).Check(op->attrs_); op_checkers().at(op_type).Check(op->attrs_);
op->Init();
return op; return op;
} }
...@@ -142,18 +145,73 @@ class OpRegistry { ...@@ -142,18 +145,73 @@ class OpRegistry {
template <typename OpType, typename ProtoMakerType> template <typename OpType, typename ProtoMakerType>
class OpRegisterHelper { class OpRegisterHelper {
public: public:
OpRegisterHelper(std::string op_type) { OpRegisterHelper(const char* op_type) {
OpRegistry::RegisterOp<OpType, ProtoMakerType>(op_type); OpRegistry::RegisterOp<OpType, ProtoMakerType>(op_type);
} }
}; };
#define REGISTER_OP(__op_class, __op_maker_class, __op_type) \ #define STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \
class __op_class##Register { \ struct __test_global_namespace_##uniq_name##__ {}; \
private: \ static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \
const static OpRegisterHelper<__op_class, __op_maker_class> reg; \ __test_global_namespace_##uniq_name##__>::value, \
}; \ msg)
const OpRegisterHelper<__op_class, __op_maker_class> \
__op_class##Register::reg(#__op_type); #define REGISTER_OP(__op_type, __op_class, __op_maker_class) \
STATIC_ASSERT_GLOBAL_NAMESPACE(__reg_op__##__op_type, \
"REGISTER_OP must be in global namespace"); \
static ::paddle::framework::OpRegisterHelper<__op_class, __op_maker_class> \
__op_register_##__op_type##__(#__op_type); \
int __op_register_##__op_type##_handle__() { return 0; }
#define REGISTER_OP_KERNEL(type, GPU_OR_CPU, PlaceType, KernelType) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op_kernel_##type##_##GPU_OR_CPU##__, \
"REGISTER_OP_KERNEL must be in global namespace"); \
struct __op_kernel_register__##type##__ { \
__op_kernel_register__##type##__() { \
::paddle::framework::OperatorWithKernel::OpKernelKey key; \
key.place_ = PlaceType(); \
::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \
.reset(new KernelType()); \
} \
}; \
static __op_kernel_register__##type##__ __reg_kernel_##type##__; \
int __op_kernel_register_##type##_handle_##GPU_OR_CPU##__() { return 0; }
#define REGISTER_OP_GPU_KERNEL(type, KernelType) \
REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, KernelType)
#define REGISTER_OP_CPU_KERNEL(type, KernelType) \
REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, KernelType)
#define USE_OP_WITHOUT_KERNEL(op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_without_kernel_##op_type, \
"USE_OP_WITHOUT_KERNEL must be in global namespace"); \
extern int __op_register_##op_type##_handle__(); \
static int __use_op_ptr_##op_type##_without_kernel__ \
__attribute__((unused)) = __op_register_##op_type##_handle__()
#define USE_OP_KERNEL(op_type, DEVICE_TYPE) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_kernel_##op_type##_##DEVICE_TYPE##__, \
"USE_OP_KERNEL must be in global namespace"); \
extern int __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__(); \
static int __use_op_ptr_##op_type##_##DEVICE_TYPE##_kernel__ \
__attribute__((unused)) = \
__op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__()
#ifdef PADDLE_ONLY_CPU
#define USE_OP(op_type) \
USE_OP_WITHOUT_KERNEL(op_type); \
USE_OP_KERNEL(op_type, CPU);
#else
#define USE_OP(op_type) \
USE_OP_WITHOUT_KERNEL(op_type); \
USE_OP_KERNEL(op_type, CPU); \
USE_OP_KERNEL(op_type, GPU)
#endif
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/framework/operator.h"
#include "paddle/operators/demo_op.h"
using namespace paddle::framework;
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class CosineOp : public OperatorWithKernel { class CosineOp : public OperatorBase {
public: public:
void Run(const OpRunContext* context) const override { void Run(const std::shared_ptr<Scope>& scope,
printf("%s\n", DebugString().c_str()); const platform::DeviceContext& dev_ctx) const override {}
} void InferShape(const std::shared_ptr<Scope>& scope) const override {}
}; };
class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
...@@ -23,19 +19,17 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { ...@@ -23,19 +19,17 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
AddAttr<float>("scale", "scale of cosine op") AddAttr<float>("scale", "scale of cosine op")
.SetDefault(1.0) .SetDefault(1.0)
.LargerThan(0.0); .LargerThan(0.0);
AddType("cos");
AddComment("This is cos op"); AddComment("This is cos op");
} }
}; };
REGISTER_OP(CosineOp, CosineOpProtoAndCheckerMaker, cos_sim) class MyTestOp : public OperatorBase {
public:
void InferShape(const std::shared_ptr<Scope>& scope) const override {}
void Run(const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& dev_ctx) const override {}
class MyTestOp : public OperatorWithKernel {
public: public:
void Run(const OpRunContext* ctx) const override {
printf("%s\n", DebugString().c_str());
printf("test_attr = %d\n", ctx->op_->GetAttr<int>("test_attr"));
}
}; };
class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
...@@ -49,15 +43,17 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { ...@@ -49,15 +43,17 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
}; };
AddAttr<int>("test_attr", "a simple test attribute") AddAttr<int>("test_attr", "a simple test attribute")
.AddCustomChecker(my_checker); .AddCustomChecker(my_checker);
AddType("my_test_op");
AddComment("This is my_test op"); AddComment("This is my_test op");
} }
}; };
REGISTER_OP(MyTestOp, MyTestOpProtoAndCheckerMaker, my_test_op)
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
REGISTER_OP(cos_sim, paddle::framework::CosineOp,
paddle::framework::CosineOpProtoAndCheckerMaker);
REGISTER_OP(my_test_op, paddle::framework::MyTestOp,
paddle::framework::MyTestOpProtoAndCheckerMaker);
TEST(OpRegistry, CreateOp) { TEST(OpRegistry, CreateOp) {
paddle::framework::OpDesc op_desc; paddle::framework::OpDesc op_desc;
op_desc.set_type("cos_sim"); op_desc.set_type("cos_sim");
...@@ -72,9 +68,9 @@ TEST(OpRegistry, CreateOp) { ...@@ -72,9 +68,9 @@ TEST(OpRegistry, CreateOp) {
paddle::framework::OperatorBase* op = paddle::framework::OperatorBase* op =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
auto scope = std::make_shared<Scope>(); auto scope = std::make_shared<paddle::framework::Scope>();
auto dev_ctx = DeviceContext(); paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, &dev_ctx); op->Run(scope, dev_ctx);
float scale_get = op->GetAttr<float>("scale"); float scale_get = op->GetAttr<float>("scale");
ASSERT_EQ(scale_get, scale); ASSERT_EQ(scale_get, scale);
} }
...@@ -115,9 +111,9 @@ TEST(OpRegistry, DefaultValue) { ...@@ -115,9 +111,9 @@ TEST(OpRegistry, DefaultValue) {
paddle::framework::OperatorBase* op = paddle::framework::OperatorBase* op =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
auto scope = std::make_shared<Scope>(); auto scope = std::make_shared<paddle::framework::Scope>();
auto dev_ctx = DeviceContext(); paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, &dev_ctx); op->Run(scope, dev_ctx);
ASSERT_EQ(op->GetAttr<float>("scale"), 1.0); ASSERT_EQ(op->GetAttr<float>("scale"), 1.0);
} }
...@@ -169,14 +165,9 @@ TEST(OpRegistry, CustomChecker) { ...@@ -169,14 +165,9 @@ TEST(OpRegistry, CustomChecker) {
attr->set_i(4); attr->set_i(4);
paddle::framework::OperatorBase* op = paddle::framework::OperatorBase* op =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
auto dev_ctx = DeviceContext(); paddle::platform::CPUDeviceContext dev_ctx;
auto scope = std::make_shared<Scope>(); auto scope = std::make_shared<paddle::framework::Scope>();
op->Run(scope, &dev_ctx); op->Run(scope, dev_ctx);
int test_attr = op->GetAttr<int>("test_attr"); int test_attr = op->GetAttr<int>("test_attr");
ASSERT_EQ(test_attr, 4); ASSERT_EQ(test_attr, 4);
} }
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
\ No newline at end of file
...@@ -39,13 +39,5 @@ std::string OperatorBase::DebugString() const { ...@@ -39,13 +39,5 @@ std::string OperatorBase::DebugString() const {
return ss.str(); return ss.str();
} }
const Variable* OpRunContext::Input(int index) const {
return scope_->GetVariable(op_->inputs_[index]);
}
Variable* OpRunContext::Output(int index) const {
return scope_->GetVariable(op_->outputs_[index]);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
\ No newline at end of file
...@@ -14,44 +14,23 @@ limitations under the License. */ ...@@ -14,44 +14,23 @@ limitations under the License. */
#pragma once #pragma once
#include <paddle/framework/attr_checker.h>
#include <paddle/framework/op_desc.pb.h>
#include <paddle/framework/scope.h>
#include <paddle/framework/tensor.h>
#include <paddle/platform/device_context.h>
#include <paddle/platform/place.h>
#include <paddle/utils/Error.h>
#include <boost/variant.hpp> #include <boost/variant.hpp>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/framework/attr_checker.h"
#include "paddle/framework/op_desc.pb.h"
#include "paddle/framework/scope.h"
#include "paddle/utils/Error.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class OperatorBase; class OperatorBase;
class DeviceContext {};
/**
* OpRunContext is the only parameter of Operator's Run function.
* Run will get input/output variables, state such as momentum and
* device resource such as CUDA stream, cublas handle, etc. from
* OpRunContext. User should construct it before run the Operator.
*/
class OpRunContext {
public:
OpRunContext(const OperatorBase* op, const std::shared_ptr<Scope> scope,
const DeviceContext* device_context)
: op_(op), scope_(scope), device_context_(device_context) {}
const Variable* Input(int index) const;
Variable* Output(int index) const;
public:
const OperatorBase* op_;
const std::shared_ptr<Scope> scope_;
const DeviceContext* device_context_;
};
/** /**
* OperatorBase has the basic element that Net will call to do computation. * OperatorBase has the basic element that Net will call to do computation.
* Only CreateOperator from OpRegistry will new Operator directly. User * Only CreateOperator from OpRegistry will new Operator directly. User
...@@ -71,13 +50,20 @@ class OperatorBase { ...@@ -71,13 +50,20 @@ class OperatorBase {
std::string DebugString() const; std::string DebugString() const;
/// Init will be called after CreateOperator, you can put some initialization
/// logic here.
virtual void Init() {}
/// InferShape infer the size of Variables used by this Operator with /// InferShape infer the size of Variables used by this Operator with
/// information inside scope /// information inside scope
virtual void InferShape(const std::shared_ptr<Scope>& scope) const = 0; virtual void InferShape(const std::shared_ptr<Scope>& scope) const = 0;
/// Net will call this function to Run an op. /// Net will call this function to Run an op.
virtual void Run(const std::shared_ptr<Scope>& scope, virtual void Run(const std::shared_ptr<Scope>& scope,
const DeviceContext* dev_ctx) const = 0; const platform::DeviceContext& dev_ctx) const = 0;
protected:
std::string Type() const { return desc_.type(); }
public: public:
OpDesc desc_; OpDesc desc_;
...@@ -86,21 +72,113 @@ class OperatorBase { ...@@ -86,21 +72,113 @@ class OperatorBase {
AttributeMap attrs_; AttributeMap attrs_;
}; };
class OpKernel {
public:
/**
* KernelContext is the only parameter of Kernel Run function.
* Run will get input/output variables, state such as momentum and
* device resource such as CUDA stream, cublas handle, etc. from
* KernelContext. User should construct it before run the Operator.
*/
class KernelContext {
public:
KernelContext(const OperatorBase* op, const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& device_context)
: op_(*op), scope_(scope), device_context_(device_context) {}
const Variable* Input(int index) const {
return scope_->GetVariable(op_.inputs_[index]);
}
Variable* Output(int index) const {
return scope_->GetVariable(op_.outputs_[index]);
}
const OperatorBase& op_;
const std::shared_ptr<Scope>& scope_;
const platform::DeviceContext& device_context_;
};
virtual void Compute(const KernelContext& context) const = 0;
virtual ~OpKernel() {}
};
template <typename T>
struct VarToTensor {};
template <>
struct VarToTensor<Tensor*> {
Tensor* operator()(Variable* var) { return var->GetMutable<Tensor>(); }
};
template <>
struct VarToTensor<const Tensor*> {
const Tensor* operator()(Variable* var) { return &var->Get<Tensor>(); }
};
class OperatorWithKernel : public OperatorBase { class OperatorWithKernel : public OperatorBase {
public: public:
virtual ~OperatorWithKernel() {} struct OpKernelKey {
platform::Place place_;
OpKernelKey() = default;
OpKernelKey(const platform::DeviceContext& dev_ctx) {
place_ = dev_ctx.GetPlace();
}
virtual void InferShape(const std::shared_ptr<Scope>& scope) const {} bool operator==(const OpKernelKey& o) const { return place_ == o.place_; }
};
struct OpKernelHash {
std::hash<bool> hash_;
size_t operator()(const OpKernelKey& key) const {
return hash_(platform::is_gpu_place(key.place_));
}
};
using OpKernelMap =
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
void Run(const std::shared_ptr<Scope>& scope, void Run(const std::shared_ptr<Scope>& scope,
const DeviceContext* dev_ctx) const { const platform::DeviceContext& dev_ctx) const final {
OpRunContext op_ctx(this, scope, dev_ctx); auto& opKernel = AllOpKernels().at(Type()).at(OpKernelKey(dev_ctx));
Run(&op_ctx); opKernel->Compute(OpKernel::KernelContext(this, scope, dev_ctx));
}
static std::unordered_map<std::string /* op_type */, OpKernelMap>&
AllOpKernels() {
static std::unordered_map<std::string, OpKernelMap> g_all_op_kernels;
return g_all_op_kernels;
}
void InferShape(const std::shared_ptr<Scope>& scope) const final {
std::vector<const Tensor*> ins;
VarNamesToTensors(scope, inputs_, &ins);
std::vector<Tensor*> outs;
VarNamesToTensors(scope, outputs_, &outs);
InferShape(ins, outs);
};
private:
template <typename T>
void VarNamesToTensors(const std::shared_ptr<Scope>& scope,
const std::vector<std::string>& var_names,
std::vector<T>* container) const {
container->reserve(var_names.size());
VarToTensor<T> convert;
for (auto& name : var_names) {
auto var = scope->GetVariable(name);
if (var != nullptr) {
container->push_back(convert(var));
} else {
container->push_back(nullptr);
}
}
} }
/// when implement an Op, your should implement this function. protected:
/// this function should be moved to OpKernel later virtual void InferShape(const std::vector<const Tensor*>& inputs,
virtual void Run(const OpRunContext* context) const = 0; const std::vector<Tensor*>& outputs) const = 0;
}; };
} // namespace framework } // namespace framework
......
...@@ -19,62 +19,76 @@ limitations under the License. */ ...@@ -19,62 +19,76 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class OperatorTest : public OperatorWithKernel { class OperatorTest : public OperatorBase {
public: public:
void Run(const OpRunContext* ctx) const override { void Init() override { x = 1; }
float scale = ctx->op_->GetAttr<float>("scale"); void InferShape(const std::shared_ptr<Scope>& scope) const override {}
PADDLE_ENFORCE(ctx->Input(0) == nullptr, "Input(0) should not initialized"); void Run(const std::shared_ptr<Scope>& scope,
PADDLE_ENFORCE(ctx->Output(0) == nullptr, const platform::DeviceContext& dev_ctx) const override {
"Output(1) should not initialized"); float scale = GetAttr<float>("scale");
auto output1 = ctx->scope_->CreateVariable("output1"); ASSERT_NEAR(scale, 3.14, 1e-5);
PADDLE_ENFORCE(output1 != nullptr, "should create output1 from scope"); ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr);
printf("get attr %s = %f\n", "scale", scale); ASSERT_EQ(x, 1);
printf("%s\n", DebugString().c_str()); ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr);
} }
public:
float x = 0;
}; };
class OperatorTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public: public:
OperatorTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of test op"); AddInput("input", "input of test op");
AddOutput("output", "output of test op"); AddOutput("output", "output of test op");
AddAttr<float>("scale", "scale of cosine op") AddAttr<float>("scale", "scale of cosine op")
.SetDefault(1.0) .SetDefault(1.0)
.LargerThan(0.0); .LargerThan(0.0);
AddType("test_operator");
AddComment("This is test op"); AddComment("This is test op");
} }
}; };
REGISTER_OP(OperatorTest, OperatorTestProtoAndCheckerMaker, test_operator) class OpWithKernelTest : public OperatorWithKernel {
protected:
void InferShape(const std::vector<const Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {}
};
TEST(OperatorBase, DebugString) { class CPUKernelTest : public OpKernel {
OpDesc op_desc; public:
op_desc.set_type("test_operator"); void Compute(const KernelContext& context) const {
std::vector<std::string> inputs = {"IN1", "IN2"}; float scale = context.op_.GetAttr<float>("scale");
for (auto& input : inputs) { ASSERT_NEAR(scale, 3.14, 1e-5);
op_desc.add_inputs(input); std::cout << "this is cpu kernel" << std::endl;
} std::cout << context.op_.DebugString() << std::endl;
std::vector<std::string> outputs = {"OUT1", "OUT2"};
for (auto& output : outputs) {
op_desc.add_outputs(output);
} }
};
} // namespace framework
} // namespace paddle
REGISTER_OP(op_with_kernel, paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(op_with_kernel, paddle::framework::CPUKernelTest);
TEST(OpKernel, all) {
using namespace paddle::framework;
OpDesc op_desc;
op_desc.set_type("op_with_kernel");
*op_desc.mutable_inputs()->Add() = "IN1";
*op_desc.mutable_outputs()->Add() = "OUT1";
auto attr = op_desc.mutable_attrs()->Add(); auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale"); attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_type(paddle::framework::AttrType::FLOAT);
float scale = 3.14; attr->set_f(3.14);
attr->set_f(scale);
DeviceContext device_context; paddle::platform::CPUDeviceContext cpu_device_context;
auto scope = std::make_shared<Scope>(); auto scope = std::make_shared<Scope>();
OperatorBase* op = paddle::framework::OpRegistry::CreateOp(op_desc); OperatorBase* op = paddle::framework::OpRegistry::CreateOp(op_desc);
ASSERT_EQ(op->inputs_, inputs); op->Run(scope, cpu_device_context);
ASSERT_EQ(op->outputs_, outputs);
ASSERT_EQ(op->GetAttr<float>("scale"), scale);
op->Run(scope, &device_context);
}
} // namespace framework delete op;
} // namespace paddle }
\ No newline at end of file
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...
if(WITH_GPU)
nv_library(add_op SRCS add_op.cc add_op.cu DEPS operator op_registry glog ddim)
else()
cc_library(add_op SRCS add_op.cc DEPS operator op_registry glog ddim)
endif()
cc_test(add_op_test SRCS add_op_test.cc DEPS add_op)
#include <paddle/framework/op_registry.h>
#include <paddle/framework/tensor.h>
#include <paddle/operators/add_op.h>
namespace paddle {
namespace operators {
class AddOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "Input size of AddOp must be two");
PADDLE_ENFORCE(outputs.size() == 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(
inputs[0] != nullptr && inputs[1] != nullptr && outputs[0] != nullptr,
"Inputs/Outputs of AddOp must all be set");
PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(),
"Two input of Add Op's dimension must be same.");
// Need set dims in Tensor
// outputs[0]->set_dims(inputs[0]->dims())
}
};
class AddOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AddOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of add op");
AddInput("Y", "The second input of add op");
AddOutput("Out", "The output of add op");
AddComment(R"DOC(
Two Element Add Operator.
The equation is: Out = X + Y
)DOC");
}
};
} // namespace op
} // namespace paddle
REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker);
REGISTER_OP_CPU_KERNEL(
add_two, ::paddle::operators::AddKernel<::paddle::platform::CPUPlace>);
\ No newline at end of file
#include <paddle/operators/add_op.h>
#include <paddle/framework/op_registry.h>
REGISTER_OP_GPU_KERNEL(add_two,
paddle::operators::AddKernel<paddle::platform::GPUPlace>);
\ No newline at end of file
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class AddKernel : public framework::OpKernel {
public:
void Compute(const KernelContext &context) const override {
LOG(INFO) << "Add kernel in " << typeid(Place).name();
}
};
} // namespace op
} // namespace paddle
#include <gtest/gtest.h>
#define private public
#include <paddle/framework/op_registry.h>
USE_OP(add_two);
TEST(AddOp, GetOpProto) {
auto& protos = paddle::framework::OpRegistry::protos();
auto it = protos.find("add_two");
ASSERT_NE(it, protos.end());
}
\ No newline at end of file
#pragma once
#include "paddle/framework/op_registry.h"
using namespace paddle::framework;
namespace paddle {
namespace operators {
class CosineOp : public OperatorWithKernel {
public:
void Run(const OpRunContext *context) const override {
printf("%s\n", DebugString().c_str());
}
};
class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
CosineOpProtoAndCheckerMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of cosine op");
AddOutput("output", "output of cosine op");
AddAttr<float>("scale", "scale of cosine op")
.SetDefault(1.0)
.LargerThan(0.0);
AddType("cos");
AddComment("This is cos op");
}
};
REGISTER_OP(CosineOp, CosineOpProtoAndCheckerMaker, cos_sim)
class MyTestOp : public OperatorWithKernel {
public:
void Run(const OpRunContext *context) const override {
printf("%s\n", DebugString().c_str());
}
};
class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
MyTestOpProtoAndCheckerMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of cosine op");
AddOutput("output", "output of cosine op");
auto my_checker = [](int i) {
PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!");
};
AddAttr<int>("test_attr", "a simple test attribute")
.AddCustomChecker(my_checker);
AddType("my_test_op");
AddComment("This is my_test op");
}
};
REGISTER_OP(MyTestOp, MyTestOpProtoAndCheckerMaker, my_test_op)
} // namespace operators
} // namespace operators
add_subdirectory(dynload) add_subdirectory(dynload)
nv_test(cuda_test SRCS cuda_test.cu DEPS dyload_cuda) nv_test(cuda_test SRCS cuda_test.cu)
cc_library(place SRCS place.cc) cc_library(place SRCS place.cc)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags) cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
IF(WITH_GPU) IF(WITH_GPU)
set(GPU_CTX_DEPS dyload_cuda dynamic_loader ) set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
ELSE() ELSE()
set(GPU_CTX_DEPS) set(GPU_CTX_DEPS)
ENDIF() ENDIF()
......
#include <paddle/platform/device_context.h> /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/device_context.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload {
namespace dummy {
// Make DeviceContext A library.
int DUMMY_VAR_FOR_DEV_CTX = 0;
} // namespace dummy template <>
} // namespace dynload Eigen::DefaultDevice* DeviceContext::get_eigen_device<Eigen::DefaultDevice>() {
return reinterpret_cast<CPUDeviceContext*>(this)->eigen_device();
}
#ifndef PADDLE_ONLY_CPU
template <>
Eigen::GpuDevice* DeviceContext::get_eigen_device<Eigen::GpuDevice>() {
return reinterpret_cast<CUDADeviceContext*>(this)->eigen_device();
}
#endif
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -22,8 +19,9 @@ limitations under the License. */ ...@@ -22,8 +19,9 @@ limitations under the License. */
#include "paddle/platform/dynload/curand.h" #include "paddle/platform/dynload/curand.h"
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#endif #endif
#include "paddle/platform/place.h" #include <paddle/platform/place.h>
#include "unsupported/Eigen/CXX11/Tensor" #include <memory>
#include <unsupported/Eigen/CXX11/Tensor>
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -31,9 +29,29 @@ namespace platform { ...@@ -31,9 +29,29 @@ namespace platform {
class DeviceContext { class DeviceContext {
public: public:
virtual ~DeviceContext() {} virtual ~DeviceContext() {}
virtual Place GetPlace() const = 0;
template <typename DeviceType>
DeviceType* get_eigen_device();
}; };
class CPUDeviceContext : public DeviceContext {}; class CPUDeviceContext : public DeviceContext {
public:
Eigen::DefaultDevice* eigen_device() {
if (!eigen_device_) {
eigen_device_.reset(new Eigen::DefaultDevice());
}
return eigen_device_.get();
}
Place GetPlace() const override {
Place retv = CPUPlace();
return retv;
}
private:
std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
};
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
...@@ -57,8 +75,13 @@ class CUDADeviceContext : public DeviceContext { ...@@ -57,8 +75,13 @@ class CUDADeviceContext : public DeviceContext {
GPUPlaceGuard guard(gpu_place_); GPUPlaceGuard guard(gpu_place_);
paddle::platform::throw_on_error(cudaStreamCreate(&stream_), paddle::platform::throw_on_error(cudaStreamCreate(&stream_),
"cudaStreamCreate failed"); "cudaStreamCreate failed");
eigen_stream_ = new Eigen::CudaStreamDevice(&stream_); eigen_stream_.reset(new Eigen::CudaStreamDevice(&stream_));
eigen_device_ = new Eigen::GpuDevice(eigen_stream_); eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
}
Place GetPlace() const override {
Place retv = GPUPlace();
return retv;
} }
void Wait() { void Wait() {
...@@ -68,7 +91,7 @@ class CUDADeviceContext : public DeviceContext { ...@@ -68,7 +91,7 @@ class CUDADeviceContext : public DeviceContext {
cudaStream_t stream() { return stream_; } cudaStream_t stream() { return stream_; }
Eigen::GpuDevice eigen_device() { return *eigen_device_; } Eigen::GpuDevice* eigen_device() { return eigen_device_.get(); }
cublasHandle_t cublas_handle() { cublasHandle_t cublas_handle() {
if (!blas_handle_) { if (!blas_handle_) {
...@@ -133,10 +156,8 @@ class CUDADeviceContext : public DeviceContext { ...@@ -133,10 +156,8 @@ class CUDADeviceContext : public DeviceContext {
rand_generator_) == CURAND_STATUS_SUCCESS, rand_generator_) == CURAND_STATUS_SUCCESS,
"curandDestroyGenerator failed"); "curandDestroyGenerator failed");
} }
eigen_stream_.reset();
delete eigen_stream_; eigen_device_.reset();
delete eigen_device_;
paddle::platform::throw_on_error(cudaStreamDestroy(stream_), paddle::platform::throw_on_error(cudaStreamDestroy(stream_),
"cudaStreamDestroy failed"); "cudaStreamDestroy failed");
} }
...@@ -145,8 +166,8 @@ class CUDADeviceContext : public DeviceContext { ...@@ -145,8 +166,8 @@ class CUDADeviceContext : public DeviceContext {
GPUPlace gpu_place_; GPUPlace gpu_place_;
cudaStream_t stream_; cudaStream_t stream_;
Eigen::CudaStreamDevice* eigen_stream_; std::unique_ptr<Eigen::CudaStreamDevice> eigen_stream_;
Eigen::GpuDevice* eigen_device_; std::unique_ptr<Eigen::GpuDevice> eigen_device_;
cublasHandle_t blas_handle_{nullptr}; cublasHandle_t blas_handle_{nullptr};
...@@ -155,6 +176,8 @@ class CUDADeviceContext : public DeviceContext { ...@@ -155,6 +176,8 @@ class CUDADeviceContext : public DeviceContext {
int random_seed_; int random_seed_;
curandGenerator_t rand_generator_{nullptr}; curandGenerator_t rand_generator_{nullptr};
}; };
#endif #endif
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -15,13 +15,26 @@ limitations under the License. */ ...@@ -15,13 +15,26 @@ limitations under the License. */
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
TEST(CUDADeviceContext, Init) { using DEVICE_GPU = Eigen::GpuDevice;
TEST(Device, Init) {
int count = paddle::platform::GetDeviceCount();
for (int i = 0; i < count; i++) {
paddle::platform::DeviceContext* device_context =
new paddle::platform::CUDADeviceContext(i);
Eigen::GpuDevice* gpu_device =
device_context->template get_eigen_device<DEVICE_GPU>();
ASSERT_NE(nullptr, gpu_device);
delete device_context;
}
}
TEST(Device, CUDADeviceContext) {
int count = paddle::platform::GetDeviceCount(); int count = paddle::platform::GetDeviceCount();
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
paddle::platform::CUDADeviceContext* device_context = paddle::platform::CUDADeviceContext* device_context =
new paddle::platform::CUDADeviceContext(i); new paddle::platform::CUDADeviceContext(i);
Eigen::GpuDevice gpu_device = device_context->eigen_device(); Eigen::GpuDevice* gpu_device = device_context->eigen_device();
ASSERT_NE(nullptr, gpu_device.stream()); ASSERT_NE(nullptr, gpu_device);
cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); cudnnHandle_t cudnn_handle = device_context->cudnn_handle();
ASSERT_NE(nullptr, cudnn_handle); ASSERT_NE(nullptr, cudnn_handle);
cublasHandle_t cublas_handle = device_context->cublas_handle(); cublasHandle_t cublas_handle = device_context->cublas_handle();
......
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)
nv_library(dyload_cuda SRCS cublas.cc cudnn.cc curand.cc) nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc)
...@@ -67,20 +67,20 @@ extern void *cublas_dso_handle; ...@@ -67,20 +67,20 @@ extern void *cublas_dso_handle;
__macro(cublasSgemm); \ __macro(cublasSgemm); \
__macro(cublasDgemm); \ __macro(cublasDgemm); \
__macro(cublasSgeam); \ __macro(cublasSgeam); \
__macro(cublasDgeam); __macro(cublasDgeam); \
__macro(cublasCreate_v2); \
DECLARE_DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasCreate); __macro(cublasDestroy_v2); \
DECLARE_DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasDestroy); __macro(cublasSetStream_v2); \
DECLARE_DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetStream); __macro(cublasSetPointerMode_v2); \
DECLARE_DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetPointerMode); __macro(cublasGetPointerMode_v2); \
DECLARE_DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasGetPointerMode); __macro(cublasSgemmBatched); \
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgemmBatched); __macro(cublasDgemmBatched); \
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgemmBatched); __macro(cublasCgemmBatched); \
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(cublasCgemmBatched); __macro(cublasZgemmBatched); \
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(cublasZgemmBatched); __macro(cublasSgetrfBatched); \
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetrfBatched); __macro(cublasSgetriBatched); \
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetriBatched); __macro(cublasDgetrfBatched); \
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetrfBatched); __macro(cublasDgetriBatched)
CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP); CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP);
......
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <Python.h>
#include <paddle/framework/scope.h> #include <paddle/framework/scope.h>
#include <pybind11/pybind11.h> #include <pybind11/pybind11.h>
...@@ -43,4 +44,4 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -43,4 +44,4 @@ All parameter, weight, gradient are variables in Paddle.
py::return_value_policy::reference); py::return_value_policy::reference);
return m.ptr(); return m.ptr();
} }
\ No newline at end of file
"""
Default scope function.
`Paddle` manages Scope as programming language's scope. It just a
thread-local stack of Scope. Top of that stack is current scope, the bottom
of that stack is all scopes' parent.
Invoking `create_var/get_var` can `create/get` variable in current scope.
Invoking `enter_local_scope/leave_local_scope` can create or destroy local
scope.
A `scoped_function` will take a `function` as input. That function will be
invoked in a new local scope.
"""
import paddle.v2.framework.core
import threading
__tl_scope__ = threading.local()
__all__ = [
'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'create_var',
'get_var', 'scoped_function'
]
def get_cur_scope():
"""
Get current scope.
:rtype: paddle.v2.framework.core.Scope
"""
cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None)
if cur_scope_stack is None:
__tl_scope__.cur_scope = list()
if len(__tl_scope__.cur_scope) == 0:
__tl_scope__.cur_scope.append(paddle.v2.framework.core.Scope(None))
return __tl_scope__.cur_scope[-1]
def enter_local_scope():
"""
Enter a new local scope
"""
cur_scope = get_cur_scope()
new_scope = paddle.v2.framework.core.Scope(cur_scope)
__tl_scope__.cur_scope.append(new_scope)
def leave_local_scope():
"""
Leave local scope
"""
__tl_scope__.cur_scope.pop()
def create_var(name):
"""
create variable in current scope.
"""
return get_cur_scope().create_var(name)
def get_var(name):
"""
get variable in current scope.
"""
return get_cur_scope().get_var(name)
def scoped_function(func):
"""
invoke `func` in new scope.
:param func: a callable function that will be run in new scope.
:type func: callable
"""
enter_local_scope()
try:
func()
except:
raise
finally:
leave_local_scope()
add_python_test(test_framework test_protobuf.py test_scope.py) add_python_test(test_framework test_protobuf.py test_scope.py
test_default_scope_funcs.py)
from paddle.v2.framework.default_scope_funcs import *
import unittest
class TestDefaultScopeFuncs(unittest.TestCase):
def test_cur_scope(self):
self.assertIsNotNone(get_cur_scope())
def test_none_variable(self):
self.assertIsNone(get_var("test"))
def test_create_var_get_var(self):
var_a = create_var("var_a")
self.assertIsNotNone(var_a)
self.assertIsNotNone(get_cur_scope().get_var('var_a'))
enter_local_scope()
self.assertIsNotNone(get_cur_scope().get_var('var_a'))
leave_local_scope()
def test_var_get_int(self):
def __new_scope__():
i = create_var("var_i")
self.assertFalse(i.is_int())
i.set_int(10)
self.assertTrue(i.is_int())
self.assertEqual(10, i.get_int())
for _ in xrange(10):
scoped_function(__new_scope__)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册