diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61b989dc698798eca932516e558c63f107ef2754..efb4dcb2dfbc63bb6905961b054cdef860cf4573 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,10 +21,10 @@ sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29 hooks: - id: clang-formater -- repo: https://github.com/dnephin/pre-commit-golang - sha: e4693a4c282b4fc878eda172a929f7a6508e7d16 +- repo: https://github.com/PaddlePaddle/pre-commit-golang + sha: 16398aeccf263adaf53b2495eed0406347d76281 hooks: - id: go-fmt - files: (.*\.go) - - id: go-lint - files: (.*\.go) + types: [go] + - id: gometalinter + types: [go] diff --git a/.travis.yml b/.travis.yml index 2cf7666fb5d0c47034676864a52c3d3dbce19683..376c693602b56fe719decfeb41c217497e143e12 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,6 +41,8 @@ before_install: - pip install rarfile - curl https://glide.sh/get | bash - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" + - go get -u github.com/alecthomas/gometalinter + - gometalinter --install - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: diff --git a/CMakeLists.txt b/CMakeLists.txt index fb1c85bf742c80308edb009c080cb0da6d409ee0..2a6b0a20e441676c85c9ed8f8ad1a6e7abdf1ea8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,6 @@ # limitations under the License cmake_minimum_required(VERSION 3.0) - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) @@ -137,7 +136,8 @@ if(WITH_GPU) endif(WITH_GPU) if(USE_NNPACK) - list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt") + include(external/nnpack) + list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) endif(USE_NNPACK) add_subdirectory(proto) diff --git a/Dockerfile b/Dockerfile index ed5910d93b41dba8d50b2ba01c59c635797edd29..8cfb16928c95dcbfac08383d32562ff67933d873 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/ RUN apt-get update && \ apt-get install -y \ git python-pip python-dev openssh-server bison \ - wget unzip tar xz-utils bzip2 gzip coreutils ntp \ + wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-numpy python-matplotlib gcc g++ \ automake locales clang-format-3.8 swig doxygen cmake \ diff --git a/Dockerfile.android b/Dockerfile.android index fa24f6f06c4e76444c83bcf13fe312afdcb6c348..c0fa58c384f9ebcae60477ffce49ea4ffa929db9 100644 --- a/Dockerfile.android +++ b/Dockerfile.android @@ -14,6 +14,17 @@ RUN apt-get update && \ wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \ apt-get clean -y +# Install Go and glide +RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ + tar -C /usr/local -xzf go.tgz && \ + mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src && \ + rm go.tgz +ENV GOROOT=/usr/local/go GOPATH=/root/gopath +# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin + # git credential to skip password typing RUN git config --global credential.helper store diff --git a/cmake/configure.cmake b/cmake/configure.cmake index a4f98ec7d4af652d0dd0650f4906696ff3a4efb9..7afab5d5344b704a9329e313a81379032ba0cc97 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -102,12 +102,19 @@ if(WITH_GOLANG) message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide") endif() - add_custom_target(go_vendor) - add_custom_command(TARGET go_vendor + # this command will only run when the file it depends is missing + # or has changed, or the output is missing. + add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide COMMAND env GOPATH=${GOPATH} ${GLIDE} install + COMMAND touch ${CMAKE_BINARY_DIR}/glide + DEPENDS ${PROJ_ROOT}/go/glide.lock WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" - ) - add_dependencies(go_vendor go_path) + ) + + # depends on the custom command which outputs + # ${CMAKE_BINARY_DIR}/glide, the custom command does not need to + # run every time this target is built. + add_custom_target(go_vendor DEPENDS ${CMAKE_BINARY_DIR}/glide go_path) endif() endif(WITH_GOLANG) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 6bbcd730e1b5ac49415cac676352e6df00eb6eb5..656e1a0803c6e389d70f37f592c3aa2e95a2bcd4 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -27,7 +27,8 @@ set(IGNORE_PATTERN .*cblas\\.h.* .*\\.pb\\.txt .*LtrDataProvider.* - .*MultiDataProvider.*) + .*MultiDataProvider.* + .*pb.*) # add_style_check_target # @@ -52,14 +53,13 @@ macro(add_style_check_target TARGET_NAME) endif() endforeach() if(LINT MATCHES ON) + # cpplint code style get_filename_component(base_filename ${filename} NAME) set(CUR_GEN ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.cpplint) - add_custom_command(OUTPUT ${CUR_GEN} - PRE_BUILD - COMMAND env ${py_env} "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" - "--filter=${STYLE_FILTER}" - "--write-success=${CUR_GEN}" ${filename} - DEPENDS ${filename} + add_custom_command(TARGET ${TARGET_NAME} PRE_BUILD + COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" + "--filter=${STYLE_FILTER}" + "--write-success=${CUR_GEN}" ${filename} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() endforeach() diff --git a/cmake/cross_compiling/android.cmake b/cmake/cross_compiling/android.cmake index dcfbc5d0129d7763daaf17c33d2b7791e87d3018..5e3e437a8da9624df35a5c754fe77be73f20361d 100644 --- a/cmake/cross_compiling/android.cmake +++ b/cmake/cross_compiling/android.cmake @@ -108,6 +108,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ENDIF() IF(ANDROID_ABI STREQUAL "arm64-v8a") SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android) + SET(CMAKE_SYSTEM_PROCESSOR aarch64) ENDIF() SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-") ENDIF() @@ -166,7 +167,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ENDIF() IF(ANDROID_ABI STREQUAL "arm64-v8a") - LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a) + LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a) ENDIF() STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}") @@ -193,6 +194,10 @@ ELSE() SET(CMAKE_ANDROID_STANDALONE_TOOLCHAIN ${ANDROID_STANDALONE_TOOLCHAIN}) ENDIF() SET(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ABI}) - SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE}) - SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON}) + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE}) + IF(ANDROID_ABI STREQUAL "armeabi-v7a") + SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON}) + ENDIF() + ENDIF() ENDIF() diff --git a/paddle/function/nnpack/nnpack.cmake b/cmake/external/nnpack.cmake similarity index 54% rename from paddle/function/nnpack/nnpack.cmake rename to cmake/external/nnpack.cmake index 7182730ae8f133bdc4f73bfc46fa8acbe5f3b603..d42bcb0f329041462bd8b568052fbb8226d18e4e 100644 --- a/paddle/function/nnpack/nnpack.cmake +++ b/cmake/external/nnpack.cmake @@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib) if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) set(NNPACK_FOUND ON) INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) + + set(NNPACK_LIBS) + list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB}) + if (NNPACK_UKERNELS_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB}) + endif() + if (NNPACK_CPUFEATURES_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB}) + endif() + if(NOT ANDROID) + list(APPEND NNPACK_LIBS "rt") + endif() else() message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") endif() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 48c054d17fa0b06998bd6b2a2a2b324afd2439e9..534be0abe246ac70950d85ad05441825c8ca768a 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -185,6 +185,10 @@ function(cc_library TARGET_NAME) add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) endif() + + # cpplint code style + add_style_check_target(${TARGET_NAME} ${cc_library_SRCS}) + else(cc_library_SRCS) if (cc_library_DEPS) merge_static_libs(${TARGET_NAME} ${cc_library_DEPS}) @@ -286,8 +290,22 @@ function(go_library TARGET_NAME) set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") endif() - # Add dummy code to support `make target_name` under Terminal Command set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + + # This custom command will always run since it depends on a not + # existing file. + add_custom_command( + OUTPUT dummy_rebulid_${TARGET_NAME} + COMMAND cmake -E touch ${dummyfile} + ) + # Create a custom target that depends on the custom command output + # file, so the custom command can be referenced as a dependency by + # `add_dependencies`. + add_custom_target(rebuild_${TARGET_NAME} + DEPENDS dummy_rebulid_${TARGET_NAME} + ) + + # Add dummy code to support `make target_name` under Terminal Command file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") if (go_library_SHARED OR go_library_shared) add_library(${TARGET_NAME} SHARED ${dummyfile}) @@ -298,6 +316,12 @@ function(go_library TARGET_NAME) add_dependencies(${TARGET_NAME} ${go_library_DEPS}) endif(go_library_DEPS) + # The "source file" of the library is `${dummyfile}` which never + # change, so the target will never rebuild. Make the target depends + # on the custom command that touches the library "source file", so + # rebuild will always happen. + add_dependencies(${TARGET_NAME} rebuild_${TARGET_NAME}) + set(${TARGET_NAME}_LIB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" CACHE STRING "output library path for target ${TARGET_NAME}") file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") @@ -338,7 +362,7 @@ function(go_test TARGET_NAME) string(REPLACE "${PADDLE_GO_PATH}" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${go_test_DEPS}) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test -race -c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" ".${CMAKE_CURRENT_SOURCE_REL_DIR}" WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") diff --git a/go/cmd/master/master.go b/go/cmd/master/master.go index 54fa254863156455f66fa87de9077042a45f9735..9eaf8c04ae01fe7eebc92c51803bfcf977995ee3 100644 --- a/go/cmd/master/master.go +++ b/go/cmd/master/master.go @@ -11,6 +11,7 @@ import ( "github.com/namsral/flag" log "github.com/sirupsen/logrus" + "github.com/topicai/candy" "github.com/PaddlePaddle/Paddle/go/master" "github.com/PaddlePaddle/Paddle/go/utils/networkhelper" @@ -20,11 +21,18 @@ func main() { port := flag.Int("port", 8080, "port of the master server.") ttlSec := flag.Int("ttl", 60, "etcd lease TTL in seconds.") endpoints := flag.String("endpoints", "http://127.0.0.1:2379", "comma separated etcd endpoints. If empty, fault tolerance will not be enabled.") - taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.") - taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.") - chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.") + taskTimeoutDur := flag.Duration("task-timout-dur", 20*time.Minute, "task timout duration.") + taskTimeoutMax := flag.Int("task-timeout-max", 3, "max timtout count for each task before it being declared failed task.") + chunkPerTask := flag.Int("chunk-per-task", 10, "chunk per task.") + logLevel := flag.String("log-level", "info", + "log level, possible values: debug, info, warning, error, fatal, panic") flag.Parse() + level, e := log.ParseLevel(*logLevel) + candy.Must(e) + + log.SetLevel(level) + if *endpoints == "" { log.Warningln("-endpoints not set, fault tolerance not be enabled.") } diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index b331b8126cadc2c5df516fb241913415b2e3e73d..652d7ba315d72ff19931b82a4b0d1c30b2ff8f37 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -40,7 +40,7 @@ func main() { idx = *index } else { e = pserver.NewEtcdClient(*etcdEndpoint, *numPservers, *etcdTimeout) - idx, err = e.Register() + idx, err = e.Register(*port) candy.Must(err) cp, err = pserver.NewCheckpointFromFile(*checkpointPath, idx, e) diff --git a/go/master/c/client.go b/go/master/c/client.go index 31f431197454c2ec6a25624d37b60876d99f0087..2cbe164c7b406b189f44ec850796203f24779205 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -23,7 +23,6 @@ import ( log "github.com/sirupsen/logrus" ) -var nullPtr = unsafe.Pointer(uintptr(0)) var mu sync.Mutex var handleMap = make(map[C.paddle_master_client]*master.Client) var curHandle C.paddle_master_client @@ -114,13 +113,13 @@ func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { if err != nil { // Error // TODO: return the type of error? - *record = (*C.uchar)(nullPtr) + *record = (*C.uchar)(nil) return -1 } if len(r) == 0 { // Empty record - *record = (*C.uchar)(nullPtr) + *record = (*C.uchar)(nil) return 0 } diff --git a/go/master/client.go b/go/master/client.go index a2ca3f3ef8ce300e3df09a302d74b56ee23c6d10..90b99470978d21480eb2d8097e7dec217b9524eb 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -2,6 +2,7 @@ package master import ( "os" + "time" "github.com/PaddlePaddle/Paddle/go/connection" "github.com/PaddlePaddle/recordio" @@ -36,9 +37,9 @@ func (c *Client) getRecords() { for { t, err := c.getTask() if err != nil { - // TODO(helin): wait before move on with next // getTask call. - log.Errorln(err) + log.Errorf("Get task failed, sleep 3 seconds and continue, %s", err) + time.Sleep(3 * time.Second) continue } @@ -68,7 +69,10 @@ func (c *Client) getRecords() { // We treat a task as finished whenever the last data // instance of the task is read. This is not exactly // correct, but a reasonable approximation. - c.taskFinished(t.Meta.ID) + err = c.taskFinished(t.Meta.ID) + if err != nil { + log.Errorln(err) + } } } diff --git a/go/master/client_internal_test.go b/go/master/client_internal_test.go index 49263474c8fe2410ffb6db93a9647f5ab066b06b..70dc09bf9461142ff6498355a5858ba9a1320510 100644 --- a/go/master/client_internal_test.go +++ b/go/master/client_internal_test.go @@ -66,11 +66,21 @@ func TestGetFinishTask(t *testing.T) { for i := 0; i < totalTask*chunkPerTask; i++ { w := recordio.NewWriter(f, -1, -1) - w.Write(nil) + _, err = w.Write(nil) + if err != nil { + panic(err) + } + // call Close to force RecordIO writing a chunk. - w.Close() + err = w.Close() + if err != nil { + panic(err) + } + } + err = f.Close() + if err != nil { + panic(err) } - f.Close() // Manually intialize client to avoid calling c.getRecords() c := &Client{} @@ -79,7 +89,11 @@ func TestGetFinishTask(t *testing.T) { ch := make(chan string, 1) ch <- addr go c.monitorMaster(ch) - c.SetDataset([]string{path}) + err = c.SetDataset([]string{path}) + if err != nil { + panic(err) + } + checkOnePass := func(i int) { var tasks []Task for idx := 0; idx < totalTask; idx++ { diff --git a/go/master/client_test.go b/go/master/client_test.go index 6666d3860c412daa8711fbfa2d729a261b3fd887..bc92dc5ac973d62434b71e09705143ac8fbbd2fa 100644 --- a/go/master/client_test.go +++ b/go/master/client_test.go @@ -57,14 +57,30 @@ func TestNextRecord(t *testing.T) { w := recordio.NewWriter(f, -1, -1) for i := 0; i < total; i++ { - w.Write([]byte{byte(i)}) + _, err = w.Write([]byte{byte(i)}) + if err != nil { + panic(err) + } + } + + err = w.Close() + if err != nil { + panic(err) + } + + err = f.Close() + if err != nil { + panic(err) } - w.Close() - f.Close() + curAddr := make(chan string, 1) curAddr <- fmt.Sprintf(":%d", p) c := master.NewClient(curAddr, 10) - c.SetDataset([]string{path}) + err = c.SetDataset([]string{path}) + if err != nil { + panic(err) + } + for pass := 0; pass < 50; pass++ { received := make(map[byte]bool) for i := 0; i < total; i++ { diff --git a/go/master/etcd_client.go b/go/master/etcd_client.go index 04c1394e963d1eb541b80b91407fb55b0d1e1f2a..69dc6a8268748ad9a72eb10fc2789982f565d291 100644 --- a/go/master/etcd_client.go +++ b/go/master/etcd_client.go @@ -30,7 +30,7 @@ type EtcdClient struct { // NewEtcdClient creates a new EtcdClient. func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) { log.Debugf("Connecting to etcd at %v", endpoints) - // TODO(helin): gracefully shutdown etcd store. Becuase etcd + // TODO(helin): gracefully shutdown etcd store. Because etcd // store holds a etcd lock, even though the lock will expire // when the lease timeout, we need to implement graceful // shutdown to release the lock. @@ -60,7 +60,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat } log.Debugf("Successfully acquired lock at %s.", lockPath) - put := clientv3.OpPut(addrPath, string(addr)) + put := clientv3.OpPut(addrPath, addr) resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit() if err != nil { return nil, err diff --git a/go/master/inmem_store.go b/go/master/inmem_store.go index bcd549b20e46381783bad11caa08cb7f4ba40add..57e75dc4e01b4bafa8153bcc7fbc82a9eb2b08f5 100644 --- a/go/master/inmem_store.go +++ b/go/master/inmem_store.go @@ -4,7 +4,7 @@ import "sync" // InMemStore is an in memory implementation of Store interface. // -// It does not tolerate the fault that casues the program to crash. +// It does not tolerate the fault that causes the program to crash. type InMemStore struct { mu sync.Mutex buf []byte diff --git a/go/master/service.go b/go/master/service.go index a6050ab99437244dade83f2943f6649453d47fff..262735f421ad7ae04050e9264a177ee4c46e68d0 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -160,7 +160,7 @@ func (s *Service) recover() (bool, error) { // snapshot *must* be called with s.mu being held. func (s *Service) snapshot() error { - // TOOD(helin): etcd request has a size limit, so the snapshot + // TODO(helin): etcd request has a size limit, so the snapshot // size is limited by the max request size. We should either // divide the snapshot into smaller chunks and save under // different keys, or configure the request size to be big @@ -215,6 +215,7 @@ func readChunks(globPaths []string) ([]Chunk, error) { } count := index.NumChunks() + log.Infof("readChunks: file %s has %d chunks", path, count) for i := 0; i < count; i++ { chunk := Chunk{ Path: path, @@ -288,7 +289,6 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) { log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure) s.taskQueues.Todo = append(s.taskQueues.Todo, t) - return } func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { diff --git a/go/pserver/client/c/cclient.go b/go/pserver/client/c/cclient.go index 7ddaceb7ed33db32e19a191402100a0c0efa241a..718b4304c80791b4d8a8816f256c8fa93e0b1ead 100644 --- a/go/pserver/client/c/cclient.go +++ b/go/pserver/client/c/cclient.go @@ -34,7 +34,6 @@ import ( log "github.com/sirupsen/logrus" ) -var nullPtr = unsafe.Pointer(uintptr(0)) var mu sync.Mutex var handleMap = make(map[C.paddle_pserver_client]*client.Client) var curHandle C.paddle_pserver_client @@ -63,7 +62,7 @@ func remove(client C.paddle_pserver_client) *client.Client { } func cArrayToSlice(p unsafe.Pointer, len int) []byte { - if p == nullPtr { + if p == nil { return nil } @@ -101,11 +100,11 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli } //export paddle_new_etcd_pserver_client -func paddle_new_etcd_pserver_client(etcd_endpoints *C.char, selected int) C.paddle_pserver_client { +func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client { // TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters) - addr := C.GoString(etcd_endpoints) - etcd_client := client.NewEtcd(addr) - c := client.NewClient(etcd_client, etcd_client.Desired(), selector(selected != 0)) + addr := C.GoString(etcdEndpoints) + etcdClient := client.NewEtcd(addr) + c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0)) return add(c) } @@ -124,20 +123,20 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int { } //export paddle_init_param -func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { +func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, paramConfig unsafe.Pointer, configLen C.int) C.int { et := pserver.ElementType(param.element_type) name := C.GoString(param.name) content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len)) pc := pserver.ParameterWithConfig{ Param: pserver.Parameter{Name: name, ElementType: et, Content: content}, - Config: cArrayToSlice(param_config, int(config_len)), + Config: cArrayToSlice(paramConfig, int(configLen)), } c := get(client) err := c.InitParam(pc) if err != nil { if err.Error() == pserver.AlreadyInitialized { - log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.", name) + log.Warningf("parameter %s already initialized, treat paddle_init_param as successful.", name) return C.PSERVER_OK } log.Errorln(err) @@ -153,7 +152,7 @@ func paddle_finish_init_params(client C.paddle_pserver_client) C.int { err := c.FinishInitParams() if err != nil { if err.Error() == pserver.AlreadyInitialized { - log.Warningln("parameters already initialized, treat paddle_finish_init_params as sucessful.") + log.Warningln("parameters already initialized, treat paddle_finish_init_params as successful.") return C.PSERVER_OK } @@ -223,12 +222,12 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, p := ps[i] param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) - if unsafe.Pointer(param) == nullPtr { + if unsafe.Pointer(param) == nil { log.Errorln("must pre-allocate parameter.") return C.PSERVER_ERROR } - if unsafe.Pointer(param.content) != nullPtr { + if unsafe.Pointer(param.content) != nil { if int(param.content_len) != len(p.Content) { log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content)) return C.PSERVER_ERROR diff --git a/go/pserver/client/c/test/test_train.py b/go/pserver/client/c/test/test_train.py index d6922672f4c1253e62cfe54965f6c2f3b5e6c7bf..e9264592b4f18fddf68b198d73bf907206e77a3f 100644 --- a/go/pserver/client/c/test/test_train.py +++ b/go/pserver/client/c/test/test_train.py @@ -1,5 +1,23 @@ import paddle.v2 as paddle import paddle.v2.dataset.uci_housing as uci_housing +import paddle.v2.master as master +import os +import cPickle as pickle + +etcd_ip = os.getenv("MASTER_IP", "127.0.0.1") +etcd_endpoint = "http://" + etcd_ip + ":2379" + + +def cloud_reader(): + print "connecting to master, etcd endpoints: ", etcd_endpoint + master_client = master.client(etcd_endpoint, 5, 64) + master_client.set_dataset( + ["/pfs/dlnel/public/dataset/uci_housing/uci_housing-*-of-*"]) + while 1: + r, e = master_client.next_record() + if not r: + break + yield pickle.loads(r) def main(): @@ -22,13 +40,13 @@ def main(): # create optimizer of new remote updater to pserver optimizer = paddle.optimizer.Momentum(momentum=0) - #TODO(zhihong) : replace optimizer with new OptimizerConfig - + print "etcd endoint: ", etcd_endpoint trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer, is_local=False, - pserver_spec="localhost:3000") + pserver_spec=etcd_endpoint, + use_etcd=True) # event_handler to print training and testing info def event_handler(event): @@ -47,11 +65,11 @@ def main(): print "Test %d, %.2f" % (event.pass_id, result.cost) # training + # NOTE: use uci_housing.train() as reader for non-paddlecloud training trainer.train( reader=paddle.batch( paddle.reader.shuffle( - uci_housing.train(), buf_size=500), - batch_size=2), + cloud_reader, buf_size=500), batch_size=2), feeding={'x': 0, 'y': 1}, event_handler=event_handler, diff --git a/go/pserver/client/client.go b/go/pserver/client/client.go index aa8bfe30c26fcc0875ad479ecd562700ccefa5a3..b4a45e1c21056550ef9264746bcf58a8abb369a1 100644 --- a/go/pserver/client/client.go +++ b/go/pserver/client/client.go @@ -233,7 +233,7 @@ func (c *Client) Save(path string) error { func strHash(s string) uint32 { h := fnv.New32a() - h.Write([]byte(s)) + _, _ = h.Write([]byte(s)) return h.Sum32() } diff --git a/go/pserver/client/client_test.go b/go/pserver/client/client_test.go index 27f4ff2380b3b5aa01485838eb4a876a8863d901..5c89882a297323034be2875a6d4cb71d715eb0c2 100644 --- a/go/pserver/client/client_test.go +++ b/go/pserver/client/client_test.go @@ -79,15 +79,33 @@ func initEtcdClient() { log.Errorf("err %v", err) } ctx, cancel := context.WithTimeout(context.Background(), timeout) - client.Delete(ctx, pserver.PsDesired) - client.Delete(ctx, pserver.PsPath) - client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver)) + _, err = client.Delete(ctx, pserver.PsDesired) + if err != nil { + panic(err) + } + + _, err = client.Delete(ctx, pserver.PsPath) + if err != nil { + panic(err) + } + + _, err = client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver)) + if err != nil { + panic(err) + } + ports := initClient() for i := 0; i < numPserver; i++ { - client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i])) + _, err = client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i])) + if err != nil { + panic(err) + } } cancel() - client.Close() + err = client.Close() + if err != nil { + panic(err) + } } type selector bool @@ -164,7 +182,7 @@ func testClient(t *testing.T, c *client.Client) { wg.Add(1) go func(gs []pserver.Gradient) { - err = c.SendGrads(gs) + err := c.SendGrads(gs) if err != nil { t.Fatal(err) } diff --git a/go/pserver/client/etcd_client.go b/go/pserver/client/etcd_client.go index 1fd3479aa88ccbbe7c5067da1e9886b65352e847..953065b427ed52d39f1253ea94d485b765ea5dc2 100644 --- a/go/pserver/client/etcd_client.go +++ b/go/pserver/client/etcd_client.go @@ -12,7 +12,7 @@ import ( ) const ( - DefaultEtcdTimeout time.Duration = 5 * time.Second + defaultEtcdTimeout time.Duration = 5 * time.Second ) // EtcdClient is used by pserver client that is a part of trainer process. @@ -47,7 +47,7 @@ func (p *EtcdClient) Desired() int { psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) if err != nil { - log.Errorf("psDesired %s invalid %v", psDesired, err) + log.Errorf("psDesired %d invalid %v", psDesired, err) time.Sleep(p.timeout) continue } @@ -106,11 +106,11 @@ func NewEtcd(endpoints string) *EtcdClient { for { cli, err = clientv3.New(clientv3.Config{ Endpoints: ep, - DialTimeout: DefaultEtcdTimeout, + DialTimeout: defaultEtcdTimeout, }) if err != nil { log.Errorf("Init etcd connection failed: %v", err) - time.Sleep(DefaultEtcdTimeout) + time.Sleep(defaultEtcdTimeout) continue } break @@ -118,7 +118,7 @@ func NewEtcd(endpoints string) *EtcdClient { log.Infof("Connected to etcd: %s\n", endpoints) client := &EtcdClient{ client: cli, - timeout: DefaultEtcdTimeout, + timeout: defaultEtcdTimeout, endpoints: ep, } return client diff --git a/go/pserver/etcd_client.go b/go/pserver/etcd_client.go index 4a694b97f47b2ab85d1e109ef7545d104194b5cf..e70e826975b26db302a6799e9171cff970153aac 100644 --- a/go/pserver/etcd_client.go +++ b/go/pserver/etcd_client.go @@ -49,7 +49,7 @@ func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *Et // Register registers the pserver on etcd // // Register returns the index of the current pserver. -func (e *EtcdClient) Register() (int, error) { +func (e *EtcdClient) Register(port int) (int, error) { var err error e.externalIP, err = networkhelper.GetExternalIP() @@ -116,7 +116,7 @@ func (e *EtcdClient) Register() (int, error) { for { ctx, cancel := context.WithTimeout(context.Background(), time.Second) var err error - pserverIdx, err = e.registerPserverEtcd(ctx) + pserverIdx, err = e.registerPserverEtcd(ctx, port) cancel() if err != nil { log.Warn(err) @@ -140,7 +140,7 @@ func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) ( } // registerPserverEtcd registers pserver node on etcd using transaction. -func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { +func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, error) { var idx int _, err := concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { registered := false @@ -156,8 +156,9 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { log.Fatal(err) } // find the first id and write info - c.Put(psKey, e.externalIP, clientv3.WithLease(resp.ID)) - log.Debugf("set pserver node %s with value %s", psKey, e.externalIP) + pserverAddr := e.externalIP + ":" + strconv.Itoa(port) + c.Put(psKey, pserverAddr, clientv3.WithLease(resp.ID)) + log.Debugf("set pserver node %s with value %s", psKey, pserverAddr) ch, kaerr := e.etcdClient.KeepAlive(context.TODO(), resp.ID) if kaerr != nil { log.Errorf("keepalive etcd node error: %v", kaerr) @@ -176,10 +177,10 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { break } } - if registered == true { + if registered { return nil } - return errors.New("not registerd, may due to already have enough pservers") + return errors.New("not registered, may due to already have enough pservers") }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) if err != nil { @@ -210,8 +211,5 @@ func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) err ctx, cancel := context.WithTimeout(context.Background(), timeout) _, err := e.etcdClient.Put(ctx, key, string(value)) cancel() - if err != nil { - return err - } - return nil + return err } diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index d6b7fafd59c0453b9f40019166d01ebdc9775117..151a3f80332b0e62767586f9f769c839ba19ce1e 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -14,8 +14,6 @@ import ( log "github.com/sirupsen/logrus" ) -var nullPtr = unsafe.Pointer(uintptr(0)) - type optimizer struct { opt *C.struct_paddle_optimizer elementType ElementType @@ -23,7 +21,7 @@ type optimizer struct { } func cArrayToSlice(p unsafe.Pointer, len int) []byte { - if p == nullPtr { + if p == nil { return nil } @@ -92,8 +90,8 @@ func (o *optimizer) UpdateParameter(g Gradient) error { } func (o *optimizer) Cleanup() { - if unsafe.Pointer(o.opt) != nullPtr { + if unsafe.Pointer(o.opt) != nil { C.paddle_release_optimizer(o.opt) - o.opt = (*C.struct_paddle_optimizer)(nullPtr) + o.opt = (*C.struct_paddle_optimizer)(nil) } } diff --git a/go/pserver/service.go b/go/pserver/service.go index fec2ec61dc67756439d9fa478788d1f155876538..c723959d6b87524762e2f874bb5e4d5bd567cd00 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -211,7 +211,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { // learning optimization methods are stochastic in // nature. This race condition is allowed deliberately // to save the program from making a copy of the - // paramter content. + // parameter content. parameter.Name = name parameter.ElementType = opt.elementType parameter.Content = opt.GetWeights() @@ -219,7 +219,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { } // pserver save checkpoint -func (s *Service) doCheckpoint() error { +func (s *Service) doCheckpoint() (err error) { <-s.initialized s.mu.Lock() defer s.mu.Unlock() @@ -237,9 +237,9 @@ func (s *Service) doCheckpoint() error { } var buf bytes.Buffer encoder := gob.NewEncoder(&buf) - err := encoder.Encode(cp) + err = encoder.Encode(cp) if err != nil { - return err + return } cpMeta := checkpointMeta{} @@ -248,10 +248,14 @@ func (s *Service) doCheckpoint() error { h := md5.New() cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes())) - cpMetajson, _ := json.Marshal(cpMeta) + cpMetajson, err := json.Marshal(cpMeta) + if err != nil { + return + } + err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second) if err != nil { - return err + return } if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) { log.Info("checkpoint does not exists.") @@ -264,15 +268,32 @@ func (s *Service) doCheckpoint() error { } } f, err := os.Create(cpMeta.UUID) - defer f.Close() if err != nil { - return err + return } + + defer func() { + closeErr := f.Close() + if closeErr != nil { + if err != nil { + log.Errorln(closeErr) + } else { + // Set closeErr as return value. + err = closeErr + } + } + }() + writer := bufio.NewWriter(f) _, err = writer.Write(buf.Bytes()) - writer.Flush() if err != nil { - return err + return } - return nil + + err = writer.Flush() + if err != nil { + return + } + + return } diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index 5fb3d1c73bc56e921f13aafd27c25224e259b3fe..0b9b83d42974151d49250bdf0e7c397f59bf6a62 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -843,7 +843,8 @@ public: bool useSparseUpdater); static ParameterUpdater* createNewRemoteUpdater( OptimizationConfig* config, - const std::string pserverSpec) throw(UnsupportError); + const std::string pserverSpec, + const bool useEtcd) throw(UnsupportError); ~ParameterUpdater(); /** diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp index 1aaefdfb8107a2eaa0432211fd7df4f5f12d537f..5934cb898b5f6adc74c237b1733a7459d8437a28 100644 --- a/paddle/api/ParameterUpdater.cpp +++ b/paddle/api/ParameterUpdater.cpp @@ -33,11 +33,12 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater( ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( OptimizationConfig *config, - const std::string pserverSpec) throw(UnsupportError) { + const std::string pserverSpec, + const bool useEtcd) throw(UnsupportError) { #ifndef PADDLE_WITHOUT_GOLANG auto updater = new ParameterUpdater(); updater->m->updater.reset(new paddle::NewRemoteParameterUpdater( - config->m->getConfig(), pserverSpec)); + config->m->getConfig(), pserverSpec, useEtcd)); return updater; #else throw UnsupportError(); diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 8415ce67e90397b31864a90cda54b81c19b3b34e..760d84e51e7473d359a415e4790251db3d139ab2 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -1,24 +1,32 @@ # ddim lib -cc_library(ddim SRCS ddim.cc) +cc_library(ddim SRCS ddim.cc DEPS eigen3) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_test(tensor_test SRCS tensor_test.cc DEPS ddim) + +cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory) +cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) +cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) + cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) -cc_test(enforce_test SRCS enforce_test.cc) + proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto DEPS attr_type) -cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) proto_library(op_desc SRCS op_desc.proto DEPS attr_type) +cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) -cc_library(operator SRCS operator.cc DEPS op_desc device_context) + +cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) + cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry operator) + py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) proto_library(net_proto SRCS net_proto.proto DEPS op_proto) -cc_library(net SRCS net.cc DEPS net_proto) +cc_library(net SRCS net.cc DEPS operator net_proto op_registry) +cc_test(net_op_test SRCS net_op_test.cc DEPS net) diff --git a/paddle/framework/attr_checker.h b/paddle/framework/attr_checker.h index c0c33d81149ac2fc2a9a57d90931ef32375fe1d0..ea5614a45f3a77a851358aff80abbc276c9972ba 100644 --- a/paddle/framework/attr_checker.h +++ b/paddle/framework/attr_checker.h @@ -4,8 +4,9 @@ #include #include #include +#include #include -#include "paddle/framework/enforce.h" +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { @@ -41,6 +42,35 @@ class DefaultValueSetter { T default_value_; }; +template +class EnumInContainer { + public: + explicit EnumInContainer(const std::unordered_set& c) : container_(c) {} + void operator()(T& val) const { + PADDLE_ENFORCE(container_.find(val) != container_.end(), + "Value %s is not in enum container %s", val, + ContainerDebugString()); + } + + private: + std::string ContainerDebugString() const { + std::ostringstream sout; + sout << "["; + size_t cnt = 0; + for (auto& v : container_) { + sout << v; + ++cnt; + if (cnt != container_.size()) { + sout << " ,"; + } + } + sout << "]"; + return sout.str(); + } + + std::unordered_set container_; +}; + // check whether a certain attribute fit its limits // an attribute can have more than one limits template @@ -50,6 +80,11 @@ class TypedAttrChecker { public: TypedAttrChecker(const std::string& attr_name) : attr_name_(attr_name) {} + TypedAttrChecker& InEnum(const std::unordered_set& range) { + value_checkers_.push_back(EnumInContainer(range)); + return *this; + } + TypedAttrChecker& LargerThan(const T& lower_bound) { value_checkers_.push_back(LargerThanChecker(lower_bound)); return *this; diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 3f949a6595ea326b97ac567daf9b35a68c8cf7f8..545c1dcc2a1682839d90194002fdbb748d85e808 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -1,9 +1,24 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #include "paddle/framework/ddim.h" +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { -///@cond HIDDEN +/// @cond HIDDEN template Dim make_dim(const int* d) { @@ -50,7 +65,7 @@ void make_ddim(DDim& ddim, const int* dims, int n) { } } -///@endcond +/// @endcond DDim make_ddim(std::initializer_list dims) { DDim result(make_dim(0)); @@ -64,11 +79,11 @@ DDim make_ddim(const std::vector& dims) { return result; } -///@cond HIDDEN +/// @cond HIDDEN // XXX For some reason, putting this in an anonymous namespace causes errors class DynamicMutableIndexer : public boost::static_visitor { public: - DynamicMutableIndexer(int idx) : idx_(idx) {} + explicit DynamicMutableIndexer(int idx) : idx_(idx) {} template int& operator()(Dim& dim) const { @@ -81,7 +96,7 @@ class DynamicMutableIndexer : public boost::static_visitor { class DynamicConstIndexer : public boost::static_visitor { public: - DynamicConstIndexer(int idx) : idx_(idx) {} + explicit DynamicConstIndexer(int idx) : idx_(idx) {} template int operator()(const Dim& dim) const { @@ -92,7 +107,7 @@ class DynamicConstIndexer : public boost::static_visitor { int idx_; }; -///@endcond +/// @endcond int& DDim::operator[](int idx) { return boost::apply_visitor(DynamicMutableIndexer(idx), var); @@ -102,6 +117,8 @@ int DDim::operator[](int idx) const { return boost::apply_visitor(DynamicConstIndexer(idx), var); } +ssize_t DDim::size() const { return arity(*this); } + bool DDim::operator==(DDim d) const { if (var.which() != d.getVar().which()) { return false; @@ -155,11 +172,11 @@ int get(const DDim& ddim, int idx) { return ddim[idx]; } void set(DDim& ddim, int idx, int value) { ddim[idx] = value; } -///@cond HIDDEN +/// @cond HIDDEN struct VectorizeVisitor : public boost::static_visitor<> { std::vector& vector; - VectorizeVisitor(std::vector& v) : vector(v) {} + explicit VectorizeVisitor(std::vector& v) : vector(v) {} template void operator()(const T& t) { @@ -169,7 +186,7 @@ struct VectorizeVisitor : public boost::static_visitor<> { void operator()(const Dim<1>& t) { vector.push_back(t.head); } }; -///@endcond +/// @endcond std::vector vectorize(const DDim& ddim) { std::vector result; @@ -178,16 +195,59 @@ std::vector vectorize(const DDim& ddim) { return result; } +struct ProductVisitor : public boost::static_visitor { + template + ssize_t operator()(const Dim& dim) { + return product(dim); + } +}; + ssize_t product(const DDim& ddim) { - ssize_t result = 1; - std::vector v = vectorize(ddim); - for (auto i : v) { - result *= i; + ProductVisitor visitor; + return boost::apply_visitor(visitor, ddim); +} + +struct SliceVectorizeVisitor : public boost::static_visitor<> { + std::vector& vector; + int begin; + int end; + + SliceVectorizeVisitor(std::vector& v, int b, int e) + : vector(v), begin(b), end(e) { + PADDLE_ENFORCE(begin < end, + "Begin index must be less than end index in ddim slice."); + PADDLE_ENFORCE(begin >= 0, + "Begin index can't be less than zero in ddim slice."); } - return result; + + template + void operator()(const Dim& dim) { + if (begin == 0) { + vector.push_back(dim.head); + } else { + --begin; + } + --end; + if (end > 0) { + this->operator()(dim.tail); + } + } + + void operator()(const Dim<1>& dim) { + PADDLE_ENFORCE(end == 1, "End index in ddim slice is out of bound."); + vector.push_back(dim.head); + } +}; + +DDim slice_ddim(const DDim& dim, int begin, int end) { + std::vector vec; + vec.reserve(end - begin); + SliceVectorizeVisitor visitor(vec, begin, end); + boost::apply_visitor(visitor, dim); + return make_ddim(vec); } -///\cond HIDDEN +/// \cond HIDDEN struct ArityVisitor : boost::static_visitor { template @@ -196,15 +256,15 @@ struct ArityVisitor : boost::static_visitor { } }; -///\endcond +/// \endcond int arity(const DDim& d) { return boost::apply_visitor(ArityVisitor(), d); } -///\cond HIDDEN +/// \cond HIDDEN struct DDimPrinter : boost::static_visitor { std::ostream& os; - DDimPrinter(std::ostream& os_) : os(os_) {} + explicit DDimPrinter(std::ostream& os_) : os(os_) {} template void operator()(const T& t) { @@ -212,7 +272,7 @@ struct DDimPrinter : boost::static_visitor { } }; -///\endcond +/// \endcond std::ostream& operator<<(std::ostream& os, const DDim& ddim) { DDimPrinter printer(os); @@ -220,5 +280,9 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { return os; } +DDim::DDim(std::initializer_list init_list) { + *this = make_ddim(init_list); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 223c4180bee45e21547364441476b27051daca56..9fcc657edcd5459d0a42a64d708603a4bcd53cf0 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -1,11 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once #include #include #include #include - #include "paddle/framework/dim.h" +#include "paddle/platform/enforce.h" +#include "unsupported/Eigen/CXX11/Tensor" namespace paddle { namespace framework { @@ -27,7 +42,9 @@ struct DDim { DDim() : var(Dim<1>()) {} template - DDim(const Dim& in) : var(in) {} + explicit DDim(const Dim& in) : var(in) {} + + /*implicit*/ DDim(std::initializer_list init_list); template DDim& operator=(const Dim& in) { @@ -57,6 +74,8 @@ struct DDim { DDim operator+(DDim d) const; DDim operator*(DDim d) const; + + ssize_t size() const; }; /** @@ -81,6 +100,15 @@ std::vector vectorize(const DDim& ddim); ssize_t product(const DDim& ddim); +/** + * \brief Slice a ddim + * + * Slice dim with [begin, end). + * e.g. DDim d = make_ddim({1,2,3,4,5}); + * slice_ddim(d, 1, 3); ====> {2,3} + */ +DDim slice_ddim(const DDim& dim, int begin, int end); + /** * \brief What is the length of this dimension? * diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index 36eef02370e0196c2af2c05f49176b70ce69235a..9d18a2972ce62139430b240b4599854b14290a32 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -49,9 +49,30 @@ TEST(DDim, Equality) { // arity of a DDim EXPECT_EQ(paddle::framework::arity(ddim), 3); + EXPECT_EQ(ddim.size(), 3); // product of a DDim EXPECT_EQ(paddle::framework::product(vddim), 45); + EXPECT_EQ( + paddle::framework::product(paddle::framework::make_ddim({3, 2, 5, 3})), + 90); + + // slice a DDim + paddle::framework::DDim ddim2 = + paddle::framework::make_ddim({1, 2, 3, 4, 5, 6}); + paddle::framework::DDim ss = paddle::framework::slice_ddim(ddim2, 2, 5); + EXPECT_EQ(arity(ss), 3); + EXPECT_EQ(ss[0], 3); + EXPECT_EQ(ss[1], 4); + EXPECT_EQ(ss[2], 5); + paddle::framework::DDim ss2 = paddle::framework::slice_ddim(ddim2, 0, 6); + EXPECT_EQ(arity(ss2), 6); + EXPECT_EQ(ss2[0], 1); + EXPECT_EQ(ss2[1], 2); + EXPECT_EQ(ss2[2], 3); + EXPECT_EQ(ss2[3], 4); + EXPECT_EQ(ss2[4], 5); + EXPECT_EQ(ss2[5], 6); } TEST(DDim, Print) { diff --git a/paddle/framework/dim_test.cu b/paddle/framework/dim_test.cu index 05217415196f3ec3ce9b5de7cb2f82c9de960ba7..3898d0a447aa502813b3cb5e86c29eebb814ff84 100644 --- a/paddle/framework/dim_test.cu +++ b/paddle/framework/dim_test.cu @@ -1,100 +1,101 @@ #include #include -#include "paddle/framework/dim.h" #include "gtest/gtest.h" +#include "paddle/framework/dim.h" __global__ void test(paddle::framework::Dim<2>* o) { - o[0] = paddle::framework::make_dim(5, 6); + o[0] = paddle::framework::make_dim(5, 6); } __global__ void dyn_idx_gpu(int* o) { - auto d = paddle::framework::make_dim(5, 6); - o[0] = d[1]; + auto d = paddle::framework::make_dim(5, 6); + o[0] = d[1]; } TEST(Dim, Equality) { - // construct a Dim on the CPU - auto a = paddle::framework::make_dim(3, 4); - EXPECT_EQ(paddle::framework::get<0>(a), 3); - EXPECT_EQ(paddle::framework::get<1>(a), 4); - - // construct a Dim on the GPU - thrust::device_vector> t(2); - test<<<1,1>>>(thrust::raw_pointer_cast(t.data())); - a = t[0]; - EXPECT_EQ(paddle::framework::get<0>(a), 5); - EXPECT_EQ(paddle::framework::get<1>(a), 6); - - // linearization - auto b = paddle::framework::make_dim(7, 8); - EXPECT_EQ(paddle::framework::linearize(a, b), 83); - - // product - EXPECT_EQ(paddle::framework::product(a), 30); - - // mutate a Dim - paddle::framework::get<1>(b) = 10; - EXPECT_EQ(paddle::framework::get<0>(b), 7); - EXPECT_EQ(paddle::framework::get<1>(b), 10); - - // dynamic access - paddle::framework::get(b, 0) = 8; - b[1] = 11; - EXPECT_EQ(paddle::framework::get<0>(b), 8); - EXPECT_EQ(paddle::framework::get<1>(b), 11); - EXPECT_EQ(paddle::framework::get(b, 0), 8); - EXPECT_EQ(b[1], 11); - - // dynamic access on GPU - thrust::device_vector r(1); - dyn_idx_gpu<<<1,1>>>(thrust::raw_pointer_cast(r.data())); - int res = r[0]; - EXPECT_EQ(res, 6); - - // ex_prefix_mul - paddle::framework::Dim<3> c = paddle::framework::ex_prefix_mul(paddle::framework::Dim<3>(3, 4, 5)); - EXPECT_EQ(paddle::framework::get<0>(c), 1); - EXPECT_EQ(paddle::framework::get<1>(c), 3); - EXPECT_EQ(paddle::framework::get<2>(c), 12); - - // generate from an index - auto size = paddle::framework::make_dim(4, 5, 2); - c = paddle::framework::Dim<3>(14, size); - EXPECT_EQ(paddle::framework::get<0>(c), 2); - EXPECT_EQ(paddle::framework::get<1>(c), 3); - EXPECT_EQ(paddle::framework::get<2>(c), 0); - c = paddle::framework::Dim<3>(25, size); - EXPECT_EQ(paddle::framework::get<0>(c), 1); - EXPECT_EQ(paddle::framework::get<1>(c), 1); - EXPECT_EQ(paddle::framework::get<2>(c), 1); + // construct a Dim on the CPU + auto a = paddle::framework::make_dim(3, 4); + EXPECT_EQ(paddle::framework::get<0>(a), 3); + EXPECT_EQ(paddle::framework::get<1>(a), 4); + + // construct a Dim on the GPU + thrust::device_vector> t(2); + test<<<1, 1>>>(thrust::raw_pointer_cast(t.data())); + a = t[0]; + EXPECT_EQ(paddle::framework::get<0>(a), 5); + EXPECT_EQ(paddle::framework::get<1>(a), 6); + + // linearization + auto b = paddle::framework::make_dim(7, 8); + EXPECT_EQ(paddle::framework::linearize(a, b), 83); + + // product + EXPECT_EQ(paddle::framework::product(a), 30); + + // mutate a Dim + paddle::framework::get<1>(b) = 10; + EXPECT_EQ(paddle::framework::get<0>(b), 7); + EXPECT_EQ(paddle::framework::get<1>(b), 10); + + // dynamic access + paddle::framework::get(b, 0) = 8; + b[1] = 11; + EXPECT_EQ(paddle::framework::get<0>(b), 8); + EXPECT_EQ(paddle::framework::get<1>(b), 11); + EXPECT_EQ(paddle::framework::get(b, 0), 8); + EXPECT_EQ(b[1], 11); + + // dynamic access on GPU + thrust::device_vector r(1); + dyn_idx_gpu<<<1, 1>>>(thrust::raw_pointer_cast(r.data())); + int res = r[0]; + EXPECT_EQ(res, 6); + + // ex_prefix_mul + paddle::framework::Dim<3> c = + paddle::framework::ex_prefix_mul(paddle::framework::Dim<3>(3, 4, 5)); + EXPECT_EQ(paddle::framework::get<0>(c), 1); + EXPECT_EQ(paddle::framework::get<1>(c), 3); + EXPECT_EQ(paddle::framework::get<2>(c), 12); + + // generate from an index + auto size = paddle::framework::make_dim(4, 5, 2); + c = paddle::framework::Dim<3>(14, size); + EXPECT_EQ(paddle::framework::get<0>(c), 2); + EXPECT_EQ(paddle::framework::get<1>(c), 3); + EXPECT_EQ(paddle::framework::get<2>(c), 0); + c = paddle::framework::Dim<3>(25, size); + EXPECT_EQ(paddle::framework::get<0>(c), 1); + EXPECT_EQ(paddle::framework::get<1>(c), 1); + EXPECT_EQ(paddle::framework::get<2>(c), 1); } TEST(Dim, Bool) { - auto a = paddle::framework::make_dim(3, 4); - auto b = paddle::framework::make_dim(5, 6); - auto c = paddle::framework::make_dim(3, 4); - - // in_bounds check - EXPECT_TRUE(paddle::framework::contained(a, b)); - EXPECT_FALSE(paddle::framework::contained(b, a)); - - // comparison - EXPECT_TRUE(a == a); - EXPECT_FALSE(a == b); - EXPECT_TRUE(a == c); + auto a = paddle::framework::make_dim(3, 4); + auto b = paddle::framework::make_dim(5, 6); + auto c = paddle::framework::make_dim(3, 4); + + // in_bounds check + EXPECT_TRUE(paddle::framework::contained(a, b)); + EXPECT_FALSE(paddle::framework::contained(b, a)); + + // comparison + EXPECT_TRUE(a == a); + EXPECT_FALSE(a == b); + EXPECT_TRUE(a == c); } TEST(Dim, Print) { - { - std::stringstream ss; - auto a = paddle::framework::make_dim(2, 3); - ss << a; - EXPECT_EQ(ss.str(), "2, 3"); - } - { - std::stringstream ss; - ss << paddle::framework::make_dim(8); - EXPECT_EQ(ss.str(), "8"); - } + { + std::stringstream ss; + auto a = paddle::framework::make_dim(2, 3); + ss << a; + EXPECT_EQ(ss.str(), "2, 3"); + } + { + std::stringstream ss; + ss << paddle::framework::make_dim(8); + EXPECT_EQ(ss.str(), "8"); + } } diff --git a/paddle/framework/eigen.h b/paddle/framework/eigen.h new file mode 100644 index 0000000000000000000000000000000000000000..4ba4fd4d110330805faf2468bd406cb23c6f1b1c --- /dev/null +++ b/paddle/framework/eigen.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/tensor.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { +namespace framework { + +// EigenDim converts paddle::platform::DDim into Eigen::DSizes. +template +struct EigenDim { + using Type = Eigen::DSizes; + + static Type From(const DDim& dims) { + PADDLE_ENFORCE(arity(dims) == D, "D must match arity(DDim)"); + Type ret; + for (int d = 0; d < arity(dims); d++) { + ret[d] = dims[d]; + } + return ret; + } +}; + +// Interpret paddle::platform::Tensor as EigenTensor and EigenConstTensor. +template +struct EigenTensor { + // TODO(qijun) Now, default type in unaligned, and we will make a benchmark on + // the speed of aligned and unaligned version in future. + using Type = Eigen::TensorMap>; + + using ConstType = + Eigen::TensorMap>; + + static Type From(Tensor& tensor, DDim dims) { + return Type(tensor.data(), EigenDim::From(dims)); + } + + static Type From(Tensor& tensor) { return From(tensor, tensor.dims_); } + + static ConstType From(const Tensor& tensor, DDim dims) { + return ConstType(tensor.data(), EigenDim::From(dims)); + } + + static ConstType From(const Tensor& tensor) { + return From(tensor, tensor.dims_); + } +}; + +template +struct EigenVector : public EigenTensor { + // Flatten is to reshape a Tensor into a one dimension EigenVector + static typename EigenTensor::Type Flatten(Tensor& tensor) { + return EigenTensor::From( + tensor, make_ddim({static_cast(product(tensor.dims_))})); + } + + static typename EigenTensor::ConstType Flatten(const Tensor& tensor) { + return EigenTensor::From( + tensor, make_ddim({static_cast(product(tensor.dims_))})); + } +}; + +template +using EigenMatrix = EigenTensor; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/eigen_test.cc b/paddle/framework/eigen_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..a9fa728e49a0dcc781e520a22c1ee5f921c4c733 --- /dev/null +++ b/paddle/framework/eigen_test.cc @@ -0,0 +1,101 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/framework/eigen.h" +#include + +namespace paddle { +namespace framework { + +TEST(EigenDim, From) { + EigenDim<3>::Type ed = EigenDim<3>::From(make_ddim({1, 2, 3})); + ASSERT_EQ(1, ed[0]); + ASSERT_EQ(2, ed[1]); + ASSERT_EQ(3, ed[2]); +} + +TEST(Eigen, Tensor) { + Tensor t; + float* p = t.mutable_data(make_ddim({1, 2, 3}), platform::CPUPlace()); + for (int i = 0; i < 1 * 2 * 3; i++) { + p[i] = static_cast(i); + } + + EigenTensor::Type et = EigenTensor::From(t); + + ASSERT_EQ(1, et.dimension(0)); + ASSERT_EQ(2, et.dimension(1)); + ASSERT_EQ(3, et.dimension(2)); + + for (int i = 0; i < 1; i++) { + for (int j = 0; j < 2; j++) { + for (int k = 0; k < 3; k++) { + ASSERT_NEAR((i * 2 + j) * 3 + k, et(i, j, k), 1e-6f); + } + } + } +} + +TEST(Eigen, VectorFrom) { + Tensor t; + float* p = t.mutable_data(make_ddim({6}), platform::CPUPlace()); + for (int i = 0; i < 6; i++) { + p[i] = static_cast(i); + } + + EigenVector::Type ev = EigenVector::From(t); + + ASSERT_EQ(6, ev.dimension(0)); + + for (int i = 0; i < 6; i++) { + ASSERT_NEAR(i, ev(i), 1e-6f); + } +} + +TEST(Eigen, VectorFlatten) { + Tensor t; + float* p = t.mutable_data(make_ddim({1, 2, 3}), platform::CPUPlace()); + for (int i = 0; i < 1 * 2 * 3; i++) { + p[i] = static_cast(i); + } + + EigenVector::Type ev = EigenVector::Flatten(t); + + ASSERT_EQ(1 * 2 * 3, ev.dimension(0)); + + for (int i = 0; i < 1 * 2 * 3; i++) { + ASSERT_NEAR(i, ev(i), 1e-6f); + } +} + +TEST(Eigen, Matrix) { + Tensor t; + float* p = t.mutable_data(make_ddim({2, 3}), platform::CPUPlace()); + for (int i = 0; i < 2 * 3; i++) { + p[i] = static_cast(i); + } + + EigenMatrix::Type em = EigenMatrix::From(t); + + ASSERT_EQ(2, em.dimension(0)); + ASSERT_EQ(3, em.dimension(1)); + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 3; j++) { + ASSERT_NEAR(i * 3 + j, em(i, j), 1e-6f); + } + } +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/enforce.h b/paddle/framework/enforce.h deleted file mode 100644 index 56cb7f95647e81efef58b156002d0d378ee22820..0000000000000000000000000000000000000000 --- a/paddle/framework/enforce.h +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include - -namespace paddle { -namespace framework { - -/** - * @brief Enforce exception. Inherits std::exception - * - * All enforce condition not met, will throw an EnforceNotMet exception. - */ -class EnforceNotMet : public std::exception { - public: - EnforceNotMet(const std::string& msg, const char* file, int fileline) { - std::ostringstream sout; - sout << msg << " at [" << file << ":" << fileline << "];"; - all_msg_ = sout.str(); - } - - const char* what() const noexcept override { return all_msg_.c_str(); } - - private: - std::string all_msg_; -}; - -// From https://stackoverflow.com/questions/30130930/ -// __buildin_expect is in C++ 11 standard. Since the condition which enforced -// should be true in most situation, it will make the compiler generate faster -// code by adding `UNLIKELY` macro. -#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) - -/** - * @brief Throw a EnforceNotMet exception, automatically filled __FILE__ & - * __LINE__ - * - * This macro take __VA_ARGS__, user can pass any type if that type can - * serialize to std::ostream - */ -#define PADDLE_THROW(...) \ - do { \ - throw ::paddle::framework::EnforceNotMet( \ - ::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \ - } while (0) - -/** - * @brief Enforce a condition, otherwise throw an EnforceNotMet - */ -#define PADDLE_ENFORCE(condition, ...) \ - do { \ - if (UNLIKELY(!(condition))) { \ - PADDLE_THROW(__VA_ARGS__); \ - } \ - } while (0) - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/net.cc b/paddle/framework/net.cc index 854ad8e33e9900ed4e54eceadc33c1ef36c1d116..501536657d76cc50b1cc4104007edd4b47758aea 100644 --- a/paddle/framework/net.cc +++ b/paddle/framework/net.cc @@ -1,20 +1,70 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + #include "paddle/framework/net.h" namespace paddle { namespace framework { -PlainNet::PlainNet(const NetDesc& def) {} +void PlainNet::CompleteAddOp(bool calc) { + add_op_done_ = true; + if (!calc) return; -void PlainNet::InferShape(const ScopePtr& scope) const { + std::unordered_set input_set; + std::unordered_set output_set; + std::unordered_set temp_output; for (auto& op : ops_) { - op.InferShape(); + for (auto& ipt : op->inputs_) { + if (!Contains(output_set, ipt)) { // Not other op's output + input_set.insert(ipt); + } else { + temp_output.insert(ipt); + } + } + + for (auto& opt : op->outputs_) { + output_set.insert(opt); + } } + inputs_.reserve(input_set.size()); + std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs_)); + + outputs_.reserve(output_set.size()); + std::vector tmp_index; + tmp_index.reserve(temp_output.size()); + int idx = 0; + for (auto& opt : output_set) { + if (Contains(temp_output, opt)) { + tmp_index.push_back(idx); + } + outputs_.push_back(opt); + ++idx; + } + + attrs_["temporary_index"] = tmp_index; } -void PlainNet::Run(const ScopePtr& scope, const DeviceContext& ctx) const { +std::string PlainNet::DebugString() const { + std::ostringstream os; + os << this->type_ << ":" << std::endl; for (auto& op : ops_) { - op.Run(ctx); + os << "\t" << op->DebugString() << std::endl; } + return os.str(); } + } // namespace framework } // namespace paddle diff --git a/paddle/framework/net.h b/paddle/framework/net.h index 0481d8f47ccbc171c0b752d48493498371ed8d4d..19c5fa223b4e75b1f06ca14ded053cebfd8bffe2 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -1,99 +1,50 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once -#include "paddle/framework/net_proto.pb.h" +#include +#include #include "paddle/framework/op_proto.pb.h" +#include "paddle/framework/op_registry.h" #include "paddle/framework/scope.h" #include "paddle/platform/device_context.h" namespace paddle { namespace framework { -using namespace paddle::platform; - -// operator's index stored in a network. -typedef int OpIndex; -/** - * NOTE following codes are some definitions of unimplemented concepts. - * We write some basic implementation to make Net compilable. These APIs will - * keep updating if the concepts related are implemented. - */ - -struct OpDesc; -struct OpAttrs {}; - -class Operator { - public: - Operator(const OpDesc &def) {} - void InferShape() const {} - void Run(const DeviceContext &ctx) const {} -}; - /** - * @brief Network that manage the operators it has. + * @brief Network is also a type of Operator + * + * It will manage the operators it has. * - * Network is the container and controller of a set of operators, user can build - * a real network from a NetDesc which is a protobuf message and use - * Network.Run() * to run all the operators in the network. + * Network is the container and controller of a set of operators. * A network object knows all Operators belonging to this network. Variables, * which are inputs and outputs of these operators, are created and managed by a * hierarchy of Scope objects. * - * This is the base class of network, all the networks should implement the apis + * This is the base class of network, all the networks should implement the APIs * it defines. */ -class Net { +class Net : public OperatorBase { public: - /** - * @brief Infer shapes of all inputs and outputs of operators. - */ - virtual void InferShape(const ScopePtr &scope) const = 0; - /** - * @brief Run the network. - * - * Run all the operators and return success(true) or not, with all the - * variables are located in `scope`. `context` describes the detail execution - * environment for ops. `begin` and `end` specify the scope of `ops_` to run, - * If no positive indexes are provided, all operators in `ops_` will run. - */ - virtual void Run(const ScopePtr &scope, const DeviceContext &ctx) const = 0; - - /** - * @brief Add an Operator according to `def`. - */ - virtual OpIndex AddOp(const OpProto &def) = 0; - - /** - * @brief Add optimizer operators acctording to `attrs`. - */ - virtual void AddOptimizerOps(const OpAttrs &attrs) = 0; - - /** - * @brief Add backward operators. - */ - virtual void AddBackwardOps() = 0; - - /** - * @brief Create a network. - */ - static std::unique_ptr Create(const NetDesc &def = NetDesc()); - - virtual ~Net() {} + virtual void AddOp(const OperatorPtr& op) = 0; + virtual void CompleteAddOp(bool calc) = 0; }; +using NetPtr = std::shared_ptr; + /** * @brief a basic implementation of Net. * @@ -103,18 +54,14 @@ class Net { class PlainNet : public Net { public: /** - * @brief Initialize a PlainNet. - * - * Initialize from a network describe by `def`. NetDesc is the definition of - * a network. - */ - PlainNet(const NetDesc &def); - - /** - * Infer all the operators' input and output varialbes' shapes, will be called + * Infer all the operators' input and output variables' shapes, will be called * before every mini-batch */ - virtual void InferShape(const ScopePtr &scope) const override; + void InferShape(const ScopePtr& scope) const override { + for (auto& op : ops_) { + op->InferShape(scope); + } + } /** * @brief Run the network. @@ -123,49 +70,34 @@ class PlainNet : public Net { * scope will be used instead. If no OpContext is provicded, default context * will be used. */ - virtual void Run(const ScopePtr &scope, - const DeviceContext &ctx) const override; + void Run(const ScopePtr& scope, + const platform::DeviceContext& dev_ctx) const override { + for (auto& op : ops_) { + op->Run(scope, dev_ctx); + } + } /** - * @brief Add an operator to this network. + * @brief Add an operator by ptr */ - virtual OpIndex AddOp(const OpProto &def) override; + void AddOp(const OperatorPtr& op) override { + PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); + ops_.push_back(op); + } - /** - * @brief Add all optimizer operators related into the network. - */ - virtual void AddOptimizerOps(const OpAttrs &attrs) override; + void CompleteAddOp(bool calculate = true) override; - /** - * @brief Add all backward operators related into the network. - */ - virtual void AddBackwardOps() override; - - virtual ~PlainNet() override {} + std::string DebugString() const override; - protected: - /** - * @brief Build the network. - * - * Create operators accordding to `def`, will be called by the constructor. - */ - void BuildNet(const NetDesc &def); - - /** - * @brief Add an operator into this network. - * - * Add a operator which is identified as `type` and has attributes described - * in `attrs`, the `inputs` are the keys of readonly input variables, - * `outputs` are keys of mutable output variables. An `OpIndex` will be - * returned to indicate the offset of the new operator in `ops_`. - */ - OpIndex AddOp(const std::string &type, const std::vector &inputs, - const std::vector &outputs, - const OpAttrs &attrs = OpAttrs()); + std::vector ops_; private: - // the operators owned by `Network`. - std::vector ops_; + bool add_op_done_{false}; + + template + static bool Contains(T container, KeyType key) { + return container.find(key) != container.end(); + } }; } // namespace framework diff --git a/paddle/framework/net_op_test.cc b/paddle/framework/net_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..e814a7e43d7ae7af0974d1a7c8b072bde5ba0238 --- /dev/null +++ b/paddle/framework/net_op_test.cc @@ -0,0 +1,67 @@ +#include +#include +#include +#include + +namespace pd = paddle::framework; + +static int infer_shape_cnt = 0; +static int run_cnt = 0; + +class TestOp : public pd::OperatorBase { + public: + void InferShape(const paddle::framework::ScopePtr& scope) const override { + ++infer_shape_cnt; + } + void Run(const paddle::framework::ScopePtr& scope, + const paddle::platform::DeviceContext& dev_ctx) const override { + ++run_cnt; + } +}; + +template +void AssertSameVectorWithoutOrder(const std::vector& expected, + const std::vector& actual) { + ASSERT_EQ(expected.size(), actual.size()); + std::unordered_set expected_set; + for (auto& tmp : expected) { + expected_set.insert(tmp); + } + for (auto& act : actual) { + ASSERT_NE(expected_set.end(), expected_set.find(act)); + } +} + +TEST(OpKernel, all) { + auto net = std::make_shared(); + ASSERT_NE(net, nullptr); + + auto op1 = std::make_shared(); + op1->inputs_ = {"x", "w1", "b1"}; + op1->outputs_ = {"y"}; + net->AddOp(op1); + + auto op2 = std::make_shared(); + op2->inputs_ = {"y", "w2", "b2"}; + op2->outputs_ = {"z"}; + net->AddOp(op2); + + net->CompleteAddOp(); + AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, net->inputs_); + AssertSameVectorWithoutOrder({"y", "z"}, net->outputs_); + auto tmp_idx_iter = net->attrs_.find("temporary_index"); + ASSERT_NE(net->attrs_.end(), tmp_idx_iter); + auto& tmp_idx = boost::get>(tmp_idx_iter->second); + ASSERT_EQ(1UL, tmp_idx.size()); + ASSERT_EQ("y", net->outputs_[tmp_idx[0]]); + + auto scope = std::make_shared(); + paddle::platform::CPUDeviceContext dev_ctx; + + net->InferShape(scope); + net->Run(scope, dev_ctx); + ASSERT_EQ(2, infer_shape_cnt); + ASSERT_EQ(2, run_cnt); + + ASSERT_THROW(net->AddOp(op2), std::runtime_error); +} diff --git a/paddle/framework/op_registry.cc b/paddle/framework/op_registry.cc index 4b35e04e681b414c36cf6d9aee9e64dd68ba5da9..1d14535c50b542733663a6900a8b5f2033290ea6 100644 --- a/paddle/framework/op_registry.cc +++ b/paddle/framework/op_registry.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #include namespace paddle { @@ -33,4 +47,4 @@ void AttrTypeHelper::SetAttrType>(AttrProto* attr) { attr->set_type(paddle::framework::AttrType::STRINGS); } } // namespace framework -} // namespace paddle \ No newline at end of file +} // namespace paddle diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 6be6ae15c24e34386c61f19026bdad07b558adf7..c41fe10729501698fd07f59456f64ac26df77f08 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -61,7 +62,14 @@ class OpProtoAndCheckerMaker { OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) : proto_(proto), op_checker_(op_checker) {} - ~OpProtoAndCheckerMaker() { CheckNoDuplicatedAttrs(); } + ~OpProtoAndCheckerMaker() { + PADDLE_ENFORCE(validated_, "should call Validate after build"); + } + + void Validate() { + validated_ = true; + CheckNoDuplicatedInOutAttrs(); + } protected: void AddInput(const std::string& name, const std::string& comment, @@ -163,19 +171,26 @@ Add a mark to which output is temporary is helpful for future optimization. } } - void CheckNoDuplicatedAttrs() { + void CheckNoDuplicatedInOutAttrs() { std::unordered_set names; - size_t cnt = 0; + auto checker = [&](const std::string& name) { + PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name); + names.insert(name); + }; for (auto& attr : proto_->attrs()) { - names.insert(attr.name()); - ++cnt; + checker(attr.name()); + } + for (auto& input : proto_->inputs()) { + checker(input.name()); + } + for (auto& output : proto_->outputs()) { + checker(output.name()); } - PADDLE_ENFORCE(names.size() == cnt, - "Cannot register two attribute in same name!"); } OpProto* proto_; OpAttrChecker* op_checker_; + bool validated_{false}; bool has_multiple_input_{false}; bool has_multiple_output_{false}; bool has_temporary_output_{false}; @@ -183,6 +198,8 @@ Add a mark to which output is temporary is helpful for future optimization. class OpRegistry { using OpCreator = std::function; + using VarIndexMap = std::unordered_map; + using VarNameList = std::vector; public: template @@ -190,36 +207,71 @@ class OpRegistry { creators()[op_type] = [] { return new OpType; }; OpProto& op_proto = protos()[op_type]; OpAttrChecker& op_checker = op_checkers()[op_type]; - ProtoMakerType(&op_proto, &op_checker); + auto maker = ProtoMakerType(&op_proto, &op_checker); + maker.Validate(); *op_proto.mutable_type() = op_type; PADDLE_ENFORCE( op_proto.IsInitialized(), "Fail to initialize %s's OpProto, because %s is not initialized", op_type, op_proto.InitializationErrorString()); + + VarIndexMaps()[op_type].reset(new VarIndexMap()); + auto& varmap = *VarIndexMaps()[op_type]; + int idx = 0; + for (auto& var : op_proto.inputs()) { + varmap[var.name()] = idx++; + } + idx = 0; + for (auto& var : op_proto.outputs()) { + varmap[var.name()] = idx++; + } + } + + static OperatorPtr CreateOp(const std::string& type, + const VarNameList& inputs, + const VarNameList& outputs, + const AttributeMap& attrs) { + auto op_create_it = creators().find(type); + PADDLE_ENFORCE(op_create_it != creators().end(), + "Operator %s cannot be found", type); + + auto op = op_create_it->second(); + op->type_ = type; + op->inputs_ = inputs; + op->outputs_ = outputs; + op->attrs_ = attrs; + op_checkers().at(type).Check(op->attrs_); + + GenerateTempVariableName(op); + + { + auto var_index_it = VarIndexMaps().find(type); + if (var_index_it != VarIndexMaps().end()) { + op->in_out_idxs_ = var_index_it->second; + } + } + + op->Init(); + return OperatorPtr(op); } static OperatorPtr CreateOp(const OpDesc& op_desc) { - std::string op_type = op_desc.type(); - OperatorPtr op(creators().at(op_type)()); - op->desc_ = op_desc; - op->inputs_.reserve((size_t)op_desc.inputs_size()); + std::vector inputs; + inputs.reserve((size_t)op_desc.inputs_size()); std::copy(op_desc.inputs().begin(), op_desc.inputs().end(), - std::back_inserter(op->inputs_)); - op->outputs_.reserve((size_t)op_desc.outputs_size()); + std::back_inserter(inputs)); + + std::vector outputs; + outputs.reserve((size_t)op_desc.outputs_size()); std::copy(op_desc.outputs().begin(), op_desc.outputs().end(), - std::back_inserter(op->outputs_)); + std::back_inserter(outputs)); + + AttributeMap attrs; for (auto& attr : op_desc.attrs()) { - op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr); + attrs[attr.name()] = AttrTypeHelper::GetAttrValue(attr); } - op_checkers().at(op_type).Check(op->attrs_); - op->Init(); - return op; - } - private: - static std::unordered_map& creators() { - static std::unordered_map creators_; - return creators_; + return CreateOp(op_desc.type(), inputs, outputs, attrs); } static std::unordered_map& protos() { @@ -227,6 +279,29 @@ class OpRegistry { return protos_; }; + private: + static std::unordered_map>& + VarIndexMaps() { + static std::unordered_map> maps_; + return maps_; + } + + static void GenerateTempVariableName(OperatorBase* op) { + static std::atomic gUniqId(0UL); + for (auto& outname : op->outputs_) { + if (outname == OperatorBase::TMP_VAR_NAME()) { + outname += op->type_; + outname += "@"; + outname += std::to_string(gUniqId.fetch_add(1)); + } + } + } + + static std::unordered_map& creators() { + static std::unordered_map creators_; + return creators_; + } + static std::unordered_map& op_checkers() { static std::unordered_map op_checkers_; return op_checkers_; @@ -241,12 +316,18 @@ class OpRegisterHelper { } }; +/** + * check if MACRO is used in GLOBAL NAMESPACE. + */ #define STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \ struct __test_global_namespace_##uniq_name##__ {}; \ static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \ __test_global_namespace_##uniq_name##__>::value, \ msg) +/** + * Macro to Register Operator. + */ #define REGISTER_OP(__op_type, __op_class, __op_maker_class) \ STATIC_ASSERT_GLOBAL_NAMESPACE(__reg_op__##__op_type, \ "REGISTER_OP must be in global namespace"); \ @@ -254,27 +335,36 @@ class OpRegisterHelper { __op_register_##__op_type##__(#__op_type); \ int __op_register_##__op_type##_handle__() { return 0; } -#define REGISTER_OP_KERNEL(type, GPU_OR_CPU, PlaceType, KernelType) \ +/** + * Macro to Register OperatorKernel. + */ +#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, ...) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_op_kernel_##type##_##GPU_OR_CPU##__, \ + __reg_op_kernel_##type##_##DEVICE_TYPE##__, \ "REGISTER_OP_KERNEL must be in global namespace"); \ struct __op_kernel_register__##type##__ { \ __op_kernel_register__##type##__() { \ ::paddle::framework::OperatorWithKernel::OpKernelKey key; \ key.place_ = PlaceType(); \ ::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \ - .reset(new KernelType()); \ + .reset(new __VA_ARGS__()); \ } \ }; \ static __op_kernel_register__##type##__ __reg_kernel_##type##__; \ - int __op_kernel_register_##type##_handle_##GPU_OR_CPU##__() { return 0; } + int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; } -#define REGISTER_OP_GPU_KERNEL(type, KernelType) \ - REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, KernelType) +// (type, KernelType) +#define REGISTER_OP_GPU_KERNEL(type, ...) \ + REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__) -#define REGISTER_OP_CPU_KERNEL(type, KernelType) \ - REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, KernelType) +// (type, KernelType) +#define REGISTER_OP_CPU_KERNEL(type, ...) \ + REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) +/** + * Macro to mark what Operator and Kernel we will use and tell the compiler to + * link them into target. + */ #define USE_OP_WITHOUT_KERNEL(op_type) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ __use_op_without_kernel_##op_type, \ @@ -292,15 +382,16 @@ class OpRegisterHelper { __attribute__((unused)) = \ __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__() -#ifdef PADDLE_ONLY_CPU -#define USE_OP(op_type) \ +// use Operator with only cpu kernel. +#define USE_OP_CPU(op_type) \ USE_OP_WITHOUT_KERNEL(op_type); \ - USE_OP_KERNEL(op_type, CPU); + USE_OP_KERNEL(op_type, CPU) +#ifdef PADDLE_ONLY_CPU +#define USE_OP(op_type) USE_OP_CPU(op_type) #else -#define USE_OP(op_type) \ - USE_OP_WITHOUT_KERNEL(op_type); \ - USE_OP_KERNEL(op_type, CPU); \ +#define USE_OP(op_type) \ + USE_OP_CPU(op_type); \ USE_OP_KERNEL(op_type, GPU) #endif diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 4791d4aaab4cfe19d1cca7741ce259f8f7aeb18a..32a7e88a894fb61a460443b7d593a6cf44bc98c5 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -1,6 +1,8 @@ #include "paddle/framework/op_registry.h" #include +namespace pd = paddle::framework; + namespace paddle { namespace framework { class CosineOp : public OperatorBase { @@ -28,8 +30,6 @@ class MyTestOp : public OperatorBase { void InferShape(const ScopePtr& scope) const override {} void Run(const ScopePtr& scope, const platform::DeviceContext& dev_ctx) const override {} - - public: }; class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { @@ -91,7 +91,7 @@ TEST(OpRegistry, IllegalAttr) { try { paddle::framework::OperatorPtr op __attribute__((unused)) = paddle::framework::OpRegistry::CreateOp(op_desc); - } catch (paddle::framework::EnforceNotMet err) { + } catch (std::runtime_error& err) { caught = true; std::string msg = "larger_than check fail"; const char* err_msg = err.what(); @@ -138,7 +138,7 @@ TEST(OpRegistry, CustomChecker) { try { paddle::framework::OperatorPtr op __attribute__((unused)) = paddle::framework::OpRegistry::CreateOp(op_desc); - } catch (paddle::framework::EnforceNotMet err) { + } catch (std::runtime_error& err) { caught = true; std::string msg = "Attribute 'test_attr' is required!"; const char* err_msg = err.what(); @@ -157,7 +157,7 @@ TEST(OpRegistry, CustomChecker) { try { paddle::framework::OperatorPtr op __attribute__((unused)) = paddle::framework::OpRegistry::CreateOp(op_desc); - } catch (paddle::framework::EnforceNotMet err) { + } catch (std::runtime_error& err) { caught = true; std::string msg = "'test_attr' must be even!"; const char* err_msg = err.what(); @@ -182,3 +182,35 @@ TEST(OpRegistry, CustomChecker) { int test_attr = op->GetAttr("test_attr"); ASSERT_EQ(test_attr, 4); } + +class TestAttrProtoMaker : public pd::OpProtoAndCheckerMaker { + public: + TestAttrProtoMaker(pd::OpProto* proto, pd::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddAttr("scale", "scale of test op"); + AddAttr("scale", "scale of test op"); + } +}; + +TEST(ProtoMaker, DuplicatedAttr) { + pd::OpProto op_proto; + pd::OpAttrChecker op_checker; + auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker); + ASSERT_THROW(proto_maker.Validate(), std::runtime_error); +} + +class TestInOutProtoMaker : public pd::OpProtoAndCheckerMaker { + public: + TestInOutProtoMaker(pd::OpProto* proto, pd::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("input", "input of test op"); + AddInput("input", "input of test op"); + } +}; + +TEST(ProtoMaker, DuplicatedInOut) { + pd::OpProto op_proto; + pd::OpAttrChecker op_checker; + auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker); + ASSERT_THROW(proto_maker.Validate(), std::runtime_error); +} diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 8f7adff8b3982e91a3d7f6d598cd62d5005d5f17..1e57e9a20f3eecfac266d67276347ad4b5b780f9 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -12,32 +12,92 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "paddle/framework/operator.h" namespace paddle { namespace framework { +template <> +Eigen::DefaultDevice* KernelContext::GetEigenDevice< + platform::CPUPlace, Eigen::DefaultDevice>() const { + return device_context_.get_eigen_device(); +} + +#ifndef PADDLE_ONLY_CPU +template <> +Eigen::GpuDevice* +KernelContext::GetEigenDevice() const { + return device_context_.get_eigen_device(); +} +#endif + +const std::string& OperatorBase::Input(const std::string& name) const { + auto it = in_out_idxs_->find(name); + PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", + name); + + if (attrs_.count("input_format") == 0) { + return inputs_[it->second]; + } else { + const auto& input_format = GetAttr>("input_format"); + int idx = input_format[it->second]; + return inputs_.at(idx); + } +} + +std::vector OperatorBase::Inputs(const std::string& name) const { + auto input_format = GetAttr>("input_format"); + auto offset = in_out_idxs_->at(name); + + return std::vector{ + inputs_.begin() + input_format.at(offset), + inputs_.begin() + input_format.at(offset + 1)}; +} + +const std::string& OperatorBase::Output(const std::string& name) const { + auto it = in_out_idxs_->find(name); + PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", + name); + + if (attrs_.count("output_format") == 0) { + return outputs_[it->second]; + } else { + const auto& output_format = GetAttr>("output_format"); + int idx = output_format[it->second]; + return outputs_.at(idx); + } +} + +std::vector OperatorBase::Outputs(const std::string& name) const { + auto output_format = GetAttr>("output_format"); + auto offset = in_out_idxs_->at(name); + + return std::vector{ + outputs_.begin() + output_format.at(offset), + outputs_.begin() + output_format.at(offset + 1)}; +} + std::string OperatorBase::DebugString() const { std::stringstream ss; - ss << "=================\n"; - ss << "type = " << desc_.type() << "\n"; - ss << "inputs = ["; - for (auto& ipt : inputs_) { - ss << ipt << ", "; - } - ss << "]\n"; - ss << "outputs = ["; - for (auto& opt : outputs_) { - ss << opt << ", "; + ss << "Op(" << type_ << "), inputs:("; + for (size_t i = 0; i < inputs_.size(); ++i) { + ss << inputs_[i]; + if (i != inputs_.size() - 1) { + ss << ", "; + } } - ss << "]\n"; - ss << "attr_keys = ["; - for (auto& attr : attrs_) { - ss << attr.first << ", "; + ss << "), outputs:("; + for (size_t i = 0; i < outputs_.size(); ++i) { + ss << outputs_[i]; + if (i != outputs_.size() - 1) { + ss << ", "; + } } - ss << "]\n"; + ss << ")."; return ss.str(); } } // namespace framework -} // namespace paddle \ No newline at end of file +} // namespace paddle diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index cf79f379fae1e0d9bfa21cea4d22cf0264fcd9a6..5f046d6293d5dbb9fd594b0c13aa8d62012cf915 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -14,21 +14,38 @@ limitations under the License. */ #pragma once -#include -#include -#include -#include -#include -#include -#include #include #include #include #include +#include "paddle/framework/attr_checker.h" +#include "paddle/framework/op_desc.pb.h" +#include "paddle/framework/op_proto.pb.h" +#include "paddle/framework/scope.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/place.h" +#include "paddle/utils/Error.h" + namespace paddle { namespace framework { +template +struct EigenDeviceConverter; + +template <> +struct EigenDeviceConverter { + using EigenDeviceType = Eigen::DefaultDevice; +}; + +#ifndef PADDLE_ONLY_CPU +template <> +struct EigenDeviceConverter { + using EigenDeviceType = Eigen::GpuDevice; +}; +#endif + class OperatorBase; using OperatorPtr = std::shared_ptr; /** @@ -39,6 +56,13 @@ using OperatorPtr = std::shared_ptr; */ class OperatorBase { public: + /// If a variable is a empty variable, that name will be used. + static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; } + + /// If a variable is a temporary variable, that name will be set in Python, + /// but it will be convert to a unique name in scope after OpCreator. + static std::string TMP_VAR_NAME() { return "@TEMP@"; } + virtual ~OperatorBase() {} template @@ -48,7 +72,7 @@ class OperatorBase { return boost::get(attrs_.at(name)); } - std::string DebugString() const; + virtual std::string DebugString() const; /// Init will be called after CreateOperator, you can put some initialization /// logic here. @@ -62,14 +86,76 @@ class OperatorBase { virtual void Run(const ScopePtr& scope, const platform::DeviceContext& dev_ctx) const = 0; - protected: - std::string Type() const { return desc_.type(); } + // Get a input with argument's name described in `op_proto` + const std::string& Input(const std::string& name) const; + // Get a input which has multiple variables. + // TODO add a vector_view to prevent memory copy. + std::vector Inputs(const std::string& name) const; + // Get a output with argument's name described in `op_proto` + const std::string& Output(const std::string& name) const; + // Get an output which has multiple variables. + // TODO add a vector_view to prevent memory copy. + std::vector Outputs(const std::string& name) const; public: - OpDesc desc_; + std::string type_; std::vector inputs_; std::vector outputs_; AttributeMap attrs_; + // store the arguments' offset described in op_desc. + std::shared_ptr> in_out_idxs_; +}; + +class KernelContext { + public: + KernelContext(const OperatorBase* op, const std::shared_ptr& scope, + const platform::DeviceContext& device_context) + : op_(*op), scope_(scope), device_context_(device_context) {} + + const Variable* Input(int index) const { + return scope_->GetVariable(op_.inputs_[index]); + } + + Variable* Output(int index) const { + return scope_->GetVariable(op_.outputs_[index]); + } + + const Variable* Input(const std::string& name) const { + return scope_->GetVariable(op_.Input(name)); + } + + const Variable* Output(const std::string& name) const { + return scope_->GetVariable(op_.Output(name)); + } + + const std::vector Inputs(const std::string& name) const { + auto names = op_.Inputs(name); + std::vector res; + std::transform( + names.begin(), names.end(), res.begin(), + [this](const std::string& name) { return scope_->GetVariable(name); }); + return res; + } + + const std::vector Outputs(const std::string& name) const { + auto names = op_.Outputs(name); + std::vector res; + std::transform( + names.begin(), names.end(), res.begin(), + [this](const std::string& name) { return scope_->GetVariable(name); }); + return res; + } + + template ::EigenDeviceType> + DeviceType* GetEigenDevice() const; + + platform::Place GetPlace() const { return device_context_.GetPlace(); } + + const OperatorBase& op_; + const std::shared_ptr& scope_; + const platform::DeviceContext& device_context_; }; class OpKernel { @@ -80,24 +166,6 @@ class OpKernel { * device resource such as CUDA stream, cublas handle, etc. from * KernelContext. User should construct it before run the Operator. */ - class KernelContext { - public: - KernelContext(const OperatorBase* op, const ScopePtr& scope, - const platform::DeviceContext& device_context) - : op_(*op), scope_(scope), device_context_(device_context) {} - - const Variable* Input(int index) const { - return scope_->GetVariable(op_.inputs_[index]); - } - - Variable* Output(int index) const { - return scope_->GetVariable(op_.outputs_[index]); - } - - const OperatorBase& op_; - const ScopePtr& scope_; - const platform::DeviceContext& device_context_; - }; virtual void Compute(const KernelContext& context) const = 0; @@ -142,8 +210,8 @@ class OperatorWithKernel : public OperatorBase { void Run(const ScopePtr& scope, const platform::DeviceContext& dev_ctx) const final { - auto& opKernel = AllOpKernels().at(Type()).at(OpKernelKey(dev_ctx)); - opKernel->Compute(OpKernel::KernelContext(this, scope, dev_ctx)); + auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); + opKernel->Compute(KernelContext(this, scope, dev_ctx)); } static std::unordered_map& @@ -151,6 +219,7 @@ class OperatorWithKernel : public OperatorBase { static std::unordered_map g_all_op_kernels; return g_all_op_kernels; } + void InferShape(const std::shared_ptr& scope) const final { std::vector ins; VarNamesToTensors(scope, inputs_, &ins); diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index d0c3153faef6537cb4260c9d7fe093acb8f839f0..8e55d0111f39b2f632cf5a49c2ad3f210683652c 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -19,14 +19,17 @@ limitations under the License. */ namespace paddle { namespace framework { -class OperatorTest : public OperatorBase { +static int op_run_num = 0; + +class OpWithoutKernelTest : public OperatorBase { public: void Init() override { x = 1; } void InferShape(const ScopePtr& scope) const override {} void Run(const ScopePtr& scope, const platform::DeviceContext& dev_ctx) const override { - float scale = GetAttr("scale"); - ASSERT_NEAR(scale, 3.14, 1e-5); + op_run_num++; + ASSERT_EQ((int)inputs_.size(), 1); + ASSERT_EQ((int)outputs_.size(), 1); ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr); ASSERT_EQ(x, 1); ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr); @@ -36,15 +39,14 @@ class OperatorTest : public OperatorBase { float x = 0; }; -class OperatorTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { +class OpeWithoutKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { public: - OperatorTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) + OpeWithoutKernelTestProtoAndCheckerMaker(OpProto* proto, + OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("input", "input of test op"); AddOutput("output", "output of test op"); - AddAttr("scale", "scale of cosine op") - .SetDefault(1.0) - .LargerThan(0.0); + AddAttr("scale", "scale of cosine op"); AddComment("This is test op"); } }; @@ -52,8 +54,8 @@ class OperatorTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { } // namespace framework } // namespace paddle -REGISTER_OP(test_operator, paddle::framework::OperatorTest, - paddle::framework::OperatorTestProtoAndCheckerMaker); +REGISTER_OP(test_operator, paddle::framework::OpWithoutKernelTest, + paddle::framework::OpeWithoutKernelTestProtoAndCheckerMaker); TEST(OperatorBase, all) { paddle::framework::OpDesc op_desc; @@ -63,18 +65,17 @@ TEST(OperatorBase, all) { auto attr = op_desc.mutable_attrs()->Add(); attr->set_name("scale"); attr->set_type(paddle::framework::AttrType::FLOAT); - float scale = 3.14; - attr->set_f(scale); + attr->set_f(3.14); paddle::platform::CPUDeviceContext device_context; auto scope = std::make_shared(); paddle::framework::OperatorPtr op = paddle::framework::OpRegistry::CreateOp(op_desc); - ASSERT_EQ(op->GetAttr("scale"), scale); scope->CreateVariable("OUT1"); + ASSERT_EQ(paddle::framework::op_run_num, 0); op->Run(scope, device_context); - std::cout << op->DebugString() << std::endl; + ASSERT_EQ(paddle::framework::op_run_num, 1); } namespace paddle { @@ -84,8 +85,8 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { public: OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("input", "input of test op"); - AddOutput("output", "output of test op"); + AddInput("x", "input of test op"); + AddOutput("y", "output of test op"); AddAttr("scale", "scale of cosine op") .SetDefault(1.0) .LargerThan(0.0); @@ -93,19 +94,76 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { } }; +static int cpu_kernel_run_num = 0; + class OpWithKernelTest : public OperatorWithKernel { protected: void InferShape(const std::vector& inputs, const std::vector& outputs) const override {} }; +template class CPUKernelTest : public OpKernel { public: - void Compute(const KernelContext& context) const { - float scale = context.op_.GetAttr("scale"); - ASSERT_NEAR(scale, 3.14, 1e-5); + void Compute(const KernelContext& ctx) const { std::cout << "this is cpu kernel" << std::endl; - std::cout << context.op_.DebugString() << std::endl; + std::cout << ctx.op_.DebugString() << std::endl; + cpu_kernel_run_num++; + ASSERT_EQ(ctx.op_.Input("x"), "IN1"); + ASSERT_EQ(ctx.op_.Output("y"), "OUT1"); + } +}; + +// multiple inputs test +class OperatorMultiInputsTest : public OperatorBase { + public: + void Init() override { x = 1; } + void InferShape(const std::shared_ptr& scope) const override {} + void Run(const std::shared_ptr& scope, + const platform::DeviceContext& dev_ctx) const override { + ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr); + ASSERT_EQ(x, 1); + ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr); + ASSERT_EQ(Input("x"), "IN1"); + ASSERT_EQ(Input("y"), "OUT1"); + } + + public: + float x = 0; +}; + +class OpKernelTestMultiInputsProtoAndCheckerMaker + : public OpProtoAndCheckerMaker { + public: + OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto, + OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInputs("xs", "inputs of test op"); + AddInput("k", "input of test op"); + AddOutputs("ys", "outputs of test op"); + AddAttr("scale", "scale of cosine op") + .SetDefault(1.0) + .LargerThan(0.0); + AddComment("This is test op"); + } +}; + +class CPUKernalMultiInputsTest : public OpKernel { + public: + void Compute(const KernelContext& ctx) const { + auto xs = ctx.op_.Inputs("xs"); + ASSERT_EQ(xs.size(), 3UL); + ASSERT_EQ(xs[0], "x0"); + ASSERT_EQ(xs[1], "x1"); + ASSERT_EQ(xs[2], "x2"); + + auto k = ctx.op_.Input("k"); + ASSERT_EQ(k, "k0"); + + auto ys = ctx.op_.Outputs("ys"); + ASSERT_EQ(ys.size(), 2UL); + ASSERT_EQ(ys[0], "y0"); + ASSERT_EQ(ys[1], "y1"); } }; @@ -114,8 +172,10 @@ class CPUKernelTest : public OpKernel { REGISTER_OP(op_with_kernel, paddle::framework::OpWithKernelTest, paddle::framework::OpKernelTestProtoAndCheckerMaker); -REGISTER_OP_CPU_KERNEL(op_with_kernel, paddle::framework::CPUKernelTest); +REGISTER_OP_CPU_KERNEL(op_with_kernel, + paddle::framework::CPUKernelTest); +// test with single input TEST(OpKernel, all) { paddle::framework::OpDesc op_desc; op_desc.set_type("op_with_kernel"); @@ -131,5 +191,51 @@ TEST(OpKernel, all) { paddle::framework::OperatorPtr op = paddle::framework::OpRegistry::CreateOp(op_desc); + ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 0); + op->Run(scope, cpu_device_context); + ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 1); +} + +REGISTER_OP(op_multi_inputs_with_kernel, paddle::framework::OpWithKernelTest, + paddle::framework::OpKernelTestMultiInputsProtoAndCheckerMaker); +REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel, + paddle::framework::CPUKernalMultiInputsTest); + +// test with multi inputs +TEST(OpKernel, multi_inputs) { + using namespace paddle::framework; + + OpDesc op_desc; + op_desc.set_type("op_multi_inputs_with_kernel"); + *op_desc.mutable_inputs()->Add() = "x0"; + *op_desc.mutable_inputs()->Add() = "x1"; + *op_desc.mutable_inputs()->Add() = "x2"; + *op_desc.mutable_inputs()->Add() = "k0"; + *op_desc.mutable_outputs()->Add() = "y0"; + *op_desc.mutable_outputs()->Add() = "y1"; + auto attr = op_desc.mutable_attrs()->Add(); + attr->set_name("scale"); + attr->set_type(paddle::framework::AttrType::FLOAT); + attr->set_f(3.14); + + auto attr0 = op_desc.mutable_attrs()->Add(); + attr0->set_name("input_format"); + attr0->set_type(paddle::framework::AttrType::INTS); + auto input_format = attr0->mutable_ints(); + input_format->Add(0); // x0 + input_format->Add(3); // k + input_format->Add(4); // end + + auto attr1 = op_desc.mutable_attrs()->Add(); + attr1->set_name("output_format"); + attr1->set_type(paddle::framework::AttrType::INTS); + auto output_format = attr1->mutable_ints(); + output_format->Add(0); // y0 + output_format->Add(2); // y1 + + paddle::platform::CPUDeviceContext cpu_device_context; + auto scope = std::make_shared(); + + OperatorPtr op(paddle::framework::OpRegistry::CreateOp(op_desc)); op->Run(scope, cpu_device_context); } diff --git a/paddle/framework/tensor.cc b/paddle/framework/tensor.cc new file mode 100644 index 0000000000000000000000000000000000000000..964f15ab66bca7da75824e192e61600c29e572c0 --- /dev/null +++ b/paddle/framework/tensor.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include + +namespace paddle { +namespace framework {} +} // namespace paddle diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 62e0710a8244ce80e35d2cf9a922c4fd9f1a1b69..93c6fad5d3d9f3de100d30161e6e438eb43816a2 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -17,19 +17,34 @@ limitations under the License. */ #include #include #include +#include #include "paddle/framework/ddim.h" -#include "paddle/framework/enforce.h" #include "paddle/memory/memory.h" +#include "paddle/platform/enforce.h" #include "paddle/platform/place.h" +#include "unsupported/Eigen/CXX11/Tensor" namespace paddle { +namespace pybind { +namespace details { // forward declare +template +struct CastToPyBufferImpl; +} // namespace details +} // namespace pybind namespace framework { class Tensor { - public: - Tensor() : numel_(0), offset_(0) {} + template + friend struct paddle::pybind::details::CastToPyBufferImpl; + + template + friend struct EigenTensor; - Tensor& operator=(const Tensor& src) = delete; + template + friend struct EigenVector; + + public: + Tensor() : offset_(0) {} template const T* data() const { @@ -39,21 +54,40 @@ class Tensor { } template - T* mutable_data(DDim dims, paddle::platform::Place place) { + T* data() { + CheckDims(); + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + + offset_); + } + + template + T* mutable_data(DDim dims, platform::Place place) { set_dims(dims); return mutable_data(place); } template - T* mutable_data(paddle::platform::Place place) { - PADDLE_ENFORCE(numel_ > 0, - "Tensor::numel_ must be larger than zero to call " + T* mutable_data(platform::Place place) { + PADDLE_ENFORCE(product(dims_) > 0, + "Tensor's numel must be larger than zero to call " "Tensor::mutable_data. Call Tensor::set_dim first."); if (holder_ == nullptr || !(holder_->place() == place) /* some versions of boost::variant don't have operator!= */ - || holder_->size() < numel_ * sizeof(T) + offset_) { - holder_.reset(new PlaceholderImpl(place, numel_ * sizeof(T))); + || holder_->size() < product(dims_) * sizeof(T) + offset_) { + if (platform::is_cpu_place(place)) { + holder_.reset(new PlaceholderImpl( + boost::get(place), product(dims_) * sizeof(T))); + } else if (platform::is_gpu_place(place)) { +#ifdef PADDLE_ONLY_CPU + PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); +#else + holder_.reset(new PlaceholderImpl( + boost::get(place), product(dims_) * sizeof(T))); +#endif + } else { + PADDLE_THROW("Unknown 'place'."); + } offset_ = 0; } return reinterpret_cast(reinterpret_cast(holder_->ptr()) + @@ -69,12 +103,12 @@ class Tensor { } template - void CopyFrom(const Tensor& src, paddle::platform::Place dst_place) { + void CopyFrom(const Tensor& src, platform::Place dst_place) { PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) && platform::is_cpu_place(dst_place), "Tensor::CopyFrom only support CPU now."); src.CheckDims(); - size_t size = src.numel_ * sizeof(T); + size_t size = product(src.dims_) * sizeof(T); set_dims(src.dims()); const void* src_ptr = static_cast(src.data()); void* dst_ptr = static_cast(mutable_data(dst_place)); @@ -108,7 +142,6 @@ class Tensor { return; } dims_ = dims; - numel_ = product(dims_); } DDim dims() const { return dims_; } @@ -119,52 +152,52 @@ class Tensor { struct Placeholder { virtual ~Placeholder() {} virtual void* ptr() const = 0; - virtual paddle::platform::Place place() const = 0; + virtual platform::Place place() const = 0; virtual size_t size() const = 0; + virtual std::type_index type() const = 0; }; - template + template struct PlaceholderImpl : public Placeholder { private: + template class Deleter { public: - Deleter(platform::Place place) : place_(place) {} - void operator()(T* ptr) { - paddle::memory::Free(place_, static_cast(ptr)); - } + Deleter(PType place) : place_(place) {} + void operator()(T* ptr) { memory::Free(place_, static_cast(ptr)); } private: - paddle::platform::Place place_; + PType place_; }; public: - PlaceholderImpl(paddle::platform::Place place, size_t size) - : ptr_(static_cast(paddle::memory::Alloc(place, size)), - Deleter(place)), + PlaceholderImpl(PlaceType place, size_t size) + : ptr_(static_cast(memory::Alloc(place, size)), + Deleter(place)), place_(place), size_(size) {} virtual void* ptr() const { return static_cast(ptr_.get()); } virtual size_t size() const { return size_; } virtual paddle::platform::Place place() const { return place_; } + virtual std::type_index type() const { return std::type_index(typeid(T)); } - std::unique_ptr ptr_; - paddle::platform::Place place_; // record the place of ptr_. - size_t size_; // size of the memory block. + std::unique_ptr> ptr_; + platform::Place place_; // record the place of ptr_. + size_t size_; // size of the memory block. }; template inline void CheckDims() const { PADDLE_ENFORCE(holder_ != nullptr, "Tenosr holds no memory. Call Tensor::mutable_data first."); - PADDLE_ENFORCE(holder_->size() >= numel_ * sizeof(T) + offset_, + PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, "Tensor's dims_ is out of bound. Call Tensor::mutable_data " "first to re-allocate memory."); } std::shared_ptr holder_; // holds the memory block if allocated. DDim dims_; - size_t numel_; // cache of `product(dims_)` size_t offset_; // marks the begin of tensor data area. }; diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 255f69372f4f06609471b9ff7a9b9ce790fcddf0..8a7cbbd0de6fd6aaafa8649abb8628e971bc49c1 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -33,7 +33,7 @@ TEST(Tensor, DataAssert) { bool caught = false; try { src_tensor.data(); - } catch (paddle::framework::EnforceNotMet err) { + } catch (std::runtime_error& err) { caught = true; std::string msg = "Tenosr holds no memory. Call Tensor::mutable_data first."; @@ -47,7 +47,7 @@ TEST(Tensor, DataAssert) { /* following tests are not available at present because Memory::Alloc() and Memory::Free() have not been ready. - +*/ TEST(Tensor, MutableData) { using namespace paddle::framework; using namespace paddle::platform; @@ -72,7 +72,7 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); EXPECT_EQ(p1, p2); } - +#ifdef __CUDACC__ { Tensor src_tensor; float* p1 = nullptr; @@ -94,6 +94,7 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); EXPECT_EQ(p1, p2); } +#endif } TEST(Tensor, ShareDataFrom) { @@ -106,11 +107,13 @@ TEST(Tensor, ShareDataFrom) { bool caught = false; try { dst_tensor.ShareDataFrom(src_tensor); - } catch (EnforceNotMet err) { + } catch (std::runtime_error& err) { caught = true; - std::string msg = "Tenosr holds no memory. Call Tensor::mutable_data -first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); -++i) { ASSERT_EQ(what[i], msg[i]); + std::string msg = + "Tenosr holds no memory. Call Tensor::mutable_data first."; + const char* what = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); } } ASSERT_TRUE(caught); @@ -120,6 +123,7 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } +#ifdef __CUDACC__ { Tensor src_tensor; Tensor dst_tensor; @@ -127,6 +131,7 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); dst_tensor.ShareDataFrom(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } +#endif } TEST(Tensor, Slice) { @@ -155,6 +160,7 @@ TEST(Tensor, Slice) { EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); } +#ifdef __CUDACC__ { Tensor src_tensor; src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); @@ -176,6 +182,7 @@ TEST(Tensor, Slice) { EXPECT_EQ(slice_data_address, slice_mutable_data_address); EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); } +#endif } TEST(Tensor, CopyFrom) { @@ -203,4 +210,3 @@ TEST(Tensor, CopyFrom) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } } -*/ \ No newline at end of file diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 1518a8a654cfb54376a49760dc5873733c916937..2bec00cdb2d32d01a5a24e662bcca07f4154939c 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -11,7 +11,6 @@ if(WITH_GPU) endif() if(USE_NNPACK) - include(nnpack/nnpack.cmake) list(APPEND cpp_files nnpack/NNPACKConvOp.cpp) if(WITH_TESTING) add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp) @@ -37,6 +36,7 @@ if(WITH_GPU) add_simple_unittest(MulOpTest) add_simple_unittest(CosSimOpTest) add_simple_unittest(RowConvOpTest) + add_simple_unittest(CropOpTest) endif() add_simple_unittest(ConvOpTest) diff --git a/paddle/function/CropOp.cpp b/paddle/function/CropOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f12ee43e3d72f9ac776eaff93914228850694dd2 --- /dev/null +++ b/paddle/function/CropOp.cpp @@ -0,0 +1,177 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CropOp.h" +#include "paddle/function/TensorShape.h" +#include "paddle/math/Vector.h" + +namespace paddle { + +template <> +void Crop(real* outputs, + const real* inputs, + const TensorShape inShape, + const TensorShape outShape, + const FuncConfig& conf) { + std::vector crop_corner = + conf.get>("crop_corner"); + int cCrop = crop_corner[1]; + int hCrop = crop_corner[2]; + int wCrop = crop_corner[3]; + + int num = inShape[0]; + int inC = inShape[1]; + int inH = inShape[2]; + int inW = inShape[3]; + + int outC = outShape[1]; + int outH = outShape[2]; + int outW = outShape[3]; + + for (int n = 0; n < num; n++) { + for (int c = 0; c < outC; c++) { + for (int h = 0; h < outH; h++) { + int outoff = ((n * outC + c) * outH + h) * outW; + int inoff = ((n * inC + c + cCrop) * inH + h + hCrop) * inW + wCrop; + memcpy(outputs + outoff, inputs + inoff, outW * sizeof(real)); + } + } + } +} + +template <> +void CropGrad(const real* inGrad, + real* outGrad, + const TensorShape inShape, + const TensorShape outShape, + const FuncConfig& conf) { + std::vector crop_corner = + conf.get>("crop_corner"); + int cCrop = crop_corner[1]; + int hCrop = crop_corner[2]; + int wCrop = crop_corner[3]; + + int num = outShape[0]; + int outC = outShape[1]; + int outH = outShape[2]; + int outW = outShape[3]; + + int inC = inShape[1]; + int inH = inShape[2]; + int inW = inShape[3]; + + for (int n = 0; n < num; n++) { + for (int c = 0; c < inC; c++) { + for (int h = 0; h < inH; h++) { + int outoff = ((n * outC + c + cCrop) * outH + h + hCrop) * outW + wCrop; + int inoff = ((n * inC + c) * inH + h) * inW; + CpuVector inG = CpuVector(inW, const_cast(inGrad + inoff)); + CpuVector outG = CpuVector(inW, outGrad + outoff); + outG += inG; + } + } + } +} + +/** + * \brief Crop input according to the specify corner and shape. + * The input and output is a 4D tensor. In CropFunc, we only + * crop the 2nd to 4th dimension. + * + * Argument in this Function: + * \param pad_ A struct object contains the cropping corner and shape. + * \param inputs A 4D tensor, only one input. + * \param outputs A 4D tensor, the output value after cropping. + * + * For example, + * Input(2,2,2,3) = [ + * [ [[1,2,3], [3,4,5]], + * [[2,3,5], [1,6,7]] ], + * [ [[4,3,1], [1,8,7]], + * [[3,8,9], [2,3,5]] ] + * ] # the input shape is (2,2,2,3) + * + * pad_: if corner = (0,1,1) and crop_shape = (2,1,2) + * Output(2,2,1,2) = [ + * [ [[4,5]], + * [[6,7]] ], + * [ [[8,7]], + * [[3,5]] ] + * ] # the input shape is (2,2,2,3) + */ +template +class CropFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { conf_ = config; } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + + TensorShape inShape = inputs[0].shape(); + TensorShape outShape = outputs[0].shape(); + + Crop(outputs[0].data(), + inputs[0].data(), + inShape, + outShape, + conf_); + } + +private: + FuncConfig conf_; +}; + +/** + * \brief The backward propagation of cropping Function. + * + * Argument in this Function: + * \param crop_ The same meaning as it in CropFunc. + * \param inputs The gradient with respect to the output value of CropFunc. + * \param outputs The gradient with respect to the input value of CropFunc. + */ + +template +class CropGradFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { conf_ = config; } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + + TensorShape outShape = outputs[0].shape(); + TensorShape inShape = inputs[0].shape(); + + CropGrad(inputs[0].data(), + outputs[0].data(), + inShape, + outShape, + conf_); + } + +private: + FuncConfig conf_; +}; + +REGISTER_TYPED_FUNC(Crop, CPU, CropFunc); +REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(Crop, GPU, CropFunc); +REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc); +#endif + +} // namespace paddle diff --git a/paddle/function/CropOp.h b/paddle/function/CropOp.h new file mode 100644 index 0000000000000000000000000000000000000000..87986fbdc7e33aeb24d947e82a5d67ba23f532de --- /dev/null +++ b/paddle/function/CropOp.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +/** + * \brief This funtion crops inputs according to the specify start point and + *shape. + * + * \param[out] outputs save results. + * \param[in] inputs input data. + * \param[in] inShape the shape of input tensor. + * \param[in] conf the cropping config + */ +template +void Crop(real* outputs, + const real* inputs, + const TensorShape inShape, + const TensorShape outShape, + const FuncConfig& conf); + +/** + * \brief Cropping operation backward. + * + * \param[out] inGrad gradients of previous layer + * \param[in] outGrad output gradient + * \param[in] inShape the shape of input tensor. + * \param[in] conf the cropping config + */ +template +void CropGrad(const real* inGrad, + real* outGrad, + const TensorShape inShape, + const TensorShape outShape, + const FuncConfig& conf); +} // namespace paddle diff --git a/paddle/function/CropOpGpu.cu b/paddle/function/CropOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..37ce6de0647e5e06a231710b5a53089533de2407 --- /dev/null +++ b/paddle/function/CropOpGpu.cu @@ -0,0 +1,113 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "CropOp.h" + +namespace paddle { + +__global__ void KeCrop(real* outputs, const real* inputs, + int inC, int inH, int inW, + int cropC, int cropH, int cropW, + int outC, int outH, int outW, int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % outW; + const int h = (idx / outW) % outH; + const int c = (idx / outW / outH) % outC; + const int n = idx / outW / outH / outC; + + const int off = ((n * inC + c + cropC) * inH + h + cropH) * inW + cropW + w; + outputs[idx] = inputs[off]; + } +} + +template <> +void Crop(real* outputs, + const real* inputs, + const TensorShape inShape, + const TensorShape outShape, + const FuncConfig& conf) { + std::vector crop_corner = conf.get>("crop_corner"); + int cropC = crop_corner[1]; + int cropH = crop_corner[2]; + int cropW = crop_corner[3]; + + int num = inShape[0]; + int inC = inShape[1]; + int inH = inShape[2]; + int inW = inShape[3]; + + int outC = outShape[1]; + int outH = outShape[2]; + int outW = outShape[3]; + + size_t nth = num * outC * outH * outW; + int blockSize = 1024; + int gridSize = (nth + blockSize - 1) / blockSize; + + KeCrop<<>> + (outputs, inputs, inC, inH, inW, cropC, cropH, cropW, + outC, outH, outW, nth); + CHECK_SYNC("Crop"); +} + +__global__ void KeCropDiff(const real* inGrad, real* outGrad, + int inC, int inH, int inW, + int cropC, int cropH, int cropW, + int outC, int outH, int outW, int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % inW; + const int h = (idx / inW) % inH; + const int c = (idx / inW / inH) % inC; + const int n = idx / inW / inH / inC; + + const int off = ((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w; + + outGrad[off] += inGrad[idx]; + } +} + +template <> +void CropGrad(const real* inGrad, + real* outGrad, + const TensorShape inShape, + const TensorShape outShape, + const FuncConfig& conf) { + std::vector crop_corner = conf.get>("crop_corner"); + int cropC = crop_corner[1]; + int cropH = crop_corner[2]; + int cropW = crop_corner[3]; + + int num = outShape[0]; + int outC = outShape[1]; + int outH = outShape[2]; + int outW = outShape[3]; + + int inC = inShape[1]; + int inH = inShape[2]; + int inW = inShape[3]; + + size_t nth = num * inC * inH * inW; + int blockSize = 1024; + int gridSize = (nth + blockSize - 1) / blockSize; + + KeCropDiff <<>> + (inGrad, outGrad, inC, inH, inW, cropC, cropH, cropW, + outC, outH, outW, nth); + CHECK_SYNC("CropGrad"); +} + +} // namespace paddle diff --git a/paddle/function/CropOpTest.cpp b/paddle/function/CropOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f11abfdf6f752857e0a75c62fb2b5c089c206d9 --- /dev/null +++ b/paddle/function/CropOpTest.cpp @@ -0,0 +1,49 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" + +namespace paddle { + +TEST(Crop, real) { + for (size_t numSamples : {5, 32}) { + for (size_t channels : {5, 5, 32}) { + for (size_t imgSizeH : {5, 33, 100}) { + for (size_t imgSizeW : {5, 32, 96}) { + VLOG(3) << " numSamples=" << numSamples << " channels=" << channels + << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; + for (bool test_grad : {false, true}) { + CpuGpuFuncCompare compare( + test_grad ? "CropGrad" : "Crop", + FuncConfig() + .set>("crop_corner", {0, 1, 1, 1}) + .set>("crop_shape", {0, 2, 3, 3})); + TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; + TensorShape outDims{numSamples, 2, 3, 3}; + compare.addInputs( + BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); + compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, + test_grad ? inDims : outDims, + test_grad ? ADD_TO : ASSIGN_TO), + test_grad ? ADD_TO : ASSIGN_TO); + compare.run(); + } + } + } + } + } +} + +} // namespace paddle diff --git a/paddle/function/GemmConvOp.cpp b/paddle/function/GemmConvOp.cpp index a40e5d9d2e76605525f0956445fc43c693933cf8..00880effc59cc80b2761fb6a4d9f3246439afd3f 100644 --- a/paddle/function/GemmConvOp.cpp +++ b/paddle/function/GemmConvOp.cpp @@ -117,8 +117,7 @@ public: ConvFunctionBase::init(config); } - virtual void check(const BufferArgs& inputs, - const BufferArgs& outputs) override { + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { const TensorShape& input = inputs[0].shape(); const TensorShape& filter = inputs[1].shape(); const TensorShape& output = outputs[0].shape(); @@ -217,8 +216,7 @@ public: ConvFunctionBase::init(config); } - virtual void check(const BufferArgs& inputs, - const BufferArgs& outputs) override { + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { const TensorShape& output = inputs[0].shape(); const TensorShape& filter = inputs[1].shape(); const TensorShape& input = outputs[0].shape(); @@ -311,8 +309,7 @@ public: ConvFunctionBase::init(config); } - virtual void check(const BufferArgs& inputs, - const BufferArgs& outputs) override { + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { const TensorShape& output = inputs[0].shape(); const TensorShape& input = inputs[1].shape(); const TensorShape& filter = outputs[0].shape(); diff --git a/paddle/function/NaiveConvOp.cpp b/paddle/function/NaiveConvOp.cpp index 4348f0f775e9442c50a3c45b9a8e6dad5c6b198d..e0692fa06d6e0c35cfa742ca3eac7fe2037b1a80 100644 --- a/paddle/function/NaiveConvOp.cpp +++ b/paddle/function/NaiveConvOp.cpp @@ -90,8 +90,7 @@ public: ConvFunctionBase::init(config); } - virtual void check(const BufferArgs& inputs, - const BufferArgs& outputs) override { + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { const TensorShape& input = inputs[0].shape(); const TensorShape& filter = inputs[1].shape(); const TensorShape& output = outputs[0].shape(); diff --git a/paddle/function/RowConvOpGpu.cu b/paddle/function/RowConvOpGpu.cu index c0b947e224313abaf4fadfb8293dc78ca085ff84..d9dcc7d59d1e3c222f5a7ce448daa8d7edb6c978 100644 --- a/paddle/function/RowConvOpGpu.cu +++ b/paddle/function/RowConvOpGpu.cu @@ -32,7 +32,7 @@ __global__ void KeRowConv(real* y, const real* x, const real* w, for (int i = tidy; i < context; i += blky) { sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0; } - + __syncthreads(); for (int i = 0; i < numSeq; ++i) { @@ -144,12 +144,15 @@ __global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy, int yoff = start + j; // transpose - sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; - sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0; + sh_x[tidx][tidy] = (xoff < width && yoff < end) ? + x[yoff * width + xoff] : 0.0; + sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? + dy[yoff * width + xoff] : 0.0; __syncthreads(); if (tidy < (context - 1)) { yoff = yoff - context + 1; - sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0; + sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? + dy[yoff * width + xoff] : 0.0; } __syncthreads(); @@ -199,11 +202,13 @@ __global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy, int yoff = start + j; // transpose - sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; + sh_x[tidx][tidy] = (xoff < width && yoff < end) ? + x[yoff * width + xoff] : 0.0; __syncthreads(); for (int t = 0; t < context; t++) { - sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0; + sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && + yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0; __syncthreads(); real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx]; @@ -239,7 +244,7 @@ __global__ void KeRowConvBwData(real* dx, const real* w, const real* dy, for (int i = tidy; i < context; i += blky) { sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0; } - + __syncthreads(); for (int i = 0; i < numSeq; ++i) { @@ -312,7 +317,7 @@ void RowConvGrad(const GpuMatrix& outG, dim3 dimBlock(32, 32); dim3 dimGrid(DIVUP(width, dimBlock.x), 1); real* dw = filterG.getData(); - if (contextLength <= 32) { + if (contextLength <= 32) { KeRowConvBwWeight<32, 32, 32> <<>> (dw, x, dy, starts, height, width, numSeq, contextLength); diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index e8080c3d714b324f072a380f738b9764477dfe04..f0ec77a5d00333993427fb8d0bc938c884e50c95 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/function/ConvOp.h" DEFINE_bool(nnpack_allocate_outside, - false, + true, "Allocate and free workspace memory outside the NNPACK interface."); DEFINE_int32(nnpack_num_threads, 0, @@ -58,18 +58,10 @@ public: workspaceBuffer_ = nullptr; workspaceSize_ = 0; - threadpool_ = nullptr; - if (FLAGS_nnpack_num_threads) { - threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads); - VLOG(3) << "Number of threads " - << pthreadpool_get_threads_count(threadpool_); - } + create_nnpack_threadpool(); } ~NNPACKConvFunction() { - if (threadpool_) { - pthreadpool_destroy(threadpool_); - } if (workspaceBuffer_) { free(workspaceBuffer_); } @@ -225,14 +217,25 @@ public: } } + static void create_nnpack_threadpool() { + if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) { + threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads); + VLOG(3) << "Number of threads " + << pthreadpool_get_threads_count(threadpool_); + } + } + private: nnp_convolution_algorithm algorithm_; nnp_convolution_transform_strategy transform_strategy_; void* workspaceBuffer_; size_t workspaceSize_; - pthreadpool_t threadpool_; + static pthreadpool_t threadpool_; }; +template +pthreadpool_t NNPACKConvFunction::threadpool_ = nullptr; + REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction); } // namespace paddle diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 2e839f640503b8f4e390fc87d9d59960dbc37f6e..cfa80a89365af5111746eec9599d16e37532a9f7 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -403,7 +403,7 @@ public: : layerName_(layerName) { addEvaluator(std::move(evaluator)); } - virtual void eval(const NeuralNetwork& nn) override { + void eval(const NeuralNetwork& nn) override { const LayerPtr& layer = nn.getLayer(layerName_); CHECK(layer) << "Nonexisted layer: " << layerName_ << " in submodel " << nn.getName(); diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 9a972466d66ba1417b2c31e66dc375b3da229aa8..9ddd449de7500f5682d59469328f06971c6e83bf 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -636,7 +636,7 @@ void lenToStarts(std::vector& starts) { } starts.back() = pos; } -} +} // namespace void RecurrentGradientMachine::calcSequenceStartPositions() { std::vector starts(commonSeqInfo_.size() + 1); diff --git a/paddle/gserver/layers/AgentLayer.cpp b/paddle/gserver/layers/AgentLayer.cpp index 15e7411b5fde0fa3a532394cf7d0e8477ef052d0..bdae7e623ae0472d4fe5ef3a88fc1e93bbf1e52c 100644 --- a/paddle/gserver/layers/AgentLayer.cpp +++ b/paddle/gserver/layers/AgentLayer.cpp @@ -124,7 +124,7 @@ void copyElements(const IVector& srcVec, dest[index[i]] = src[i]; } } -} +} // namespace void GatherAgentLayer::forwardIds(PassType passType) { IVectorPtr realId = realLayers_[0]->getOutputLabel(); diff --git a/paddle/gserver/layers/CropLayer.cpp b/paddle/gserver/layers/CropLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..69ad913420bdb6e1b2ed0618b7f9b78d7477be99 --- /dev/null +++ b/paddle/gserver/layers/CropLayer.cpp @@ -0,0 +1,146 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CropLayer.h" +#include "paddle/utils/Stat.h" +namespace paddle { + +REGISTER_LAYER(crop, CropLayer); + +bool CropLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + CHECK_LE(static_cast(inputLayers_.size()), 2); + CHECK_GE(static_cast(inputLayers_.size()), 1); + crop_axis_ = config_.axis(); + for (int i = 0; i < config_.offset_size(); i++) { + crop_offsets_.push_back(config_.offset(i)); + } + + // 1. get input_0 shape + auto& input0_img_conf = config_.inputs(0).image_conf(); + inDims_ = TensorShape({0, + input0_img_conf.channels(), + input0_img_conf.has_img_size_y() + ? input0_img_conf.img_size_y() + : input0_img_conf.img_size(), + input0_img_conf.img_size()}); + // 2. get target dims from config + if (config_.inputs_size() == 1) { + targetDims_ = TensorShape({config_.shape(0), + config_.shape(1), + config_.shape(2), + config_.shape(3)}); + } else { + // 2. get input_1 shape + auto& input1_img_conf = config_.inputs(1).image_conf(); + targetDims_ = TensorShape({0, + input1_img_conf.channels(), + input1_img_conf.has_img_size_y() + ? input1_img_conf.img_size_y() + : input1_img_conf.img_size(), + input1_img_conf.img_size()}); + } + + // 3. get final crop corner + int dimSize = 4; + crop_corner_ = {0, 0, 0, 0}; + for (int i = 0; i < dimSize; i++) { + if (i >= crop_axis_) { + if (crop_offsets_.size() > 1) { + crop_corner_[i] = crop_offsets_[i - crop_axis_]; + } else { + crop_corner_[i] = crop_offsets_[0]; + } + } + } + + outDims_ = TensorShape(4); + + createFunction( + forward_, "Crop", FuncConfig().set("crop_corner", crop_corner_)); + createFunction( + backward_, "CropGrad", FuncConfig().set("crop_corner", crop_corner_)); + + return true; +} + +void CropLayer::setOutDims() { + MatrixPtr input = inputLayers_[1]->getOutputValue(); + size_t batchSize = input->getHeight(); + // get target dims from input_1 + if (config_.inputs_size() == 2) { + targetDims_.setDim(0, batchSize); + int ch = config_.inputs(0).image_conf().channels(); + if (ch != 0) targetDims_.setDim(1, ch); + int h = inputLayers_[1]->getOutput().getFrameHeight(); + if (h != 0) targetDims_.setDim(2, h); + int w = inputLayers_[1]->getOutput().getFrameWidth(); + if (w != 0) targetDims_.setDim(3, w); + } + // get final crop shape from target dims and crop axis + std::vector crop_shape; + int dimSize = 4; + for (int i = 0; i < dimSize; i++) { + if (i >= crop_axis_) { + crop_shape.push_back(targetDims_[i]); + } else { + crop_shape.push_back(inDims_[i]); + } + } + + outDims_.reshape( + {crop_shape[0], crop_shape[1], crop_shape[2], crop_shape[3]}); + output_.setFrameHeight(crop_shape[2]); + output_.setFrameWidth(crop_shape[3]); +} + +void CropLayer::setInDims() { + MatrixPtr input = inputLayers_[0]->getOutputValue(); + size_t batchSize = input->getHeight(); + inDims_.setDim(0, batchSize); + int h = inputLayers_[0]->getOutput().getFrameHeight(); + if (h != 0) inDims_.setDim(2, h); + int w = inputLayers_[0]->getOutput().getFrameWidth(); + if (w != 0) inDims_.setDim(3, w); +} + +void CropLayer::forward(PassType passType) { + Layer::forward(passType); + setInDims(); + setOutDims(); + int size = outDims_[1] * outDims_[2] * outDims_[3]; + resetOutput(outDims_[0], size); + MatrixPtr outV = getOutputValue(); + REGISTER_TIMER_INFO("CropForward", getName().c_str()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getInputValue(0), inDims_); + outputs.addArg(*getOutputValue(), outDims_, ASSIGN_TO); + forward_[0]->calc(inputs, outputs); +} + +void CropLayer::backward(const UpdateCallback& callback) { + (void)callback; + REGISTER_TIMER_INFO("CropBackward", getName().c_str()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getOutputGrad(), outDims_); + outputs.addArg(*getInputGrad(0), inDims_, ADD_TO); + backward_[0]->calc(inputs, outputs); +} +} // namespace paddle diff --git a/paddle/gserver/layers/CropLayer.h b/paddle/gserver/layers/CropLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..6b6202621023575c1c83049ecbd019656c726e3f --- /dev/null +++ b/paddle/gserver/layers/CropLayer.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * \brief This layer crop input according to the specify conf. + * input_0: input to be cropped + * input_1: optional reference input + * axis: start dimension to be croped + * offset: offset of cropping in each dimension + * shape: if reference input layer was not setted, + * crop input as this shape conf + */ +class CropLayer : public Layer { +public: + explicit CropLayer(const LayerConfig& config) : Layer(config) {} + + ~CropLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +protected: + void setOutDims(); + void setInDims(); + + int32_t crop_axis_; + std::vector crop_offsets_; + std::vector crop_corner_; + TensorShape inDims_; + TensorShape targetDims_; + TensorShape outDims_; +}; +} // namespace paddle diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index 4b92b5d163ad107c0783beae45f8c936112fcccf..d5621412caee843e24a0d0c9b7096402765738c7 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -359,12 +359,11 @@ void Layer::backwardActivation() { /* Do error clipping */ if (config_.error_clipping_threshold() > 0.0f) { if (FLAGS_log_error_clipping) { - CpuVector outGradVec(0, nullptr); - outGradVec.subVecFrom( - output_.grad->getData(), 0, output_.grad->getElementCnt()); - real maxAbsGrad = outGradVec.getAbsMax(); + VectorPtr outGradVec = Vector::create( + output_.grad->getData(), output_.grad->getElementCnt(), useGpu_); + real maxAbsGrad = outGradVec->getAbsMax(); if (maxAbsGrad > config_.error_clipping_threshold()) { - real avgAbsGrad = outGradVec.getAbsSum() / outGradVec.getSize(); + real avgAbsGrad = outGradVec->getAbsSum() / outGradVec->getSize(); LOG(INFO) << " layer=" << config_.name() << " need clipping," << " max error=" << maxAbsGrad << " avg error=" << avgAbsGrad; } diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 92f6cbcfe5a0e23c5939b1689a3e339367450387..a43adc7ce7db937bd62ea9bf1533b8a5899c259a 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -56,7 +56,7 @@ add_test(NAME test_DetectionOutput add_unittest_without_exec(test_ConvUnify test_ConvUnify.cpp LayerGradUtil.cpp) - + add_test(NAME test_ConvUnify COMMAND test_ConvUnify) ################# test_BatchNorm ####################### diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 67251f08e34faff57d9e6fd6a1163ba655619a8b..9af083468c0f01218117211f9e4931ca0669e96a 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1802,6 +1802,34 @@ TEST(Layer, RowConvLayer) { } } +TEST(Layer, CropLayer) { + TestConfig config; + // config input_0 + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ImageConfig* img = input->mutable_image_conf(); + img->set_channels(4); + img->set_img_size(16); + config.layerConfig.set_axis(2); + config.layerConfig.add_offset(0); + config.layerConfig.add_offset(0); + + // config input_1 + config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); + input = config.layerConfig.add_inputs(); + img = input->mutable_image_conf(); + img->set_channels(2); + img->set_img_size(8); + + // config crop layer + config.layerConfig.set_type("crop"); + config.layerConfig.set_name("cropLayer"); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "crop", 100, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/memory/detail/memory_block.cc b/paddle/memory/detail/memory_block.cc index bc67bcef0fdf9358460d72da5f443b48744c6838..fc40993208323f1f5d18103165c8835b5f829613 100644 --- a/paddle/memory/detail/memory_block.cc +++ b/paddle/memory/detail/memory_block.cc @@ -152,6 +152,6 @@ MemoryBlock* MemoryBlock::metadata() const { reinterpret_cast(this) - 1)); } -} // detail -} // memory -} // paddle +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc index 1579174b1a6ff08824629d833d01411cff651f48..f61e67a32906083881dd7f47433521876be9b355 100644 --- a/paddle/memory/detail/system_allocator.cc +++ b/paddle/memory/detail/system_allocator.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/memory/detail/system_allocator.h" #include "paddle/platform/assert.h" -#include "paddle/platform/error.h" +#include "paddle/platform/enforce.h" #include "paddle/platform/gpu_info.h" #include // for malloc and free @@ -128,8 +128,7 @@ void GPUAllocator::Free(void* p, size_t size, size_t index) { // process is terminating, in which case we don't care if // cudaFree succeeds. if (err != cudaErrorCudartUnloading) { - platform::throw_on_error(err, - "cudaFree{Host} failed in GPUAllocator::Free."); + PADDLE_ENFORCE(err, "cudaFree{Host} failed in GPUAllocator::Free."); } } diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 40bb326512c118178184120d4bc26dc127689ff3..a37720e5093342f5e02bd9a15a3099de434d6396 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -1,6 +1,55 @@ -if(WITH_GPU) - nv_library(add_op SRCS add_op.cc add_op.cu DEPS operator op_registry glog ddim) -else() - cc_library(add_op SRCS add_op.cc DEPS operator op_registry glog ddim) -endif() +function(op_library TARGET) + # op_library is a function to create op library. The interface is same as + # cc_library. But it handle split GPU/CPU code and link some common library + # for ops. + set(cc_srcs) + set(cu_srcs) + set(op_common_deps operator op_registry) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + + foreach(src ${op_library_SRCS}) + if (${src} MATCHES ".*\\.cu$") + list(APPEND cu_srcs ${src}) + elseif(${src} MATCHES ".*\\.cc$") + list(APPEND cc_srcs ${src}) + else() + message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu") + endif() + endforeach() + + list(LENGTH cc_srcs cc_srcs_len) + if (${cc_srcs_len} EQUAL 0) + message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file") + endif() + + list(LENGTH cu_srcs cu_srcs_len) + list(LENGTH op_library_DEPS dep_len) + if (${cu_srcs_len} EQUAL 0 AND ${dep_len} EQUAL 0) + message(WARNING "The op library ${TARGET} not support GPU!") + endif() + + if (WITH_GPU) + nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} + ${op_common_deps}) + else() + cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS} + ${op_common_deps}) + endif() +endfunction() + +op_library(add_op SRCS add_op.cc add_op.cu) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) + +op_library(mul_op SRCS mul_op.cc mul_op.cu) +op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) +op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc) +op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) + +op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op + softmax_op net) + +op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 2766f0bf258ed863a4297c1e4a2be4673cbf3044..41d044cdb72b5fb2a7f8654e8ad103778e0857d1 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -1,6 +1,20 @@ -#include -#include -#include +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/add_op.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/tensor.h" namespace paddle { namespace operators { @@ -17,8 +31,7 @@ protected: "Inputs/Outputs of AddOp must all be set"); PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(), "Two input of Add Op's dimension must be same."); - // Need set dims in Tensor - // outputs[0]->set_dims(inputs[0]->dims()) + outputs[0]->set_dims(inputs[0]->dims()); } }; @@ -36,9 +49,10 @@ The equation is: Out = X + Y )DOC"); } }; -} // namespace op +} // namespace operators } // namespace paddle REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); -REGISTER_OP_CPU_KERNEL( - add_two, ::paddle::operators::AddKernel<::paddle::platform::CPUPlace>); \ No newline at end of file +typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float> + AddKernel_CPU_float; +REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float); diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 5979345fffd68d71ba09dc96874d8ff9471bdbcc..0edf142ee4e5f359ea14be02dbf3f7f8855f6db1 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -1,5 +1,6 @@ -#include -#include +#include "paddle/operators/add_op.h" +#include "paddle/framework/op_registry.h" +typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float; REGISTER_OP_GPU_KERNEL(add_two, - paddle::operators::AddKernel); \ No newline at end of file + AddKernel_GPU_float); \ No newline at end of file diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index 17d459dbc86af73fa86934a5ccce9da509c59c6b..39d54a63bd16cdafeec1cfcd86ef5d142382e880 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -1,17 +1,41 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once -#include -#include +#include "glog/logging.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" namespace paddle { namespace operators { -template +template class AddKernel : public framework::OpKernel { public: - void Compute(const KernelContext &context) const override { - LOG(INFO) << "Add kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto input0 = context.Input(0)->Get(); + auto input1 = context.Input(1)->Get(); + auto* output = context.Output(0)->GetMutable(); + + output->mutable_data(context.GetPlace()); + + framework::EigenVector::Flatten(*output).device( + *(context.GetEigenDevice())) = + framework::EigenVector::Flatten(input0) + + framework::EigenVector::Flatten(input1); } }; -} // namespace op +} // namespace operators } // namespace paddle diff --git a/paddle/operators/add_op_test.cc b/paddle/operators/add_op_test.cc index f554ac1bef3255f136ad4407a7a1096bdc2b1db5..53b354fedcacf2176aed8b504daf2046bdf96bb6 100644 --- a/paddle/operators/add_op_test.cc +++ b/paddle/operators/add_op_test.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #include #define private public #include diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..01e96f4c4817466e3266ca57a0d0ae2368b3e097 --- /dev/null +++ b/paddle/operators/fc_op.cc @@ -0,0 +1,76 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/net.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +class FullyConnectedOp : public framework::PlainNet { +public: + void Init() override { + AddOp(framework::OpRegistry::CreateOp("mul", + { + Input("X"), Input("W"), + }, + {Output("before_act")}, + {})); + auto b = Input("b"); + if (b != framework::OperatorBase::EMPTY_VAR_NAME()) { + AddOp(framework::OpRegistry::CreateOp("rowwise_add", + {Output("before_act"), Input("b")}, + {Output("before_act")}, + {})); + } + + auto activation = GetAttr("activation"); + AddOp(framework::OpRegistry::CreateOp( + activation, {Output("before_act")}, {Output("Y")}, {})); + CompleteAddOp(false); + } +}; + +class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker { +public: + FullyConnectedOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "the input of fc operator"); + AddInput("W", "the weight of fc operator"); + AddInput("b", "the bias of fc operator"); + + AddOutput("Y", "the output of fc operator"); + AddOutput( + "before_act", "the before activation output of fc operator", true); + AddAttr("activation", "The activation key for fc layer") + .SetDefault("sigmoid") + .InEnum({"sigmoid", "softmax"}); + + //! TODO(yuyang18): Complete comment; + AddComment("FullyConnected Operator"); + } +}; +} // namespace operators +} // namespace paddle + +USE_OP(mul); +USE_OP(rowwise_add); +USE_OP(sigmoid); +USE_OP(softmax); + +REGISTER_OP(fc, + paddle::operators::FullyConnectedOp, + paddle::operators::FullyConnectedOpMaker); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..713b2a5dc83d8dd5a3d944101591d75cb19fe04f --- /dev/null +++ b/paddle/operators/mul_op.cc @@ -0,0 +1,60 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include + +namespace paddle { +namespace operators { + +class MulOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector &inputs, + const std::vector &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 2, "The mul op must take two inputs"); + auto dim0 = inputs[0]->dims(); + auto dim1 = inputs[1]->dims(); + PADDLE_ENFORCE(dim0.size() == 2 && dim1.size() == 2, + "The input of mul op must be matrix"); + PADDLE_ENFORCE( + dim0[1] == dim1[0], + "First matrix's width must be equal with second matrix's height."); + PADDLE_ENFORCE(outputs.size() == 1, "The mul op must take one output"); + outputs[0]->set_dims({dim0[0], dim1[1]}); + } +}; + +class MulOpMaker : public framework::OpProtoAndCheckerMaker { +public: + MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The first input of mul op"); + AddInput("Y", "The second input of mul op"); + AddOutput("Out", "The output of mul op"); + AddComment(R"DOC( +Two Element Mul Operator. + +The equation is: Out = X * Y +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker); +REGISTER_OP_CPU_KERNEL( + mul, paddle::operators::MulKernel); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..201723df247993c5cc1650edbe4f74441e3217d4 --- /dev/null +++ b/paddle/operators/mul_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include + +REGISTER_OP_GPU_KERNEL(mul, + paddle::operators::MulKernel); \ No newline at end of file diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ce8a0169e0cbaafb7e90d2227c9597fff463883d --- /dev/null +++ b/paddle/operators/mul_op.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include + +namespace paddle { +namespace operators { + +template +class MulKernel : public framework::OpKernel { +public: + void Compute(const framework::KernelContext &context) const override { + LOG(INFO) << "Mul kernel in " << typeid(Place).name(); + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..414bafd0468033813d50d4d6723e68ee9347eaac --- /dev/null +++ b/paddle/operators/rowwise_add_op.cc @@ -0,0 +1,61 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +namespace paddle { +namespace operators { + +class RowWiseAddOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector &inputs, + const std::vector &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 2UL, "Two inputs is needed by rowwise add"); + auto dim0 = inputs[0]->dims(); + auto dim1 = inputs[1]->dims(); + + PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix"); + PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector"); + PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same"); + PADDLE_ENFORCE(outputs.size() == 1, "The output size must be 1"); + outputs[0]->set_dims(inputs[0]->dims()); + } +}; + +class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker { +public: + RowWiseAddOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The left input of row-wise add op, must be matrix"); + AddInput("b", "The right input of row-wise add op, must be vector"); + AddOutput("Out", "The output of row-wise add op"); + AddComment(R"DOC(Row-wise Add operator + +for i in xrange(X.shape[0]): + Out = X[i] + b +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP(rowwise_add, + paddle::operators::RowWiseAddOp, + paddle::operators::RowWiseAddOpMaker); +REGISTER_OP_CPU_KERNEL( + rowwise_add, + paddle::operators::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..2c4bfbf93a1064a47a19c991fa6655b5d67e83cb --- /dev/null +++ b/paddle/operators/rowwise_add_op.cu @@ -0,0 +1,6 @@ +#include +#include + +REGISTER_OP_GPU_KERNEL( + rowwise_add, + paddle::operators::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h new file mode 100644 index 0000000000000000000000000000000000000000..35f43e6376be6239021e7a9bacb849b93d5226b5 --- /dev/null +++ b/paddle/operators/rowwise_add_op.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include + +namespace paddle { +namespace operators { + +template +class RowWiseAddKernel : public framework::OpKernel { +public: + void Compute(const framework::KernelContext &context) const override { + LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name(); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..04df87a3add2af7daa127a072f7b690f6cf94327 --- /dev/null +++ b/paddle/operators/sgd_op.cc @@ -0,0 +1,61 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sgd_op.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/tensor.h" + +namespace paddle { +namespace operators { + +class SGDOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector &inputs, + const std::vector &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 2, "Input size of SGDOp must be two"); + PADDLE_ENFORCE(outputs.size() == 1, "Output size of SGDOp must be one"); + PADDLE_ENFORCE(inputs[0] != nullptr, "inputs[0] mast be set"); + PADDLE_ENFORCE(inputs[1] != nullptr, "inputs[1] mast be set"); + PADDLE_ENFORCE(outputs[0] != nullptr, "outputs[0] mast be set"); + PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(), + "Two input of SGD Op's dimension must be same."); + outputs[0]->set_dims(inputs[0]->dims()); + } +}; + +class SGDOpMaker : public framework::OpProtoAndCheckerMaker { +public: + SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("param", "input parameter"); + AddInput("grad", "input gradient"); + AddOutput("param_out", "output parameter"); + AddAttr("learning_rate", "learning rate of sgd"); + AddComment(R"DOC( + +Simplest sgd algorithm. + +param_out = param - learning_rate * grad; + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +REGISTER_OP(sgd, paddle::operators::SGDOp, paddle::operators::SGDOpMaker); +typedef paddle::operators::SGDOpKernel<::paddle::platform::CPUPlace, float> + SGDOpKernel_CPU_float; +REGISTER_OP_CPU_KERNEL(sgd, SGDOpKernel_CPU_float); diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..400425db10896e3970fc7468e34aba596a536184 --- /dev/null +++ b/paddle/operators/sgd_op.cu @@ -0,0 +1,5 @@ +#include "paddle/operators/sgd_op.h" +#include "paddle/framework/op_registry.h" + +typedef paddle::operators::SGDOpKernel<::paddle::platform::GPUPlace, float> SGDOpKernel_GPU_float; +REGISTER_OP_GPU_KERNEL(sgd, SGDOpKernel_GPU_float); \ No newline at end of file diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h new file mode 100644 index 0000000000000000000000000000000000000000..4b2d214618e5c7c15695bd66604139d805255c47 --- /dev/null +++ b/paddle/operators/sgd_op.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "glog/logging.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +template +class SGDOpKernel : public framework::OpKernel { +public: + void Compute(const framework::KernelContext& ctx) const override { + auto param = ctx.Input("param")->Get(); + auto grad = ctx.Input("grad")->Get(); + auto* param_out = ctx.Output(0)->GetMutable(); + float lr = ctx.op_.GetAttr("learning_rate"); + + param_out->mutable_data(ctx.GetPlace()); + + framework::EigenVector::Flatten(*param_out) + .device(*(ctx.GetEigenDevice())) = + framework::EigenVector::Flatten(param) - + lr * framework::EigenVector::Flatten(grad); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/sgd_op_test.cc b/paddle/operators/sgd_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..75137259f5e608b259b073101353e5818bb17c92 --- /dev/null +++ b/paddle/operators/sgd_op_test.cc @@ -0,0 +1,22 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +USE_OP(sgd); +TEST(SGDOp, GetOpProto) { + auto& protos = paddle::framework::OpRegistry::protos(); + auto it = protos.find("sgd"); + ASSERT_NE(it, protos.end()); +} diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..45ae277c538ca90716febaf2f3d92b560149d147 --- /dev/null +++ b/paddle/operators/sigmoid_op.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +namespace paddle { +namespace operators { + +class SigmoidOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector &inputs, + const std::vector &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input"); + PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output"); + outputs[0]->set_dims(inputs[0]->dims()); + } +}; + +class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { +public: + SigmoidOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "sigmoid input"); + AddInput("Y", "sigmoid output"); + AddComment("Sigmoid function"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP(sigmoid, + paddle::operators::SigmoidOp, + paddle::operators::SigmoidOpMaker); +REGISTER_OP_CPU_KERNEL( + sigmoid, paddle::operators::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..79d5222348f610b1b016a2df06e8b1e0a4fac66c --- /dev/null +++ b/paddle/operators/sigmoid_op.cu @@ -0,0 +1,5 @@ +#include +#include + +REGISTER_OP_GPU_KERNEL( + sigmoid, paddle::operators::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h new file mode 100644 index 0000000000000000000000000000000000000000..42173343f3e364729ecd190fc554b8c45ecfca8d --- /dev/null +++ b/paddle/operators/sigmoid_op.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include + +namespace paddle { +namespace operators { + +template +class SigmoidKernel : public framework::OpKernel { +public: + void Compute(const framework::KernelContext &context) const override { + LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name(); + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..4ca7be359e210d7a31aef94e498f37a1ad4879a2 --- /dev/null +++ b/paddle/operators/softmax_op.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ +#include +#include + +namespace paddle { +namespace operators { + +class SoftmaxOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector &inputs, + const std::vector &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax"); + PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax"); + + outputs[0]->set_dims(inputs[0]->dims()); + } +}; + +class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { +public: + SoftmaxOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "input of softmax"); + AddOutput("Y", "output of softmax"); + AddComment("Softmax Op"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); +REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..903eef1b62231d65e2f9ec7a1f57fca0f4c4605c --- /dev/null +++ b/paddle/operators/softmax_op.cu @@ -0,0 +1,5 @@ +#include +#include + +REGISTER_OP_GPU_KERNEL( + softmax, paddle::operators::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h new file mode 100644 index 0000000000000000000000000000000000000000..74e9e2786b11b9a87cd9700d8458d4e611a8d4bb --- /dev/null +++ b/paddle/operators/softmax_op.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include + +namespace paddle { +namespace operators { + +template +class SoftmaxKernel : public framework::OpKernel { +public: + void Compute(const framework::KernelContext &context) const override { + LOG(INFO) << "Softmax kernel in " << typeid(Place).name(); + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/optimizer/parameter_optimizer_test.cpp b/paddle/optimizer/parameter_optimizer_test.cpp index 4e6254d9e4dab48279b4a880695959526d30d70c..edf4ae37a9beee2911d23dd1ab23e67a18065b1b 100644 --- a/paddle/optimizer/parameter_optimizer_test.cpp +++ b/paddle/optimizer/parameter_optimizer_test.cpp @@ -1,3 +1,19 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + #include "parameter_optimizer.h" #include #include @@ -5,21 +21,18 @@ #include "gtest/gtest.h" #include "lr_policy.h" -using namespace paddle; -using namespace paddle::optimizer; - -Tensor* FillTensor(size_t size) { - Tensor* param = new Tensor(size); - Tensor& p = *param; +paddle::optimizer::Tensor* FillTensor(size_t size) { + paddle::optimizer::Tensor* param = new paddle::optimizer::Tensor(size); + paddle::optimizer::Tensor& p = *param; for (size_t i = 0; i < p.size(); ++i) { p[i] = (float)rand() / (float)RAND_MAX; } return param; } -Tensor* FixedTensor(size_t size) { - Tensor* param = new Tensor(size); - Tensor& p = *param; +paddle::optimizer::Tensor* FixedTensor(size_t size) { + paddle::optimizer::Tensor* param = new paddle::optimizer::Tensor(size); + paddle::optimizer::Tensor& p = *param; for (size_t i = 0; i < p.size(); ++i) { p[i] = i; } @@ -28,7 +41,8 @@ Tensor* FixedTensor(size_t size) { class OptimizerTest : public testing::Test { public: - // init tensor shape + virtual ~OptimizerTest() {} + // init paddle::optimizer::Tensor shape const size_t kSize = 5; virtual void SetUp() { @@ -38,34 +52,36 @@ public: virtual void TearDown() {} void CreateSGD() { - Tensor* parameter = FixedTensor(kSize); - config_.set_optimizer(OptimizerConfig::SGD); + paddle::optimizer::Tensor* parameter = FixedTensor(kSize); + config_.set_optimizer(paddle::OptimizerConfig::SGD); config_.mutable_sgd()->set_momentum(0.0); config_.mutable_sgd()->set_decay(0.0); config_.mutable_sgd()->set_nesterov(false); - config_.set_lr_policy(OptimizerConfig::Const); + config_.set_lr_policy(paddle::OptimizerConfig::Const); config_.mutable_const_lr()->set_learning_rate(0.1); std::string str = config_.SerializeAsString(); - ParameterOptimizer* opt = ParameterOptimizer::Create(str, parameter); + paddle::optimizer::ParameterOptimizer* opt = + paddle::optimizer::ParameterOptimizer::Create(str, parameter); opts_.push_back(opt); } void CreateAdam() { - Tensor* parameter = FixedTensor(kSize); - config_.set_optimizer(OptimizerConfig::Adam); + paddle::optimizer::Tensor* parameter = FixedTensor(kSize); + config_.set_optimizer(paddle::OptimizerConfig::Adam); config_.mutable_adam()->set_beta_1(0.9); config_.mutable_adam()->set_beta_2(0.1); config_.mutable_adam()->set_epsilon(1e-3); config_.mutable_adam()->set_decay(0.0); - config_.set_lr_policy(OptimizerConfig::Const); + config_.set_lr_policy(paddle::OptimizerConfig::Const); config_.mutable_const_lr()->set_learning_rate(0.1); std::string str = config_.SerializeAsString(); - ParameterOptimizer* opt = ParameterOptimizer::Create(str, parameter); + paddle::optimizer::ParameterOptimizer* opt = + paddle::optimizer::ParameterOptimizer::Create(str, parameter); opts_.push_back(opt); } void TestGetWeight() { - Tensor* p = FixedTensor(kSize); + paddle::optimizer::Tensor* p = FixedTensor(kSize); for (size_t i = 0; i < opts_.size(); ++i) { int s = 0; float* newp = (float*)opts_[i]->get_weight(&s); @@ -76,7 +92,7 @@ public: } void TestUpdate() { - Tensor* g = FixedTensor(kSize); + paddle::optimizer::Tensor* g = FixedTensor(kSize); for (size_t i = 0; i < opts_.size(); ++i) { opts_[i]->Update(g); } @@ -91,8 +107,8 @@ public: } private: - std::vector opts_; - OptimizerConfig config_; + std::vector opts_; + paddle::OptimizerConfig config_; }; TEST_F(OptimizerTest, TestGetWeight) { TestGetWeight(); } diff --git a/paddle/optimizer/serialization_test.cpp b/paddle/optimizer/serialization_test.cpp index d2454140dc243b40ed8348578360b30894213838..e4d97cbdba545c4ba5adf5b30efd3fc9f3f744ee 100644 --- a/paddle/optimizer/serialization_test.cpp +++ b/paddle/optimizer/serialization_test.cpp @@ -1,19 +1,32 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + #include "serialization.h" #include "gtest/gtest.h" -using namespace paddle; -using namespace paddle::optimizer; - TEST(TensorToProto, Case1) { - Tensor t(3), t1(3); + paddle::optimizer::Tensor t(3), t1(3); for (size_t i = 0; i < t.size(); ++i) { t[i] = i; t1[i] = 0; } - TensorProto proto; - TensorToProto(t, &proto); - ProtoToTensor(proto, &t1); + paddle::TensorProto proto; + paddle::optimizer::TensorToProto(t, &proto); + paddle::optimizer::ProtoToTensor(proto, &t1); for (size_t i = 0; i < t1.size(); ++i) { EXPECT_EQ(t1[i], t[i]); } diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 6ac4035c0f863c5f63d17b523a7a8be668ff3da0..bd77bb7daa50e0b273f110624ddf6f4b79a3ceab 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -8,6 +8,8 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) +cc_test(enforce_test SRCS enforce_test.cc) + IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) ELSE() diff --git a/paddle/platform/cpu_info.cc b/paddle/platform/cpu_info.cc index 1905cfeee63038e2bbcff84200d66eaabf073273..78e1fa9df56b1623bfd9a53c6a37524d29648afc 100644 --- a/paddle/platform/cpu_info.cc +++ b/paddle/platform/cpu_info.cc @@ -22,7 +22,6 @@ limitations under the License. */ #endif #include "gflags/gflags.h" -#include "paddle/platform/error.h" DEFINE_double(fraction_of_cpu_memory_to_use, 1, "Default use 100% of CPU memory for PaddlePaddle," @@ -41,8 +40,8 @@ inline size_t CpuTotalPhysicalMemory() { if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size; return 0L; #else - long pages = sysconf(_SC_PHYS_PAGES); - long page_size = sysconf(_SC_PAGE_SIZE); + int64_t pages = sysconf(_SC_PHYS_PAGES); + int64_t page_size = sysconf(_SC_PAGE_SIZE); return pages * page_size; #endif } diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index 960ef0a5955bfe5f7d33b7c8e4524176b0dbfda6..9c1d94e9e703caf2db92ca4a8eac975317e6b945 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -15,14 +15,15 @@ namespace paddle { namespace platform { template <> -Eigen::DefaultDevice* DeviceContext::get_eigen_device() { - return reinterpret_cast(this)->eigen_device(); +Eigen::DefaultDevice* DeviceContext::get_eigen_device() + const { + return reinterpret_cast(this)->eigen_device(); } #ifndef PADDLE_ONLY_CPU template <> -Eigen::GpuDevice* DeviceContext::get_eigen_device() { - return reinterpret_cast(this)->eigen_device(); +Eigen::GpuDevice* DeviceContext::get_eigen_device() const { + return reinterpret_cast(this)->eigen_device(); } #endif diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 51c8e1391324649d8e845e902a5632f6bca1fa58..fe6f13e399a78f9e5230ae52b0f67ab465af373b 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -11,18 +11,19 @@ limitations under the License. */ #pragma once -#include "paddle/framework/enforce.h" +#include "paddle/platform/enforce.h" +#include "paddle/platform/place.h" + #ifndef PADDLE_ONLY_CPU #include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/curand.h" -#include "paddle/platform/error.h" #include "paddle/platform/gpu_info.h" #define EIGEN_USE_GPU #endif -#include #include -#include +#include "paddle/platform/place.h" +#include "unsupported/Eigen/CXX11/Tensor" namespace paddle { namespace platform { @@ -33,17 +34,14 @@ class DeviceContext { virtual Place GetPlace() const = 0; template - DeviceType* get_eigen_device(); + DeviceType* get_eigen_device() const; }; class CPUDeviceContext : public DeviceContext { public: - Eigen::DefaultDevice* eigen_device() { - if (!eigen_device_) { - eigen_device_.reset(new Eigen::DefaultDevice()); - } - return eigen_device_.get(); - } + CPUDeviceContext() { eigen_device_.reset(new Eigen::DefaultDevice()); } + + Eigen::DefaultDevice* eigen_device() const { return eigen_device_.get(); } Place GetPlace() const override { Place retv = CPUPlace(); @@ -74,8 +72,7 @@ class CUDADeviceContext : public DeviceContext { public: explicit CUDADeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) { GPUPlaceGuard guard(gpu_place_); - paddle::platform::throw_on_error(cudaStreamCreate(&stream_), - "cudaStreamCreate failed"); + PADDLE_ENFORCE(cudaStreamCreate(&stream_), "cudaStreamCreate failed"); eigen_stream_.reset(new Eigen::CudaStreamDevice(&stream_)); eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get())); } @@ -86,23 +83,22 @@ class CUDADeviceContext : public DeviceContext { } void Wait() { - paddle::platform::throw_on_error(cudaStreamSynchronize(stream_), - "cudaStreamSynchronize failed"); + PADDLE_ENFORCE(cudaStreamSynchronize(stream_), + "cudaStreamSynchronize failed"); } cudaStream_t stream() { return stream_; } - Eigen::GpuDevice* eigen_device() { return eigen_device_.get(); } + Eigen::GpuDevice* eigen_device() const { return eigen_device_.get(); } cublasHandle_t cublas_handle() { if (!blas_handle_) { GPUPlaceGuard guard(gpu_place_); - PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_) == - CUBLAS_STATUS_SUCCESS, + PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_), "cublasCreate failed"); - PADDLE_ENFORCE(paddle::platform::dynload::cublasSetStream( - blas_handle_, stream_) == CUBLAS_STATUS_SUCCESS, - "cublasSetStream failed"); + PADDLE_ENFORCE( + paddle::platform::dynload::cublasSetStream(blas_handle_, stream_), + "cublasSetStream failed"); } return blas_handle_; } @@ -110,12 +106,11 @@ class CUDADeviceContext : public DeviceContext { cudnnHandle_t cudnn_handle() { if (!dnn_handle_) { GPUPlaceGuard guard(gpu_place_); - PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_) == - CUDNN_STATUS_SUCCESS, + PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_), "cudnnCreate failed"); - PADDLE_ENFORCE(paddle::platform::dynload::cudnnSetStream( - dnn_handle_, stream_) == CUDNN_STATUS_SUCCESS, - "cudnnSetStream failed"); + PADDLE_ENFORCE( + paddle::platform::dynload::cudnnSetStream(dnn_handle_, stream_), + "cudnnSetStream failed"); } return dnn_handle_; } @@ -124,16 +119,15 @@ class CUDADeviceContext : public DeviceContext { if (!rand_generator_) { GPUPlaceGuard guard(gpu_place_); PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator( - &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) == - CURAND_STATUS_SUCCESS, + &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT), "curandCreateGenerator failed"); PADDLE_ENFORCE( paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed( - rand_generator_, random_seed_) == CURAND_STATUS_SUCCESS, + rand_generator_, random_seed_), "curandSetPseudoRandomGeneratorSeed failed"); - PADDLE_ENFORCE(paddle::platform::dynload::curandSetStream( - rand_generator_, stream_) == CURAND_STATUS_SUCCESS, - "curandSetStream failed"); + PADDLE_ENFORCE( + paddle::platform::dynload::curandSetStream(rand_generator_, stream_), + "curandSetStream failed"); } return rand_generator_; } @@ -141,26 +135,23 @@ class CUDADeviceContext : public DeviceContext { ~CUDADeviceContext() { Wait(); if (blas_handle_) { - PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_) == - CUBLAS_STATUS_SUCCESS, + PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_), "cublasDestroy failed"); } if (dnn_handle_) { - PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_) == - CUDNN_STATUS_SUCCESS, + PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_), "cudnnDestroy failed"); } if (rand_generator_) { - PADDLE_ENFORCE(paddle::platform::dynload::curandDestroyGenerator( - rand_generator_) == CURAND_STATUS_SUCCESS, - "curandDestroyGenerator failed"); + PADDLE_ENFORCE( + paddle::platform::dynload::curandDestroyGenerator(rand_generator_), + "curandDestroyGenerator failed"); } eigen_stream_.reset(); eigen_device_.reset(); - paddle::platform::throw_on_error(cudaStreamDestroy(stream_), - "cudaStreamDestroy failed"); + PADDLE_ENFORCE(cudaStreamDestroy(stream_), "cudaStreamDestroy failed"); } private: diff --git a/paddle/platform/dynload/dynamic_loader.cc b/paddle/platform/dynload/dynamic_loader.cc index dd914e006d54c423ffea56ffaaafe7dcba416361..ae9a0a982c73de05821579d22b7f9ad99f24a92b 100644 --- a/paddle/platform/dynload/dynamic_loader.cc +++ b/paddle/platform/dynload/dynamic_loader.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include #include "gflags/gflags.h" #include "glog/logging.h" -#include "paddle/framework/enforce.h" +#include "paddle/platform/enforce.h" DEFINE_string(cudnn_dir, "", "Specify path for loading libcudnn.so. For instance, " diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h new file mode 100644 index 0000000000000000000000000000000000000000..b06ab8a2f184e7bb7dd9cb39f377b087c5258dc4 --- /dev/null +++ b/paddle/platform/enforce.h @@ -0,0 +1,148 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#ifndef PADDLE_ONLY_CPU + +#include "paddle/platform/dynload/cublas.h" +#include "paddle/platform/dynload/cudnn.h" +#include "paddle/platform/dynload/curand.h" + +#include +#include +#include +#include +#include + +#endif // PADDLE_ONLY_CPU + +namespace paddle { +namespace platform { + +// Because most enforce conditions would evaluate to true, we can use +// __builtin_expect to instruct the C++ compiler to generate code that +// always forces branch prediction of true. +// This generates faster binary code. __builtin_expect is since C++11. +// For more details, please check https://stackoverflow.com/a/43870188/724872. +#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) + +template +inline void throw_on_error(T e) { + throw_on_error(e, ""); +} + +template +inline typename std::enable_if::type throw_on_error( + int stat, const Args&... args) { + if (UNLIKELY(!(stat))) { + throw std::runtime_error( + string::Sprintf(args...) + + string::Sprintf(" at [%s:%s];", __FILE__, __LINE__)); + } +} + +#ifndef PADDLE_ONLY_CPU + +template +inline typename std::enable_if::type throw_on_error( + cudaError_t e, const Args&... args) { + if (UNLIKELY(e)) { + // clang-format off + throw thrust::system_error( + e, thrust::cuda_category(), + string::Sprintf(args...) + + string::Sprintf(" at [%s:%s];", __FILE__, __LINE__)); + // clang-format on + } +} + +template +inline typename std::enable_if::type throw_on_error( + curandStatus_t stat, const Args&... args) { + if (stat != CURAND_STATUS_SUCCESS) { + // clang-format off + throw thrust::system_error( + cudaErrorLaunchFailure, thrust::cuda_category(), + string::Sprintf(args...) + + string::Sprintf(" at [%s:%s];", __FILE__, __LINE__)); + // clang-format on + } +} + +template +inline typename std::enable_if::type throw_on_error( + cudnnStatus_t stat, const Args&... args) { + if (stat == CUDNN_STATUS_SUCCESS) { + return; + } else { + // clang-format off + throw std::runtime_error( + platform::dynload::cudnnGetErrorString(stat) + + string::Sprintf(args...) + + string::Sprintf(" at [%s:%s];", __FILE__, __LINE__)); + // clang-format on + } +} + +template +inline typename std::enable_if::type throw_on_error( + cublasStatus_t stat, const Args&... args) { + std::string err; + if (stat == CUBLAS_STATUS_SUCCESS) { + return; + } else if (stat == CUBLAS_STATUS_NOT_INITIALIZED) { + err = "CUBLAS: not initialized, "; + } else if (stat == CUBLAS_STATUS_ALLOC_FAILED) { + err = "CUBLAS: alloc failed, "; + } else if (stat == CUBLAS_STATUS_INVALID_VALUE) { + err = "CUBLAS: invalid value, "; + } else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) { + err = "CUBLAS: arch mismatch, "; + } else if (stat == CUBLAS_STATUS_MAPPING_ERROR) { + err = "CUBLAS: mapping error, "; + } else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) { + err = "CUBLAS: execution failed, "; + } else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) { + err = "CUBLAS: internal error, "; + } else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) { + err = "CUBLAS: not supported, "; + } else if (stat == CUBLAS_STATUS_LICENSE_ERROR) { + err = "CUBLAS: license error, "; + } + throw std::runtime_error(err + string::Sprintf(args...) + + string::Sprintf(" at [%s:%s];", __FILE__, __LINE__)); +} + +#endif // PADDLE_ONLY_CPU + +#define PADDLE_THROW(...) \ + do { \ + throw std::runtime_error( \ + string::Sprintf(__VA_ARGS__) + \ + string::Sprintf(" at [%s:%s];", __FILE__, __LINE__)); \ + } while (0) + +#define PADDLE_ENFORCE(...) \ + do { \ + ::paddle::platform::throw_on_error(__VA_ARGS__); \ + } while (0) + +} // namespace platform +} // namespace paddle diff --git a/paddle/framework/enforce_test.cc b/paddle/platform/enforce_test.cc similarity index 85% rename from paddle/framework/enforce_test.cc rename to paddle/platform/enforce_test.cc index f8da1a192f63a54324d80725c9d2f156fb11a481..d7152f81509a35e4ce36d5649e7d209f51e34b86 100644 --- a/paddle/framework/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include +#include "paddle/platform/enforce.h" +#include "gtest/gtest.h" TEST(ENFORCE, OK) { PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); @@ -23,13 +23,14 @@ TEST(ENFORCE, FAILED) { bool in_catch = false; try { PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123); - } catch (paddle::framework::EnforceNotMet err) { + } catch (const std::runtime_error& error) { + // your error handling code here in_catch = true; std::string msg = "Enforce is not ok 123 at all"; - const char* what = err.what(); + const char* what = error.what(); for (size_t i = 0; i < msg.length(); ++i) { ASSERT_EQ(what[i], msg[i]); } } ASSERT_TRUE(in_catch); -} \ No newline at end of file +} diff --git a/paddle/platform/error.h b/paddle/platform/error.h deleted file mode 100644 index 93424bb61096503a4843da7942853a113f612e3b..0000000000000000000000000000000000000000 --- a/paddle/platform/error.h +++ /dev/null @@ -1,87 +0,0 @@ -#pragma once - -#include -#include -#include - -#ifndef PADDLE_ONLY_CPU - -#include -#include -#include -#include -#include - -#endif // PADDLE_ONLY_CPU - -namespace paddle { -namespace platform { - -#ifndef PADDLE_ONLY_CPU - -inline void throw_on_error(cudaError_t e, const char* message) { - if (e) { - throw thrust::system_error(e, thrust::cuda_category(), message); - } -} - -inline void throw_on_error(curandStatus_t stat, const char* message) { - if (stat != CURAND_STATUS_SUCCESS) { - throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(), - message); - } -} - -inline void throw_on_error(cudnnStatus_t stat, const char* message) { - std::stringstream ss; - if (stat == CUDNN_STATUS_SUCCESS) { - return; - } else { - ss << cudnnGetErrorString(stat); - ss << ", " << message; - throw std::runtime_error(ss.str()); - } -} - -inline void throw_on_error(cublasStatus_t stat, const char* message) { - std::stringstream ss; - if (stat == CUBLAS_STATUS_SUCCESS) { - return; - } else if (stat == CUBLAS_STATUS_NOT_INITIALIZED) { - ss << "CUBLAS: not initialized"; - } else if (stat == CUBLAS_STATUS_ALLOC_FAILED) { - ss << "CUBLAS: alloc failed"; - } else if (stat == CUBLAS_STATUS_INVALID_VALUE) { - ss << "CUBLAS: invalid value"; - } else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) { - ss << "CUBLAS: arch mismatch"; - } else if (stat == CUBLAS_STATUS_MAPPING_ERROR) { - ss << "CUBLAS: mapping error"; - } else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) { - ss << "CUBLAS: execution failed"; - } else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) { - ss << "CUBLAS: internal error"; - } else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) { - ss << "CUBLAS: not supported"; - } else if (stat == CUBLAS_STATUS_LICENSE_ERROR) { - ss << "CUBLAS: license error"; - } - ss << ", " << message; - throw std::runtime_error(ss.str()); -} - -inline void throw_on_error(cublasStatus_t stat) { - const char* message = ""; - throw_on_error(stat, message); -} - -#endif // PADDLE_ONLY_CPU - -inline void throw_on_error(int stat, const char* message) { - if (stat) { - throw std::runtime_error(message + (", stat = " + std::to_string(stat))); - } -} - -} // namespace platform -} // namespace paddle diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc index 12dc01d1a1e2df7549a19e7b8d825c7dbd16c062..edeb3ecd7bf8b87333813eee5b40f71030f6609f 100644 --- a/paddle/platform/gpu_info.cc +++ b/paddle/platform/gpu_info.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/platform/gpu_info.h" #include "gflags/gflags.h" -#include "paddle/platform/error.h" +#include "paddle/platform/enforce.h" DEFINE_double(fraction_of_gpu_memory_to_use, 0.95, "Default use 95% of GPU memory for PaddlePaddle," @@ -25,7 +25,7 @@ namespace platform { int GetDeviceCount() { int count; - throw_on_error( + PADDLE_ENFORCE( cudaGetDeviceCount(&count), "cudaGetDeviceCount failed in paddle::platform::GetDeviceCount"); return count; @@ -33,19 +33,19 @@ int GetDeviceCount() { int GetCurrentDeviceId() { int device_id; - throw_on_error( + PADDLE_ENFORCE( cudaGetDevice(&device_id), "cudaGetDevice failed in paddle::platform::GetCurrentDeviceId"); return device_id; } void SetDeviceId(int id) { - throw_on_error(cudaSetDevice(id), + PADDLE_ENFORCE(cudaSetDevice(id), "cudaSetDevice failed in paddle::platform::SetDeviceId"); } void GpuMemoryUsage(size_t &available, size_t &total) { - throw_on_error(cudaMemGetInfo(&available, &total), + PADDLE_ENFORCE(cudaMemGetInfo(&available, &total), "cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"); } @@ -84,21 +84,26 @@ size_t GpuMaxChunkSize() { void GpuMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream) { - PADDLE_ENFORCE(cudaMemcpyAsync(dst, src, count, kind, stream)); + PADDLE_ENFORCE(cudaMemcpyAsync(dst, src, count, kind, stream), + "cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync"); } void GpuMemcpySync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind) { - PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind)); + PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind), + "cudaMemcpy failed in paddle::platform::GpuMemcpySync"); // note: cudaMemcpy may actually be asynchronous with respect to the caller, // block on stream 0 to make sure the copy has completed - PADDLE_ENFORCE(cudaStreamSynchronize(0)); + PADDLE_ENFORCE( + cudaStreamSynchronize(0), + "cudaStreamSynchronize failed in paddle::platform::GpuMemcpySync"); } void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device, size_t count, cudaStream_t stream) { PADDLE_ENFORCE( - cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream)); + cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream), + "cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeer"); } } // namespace platform } // namespace paddle diff --git a/paddle/platform/place.cc b/paddle/platform/place.cc index 0704820aa05079401eb56814d689d6e280311edb..b31515e1f028acac885a506ff1c20479407a05e3 100644 --- a/paddle/platform/place.cc +++ b/paddle/platform/place.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include "paddle/platform/place.h" namespace paddle { @@ -7,7 +21,7 @@ namespace detail { class PlacePrinter : public boost::static_visitor<> { public: - PlacePrinter(std::ostream &os) : os_(os) {} + explicit PlacePrinter(std::ostream &os) : os_(os) {} void operator()(const CPUPlace &) { os_ << "CPUPlace"; } void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; } diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index af85fdeecb57729d7fb580ebd4c59c1afc61d61a..6354dd211d5d036e1b5971babaf624e8f847a92b 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1 +1,2 @@ -cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python) +cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python + add_op fc_op sgd_op) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index f9f87acf15a6b62c343cc0e3db9ebc7e0aabb786..54707a2859693af4a80692bf5cebab59c43ffbc3 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -13,15 +13,49 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include +#include #include +#include +#include #include +#include +#include +#include namespace py = pybind11; namespace pd = paddle::framework; +USE_OP(add_two); +USE_OP_WITHOUT_KERNEL(fc); +USE_OP(sgd); + PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of Paddle Paddle"); + py::class_(m, "Tensor", py::buffer_protocol()) + .def_buffer([](pd::Tensor& self) -> py::buffer_info { + return paddle::pybind::CastToPyBuffer(self); + }) + .def("get_dims", + [](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) + .def("set_dims", + [](pd::Tensor& self, const std::vector& dim) { + self.set_dims(pd::make_ddim(dim)); + }) + .def("alloc_float", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) + .def("alloc_int", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) + .def("set", paddle::pybind::PyTensorSetFromArray) + .def("set", paddle::pybind::PyTensorSetFromArray) + .def("shape", + [](pd::Tensor& self) { return pd::vectorize(self.dims()); }); + py::class_(m, "Variable", R"DOC(Variable Class. All parameter, weight, gradient are variables in Paddle. @@ -32,7 +66,12 @@ All parameter, weight, gradient are variables in Paddle. *var.GetMutable() = val; }) .def("get_int", - [](const pd::Variable& var) -> int { return var.Get(); }); + [](const pd::Variable& var) -> int { return var.Get(); }) + .def("get_tensor", + [](pd::Variable& self) -> pd::Tensor* { + return self.GetMutable(); + }, + py::return_value_policy::reference); py::class_>(m, "Scope") .def(py::init&>()) @@ -43,5 +82,47 @@ All parameter, weight, gradient are variables in Paddle. &pd::Scope::CreateVariable, py::return_value_policy::reference); + //! @note: Be careful! PyBind will return std::string as an unicode, not + //! Python str. If you want a str object, you should cast them in Python. + m.def("get_all_op_protos", []() -> std::vector { + auto& protos = pd::OpRegistry::protos(); + std::vector ret_values; + for (auto it = protos.begin(); it != protos.end(); ++it) { + PADDLE_ENFORCE(it->second.IsInitialized(), + "OpProto must all be initialized"); + std::string str; + PADDLE_ENFORCE(it->second.SerializeToString(&str), + "Serialize OpProto Error. This could be a bug of Paddle."); + ret_values.push_back(py::bytes(str)); + } + return ret_values; + }); + m.def_submodule( + "var_names", + "The module will return special predefined variable name in Paddle") + .def("empty", pd::OperatorBase::EMPTY_VAR_NAME) + .def("temp", pd::OperatorBase::TMP_VAR_NAME); + + py::class_(m, "DeviceContext") + .def_static("cpu_context", []() -> paddle::platform::DeviceContext* { + return new paddle::platform::CPUDeviceContext(); + }); + + py::class_(m, "Operator") + .def("__str__", &pd::OperatorBase::DebugString) + .def_static("create", + [](py::bytes protobin) { + pd::OpDesc desc; + PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE(desc.IsInitialized(), + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + return pd::OpRegistry::CreateOp(desc); + }) + .def("infer_shape", &pd::OperatorBase::InferShape) + .def("run", &pd::OperatorBase::Run) + .def("outputs", [](const pd::OperatorPtr& op) { return op->outputs_; }); + return m.ptr(); } diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h new file mode 100644 index 0000000000000000000000000000000000000000..b96516643ab55b9615ccafdc41d3290590987d95 --- /dev/null +++ b/paddle/pybind/tensor_bind.h @@ -0,0 +1,95 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include + +namespace py = pybind11; + +namespace paddle { + +namespace pybind { + +namespace details { + +template +struct CastToPyBufferImpl; + +template +struct CastToPyBufferImpl { + py::buffer_info operator()(framework::Tensor &tensor) { + PADDLE_THROW("This type of tensor cannot be expose to Python"); + return py::buffer_info(); + } +}; + +template +struct CastToPyBufferImpl { + using CUR_TYPE = typename std::tuple_element>::type; + py::buffer_info operator()(framework::Tensor &tensor) { + PADDLE_ENFORCE(paddle::platform::is_cpu_place(tensor.holder_->place()), + "Only CPU tensor can cast to numpy array"); + + if (std::type_index(typeid(CUR_TYPE)) == tensor.holder_->type()) { + auto dim_vec = framework::vectorize(tensor.dims()); + std::vector dims_outside; + std::vector strides; + dims_outside.resize(dim_vec.size()); + strides.resize(dim_vec.size()); + + size_t prod = 1; + for (size_t i = dim_vec.size(); i != 0; --i) { + dims_outside[i - 1] = (size_t)dim_vec[i - 1]; + strides[i - 1] = sizeof(CUR_TYPE) * prod; + prod *= dims_outside[i - 1]; + } + + return py::buffer_info( + tensor.mutable_data(tensor.holder_->place()), + sizeof(CUR_TYPE), + py::format_descriptor::format(), + (size_t)framework::arity(tensor.dims()), + dims_outside, + strides); + } else { + constexpr bool less = I + 1 < std::tuple_size>::value; + return CastToPyBufferImpl()(tensor); + } + } +}; +} // namespace details +inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { + auto buffer_info = details::CastToPyBufferImpl()(tensor); + return buffer_info; +} + +template +void PyTensorSetFromArray( + framework::Tensor &self, + py::array_t array) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back((int)array.shape()[i]); + } + + self.set_dims(framework::make_ddim(dims)); + auto *dst = self.mutable_data(paddle::platform::CPUPlace()); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); +} + +} // namespace pybind +} // namespace paddle diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index ab60f1a38dd4cd1d9799c0019dccae5f1c7d4310..3860facb099950a5287d3f6b89c3de38f588f568 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -155,7 +155,8 @@ RUN apt-get update &&\ paddle version ${DOCKERFILE_CUDNN_DSO} ${DOCKERFILE_GPU_ENV} - +ADD go/cmd/pserver/pserver /usr/bin/ +ADD go/cmd/master/master /usr/bin/ # default command shows the paddle version and exit CMD ["paddle", "version"] EOF diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index bfa10c91553563bddac8c1b41bf21490fb89d3cf..56d290be4ab04a9f6974023159aa8571d27f8dd5 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -2,9 +2,9 @@ set -xe -mkdir -p /paddle/build -cd /paddle/build -rm -f /paddle/install 2>/dev/null || true +mkdir -p /paddle/build_android +cd /paddle/build_android +rm -rf /paddle/install 2>/dev/null || true cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \ -DANDROID_ABI=armeabi-v7a \ @@ -21,6 +21,3 @@ cmake -DCMAKE_SYSTEM_NAME=Android \ .. make -j `nproc` make install - -export PATH=/paddle/install/bin:/paddle/install/opt/paddle/bin:$PATH -paddle version diff --git a/paddle/scripts/travis/check_style.sh b/paddle/scripts/travis/check_style.sh index 4754bdd4c80de9700d92b0e33ecfdfc582f42813..ec499a839ac6593bac788f4cca5e33afbed73010 100755 --- a/paddle/scripts/travis/check_style.sh +++ b/paddle/scripts/travis/check_style.sh @@ -1,7 +1,7 @@ #!/bin/bash function abort(){ echo "Your change doesn't follow PaddlePaddle's code style." 1>&2 - echo "Please use pre-commit to reformat your code and git push again." 1>&2 + echo "Please use pre-commit to check what is wrong." 1>&2 exit 1 } @@ -13,8 +13,14 @@ export PATH=/usr/bin:$PATH pre-commit install clang-format --version +# set up go environment for running gometalinter +mkdir -p $GOPATH/src/github.com/PaddlePaddle/ +ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle +cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd - + if ! pre-commit run -a ; then - git diff --exit-code + git diff + exit 1 fi trap : 0 diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index b359d9da2167bf459504e15c3140b3d956f417f3..a830ceba5772846cd9255a3eeb26e8d6a17dcfbc 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -28,6 +28,17 @@ NewRemoteParameterUpdater::NewRemoteParameterUpdater( newGradients_(nullptr), pserverSpec_(pserverSpec) {} +NewRemoteParameterUpdater::NewRemoteParameterUpdater( + const OptimizationConfig &config, + const std::string pserverSpec, + const bool useEtcd) + : trainerConfig_(config), + parameterClient_(-1), + newParameters_(nullptr), + newGradients_(nullptr), + pserverSpec_(pserverSpec), + useEtcd_(useEtcd) {} + void NewRemoteParameterUpdater::init( const std::vector ¶meters) { ParameterUpdater::init(parameters); @@ -38,8 +49,13 @@ void NewRemoteParameterUpdater::init( } // create parameter server client. - parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), - FLAGS_trainer_id == 0); + if (useEtcd_) { + parameterClient_ = paddle_new_etcd_pserver_client( + (char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0); + } else { + parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), + FLAGS_trainer_id == 0); + } // init new parameter and gradient. newParameters_ = initNewParameter(PARAMETER_VALUE); diff --git a/paddle/trainer/NewRemoteParameterUpdater.h b/paddle/trainer/NewRemoteParameterUpdater.h index dfed00bc216b1d41bb7520619b76702f9fe650f2..6223ba427c9b94494c2bee8f0847442f1b0574c9 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.h +++ b/paddle/trainer/NewRemoteParameterUpdater.h @@ -32,6 +32,9 @@ class NewRemoteParameterUpdater : public ParameterUpdater { public: NewRemoteParameterUpdater(const OptimizationConfig& config, const std::string pserverSpec); + NewRemoteParameterUpdater(const OptimizationConfig& config, + const std::string pserverSpec, + const bool useEtcd); ~NewRemoteParameterUpdater() { releaseNewParameter(newParameters_); releaseNewParameter(newGradients_); @@ -111,6 +114,8 @@ protected: paddle_parameter** newGradients_; /// the specification of parameter server "host1:port,host1:port" std::string pserverSpec_; + /// true if pserverSpec_ is etcd endpoint, else pserverSpec_ is pserver addr + bool useEtcd_; }; } // namespace paddle diff --git a/paddle/utils/DynamicLoader.h b/paddle/utils/DynamicLoader.h index 9b5ad21724afd7176f958619e7e10d12dc08fa49..2e5ff76a06152b6a12818f06baaeaa6a69726ba8 100644 --- a/paddle/utils/DynamicLoader.h +++ b/paddle/utils/DynamicLoader.h @@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef DYNAMIC_LOAD_H_ -#define DYNAMIC_LOAD_H_ +#pragma once #include #include @@ -59,5 +58,3 @@ void GetWarpCTCDsoHandle(void** dso_handle); * */ void GetLapackDsoHandle(void** dso_handle); - -#endif // DYNAMIC_LOAD_H_ diff --git a/paddle/utils/ThreadLocal.h b/paddle/utils/ThreadLocal.h index b5e2862546212041a774599ec664b95e56224a07..0a27b8b97b83a9066af23039a317c437ea56777a 100644 --- a/paddle/utils/ThreadLocal.h +++ b/paddle/utils/ThreadLocal.h @@ -51,7 +51,7 @@ template class ThreadLocal { public: ThreadLocal() { - CHECK(pthread_key_create(&threadSpecificKey_, dataDestructor) == 0); + CHECK_EQ(pthread_key_create(&threadSpecificKey_, dataDestructor), 0); } ~ThreadLocal() { pthread_key_delete(threadSpecificKey_); } @@ -65,7 +65,7 @@ public: if (!p && createLocal) { p = new T(); int ret = pthread_setspecific(threadSpecificKey_, p); - CHECK(ret == 0); + CHECK_EQ(ret, 0); } return p; } @@ -79,7 +79,7 @@ public: if (T* q = get(false)) { dataDestructor(q); } - CHECK(pthread_setspecific(threadSpecificKey_, p) == 0); + CHECK_EQ(pthread_setspecific(threadSpecificKey_, p), 0); } /** @@ -112,7 +112,7 @@ private: template class ThreadLocalD { public: - ThreadLocalD() { CHECK(pthread_key_create(&threadSpecificKey_, NULL) == 0); } + ThreadLocalD() { CHECK_EQ(pthread_key_create(&threadSpecificKey_, NULL), 0); } ~ThreadLocalD() { pthread_key_delete(threadSpecificKey_); for (auto t : threadMap_) { @@ -127,7 +127,7 @@ public: T* p = (T*)pthread_getspecific(threadSpecificKey_); if (!p) { p = new T(); - CHECK(pthread_setspecific(threadSpecificKey_, p) == 0); + CHECK_EQ(pthread_setspecific(threadSpecificKey_, p), 0); updateMap(p); } return p; @@ -141,7 +141,7 @@ public: if (T* q = (T*)pthread_getspecific(threadSpecificKey_)) { dataDestructor(q); } - CHECK(pthread_setspecific(threadSpecificKey_, p) == 0); + CHECK_EQ(pthread_setspecific(threadSpecificKey_, p), 0); updateMap(p); } diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 37cd16c79890738f6d8966579e15686c653d4df3..83f72c137bdf5e55f28be908321bd2ccd6c906fe 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -472,10 +472,16 @@ message LayerConfig { // blank label used in ctc loss optional uint32 blank = 52 [default = 0]; - // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which + // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which // controls the scope of pooling operation. can be set > 0. // leave empty or set to -1 to disable this stride pooling. optional int32 seq_pool_stride = 53 [default = -1]; + + // for crop layer + optional int32 axis = 54 [default = 2]; + repeated uint32 offset = 55; + repeated uint32 shape = 56; + } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 826ba2834a820d11e69feec5569ef3537194e3c3..ab81e67579e39a34e3ace18d14434eb86b66fa5b 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1575,7 +1575,13 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('fc') class FCLayer(LayerBase): - def __init__(self, name, size, inputs, bias=True, **xargs): + def __init__(self, + name, + size, + inputs, + bias=True, + error_clipping_threshold=None, + **xargs): super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) @@ -1592,6 +1598,8 @@ class FCLayer(LayerBase): self.create_input_parameter(input_index, psize, dims, sparse, format) self.create_bias_parameter(bias, self.config.size) + if error_clipping_threshold is not None: + self.config.error_clipping_threshold = error_clipping_threshold @config_layer('selective_fc') @@ -1990,6 +1998,23 @@ class PadLayer(LayerBase): self.config.size = out_ch * out_h * out_w +@config_layer('crop') +class CropLayer(LayerBase): + def __init__(self, name, inputs, axis, offset, shape, **xargs): + super(CropLayer, self).__init__(name, 'crop', 0, inputs=inputs, **xargs) + self.config.axis = axis + self.config.offset.extend(offset) + self.config.shape.extend(shape) + + # get channel, width and height from input_0 layer + input_layer = self.get_input_layer(0) + image_conf = self.config.inputs[0].image_conf + image_conf.img_size = input_layer.width + image_conf.img_size_y = input_layer.height + image_conf.channels = input_layer.size / (input_layer.width * + input_layer.height) + + @config_layer('batch_norm') class BatchNormLayer(LayerBase): layer_type = 'batch_norm' diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 78aa0778f8d1dca9fae82f0411be5a00e636cbc9..fdb6f83f2ba510232714fb8a9c7c1af837a753ff 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -127,6 +127,7 @@ __all__ = [ 'dropout_layer', 'prelu_layer', 'gated_unit_layer', + 'crop_layer', ] @@ -218,6 +219,7 @@ class LayerType(object): SMOOTH_L1 = 'smooth_l1' PRELU = 'prelu' + CROP_LAYER = 'crop' @staticmethod def is_layer_type(type_name): @@ -5970,3 +5972,52 @@ def gated_unit_layer(input, name="%s_gated_act" % name, input=dotmul_operator(input_proj, gate), layer_attr=layer_attr) + + +@wrap_name_default() +@layer_support() +def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): + """ + The crop layer crops images by offset and shape. User can set crop shape by + args 'shape' explicitly or by reference input layer. + + The example usage is: + + .. code-block:: python + crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3]) + + :param input: The input layer.If two inputs were setted, + the second input will be regarded as reference input + :type input: LayerOutput or Sequence + :param offset: The crop offset + :type offset: Sequence + :param axis: start axis to be cropped. To image input layer: + - 0: batch size + - 1: channels + - 2: height + - 3: width + :type partial_sum: int + :param shape: The shape to be cropped. Default is None. + :type shape: Sequence | None + :param name: Name of this layer. + :type name: basestring + :return: LayerOutput object. + :rtype: LayerOutput + """ + if isinstance(input, LayerOutput): + input = [input] + else: + assert isinstance(input, collections.Sequence) + l = Layer( + inputs=[x.name for x in input], + axis=axis, + offset=offset, + shape=shape, + name=name, + type=LayerType.CROP_LAYER, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.CROP_LAYER, + parents=input, + size=l.config.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py new file mode 100644 index 0000000000000000000000000000000000000000..8314a7e9a5586647c70ff010156817110919c72b --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py @@ -0,0 +1,21 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +data = data_layer(name='data', size=2016, height=48, width=42) +refernce_data = data_layer(name='data', size=768, height=16, width=16) + +conv = img_conv_layer( + input=data, + filter_size=3, + num_channels=1, + num_filters=16, + padding=1, + act=LinearActivation(), + bias_attr=True) + +pool = img_pool_layer(input=conv, pool_size=2, stride=2, pool_type=MaxPooling()) + +crop = crop_layer(input=[pool, refernce_data], axis=2) + +outputs(pad) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 3ba5c31871807027e452df5d889b3b403e1c6414..3c75ca4c3abf1e94fc00b87f3af51d1cbf6dc430 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,7 +20,6 @@ import trainer import event import data_type import topology -import data_feeder import networks import evaluator from . import dataset @@ -31,7 +30,6 @@ import op import pooling import inference import networks -import py_paddle.swig_paddle as api import minibatch import plot import image @@ -47,7 +45,6 @@ __all__ = [ 'data_type', 'attr', 'pooling', - 'data_feeder', 'dataset', 'reader', 'topology', @@ -61,6 +58,7 @@ __all__ = [ def init(**kwargs): + import py_paddle.swig_paddle as api args = [] args_dict = {} # NOTE: append arguments if they are in ENV diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index 2698251b9e15046eb14f71c3f5b0546ecbb4a5dd..98dfb85a0ea57050bf8dd8d46fca9574801d8eb3 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from py_paddle import DataProviderConverter import collections import paddle.trainer.PyDataProvider2 as pydp2 diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 2e4beb6882789249db09705f3f4d6c5c19e492cd..90830515c1e8e6f5260cfca631e02a3a52cedbe5 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -26,8 +26,9 @@ import sentiment import wmt14 import mq2007 import flowers +import voc2012 __all__ = [ 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' - 'uci_housing', 'wmt14', 'mq2007', 'flowers' + 'uci_housing', 'wmt14', 'mq2007', 'flowers', 'voc2012' ] diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index 4a2eb59c340f5d0d3818170e56d730330e0bab29..645f3cc0dce70752c20569523e4bab440861f6a1 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -22,6 +22,8 @@ import importlib import paddle.v2.dataset import cPickle import glob +import cPickle as pickle +import random __all__ = [ 'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader', @@ -170,8 +172,6 @@ def convert(output_path, name_prefix, max_lines_to_shuffle=1000): import recordio - import cPickle as pickle - import random """ Convert data from reader to recordio format files. @@ -201,8 +201,10 @@ def convert(output_path, def write_data(w, lines): random.shuffle(lines) for i, d in enumerate(lines): - d = pickle.dumps(d, pickle.HIGHEST_PROTOCOL) - w[i % num_shards].write(d) + # FIXME(Yancey1989): + # dumps with protocol: pickle.HIGHEST_PROTOCOL + o = pickle.dumps(d) + w[i % num_shards].write(o) w = open_writers() lines = [] diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py index fd71b341662ca6f540ce44a86348e782561a97d7..cffb319ad8f56ccddba3fef63e1b6ec68e5bac1e 100644 --- a/python/paddle/v2/dataset/mq2007.py +++ b/python/paddle/v2/dataset/mq2007.py @@ -212,19 +212,19 @@ def gen_pair(querylist, partial_order="full"): for j in range(i + 1, len(querylist)): query_right = querylist[j] if query_left.relevance_score > query_right.relevance_score: - labels.append(1) + labels.append([1]) docpairs.append([ np.array(query_left.feature_vector), np.array(query_right.feature_vector) ]) elif query_left.relevance_score < query_right.relevance_score: - labels.append(1) + labels.append([1]) docpairs.append([ np.array(query_right.feature_vector), np.array(query_left.feature_vector) ]) for label, pair in zip(labels, docpairs): - yield label, pair[0], pair[1] + yield np.array(label), pair[0], pair[1] def gen_list(querylist): diff --git a/python/paddle/v2/dataset/tests/voc2012_test.py b/python/paddle/v2/dataset/tests/voc2012_test.py new file mode 100644 index 0000000000000000000000000000000000000000..31e72ebf5eac0508d12783f9ceaa6eef0fa6d353 --- /dev/null +++ b/python/paddle/v2/dataset/tests/voc2012_test.py @@ -0,0 +1,42 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2.dataset.voc2012 +import unittest + + +class TestVOC(unittest.TestCase): + def check_reader(self, reader): + sum = 0 + label = 0 + for l in reader(): + self.assertEqual(l[0].size, 3 * l[1].size) + sum += 1 + return sum + + def test_train(self): + count = self.check_reader(paddle.v2.dataset.voc_seg.train()) + self.assertEqual(count, 2913) + + def test_test(self): + count = self.check_reader(paddle.v2.dataset.voc_seg.test()) + self.assertEqual(count, 1464) + + def test_val(self): + count = self.check_reader(paddle.v2.dataset.voc_seg.val()) + self.assertEqual(count, 1449) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/dataset/voc2012.py b/python/paddle/v2/dataset/voc2012.py new file mode 100644 index 0000000000000000000000000000000000000000..617e212d67fbe37f9d9663e9c83c62045411fa77 --- /dev/null +++ b/python/paddle/v2/dataset/voc2012.py @@ -0,0 +1,85 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Image dataset for segmentation. +The 2012 dataset contains images from 2008-2011 for which additional +segmentations have been prepared. As in previous years the assignment +to training/test sets has been maintained. The total number of images +with segmentation has been increased from 7,062 to 9,993. +""" + +import tarfile +import io +import numpy as np +from paddle.v2.dataset.common import download +from paddle.v2.image import * +from PIL import Image + +__all__ = ['train', 'test', 'val'] + +VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/\ +VOCtrainval_11-May-2012.tar' + +VOC_MD5 = '6cd6e144f989b92b3379bac3b3de84fd' +SET_FILE = 'VOCdevkit/VOC2012/ImageSets/Segmentation/{}.txt' +DATA_FILE = 'VOCdevkit/VOC2012/JPEGImages/{}.jpg' +LABEL_FILE = 'VOCdevkit/VOC2012/SegmentationClass/{}.png' + +CACHE_DIR = 'voc2012' + + +def reader_creator(filename, sub_name): + + tarobject = tarfile.open(filename) + name2mem = {} + for ele in tarobject.getmembers(): + name2mem[ele.name] = ele + + def reader(): + set_file = SET_FILE.format(sub_name) + sets = tarobject.extractfile(name2mem[set_file]) + for line in sets: + line = line.strip() + data_file = DATA_FILE.format(line) + label_file = LABEL_FILE.format(line) + data = tarobject.extractfile(name2mem[data_file]).read() + label = tarobject.extractfile(name2mem[label_file]).read() + data = Image.open(io.BytesIO(data)) + label = Image.open(io.BytesIO(label)) + data = np.array(data) + label = np.array(label) + yield data, label + + return reader + + +def train(): + """ + Create a train dataset reader containing 2913 images in HWC order. + """ + return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'trainval') + + +def test(): + """ + Create a test dataset reader containing 1464 images in HWC order. + """ + return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'train') + + +def val(): + """ + Create a val dataset reader containing 1449 images in HWC order. + """ + return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'val') diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py index fd6050fa339d280ad54e40128ea6bae25132c873..7589cc9917f26375d595e200245d5ba099bc38d7 100644 --- a/python/paddle/v2/event.py +++ b/python/paddle/v2/event.py @@ -9,8 +9,6 @@ There are: * BeginPass * EndPass """ -import py_paddle.swig_paddle as api - __all__ = [ 'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult' ] @@ -18,6 +16,7 @@ __all__ = [ class WithMetric(object): def __init__(self, evaluator): + import py_paddle.swig_paddle as api if not isinstance(evaluator, api.Evaluator): raise TypeError("Evaluator should be api.Evaluator type") self.__evaluator__ = evaluator diff --git a/python/paddle/v2/framework/create_op_creation_methods.py b/python/paddle/v2/framework/create_op_creation_methods.py new file mode 100644 index 0000000000000000000000000000000000000000..7248c3f52a9902e8c08ac2f1405801a5710459e5 --- /dev/null +++ b/python/paddle/v2/framework/create_op_creation_methods.py @@ -0,0 +1,250 @@ +import paddle.v2.framework.core as core +import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 +import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 +import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2 +import cStringIO + + +def get_all_op_protos(): + """ + Get all registered op proto from Paddle C++ + :return: list of OpProto + """ + protostrs = core.get_all_op_protos() + ret_values = [] + for pbstr in protostrs: + op_proto = op_proto_pb2.OpProto.FromString(str(pbstr)) + ret_values.append(op_proto) + return ret_values + + +class OpDescCreationMethod(object): + """ + A Functor object to convert user input(use key word args) to OpDesc based on + OpProto. + + :param op_proto: The OpProto object. + :type op_proto: op_proto_pb2.OpProto + """ + + def __init__(self, op_proto): + if not isinstance(op_proto, op_proto_pb2.OpProto): + raise TypeError("Argument should be OpProto") + self.__op_proto__ = op_proto + + def __call__(self, *args, **kwargs): + """ + Convert user input to OpDesc. Only key-word args are supported. + :return: OpDesc based on user input + :rtype: op_desc_pb2.OpDesc + """ + if len(args) != 0: + raise ValueError("Only keyword arguments is supported by Paddle") + op_desc = op_desc_pb2.OpDesc() + + # Inputs + ipts, ipt_format, _ = OpDescCreationMethod.extract_input_or_output( + "input", kwargs, self.__op_proto__.inputs) + op_desc.inputs.extend(ipts) + if ipt_format is not None: + op_desc.attrs.extend([ipt_format]) + + # Outputs + outs, out_format, tmp_index = OpDescCreationMethod.extract_input_or_output( + "output", kwargs, self.__op_proto__.outputs) + op_desc.outputs.extend(outs) + if out_format is not None: + op_desc.attrs.extend([out_format]) + if len(tmp_index) != 0: + tmp_index_attr = op_desc.attrs.add() + tmp_index_attr.type = attr_type_pb2.INTS + tmp_index_attr.name = "temporary_index" + tmp_index_attr.ints.extend(tmp_index) + + # Types + op_desc.type = self.__op_proto__.type + + # Attrs + for attr in self.__op_proto__.attrs: + if attr.generated: + continue + user_defined_attr = kwargs.get(attr.name, None) + if user_defined_attr is not None: + new_attr = op_desc.attrs.add() + new_attr.name = attr.name + new_attr.type = attr.type + if attr.type == attr_type_pb2.INT: + new_attr.i = user_defined_attr + elif attr.type == attr_type_pb2.FLOAT: + new_attr.f = user_defined_attr + elif attr.type == attr_type_pb2.STRING: + new_attr.s = user_defined_attr + elif attr.type == attr_type_pb2.INTS: + new_attr.ints.extend(user_defined_attr) + elif attr.type == attr_type_pb2.FLOATS: + new_attr.floats.extend(user_defined_attr) + elif attr.type == attr_type_pb2.STRINGS: + new_attr.strings.extend(user_defined_attr) + else: + raise NotImplementedError("Not support attribute type " + + attr.type) + + return op_desc + + @staticmethod + def extract_input_or_output(in_out, kwargs, meta): + """ + Extract input variable names or output variable names from key-word + arguments, which base on VarProtos. + + :param in_out: "input" or "output" + :param kwargs: key-word arguments that user inputted. + :param meta: a list of VarProto + :return: The three object will be return. The variable names. The + input_format or output_format attribute(None if the input or output is + not multiple). The temporary variable index list. + """ + multiple = OpDescCreationMethod.any_is_true((m.multiple for m in meta)) + tmp_index = [] + retv = [] + if multiple: + var_format = op_desc_pb2.AttrDesc() + var_format.type = attr_type_pb2.INTS + var_format.name = "%s_format" % in_out + var_format.ints.append(0) + + for var in meta: + var_name = var.name + + if var.temporary: + var_name = [core.var_names.temp()] + tmp_index.append(len(retv)) + else: + var_name = kwargs.get(var_name, []) + if not isinstance(var_name, list): + var_name = [var_name] + retv.extend(var_name) + var_format.ints.append(len(var_name) + var_format.ints[-1]) + return retv, var_format, tmp_index + else: + for var in meta: + if var.temporary: + retv.append(kwargs.get(var.name, core.var_names.temp())) + tmp_index.append(len(retv)) + else: + retv.append(kwargs.get(var.name, core.var_names.empty())) + return retv, None, tmp_index + + @staticmethod + def any_is_true(generator): + """ + Reduce a bool array to one. If any of them is True, then return True. + """ + for flag in generator: + if flag: + return True + return False + + +def get_docstring_from_op_proto(op_proto): + """ + Generate docstring from a OpProto + :param op_proto: a OpProto instance. + :type op_proto: op_proto_pb2.OpProto + :return: docstring + """ + if not isinstance(op_proto, op_proto_pb2.OpProto): + raise TypeError("Input must be OpProto") + f = cStringIO.StringIO() + f.write(op_proto.comment) + f.write("\n") + + def __append_param__(name, comment, type): + # Maybe replace the following line with template engine is better. + f.write(":param ") + f.write(name) + f.write(": ") + f.write(comment) + f.write("\n") + f.write(":type ") + f.write(name) + f.write(": ") + f.write(type) + f.write("\n") + + for ipt in op_proto.inputs: + __append_param__(ipt.name, ipt.comment, "list | basestr" + if ipt.multiple else "basestr") + + temp_var_prefix = \ + "This is a temporary variable. It does not have to set by user. " + for opt in op_proto.outputs: + __append_param__(opt.name, opt.comment if not opt.temporary else + temp_var_prefix + opt.comment, "list | basestr" + if opt.multiple else "basestr") + + for attr in op_proto.attrs: + attr_type = None + if attr.type == attr_type_pb2.INT: + attr_type = "int" + elif attr.type == attr_type_pb2.FLOAT: + attr_type = "float" + elif attr.type == attr_type_pb2.STRING: + attr_type = "basestr" + elif attr.type == attr_type_pb2.INTS: + attr_type = "list of int" + elif attr.type == attr_type_pb2.FLOATS: + attr_type = "list of float" + elif attr.type == attr_type_pb2.STRINGS: + attr_type = "list of basestr" + + if attr_type is None: + raise RuntimeError("Not supported attribute type " + attr.type) + + __append_param__(attr.name, attr.comment, attr_type) + + return f.getvalue() + + +def create_op_creation_method(op_proto): + """ + Generate op creation method for an OpProto + """ + method = OpDescCreationMethod(op_proto) + + def __impl__(*args, **kwargs): + opdesc = method(*args, **kwargs) + return core.Operator.create(opdesc.SerializeToString()) + + __impl__.__doc__ = get_docstring_from_op_proto(op_proto) + __impl__.all_input_args = [var.name for var in op_proto.inputs] + __impl__.all_output_args = [var.name for var in op_proto.outputs] + __impl__.all_attr_args = [attr.name for attr in op_proto.attrs] + + return __impl__ + + +class OpCreationsHolder(object): + """ + A object will holds all op creation methods. + + Use `op_creations.xxx_op` to access them. + """ + pass + + +op_creations = OpCreationsHolder() + + +def __bootstrap__(): + """ + Bootstrap function for this module. It will dynamic create all op creation + methods in runtime. + """ + for op_proto in get_all_op_protos(): + func = create_op_creation_method(op_proto) + func.__name__ = str(op_proto.type) + setattr(op_creations, func.__name__, func) + + +__bootstrap__() diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 7023e82b5f08eb49fa1fee27118a7907d58312e2..ec076e40c9312fee7f3ba030dc69208069fd45a8 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -1,2 +1,3 @@ add_python_test(test_framework test_protobuf.py test_scope.py - test_default_scope_funcs.py) + test_default_scope_funcs.py test_op_creation_methods.py + test_tensor.py test_fc_op.py test_add_two_op.py test_sgd_op.py) diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py new file mode 100644 index 0000000000000000000000000000000000000000..237f9b7eb0d525a2c8431523a2d90b7e32493d53 --- /dev/null +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -0,0 +1,50 @@ +import paddle.v2.framework.core as core +import unittest +import numpy +import paddle.v2.framework.create_op_creation_methods as creation + + +class OpTestMeta(type): + def __new__(cls, name, bases, attrs): + obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs) + + def test_all(self): + func = getattr(creation.op_creations, self.type, None) + self.assertIsNotNone(func) + + scope = core.Scope(None) + kwargs = dict() + + for in_name in func.all_input_args: + if hasattr(self, in_name): + kwargs[in_name] = in_name + var = scope.create_var(in_name).get_tensor() + arr = getattr(self, in_name) + var.set_dims(arr.shape) + var.set(arr) + else: + kwargs[in_name] = "@EMPTY@" + + for out_name in func.all_output_args: + if hasattr(self, out_name): + kwargs[out_name] = out_name + scope.create_var(out_name).get_tensor() + + for attr_name in func.all_attr_args: + if hasattr(self, attr_name): + kwargs[attr_name] = getattr(self, attr_name) + + op = func(**kwargs) + + op.infer_shape(scope) + + ctx = core.DeviceContext.cpu_context() + op.run(scope, ctx) + + for out_name in func.all_output_args: + actual = numpy.array(scope.get_var(out_name).get_tensor()) + expect = getattr(self, out_name) + numpy.testing.assert_almost_equal(actual, expect) + + obj.test_all = test_all + return obj diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py new file mode 100644 index 0000000000000000000000000000000000000000..a06d7a78ecf838a49e5f2808d3686c6b92faa8ce --- /dev/null +++ b/python/paddle/v2/framework/tests/test_add_two_op.py @@ -0,0 +1,17 @@ +import unittest +from op_test_util import OpTestMeta +import numpy + + +class TestAddOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "add_two" + self.X = numpy.random.random((342, 345)).astype("float32") + self.Y = numpy.random.random((342, 345)).astype("float32") + self.Out = self.X + self.Y + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py new file mode 100644 index 0000000000000000000000000000000000000000..59e7e61249e2a7d49a17e5d87209f03b8f35f730 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_fc_op.py @@ -0,0 +1,43 @@ +import paddle.v2.framework.core as core +import unittest +import numpy +import paddle.v2.framework.create_op_creation_methods as creation + + +class TestFc(unittest.TestCase): + def test_fc(self): + scope = core.Scope(None) + x = scope.create_var("X") + x_tensor = x.get_tensor() + x_tensor.set_dims([1000, 784]) + x_tensor.alloc_float() + + w = scope.create_var("W") + w_tensor = w.get_tensor() + w_tensor.set_dims([784, 100]) + w_tensor.alloc_float() + + w_tensor.set(numpy.random.random((784, 100)).astype("float32")) + + # Set a real numpy array here. + # x_tensor.set(numpy.array([])) + + op = creation.op_creations.fc(X="X", Y="Y", W="W") + + for out in op.outputs(): + if scope.get_var(out) is None: + scope.create_var(out).get_tensor() + + tensor = scope.get_var("Y").get_tensor() + op.infer_shape(scope) + self.assertEqual([1000, 100], tensor.shape()) + + ctx = core.DeviceContext.cpu_context() + + op.run(scope, ctx) + + # After complete all ops, check Y is expect or not. + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_op_creation_methods.py b/python/paddle/v2/framework/tests/test_op_creation_methods.py new file mode 100644 index 0000000000000000000000000000000000000000..41db7c0d535aa920b34d6cc346090a8c15bfb110 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_op_creation_methods.py @@ -0,0 +1,254 @@ +import unittest +import paddle.v2.framework.create_op_creation_methods as creation +import paddle.v2.framework.core as core +import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 +import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 +import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2 + + +class TestGetAllProtos(unittest.TestCase): + def test_all(self): + all_protos = creation.get_all_op_protos() + self.assertNotEqual(0, len(all_protos)) + + for each in all_protos: + self.assertTrue(each.IsInitialized()) + + +class TestOpDescCreationMethod(unittest.TestCase): + def test_plain_input_output(self): + op = op_proto_pb2.OpProto() + op.type = "test" + ipt = op.inputs.add() + ipt.name = "X" + ipt.comment = "not matter" + + ipt = op.inputs.add() + ipt.name = "Y" + ipt.comment = "not matter" + + opt = op.outputs.add() + opt.name = "Z" + opt.comment = "not matter" + + op.comment = "not matter" + + self.assertTrue(op.IsInitialized()) + + method = creation.OpDescCreationMethod(op) + output = method(X="a", Y="b", Z="c") + + expected = op_desc_pb2.OpDesc() + expected.type = "test" + expected.inputs.extend(["a", "b"]) + expected.outputs.append("c") + self.assertEqual(expected, output) + + def test_multiple_input_plain_output(self): + op = op_proto_pb2.OpProto() + op.type = "fc" + ipt = op.inputs.add() + ipt.name = "X" + ipt.comment = "" + ipt.multiple = True + + ipt = op.inputs.add() + ipt.name = "W" + ipt.comment = "" + ipt.multiple = True + + ipt = op.inputs.add() + ipt.name = "b" + ipt.comment = "" + + out = op.outputs.add() + out.name = "Y" + out.comment = "" + + op.comment = "" + self.assertTrue(op.IsInitialized()) + method = creation.OpDescCreationMethod(op) + + generated1 = method(X="x", W="w", b="b", Y="y") + expected1 = op_desc_pb2.OpDesc() + expected1.inputs.extend(['x', 'w', 'b']) + expected1.outputs.extend(['y']) + expected1.type = 'fc' + attr = expected1.attrs.add() + attr.name = 'input_format' + attr.type = attr_type_pb2.INTS + attr.ints.extend([0, 1, 2, 3]) + self.assertEqual(expected1, generated1) + + generated2 = method( + X=['x1', 'x2', 'x3'], b='b', W=['w1', 'w2', 'w3'], Y='y') + expected2 = op_desc_pb2.OpDesc() + expected2.inputs.extend(['x1', 'x2', 'x3', 'w1', 'w2', 'w3', 'b']) + expected2.outputs.extend(['y']) + expected2.type = 'fc' + attr = expected2.attrs.add() + attr.name = 'input_format' + attr.type = attr_type_pb2.INTS + attr.ints.extend([0, 3, 6, 7]) + self.assertEqual(expected2, generated2) + + def test_attrs(self): + op = op_proto_pb2.OpProto() + op.type = "test" + ipt = op.inputs.add() + ipt.name = 'X' + ipt.comment = "" + + def __add_attr__(name, type): + attr = op.attrs.add() + attr.name = name + attr.comment = "" + attr.type = type + + __add_attr__("int_attr", attr_type_pb2.INT) + __add_attr__("float_attr", attr_type_pb2.FLOAT) + __add_attr__("string_attr", attr_type_pb2.STRING) + __add_attr__("ints_attr", attr_type_pb2.INTS) + __add_attr__("floats_attr", attr_type_pb2.FLOATS) + __add_attr__("strings_attr", attr_type_pb2.STRINGS) + + op.comment = "" + self.assertTrue(op.IsInitialized()) + + method = creation.OpDescCreationMethod(op) + + generated = method( + X="a", + int_attr=10, + float_attr=3.2, + string_attr="test_str", + ints_attr=[0, 1, 2, 3, 4], + floats_attr=[0.2, 3.2, 4.5], + strings_attr=["a", "b", "c"]) + + expected = op_desc_pb2.OpDesc() + expected.type = "test" + expected.inputs.extend(['a']) + attr = expected.attrs.add() + attr.name = "int_attr" + attr.type = attr_type_pb2.INT + attr.i = 10 + + attr = expected.attrs.add() + attr.name = "float_attr" + attr.type = attr_type_pb2.FLOAT + attr.f = 3.2 + + attr = expected.attrs.add() + attr.name = "string_attr" + attr.type = attr_type_pb2.STRING + attr.s = "test_str" + + attr = expected.attrs.add() + attr.name = "ints_attr" + attr.type = attr_type_pb2.INTS + attr.ints.extend([0, 1, 2, 3, 4]) + + attr = expected.attrs.add() + attr.name = 'floats_attr' + attr.type = attr_type_pb2.FLOATS + attr.floats.extend([0.2, 3.2, 4.5]) + + attr = expected.attrs.add() + attr.name = 'strings_attr' + attr.type = attr_type_pb2.STRINGS + attr.strings.extend(['a', 'b', 'c']) + + self.assertEqual(expected, generated) + + def test_input_temporary_output(self): + op = op_proto_pb2.OpProto() + op.type = "test" + out = op.outputs.add() + out.name = "OUT" + out.comment = "" + + out = op.outputs.add() + out.name = "TMP" + out.comment = "" + out.temporary = True + + out = op.outputs.add() + out.name = "OUT2" + out.comment = "" + op.comment = "" + + method = creation.OpDescCreationMethod(op) + generated = method(OUT="a", OUT2="b") + desc = op_desc_pb2.OpDesc() + desc.outputs.extend(["a", core.var_names.temp(), "b"]) + desc.type = "test" + attr = desc.attrs.add() + attr.name = "temporary_index" + attr.type = attr_type_pb2.INTS + attr.ints.append(2) + self.assertEqual(generated, desc) + + +class TestOpCreationDocStr(unittest.TestCase): + def test_all(self): + op = op_proto_pb2.OpProto() + op.type = "test" + op.comment = """Test Op. + +This op is used for unit test, not a real op. +""" + a = op.inputs.add() + a.name = "a" + a.comment = "Input a for test op" + a.multiple = True + + b = op.inputs.add() + b.name = "b" + b.comment = "Input b for test op" + self.assertTrue(op.IsInitialized()) + + o1 = op.outputs.add() + o1.name = "output" + o1.comment = "The output of test op" + + o2 = op.outputs.add() + o2.name = "temp output" + o2.comment = "The temporary output of test op" + o2.temporary = True + + test_str = op.attrs.add() + test_str.name = "str_attr" + test_str.type = attr_type_pb2.STRING + test_str.comment = "A string attribute for test op" + + actual = creation.get_docstring_from_op_proto(op) + expected_docstring = '''Test Op. + +This op is used for unit test, not a real op. + +:param a: Input a for test op +:type a: list | basestr +:param b: Input b for test op +:type b: basestr +:param output: The output of test op +:type output: basestr +:param temp output: This is a temporary variable. It does not have to set by user. The temporary output of test op +:type temp output: basestr +:param str_attr: A string attribute for test op +:type str_attr: basestr +''' + self.assertEqual(expected_docstring, actual) + + +class TestOpCreations(unittest.TestCase): + def test_all(self): + add_op = creation.op_creations.add_two(X="a", Y="b", Out="z") + self.assertIsNotNone(add_op) + # Invoke C++ DebugString() + self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).', + str(add_op)) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py new file mode 100644 index 0000000000000000000000000000000000000000..405d73b224fa153e50b4ec408a921f2bdaab46aa --- /dev/null +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -0,0 +1,18 @@ +import unittest +import numpy +from op_test_util import OpTestMeta + + +class TestSGD(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "sgd" + self.param = numpy.random.random((342, 345)).astype("float32") + self.grad = numpy.random.random((342, 345)).astype("float32") + self.learning_rate = 0.1 + self.param_out = self.param - self.learning_rate * self.grad + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..b72aff3b9cd16595c7e81856642196b2bb61a790 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -0,0 +1,45 @@ +import paddle.v2.framework.core as core +import unittest +import numpy + + +class TestScope(unittest.TestCase): + def test_int_tensor(self): + scope = core.Scope(None) + var = scope.create_var("test_tensor") + tensor = var.get_tensor() + + tensor.set_dims([1000, 784]) + tensor.alloc_int() + + tensor_array = numpy.array(tensor) + self.assertEqual((1000, 784), tensor_array.shape) + tensor_array[3, 9] = 1 + tensor_array[19, 11] = 2 + tensor.set(tensor_array) + + tensor_array_2 = numpy.array(tensor) + self.assertEqual(1.0, tensor_array_2[3, 9]) + self.assertEqual(2.0, tensor_array_2[19, 11]) + + def test_float_tensor(self): + scope = core.Scope(None) + var = scope.create_var("test_tensor") + tensor = var.get_tensor() + + tensor.set_dims([1000, 784]) + tensor.alloc_float() + + tensor_array = numpy.array(tensor) + self.assertEqual((1000, 784), tensor_array.shape) + tensor_array[3, 9] = 1.0 + tensor_array[19, 11] = 2.0 + tensor.set(tensor_array) + + tensor_array_2 = numpy.array(tensor) + self.assertAlmostEqual(1.0, tensor_array_2[3, 9]) + self.assertAlmostEqual(2.0, tensor_array_2[19, 11]) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 34b7308601390a4ccb0c19ef10d2c7a60b3fa576..40134a3270c3579fd2f6a891af66ff241050f60c 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -1,9 +1,7 @@ import numpy -import py_paddle.swig_paddle as api import collections import topology import minibatch -from data_feeder import DataFeeder __all__ = ['infer', 'Inference'] @@ -28,6 +26,7 @@ class Inference(object): """ def __init__(self, output_layer, parameters): + import py_paddle.swig_paddle as api topo = topology.Topology(output_layer) gm = api.GradientMachine.createFromConfigProto( topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE]) @@ -40,6 +39,7 @@ class Inference(object): self.__data_types__ = topo.data_type() def iter_infer(self, input, feeding=None): + from data_feeder import DataFeeder feeder = DataFeeder(self.__data_types__, feeding) batch_size = len(input) diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py index 70f9e43c9683033233d48a750668771a4c7ba045..4c041fb509903008a7a5648a112b2472ed856aea 100644 --- a/python/paddle/v2/master/client.py +++ b/python/paddle/v2/master/client.py @@ -10,8 +10,9 @@ class client(object): client is a client to the master server. """ - def __init__(self, addr, buf_size): - self.c = lib.paddle_new_master_client(addr, buf_size) + def __init__(self, etcd_endpoints, timeout, buf_size): + self.c = lib.paddle_new_etcd_master_client(etcd_endpoints, timeout, + buf_size) def close(self): lib.paddle_release_master_client(self.c) diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 390c22ee552c506fde1567efba1326a6d735ad2e..ba581980334fec6226a537af2cf53b3465d32c1e 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,5 +1,3 @@ -import py_paddle.swig_paddle as swig_api - import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.optimizers as v1_optimizers """ @@ -18,6 +16,7 @@ __all__ = [ class Optimizer(object): def __init__(self, **kwargs): + import py_paddle.swig_paddle as swig_api if 'batch_size' in kwargs: del kwargs['batch_size'] # not important for python library. @@ -36,23 +35,27 @@ class Optimizer(object): For each optimizer(SGD, Adam), GradientMachine should enable different buffers. """ + import py_paddle.swig_paddle as swig_api tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__) assert isinstance(tmp, swig_api.ParameterOptimizer) return tmp.getParameterTypes() def __create_local_updater__(self): + import py_paddle.swig_paddle as swig_api return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__) def __create_remote_updater__(self, pass_num, use_sparse_updater): + import py_paddle.swig_paddle as swig_api return swig_api.ParameterUpdater.createRemoteUpdater( self.__opt_conf__, pass_num, use_sparse_updater) - def __create_new_remote_updater__(self, pserver_spec): + def __create_new_remote_updater__(self, pserver_spec, use_etcd): + import py_paddle.swig_paddle as swig_api return swig_api.ParameterUpdater.createNewRemoteUpdater( - self.__opt_conf__, pserver_spec) + self.__opt_conf__, pserver_spec, use_etcd) def create_updater(self, is_local, num_passes, use_sparse_updater, - pserver_spec): + pserver_spec, use_etcd): """ create proper parameter_updater by configuration. :param is_local: create local or remote parameter updater @@ -78,7 +81,7 @@ class Optimizer(object): num_passes, use_sparse_updater) else: parameter_updater = self.__create_new_remote_updater__( - pserver_spec) + pserver_spec, use_etcd) return parameter_updater @@ -268,6 +271,7 @@ ModelAverage = v1_optimizers.ModelAverage L2Regularization = v1_optimizers.L2Regularization if __name__ == '__main__': + import py_paddle.swig_paddle as swig_api swig_api.initPaddle('--use_gpu=false') for opt in [ Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(), diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index bbaf8bfa979fbbf460561ebf7077b75b9c41a11a..a9cba8ca0b1efd4149463f6c7bf2dcdfbea350c9 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -1,5 +1,4 @@ import numpy as np -import py_paddle.swig_paddle as api from paddle.proto.ParameterConfig_pb2 import ParameterConfig import paddle.trainer.config_parser as cp import struct @@ -124,6 +123,7 @@ class Parameters(object): :return: parameter value :rtype: np.ndarray """ + import py_paddle.swig_paddle as api shape = self.get_shape(key) if len(self.__gradient_machines__) == 0: @@ -223,7 +223,7 @@ class Parameters(object): :type gradient_machine: api.GradientMachine :return: """ - + import py_paddle.swig_paddle as api if not isinstance(gradient_machine, api.GradientMachine): raise ValueError("gradient_machine should be api.GradientMachine") @@ -359,6 +359,7 @@ def __copy_parameter_to_gradient_machine__(gradient_machine, name, arr): :return: :rtype: api.Parameter """ + import py_paddle.swig_paddle as api param = __get_parameter_in_gradient_machine__(gradient_machine, name) vec = param.getBuf(api.PARAMETER_VALUE) assert isinstance(vec, api.Vector) diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 96c6c4b89a2f2e2c3ecb95213e0e0191b1998f50..76bae0bb12b6c33f88530386f9cc19ae9b59f457 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -2,12 +2,6 @@ Module Trainer """ import collections -import gzip -import os - -import py_paddle.swig_paddle as api - -from data_feeder import DataFeeder from topology import Topology from . import event as v2_event from . import optimizer as v2_optimizer @@ -51,7 +45,8 @@ class SGD(object): update_equation, extra_layers=None, is_local=True, - pserver_spec=None): + pserver_spec=None, + use_etcd=True): if not isinstance(parameters, v2_parameters.Parameters): raise TypeError('parameters should be parameters') @@ -59,6 +54,7 @@ class SGD(object): if not isinstance(update_equation, v2_optimizer.Optimizer): raise TypeError("update equation parameter must be " "paddle.v2.optimizer.Optimizer") + import py_paddle.swig_paddle as api topology = Topology(cost, extra_layers=extra_layers) self.__optimizer__ = update_equation self.__topology__ = topology @@ -66,6 +62,7 @@ class SGD(object): self.__topology_in_proto__ = topology.proto() self.__is_local__ = is_local self.__pserver_spec__ = pserver_spec + self.__use_etcd__ = use_etcd self.__use_sparse_updater__ = self.__topology__.use_sparse_updater() # # In local mode, disable sparse_remote_update. @@ -124,13 +121,15 @@ class SGD(object): :type feeding: dict|list :return: """ + import py_paddle.swig_paddle as api + from data_feeder import DataFeeder if event_handler is None: event_handler = default_event_handler __check_train_args__(**locals()) self.__parameter_updater__ = self.__optimizer__.create_updater( self.__is_local__, num_passes, self.__use_sparse_updater__, - self.__pserver_spec__) + self.__pserver_spec__, self.__use_etcd__) self.__parameter_updater__.init(self.__gradient_machine__) self.__gradient_machine__.start() @@ -187,6 +186,8 @@ class SGD(object): :type feeding: dict :return: """ + import py_paddle.swig_paddle as api + from data_feeder import DataFeeder feeder = DataFeeder(self.__data_types__, feeding) evaluator = self.__gradient_machine__.makeEvaluator() out_args = api.Arguments.createArguments(0) diff --git a/python/setup.py.in b/python/setup.py.in index 271ee6e5526981ad94710315d1472b0f4069a1aa..65a26940d4d703ea4fbb5022523a90716982ec10 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -19,7 +19,9 @@ setup_requires=["requests", "recordio", "matplotlib", "rarfile", - "scipy>=0.19.0"] + "scipy>=0.19.0", + "Pillow", + "nltk"] if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: setup_requires+=["opencv-python"]