提交 fd46fe38 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into variable_input

......@@ -57,8 +57,11 @@ function(cc_binary TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_binary_SRCS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies})
link_paddle_exe(${TARGET_NAME})
if(cc_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS})
endif()
endfunction(cc_binary)
# The dependency to target tensor implies that if any of
......@@ -74,8 +77,11 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
link_paddle_test(${TARGET_NAME})
if(cc_test_DEPS)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS})
add_dependencies(${TARGET_NAME} ${cc_test_DEPS})
endif()
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(cc_test)
......@@ -106,8 +112,11 @@ function(nv_binary TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS} ${external_project_dependencies})
link_paddle_exe(${TARGET_NAME})
if(nv_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
endif()
endfunction(nv_binary)
# The dependency to target tensor implies that if any of
......@@ -123,7 +132,10 @@ function(nv_test TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
link_paddle_test(${TARGET_NAME})
if(nv_test_DEPS)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS})
add_dependencies(${TARGET_NAME} ${nv_test_DEPS})
endif()
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(nv_test)
......@@ -207,6 +207,11 @@ trans_full_matrix_projection
Aggregate Layers
================
AggregateLevel
--------------
.. autoclass:: paddle.v2.layer.AggregateLevel
:noindex:
.. _api_v2.layer_pooling:
pooling
......@@ -248,6 +253,11 @@ block_expand
.. _api_v2.layer_expand:
ExpandLevel
-----------
.. autoclass:: paddle.v2.layer.ExpandLevel
:noindex:
expand
------
.. autoclass:: paddle.v2.layer.expand
......
A few months ago when we were trying to replace CMake with Bazel, @emailweixu suggested that we rewrite those handy Bazel functions using CMake. Now it seems that it's the right time to get this done, as we are facing problems from the porting of Majel and the development of new the parameter server using Go and C++.
Here are some initial thoughts. Your comments are welcome!
### Required CMake Function
I think we need only the following few CMake functions to make a project description mean and clean:
| C++ | CUDA C++ | Go |
|---|---|---|
| cc_library | nv_library | go_library |
| cc_binary | nv_binary | go_binary |
| cc_test | nv_test | go_test |
- The `_library` functions generate .a files from source code.
- The `_binary` functions generate executable binary files.
- The `_test` functions generate executable unit test files. They work like `_binary` but links `-lgtest` and `-lgtest_main`.
The difference between `nv_` functions and `cc_` functions is that the former use `nvcc` instead of the system-default C++ compiler.
Both `nv_` and `cc_` functions enables C++11 (-std=c++11).
Also,
- to describe external dependencies, we need `external_library`.
- to build shared libraries, we need `shared_library`.
### An Example Project
Suppose that we have aforementioned functions defined in our `/cmake` directory. The following example `CMakeLists.txt` describes a project including the following source files:
- tensor.h
- tensor.cc
- tensor_test.cc
- ops.h
- ops.cu
- ops_test.cu
- api.go
- api_test.go
Suppose that ops.cu depends on CUDNN.
```cmake
# cc_binary parses tensor.cc and figures out that target also depend
# on tensor.h.
cc_binary(tensor
SRCS
tensor.cc)
# The dependency to target tensor implies that if any of
# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built.
cc_test(tensor_test
SRCS
tensor_test.cc
DEPS
tensor)
# I don't have a clear idea what parameters external_library need to
# have. @gangliao as a CMake expert would have better ideas.
external_library(cudnn
....)
# Suppose that ops.cu depends on external target CUDNN. Also, ops.cu
# include global functions that take Tensor as their parameters, so
# ops depend on tensor. This implies that if any of tensor.{h.cc},
# ops.{h,cu} is changed, ops need to be re-built.
nv_library(ops
SRCS
ops.cu
DEPS
tensor
cudnn) # cudnn is defined later.
nv_test(ops_test
SRCS
ops_test.cu
DEPS
ops)
# Because api.go defines a GO wrapper to ops and tensor, it depends on
# both. This implies that if any of tensor.{h,cc}, ops.{h,cu}, or
# api.go is changed, api need to be re-built.
go_library(api
SRCS
api.go
DEPS
tensor # Because ops depend on tensor, this line is optional.
ops)
go_test(api_test
SRCS
api_test.go
DEPS
api)
# This builds libapi.so. shared_library might use CMake target
# api_shared so to distinguish it from above target api.
shared_library(api
DEPS
api)
```
### Implementation
As above example CMakeLists.txt executes, each function invocation adds "nodes" to a dependency graph. It also use this graph to generate CMake commands including `add_executable`, `add_dependencies`, `target_link_libraries`, and `add_test`.
......@@ -26,10 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py)
SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)
SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fPIC -Wall")
IF(WITH_COVERAGE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
ENDIF(WITH_COVERAGE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign")
SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
paddle_parameter
......
cmake_minimum_required(VERSION 3.0)
if(GTEST_INCLUDE_DIR AND GTEST_LIBRARIES)
message("-- Found gtest (include: ${GTEST_INCLUDE_DIR}, library: ${GTEST_LIBRARIES})")
else()
# find #include <majel/xx.h>
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
include_directories(${PARENT_DIR})
# find cmake directory modules
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake")
# enable c++11
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
# enable gtest
set(THIRD_PARTY_PATH ./third_party)
set(WITH_TESTING ON)
include(external/gtest)
endif()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
project(cxx_go CXX C Go)
include(cmake/golang.cmake)
include(cmake/flags.cmake)
ExternalGoProject_Add(pserver github.com/PaddlePaddle/Paddle/paddle/go/pserver)
add_go_library(client STATIC pserver)
add_subdirectory(test)
package main
/*
#include <stdlib.h>
#include <string.h>
typedef enum {
PADDLE_ELEMENT_TYPE_INT32 = 0,
PADDLE_ELEMENT_TYPE_UINT32 = 1,
PADDLE_ELEMENT_TYPE_INT64 = 2,
PADDLE_ELEMENT_TYPE_UINT64 = 3,
PADDLE_ELEMENT_TYPE_FLOAT32 = 4,
PADDLE_ELEMENT_TYPE_FLOAT64 = 5,
} paddle_element_type;
typedef struct {
char* name;
paddle_element_type element_type;
unsigned char* content;
int content_len;
} paddle_parameter, paddle_gradient;
static inline void paddle_release_param(paddle_parameter* param) {
if (param != NULL) {
if (param->name != NULL) {
free(param->name);
}
if (param->content != NULL) {
free(param->content);
}
free(param);
}
}
typedef int client;
*/
import "C"
import (
"log"
"sync"
"unsafe"
"github.com/PaddlePaddle/Paddle/paddle/go/pserver"
)
var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex
var handleMap = make(map[C.client]*pserver.Client)
var curHandle C.client
func add(c *pserver.Client) C.client {
mu.Lock()
defer mu.Unlock()
client := curHandle
curHandle++
handleMap[client] = c
return client
}
func get(client C.client) *pserver.Client {
mu.Lock()
defer mu.Unlock()
return handleMap[client]
}
func remove(client C.client) *pserver.Client {
mu.Lock()
defer mu.Unlock()
h := handleMap[client]
delete(handleMap, client)
return h
}
func cArrayToSlice(p unsafe.Pointer, len int) []byte {
if p == nullPtr {
return nil
}
// create a Go clice backed by a C array,
// reference: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
return (*[1 << 30]byte)(p)[:len:len]
}
//export paddle_new_pserver_client
func paddle_new_pserver_client(addr *C.char) C.client {
c := pserver.NewClient(C.GoString(addr))
return add(c)
}
//export paddle_pserver_client_release
func paddle_pserver_client_release(client C.client) {
c := remove(client)
c.Cleanup()
}
//export paddle_begin_init_params
func paddle_begin_init_params(client C.client, pserver_config unsafe.Pointer, config_len C.int) C.int {
c := get(client)
b := cArrayToSlice(pserver_config, int(config_len))
selected, err := c.BeginInitParams(b)
if err != nil {
log.Println(err)
return -1
}
if selected {
return 1
}
return 0
}
//export paddle_init_param
func paddle_init_param(client C.client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int {
et := pserver.ElementType(param.element_type)
name := C.GoString(param.name)
content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len))
pc := pserver.ParameterWithConfig{
Param: pserver.Parameter{Name: name, ElementType: et, Content: content},
Config: cArrayToSlice(param_config, int(config_len)),
}
c := get(client)
err := c.InitParam(pc)
if err != nil {
log.Println(err)
return -1
}
return 0
}
//export paddle_finish_init_params
func paddle_finish_init_params(client C.client) C.int {
c := get(client)
err := c.FinishInitParams()
if err != nil {
log.Println(err)
return -1
}
return 0
}
//export paddle_send_grads
func paddle_send_grads(client C.client, grads *C.paddle_gradient, total C.int) C.int {
var gs []pserver.Gradient
for i := 0; i < int(total); i++ {
grad := (*C.paddle_gradient)(unsafe.Pointer((uintptr(unsafe.Pointer(grads)) + uintptr(i)*unsafe.Sizeof(*grads))))
et := pserver.ElementType(grad.element_type)
name := C.GoString(grad.name)
content := cArrayToSlice(unsafe.Pointer(grad.content), int(grad.content_len))
gs = append(gs, pserver.Gradient{Name: name, ElementType: et, Content: content})
}
c := get(client)
err := c.SendGrads(gs)
if err != nil {
log.Println(err)
return -1
}
return 0
}
//export paddle_get_params
func paddle_get_params(client C.client, names **C.char, dst **C.paddle_parameter, total C.int) C.int {
var ns []string
for i := 0; i < int(total); i++ {
name := *(**C.char)(unsafe.Pointer((uintptr(unsafe.Pointer(names)) + uintptr(i)*unsafe.Sizeof(*names))))
ns = append(ns, C.GoString(name))
}
c := get(client)
ps, err := c.GetParams(ns)
if err != nil {
log.Println(err)
return -1
}
for i := 0; i < int(total); i++ {
if i >= len(ps) {
break
}
p := ps[i]
param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst))))
nameReady := false
contentAllocated := false
if unsafe.Pointer(param) == nullPtr {
param = (*C.paddle_parameter)(C.calloc(1, C.size_t(unsafe.Sizeof(*param))))
} else {
if unsafe.Pointer(param.name) != nullPtr {
if n := C.GoString(param.name); n != p.Name {
log.Println("Warning: the pre-allocated parameter name does not match the parameter name, it will be freed.", n, p.Name)
C.free(unsafe.Pointer(param.name))
} else {
nameReady = true
}
}
if unsafe.Pointer(param.content) != nullPtr {
if int(param.content_len) == len(p.Content) {
contentAllocated = true
} else {
log.Println("Warning: the pre-allocated content len does not match parameter content len, the pre-allocated content will be freed.", param.content_len, len(p.Content))
C.free(unsafe.Pointer(param.content))
}
}
}
if !nameReady {
param.name = C.CString(p.Name)
}
if !contentAllocated {
param.content = (*C.uchar)(C.malloc(C.size_t(len(p.Content))))
}
C.memcpy(unsafe.Pointer(param.content), unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content)))
param.content_len = C.int(len(p.Content))
param.element_type = C.paddle_element_type(p.ElementType)
}
return 0
}
//export paddle_save_model
func paddle_save_model(client C.client, path *C.char) C.int {
p := C.GoString(path)
c := get(client)
err := c.SaveModel(p)
if err != nil {
log.Println(err)
return -1
}
return 0
}
func main() {} // Required but ignored
if(NOT CMAKE_Go_COMPILER)
if(NOT $ENV{GO_COMPILER} STREQUAL "")
get_filename_component(CMAKE_Go_COMPILER_INIT $ENV{GO_COMPILER} PROGRAM PROGRAM_ARGS CMAKE_Go_FLAGS_ENV_INIT)
if(CMAKE_Go_FLAGS_ENV_INIT)
set(CMAKE_Go_COMPILER_ARG1 "${CMAKE_Go_FLAGS_ENV_INIT}" CACHE STRING "First argument to Go compiler")
endif()
if(NOT EXISTS ${CMAKE_Go_COMPILER_INIT})
message(SEND_ERROR "Could not find compiler set in environment variable GO_COMPILER:\n$ENV{GO_COMPILER}.")
endif()
endif()
set(Go_BIN_PATH
$ENV{GOPATH}
$ENV{GOROOT}
$ENV{GOROOT}/../bin
$ENV{GO_COMPILER}
/usr/bin
/usr/local/bin
)
if(CMAKE_Go_COMPILER_INIT)
set(CMAKE_Go_COMPILER ${CMAKE_Go_COMPILER_INIT} CACHE PATH "Go Compiler")
else()
find_program(CMAKE_Go_COMPILER
NAMES go
PATHS ${Go_BIN_PATH}
)
EXEC_PROGRAM(${CMAKE_Go_COMPILER} ARGS version OUTPUT_VARIABLE GOLANG_VERSION)
STRING(REGEX MATCH "go[0-9]+.[0-9]+.[0-9]+[ /A-Za-z0-9]*" VERSION "${GOLANG_VERSION}")
message("-- The Golang compiler identification is ${VERSION}")
message("-- Check for working Golang compiler: ${CMAKE_Go_COMPILER}")
endif()
endif()
mark_as_advanced(CMAKE_Go_COMPILER)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/CMakeGoCompiler.cmake.in
${CMAKE_PLATFORM_INFO_DIR}/CMakeGoCompiler.cmake @ONLY)
set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER")
set(CMAKE_Go_COMPILER "@CMAKE_Go_COMPILER@")
set(CMAKE_Go_COMPILER_LOADED 1)
set(CMAKE_Go_SOURCE_FILE_EXTENSIONS go)
set(CMAKE_Go_LINKER_PREFERENCE 40)
set(CMAKE_Go_OUTPUT_EXTENSION .o)
set(CMAKE_Go_OUTPUT_EXTENSION_REPLACE 1)
set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER")
if(NOT CMAKE_Go_COMPILE_OBJECT)
set(CMAKE_Go_COMPILE_OBJECT "go tool compile -l -N -o <OBJECT> <SOURCE> ")
endif()
if(NOT CMAKE_Go_LINK_EXECUTABLE)
set(CMAKE_Go_LINK_EXECUTABLE "go tool link -o <TARGET> <OBJECTS> ")
endif()
set(CMAKE_Go_COMPILER_WORKS 1 CACHE INTERNAL "")
# Setting Paddle Compile Flags
include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
include(CheckCXXSymbolExists)
include(CheckTypeSize)
function(CheckCompilerCXX11Flag)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.")
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang"
# Apple Clang is a different compiler than upstream Clang which havs different version numbers.
# https://gist.github.com/yamaya/2924292
if(APPLE) # cmake < 3.0 compiler id "Clang" on Mac OS X
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.1)
message(FATAL_ERROR "Unsupported AppleClang version. AppleClang >= 5.1 required.")
endif()
else()
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3)
message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.3 required.")
endif()
endif()
endif()
endfunction()
CheckCompilerCXX11Flag()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
# Common gpu architectures: Kepler, Maxwell
foreach(capability 30 35 50)
list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}")
endforeach()
if (CUDA_VERSION VERSION_GREATER "7.0" OR CUDA_VERSION VERSION_EQUAL "7.0")
list(APPEND __arch_flags " -gencode arch=compute_52,code=sm_52")
endif()
# Modern gpu architectures: Pascal
if (CUDA_VERSION VERSION_GREATER "8.0" OR CUDA_VERSION VERSION_EQUAL "8.0")
list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60")
endif()
set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS})
\ No newline at end of file
set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go")
file(MAKE_DIRECTORY ${GOPATH})
function(ExternalGoProject_Add TARG)
add_custom_target(${TARG} env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get ${ARGN})
endfunction(ExternalGoProject_Add)
function(add_go_executable NAME)
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
-o "${CMAKE_CURRENT_BINARY_DIR}/${NAME}"
${CMAKE_GO_FLAGS} ${GO_SOURCE}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
add_custom_target(${NAME} ALL DEPENDS ${OUTPUT_DIR}/.timestamp ${ARGN})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${NAME} DESTINATION bin)
endfunction(add_go_executable)
function(ADD_GO_LIBRARY NAME BUILD_TYPE)
if(BUILD_TYPE STREQUAL "STATIC")
set(BUILD_MODE -buildmode=c-archive)
set(LIB_NAME "lib${NAME}.a")
else()
set(BUILD_MODE -buildmode=c-shared)
if(APPLE)
set(LIB_NAME "lib${NAME}.dylib")
else()
set(LIB_NAME "lib${NAME}.so")
endif()
endif()
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
-o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}"
${CMAKE_GO_FLAGS} ${GO_SOURCE}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
add_custom_target(${NAME} ALL DEPENDS ${OUTPUT_DIR}/.timestamp ${ARGN})
if(NOT BUILD_TYPE STREQUAL "STATIC")
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} DESTINATION bin)
endif()
endfunction(ADD_GO_LIBRARY)
cmake_minimum_required(VERSION 3.0)
include_directories(/env/gopath/src/github.com/PaddlePaddle/Paddle/paddle/go/cclient/build/)
add_executable(main main.c)
add_dependencies(main client)
set (CMAKE_EXE_LINKER_FLAGS "-pthread")
target_link_libraries(main /env/gopath/src/github.com/PaddlePaddle/Paddle/paddle/go/cclient/build/libclient.a) # ${GTEST_LIBRARIES})
#include "libclient.h"
//#include "gtest/gtest.h"
void panic() {
// TODO(helin): fix: gtest using cmake is not working, using this
// hacky way for now.
*(void*)0;
}
int main() {
char addr[] = "localhost:3000";
client c = paddle_new_pserver_client(addr);
retry:
if (paddle_begin_init_params(c, NULL, 0)) {
paddle_parameter param;
char name_a[] = "param_a";
char name_b[] = "param_b";
char content[] = {0x00, 0x11, 0x22};
param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param.name = name_a;
param.content = content;
param.content_len = 3;
if (paddle_init_param(c, param, NULL, 0) != 0) {
goto retry;
}
param.element_type = PADDLE_ELEMENT_TYPE_INT32;
param.name = name_b;
param.content = content;
param.content_len = 3;
if (paddle_init_param(c, param, NULL, 0) != 0) {
goto retry;
}
if (paddle_finish_init_params(c) != 0) {
goto retry;
}
} else {
panic();
}
char content[] = {0x00, 0x11, 0x22};
paddle_gradient grads[2] = {
{"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3},
{"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}};
if (!paddle_send_grads(c, grads, 2)) {
panic();
}
paddle_parameter* params[2] = {NULL, NULL};
char* names[] = {"param_a", "param_b"};
if (!paddle_get_params(c, names, params, 2)) {
panic();
}
// get parameters again by reusing the allocated parameter buffers.
if (!paddle_get_params(c, names, params, 2)) {
panic();
}
paddle_release_param(params[0]);
paddle_release_param(params[1]);
if (!paddle_save_model(c, "/tmp/")) {
panic();
}
return 0;
}
package pserver
// ElementType is the type of elements of a Parameter.
type ElementType int
// Supported element types
const (
Int32 ElementType = iota
UInt32
Int64
UInt64
Float32
Float64
)
// Parameter is a piece of data to sync with the parameter server.
type Parameter struct {
Name string
ElementType ElementType
Content []byte
}
// ParameterWithConfig contains the parameter and the configuration.
type ParameterWithConfig struct {
Param Parameter
Config []byte // parameter configuration in Proto Buffer format
}
// Gradient is the gradient of the parameter.
type Gradient Parameter
// Client is the client to parameter servers.
type Client struct {
}
// NewClient creates a new client.
func NewClient(addr string) *Client {
return &Client{}
}
// BeginInitParams begins to initialize parameters on parameter
// servers.
//
// BeginInitParams will be called from multiple trainers, only one
// trainer will be selected to initialize the parameters on parameter
// servers. Other trainers will be blocked until the initialization is
// done, and they need to get the initialized parameters from
// parameter servers using GetParams.
func (c *Client) BeginInitParams(pserverConfigProto []byte) (selected bool, err error) {
return true, nil
}
// InitParam initializes the parameter on parameter servers.
func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error {
return nil
}
// FinishInitParams tells parameter servers client has sent all
// parameters to parameter servers as initialization.
func (c *Client) FinishInitParams() error {
return nil
}
// SendGrads sends gradients to parameter servers for updating
// parameters.
func (c *Client) SendGrads(grads []Gradient) error {
return nil
}
// GetParams gets parameters from parameter servers.
func (c *Client) GetParams(names []string) ([]Parameter, error) {
return nil, nil
}
// SaveModel indicates parameters to save the parameter to the given
// path.
func (c *Client) SaveModel(path string) error {
return nil
}
// Cleanup cleans up the client states.
func (c *Client) Cleanup() {
}
cc_library(majel SRCS place.cc)
cc_library(place SRCS place.cc)
cc_library(ddim SRCS ddim.cc)
if(WITH_TESTING)
add_subdirectory(test)
......
......@@ -106,34 +106,84 @@ Because `variant` may be thought of as "multi-type, single value", we can utiliz
arr[make_ddim({0, 1})] = 1.0
```
## implement Tensor in Paddle
## Implement Tensor in Paddle
We want to create a Tensor class to replace Vector and Matrix, and to support high-dimensional data. The operations on Tensor are implemented in both CPU and GPU. We also want to make sure that the Tensor interface is friendly to its callers.
Tensor is only responsible for describing computing. It will not take charge of memory allocation policy, handles of some CUDA library context(e.g. cublasHandle, cudnnHandle), and dispatching CUDA kernels. Paddle has realize the initialization and resources management of hardware.
Before writing code, please make sure you already look through Majel Source Code and grabbed the design philosophy of `DArray` in Majel.
To assign subtasks to our colleagues, we have to discuss how to divide it to independent subtasks.
- [ ] 1. First, we need to consider the third-party dependencies in Majel.
### Memory Management
`Allocation` manages a block of memory in device(CPU/GPU). We use `Place` to decribe memory location. The details of memory allocation and deallocation are implememted in `Allocator` and `DeAllocator`. Related low-level API such as `hl_malloc_device()` and `hl_malloc_host()` are provided by Paddle.
### Dim and Array
#### Dim
`Dim` decribes the dimension information of an array.
Majel heavily use `boost.variant`, but we don't want to integrate `boost` into PaddlePaddle. It's better to replace boost using the lightweight implementation. https://github.com/mapbox/variant Mapbox variant has the same speedy performance of `boost::variant `but is faster to compile, results in smaller binaries, and has no dependencies.
`DDimVar` is an alias of a specializd class of boost.variant class template.
> @gangliao
`DDim` is introduced to represent a dynamically sized dimension.
For example:
```
Dim<2> d1 = make_dim(3, 3);
DDim d2 = make_ddim({1, 2, 3});
```
- [ ] 2. Re-implement `Place` and `Allocation/Memory`
You must appoint a concrete sized dimension to Dim, whereas DDim can represent a dynamically sized dimension.
#### Array
`Array` represents for a tensor with specific type and size.
`DArrarVar` is an alias of a specialized class of boost.variant class template.
`DArray` is introduced to represent a dynamically typed array.
For example:
```
Array<float, 2> a1(Dim<2>(2, 2));
DArray a2 = make_darray(make_ddim({3, 4}), 0.0, CpuPlace());
```
I found @wangkuiyi submitted a pull request includes `Place`. @gangliao and @qijun could re-implement `Allocation`, because we have the GPU development experience before joining Paddle team.
You must appoint the type and dimension of a Array, whereas DArray can represent a dynanmically typed array.
> @wangkuiyi @gangliao @qijun
- [ ] 3. Re-implement `Dim`.
Please reference the section of `Learn from Majel` for more details.
`Dim` is an excellent implementation in Majel.
### ArrayView
> ???
`ViewIterator` is a class template which implements basic iterator operation, including increment(++), decrement(--), dereference(*), equality comparisons(==) and so on.
- [ ] 4. Re-implement `Array/Tensor`.
`ArrayView` is an encapsulation of `Array`, which introduces extra iterator methods, such as `begin()` and `end()`. The `begin()` method returns an iterator pointing to the first element in the ArrayView. And the `end()` method returns an iterator pointing to the pass-the-end element in the ArrayView.
`ArrayView` make the visting and manipulating an array more efficiently, flexibly and safely.
A global function `make_view` is provided to transform an array to corresponding arrayview.
```
template<typename T, int D>
ArrayView<T, D> make_view(const Array<T, D>& in) {
return in;
}
```
A global function `make_iterator` is provided to make iterator of an array.
```
template<typename T, int D>
ViewIterator<ArrayView<T, D>> make_iterator(const Array<T, D>& in, Dim<D> idx) {
return make_iterator(make_view(in), idx);
}
```
> Prerequisites: 1 - 3
### Basic Operations
- [ ] 5. Re-implement fundamental operators for `Array/Tensor`.
The operations that manipulate DArray are defined as global functions, such as `ones`, `zeros`, `reshape`, `gemm` and so on.
> Prerequisites: 1 - 4
An array will be trasformed into an arrayview and then passed to the operation launching on a specific device(CPU/GPU).
#include "paddle/majel/ddim.h"
namespace majel {
///@cond HIDDEN
template <int i>
Dim<i> make_dim(const int* d) {
return Dim<i>(*d, make_dim<i - 1>(d + 1));
}
template <>
Dim<1> make_dim<1>(const int* d) {
return Dim<1>(*d);
}
void make_ddim(DDim& ddim, const int* dims, int n) {
switch (n) {
case 1:
ddim = make_dim<1>(dims);
break;
case 2:
ddim = make_dim<2>(dims);
break;
case 3:
ddim = make_dim<3>(dims);
break;
case 4:
ddim = make_dim<4>(dims);
break;
case 5:
ddim = make_dim<5>(dims);
break;
case 6:
ddim = make_dim<6>(dims);
break;
case 7:
ddim = make_dim<7>(dims);
break;
case 8:
ddim = make_dim<8>(dims);
break;
case 9:
ddim = make_dim<9>(dims);
break;
default:
throw std::invalid_argument(
"Dynamic dimensions must have between [1, 9] dimensions.");
}
}
///@endcond
DDim make_ddim(std::initializer_list<int> dims) {
DDim result(make_dim(0));
make_ddim(result, dims.begin(), dims.size());
return result;
}
DDim make_ddim(const std::vector<int>& dims) {
DDim result(make_dim(0));
make_ddim(result, &dims[0], dims.size());
return result;
}
///@cond HIDDEN
// XXX For some reason, putting this in an anonymous namespace causes errors
class DynamicMutableIndexer : public boost::static_visitor<int&> {
public:
DynamicMutableIndexer(int idx) : idx_(idx) {}
template <int D>
int& operator()(Dim<D>& dim) const {
return dim[idx_];
}
private:
int idx_;
};
class DynamicConstIndexer : public boost::static_visitor<int> {
public:
DynamicConstIndexer(int idx) : idx_(idx) {}
template <int D>
int operator()(const Dim<D>& dim) const {
return dim[idx_];
}
private:
int idx_;
};
///@endcond
int& DDim::operator[](int idx) {
return boost::apply_visitor(DynamicMutableIndexer(idx), var);
}
int DDim::operator[](int idx) const {
return boost::apply_visitor(DynamicConstIndexer(idx), var);
}
bool DDim::operator==(DDim d) const {
if (var.which() != d.getVar().which()) {
return false;
} else {
std::vector<int> v1 = vectorize(*this);
std::vector<int> v2 = vectorize(d);
for (unsigned int i = 0; i < v1.size(); i++) {
if (v1[i] != v2[i]) {
return false;
}
}
return true;
}
}
bool DDim::operator!=(DDim d) const { return !(*this == d); }
DDim DDim::operator+(DDim d) const {
std::vector<int> v1 = vectorize(*this);
std::vector<int> v2 = vectorize(d);
std::vector<int> v3;
assert(v1.size() == v2.size());
for (unsigned int i = 0; i < v1.size(); i++) {
v3.push_back(v1[i] + v2[i]);
}
return make_ddim(v3);
}
DDim DDim::operator*(DDim d) const {
std::vector<int> v1 = vectorize(*this);
std::vector<int> v2 = vectorize(d);
std::vector<int> v3;
assert(v1.size() == v2.size());
for (unsigned int i = 0; i < v1.size(); i++) {
v3.push_back(v1[i] * v2[i]);
}
return make_ddim(v3);
}
int get(const DDim& ddim, int idx) { return ddim[idx]; }
void set(DDim& ddim, int idx, int value) { ddim[idx] = value; }
///@cond HIDDEN
struct VectorizeVisitor : public boost::static_visitor<> {
std::vector<int>& vector;
VectorizeVisitor(std::vector<int>& v) : vector(v) {}
template <typename T>
void operator()(const T& t) {
vector.push_back(t.head);
this->operator()(t.tail);
}
void operator()(const Dim<1>& t) { vector.push_back(t.head); }
};
///@endcond
std::vector<int> vectorize(const DDim& ddim) {
std::vector<int> result;
VectorizeVisitor visitor(result);
boost::apply_visitor(visitor, ddim);
return result;
}
ssize_t product(const DDim& ddim) {
ssize_t result = 1;
std::vector<int> v = vectorize(ddim);
for (auto i : v) {
result *= i;
}
return result;
}
///\cond HIDDEN
struct ArityVisitor : boost::static_visitor<int> {
template <int D>
int operator()(Dim<D>) const {
return D;
}
};
///\endcond
int arity(const DDim& d) { return boost::apply_visitor(ArityVisitor(), d); }
///\cond HIDDEN
struct DDimPrinter : boost::static_visitor<void> {
std::ostream& os;
DDimPrinter(std::ostream& os_) : os(os_) {}
template <typename T>
void operator()(const T& t) {
os << t;
}
};
///\endcond
std::ostream& operator<<(std::ostream& os, const majel::DDim& ddim) {
DDimPrinter printer(os);
boost::apply_visitor(printer, ddim);
return os;
}
} // namespace majel
#pragma once
#include <boost/variant.hpp>
#include <initializer_list>
#include <stdexcept>
#include <vector>
#include "paddle/majel/dim.h"
namespace majel {
namespace {
typedef boost::variant<Dim<1>,
Dim<2>,
Dim<3>,
Dim<4>,
Dim<5>,
Dim<6>,
Dim<7>,
Dim<8>,
Dim<9>>
DDimVar;
}
/**
* \brief A dynamically sized dimension.
*
* The number of dimensions must be between [1, 9].
*/
struct DDim {
DDimVar var;
DDim() : var(Dim<1>()) {}
template <int D>
DDim(const Dim<D>& in) : var(in) {}
template <int D>
DDim& operator=(const Dim<D>& in) {
var = in;
return *this;
}
int& operator[](int idx);
int operator[](int idx) const;
template <typename Visitor>
typename Visitor::result_type apply_visitor(Visitor& visitor) {
return var.apply_visitor(visitor);
}
template <typename Visitor>
typename Visitor::result_type apply_visitor(Visitor& visitor) const {
return var.apply_visitor(visitor);
}
DDimVar getVar() { return var; }
bool operator==(DDim d) const;
bool operator!=(DDim d) const;
DDim operator+(DDim d) const;
DDim operator*(DDim d) const;
};
/**
* \brief Make a DDim from std::vector<int>
*
* \param dims An vector of ints. Must be sized between [1, 9]
*/
DDim make_ddim(const std::vector<int>& dims);
/**
* \brief Make a DDim from an initializer list
*
* \param dims An initializer list of ints. Must be sized between [1, 9]
*
*/
DDim make_ddim(std::initializer_list<int> dims);
int get(const DDim& dim, int idx);
void set(DDim& dim, int idx, int val);
std::vector<int> vectorize(const DDim& ddim);
ssize_t product(const DDim& ddim);
/**
* \brief What is the length of this dimension?
*
* \param Dynamic dimension to inspect
*/
int arity(const DDim& ddim);
std::ostream& operator<<(std::ostream&, const majel::DDim&);
} // namespace majel
namespace boost {
template <typename T>
T get(const majel::DDim& in) {
return boost::get<T>(in.var);
}
} // namespace boost
#pragma once
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#if defined(__APPLE__) && defined(__CUDA_ARCH__) && !defined(NDEBUG)
#include <stdio.h>
#define MAJEL_ASSERT(e) \
do { \
if (!(e)) { \
printf( \
"%s:%d Assertion `%s` failed.\n", __FILE__, __LINE__, TOSTRING(e)); \
asm("trap;"); \
} \
} while (0)
#define MAJEL_ASSERT_MSG(e, m) \
do { \
if (!(e)) { \
printf("%s:%d Assertion `%s` failed (%s).\n", \
__FILE__, \
__LINE__, \
TOSTRING(e), \
m); \
asm("trap;"); \
} \
} while (0)
#else
#include <assert.h>
#define MAJEL_ASSERT(e) assert(e)
#define MAJEL_ASSERT_MSG(e, m) assert((e) && (m))
#endif
#pragma once
#ifdef __CUDACC__
#define HOSTDEVICE __host__ __device__
#define HOST __host__
#else
#define HOSTDEVICE
#define HOST
#endif
#pragma once
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <type_traits>
#include "paddle/majel/detail/cuda_assert.h"
#include "paddle/majel/detail/hostdevice.h"
namespace majel {
// Statically sized, statically indexed dimension
template <int i>
struct Dim {
static constexpr int dimensions = i;
template <typename... Args>
HOSTDEVICE Dim(int _head, Args... _tail) : head(_head), tail(_tail...) {
static_assert(sizeof...(_tail) == i - 1,
"Dim initialized with the wrong number of parameters");
}
HOSTDEVICE
Dim(int _head, const Dim<i - 1>& _tail) : head(_head), tail(_tail) {}
HOSTDEVICE
Dim() : head(0), tail() {}
/** Construct a Dim from a linear index and size. Uses Fortran order
* indexing. */
HOSTDEVICE
Dim(int idx, const Dim<i>& size)
: head(idx % size.head), tail(idx / size.head, size.tail) {}
/** Construct a Dim with each dimension set to the given index */
HOSTDEVICE
Dim(int idx) : head(idx), tail(idx) {}
HOSTDEVICE
bool operator==(const Dim<i>& o) const {
return (head == o.head) && (tail == o.tail);
}
HOSTDEVICE
bool operator!=(const Dim<i>& o) const { return !(*this == o); }
HOSTDEVICE
int& operator[](int idx);
HOSTDEVICE
int operator[](int idx) const;
HOST std::string to_string() const;
int head;
Dim<i - 1> tail;
};
// Base case specialization
template <>
struct Dim<1> {
static constexpr int dimensions = 1;
HOSTDEVICE
Dim(int _head) : head(_head) {}
HOSTDEVICE
Dim() : head(0) {}
HOSTDEVICE
Dim(int idx, const Dim<1>& size) : head(idx) {
#ifndef __CUDA_ARCH__
if (idx >= size.head) {
throw std::invalid_argument("Index out of range.");
}
#else
MAJEL_ASSERT(idx < size.head);
#endif
}
HOSTDEVICE
bool operator==(const Dim<1>& o) const { return (head == o.head); }
HOSTDEVICE
bool operator!=(const Dim<1>& o) const { return !(*this == o); }
HOSTDEVICE
int& operator[](int idx);
HOSTDEVICE
int operator[](int idx) const;
int head;
};
namespace {
// Helper for accessing Dim classes
template <int i>
struct DimGetter {
// Return a copy if Dim is const
template <typename D>
HOSTDEVICE static int impl(const D& d) {
return DimGetter<i - 1>::impl(d.tail);
}
// Return a reference if Dim is mutable
template <typename D>
HOSTDEVICE static int& impl(D& d) {
return DimGetter<i - 1>::impl(d.tail);
}
};
// Eureka! We found the element!
template <>
struct DimGetter<0> {
// Return a copy if Dim is const
template <typename D>
HOSTDEVICE static int impl(const D& d) {
return d.head;
}
// Return a reference if Dim is mutable
template <typename D>
HOSTDEVICE static int& impl(D& d) {
return d.head;
}
};
template <int D>
HOSTDEVICE int& indexer(Dim<D>& dim, int idx) {
#ifndef __CUDA_ARCH__
if (idx < 0) {
throw std::invalid_argument("Tried to access a negative dimension");
}
#else
MAJEL_ASSERT(idx >= 0);
#endif
if (idx == 0) {
return dim.head;
}
return indexer(dim.tail, idx - 1);
}
template <>
HOSTDEVICE int& indexer<1>(Dim<1>& dim, int idx) {
#ifndef __CUDA_ARCH__
if (idx != 0) {
throw std::invalid_argument("Invalid index");
}
#else
MAJEL_ASSERT(idx == 0);
#endif
return dim.head;
}
template <int D>
HOSTDEVICE int indexer(const Dim<D>& dim, int idx) {
#ifndef __CUDA_ARCH__
if (idx < 0) {
throw std::invalid_argument("Tried to access a negative dimension");
}
#else
MAJEL_ASSERT(idx >= 0);
#endif
if (idx == 0) {
return dim.head;
}
return indexer(dim.tail, idx - 1);
}
template <>
HOSTDEVICE int indexer<1>(const Dim<1>& dim, int idx) {
#ifndef __CUDA_ARCH__
if (idx != 0) {
throw std::invalid_argument("Invalid index");
}
#else
MAJEL_ASSERT(idx == 0);
#endif
return dim.head;
}
} // namespace
// Static access to constant Dim
template <int i, int l>
HOSTDEVICE int get(const Dim<l>& d) {
return DimGetter<i>::impl(d);
}
// Static access to mutable Dim
template <int i, int l>
HOSTDEVICE int& get(Dim<l>& d) {
return DimGetter<i>::impl(d);
}
// Dynamic access to constant Dim
template <int l>
HOSTDEVICE int Dim<l>::operator[](int i) const {
return indexer(*this, i);
}
// Dynamic access to mutable Dim
template <int l>
HOSTDEVICE int& Dim<l>::operator[](int i) {
return indexer(*this, i);
}
// Dynamic access to constant Dim
inline HOSTDEVICE int Dim<1>::operator[](int i) const {
return indexer(*this, i);
}
// Dynamic access to mutable Dim
inline HOSTDEVICE int& Dim<1>::operator[](int i) { return indexer(*this, i); }
// Dynamic access to constant Dim
// without std::enable_if will try to instantiate this on get<0>(d)
template <int l>
HOSTDEVICE typename std::enable_if<(l > 0), int>::type get(const Dim<l>& d,
int i) {
return d[i];
}
// Dynamic access to mutable Dim
template <int l>
HOSTDEVICE typename std::enable_if<(l > 0), int&>::type get(Dim<l>& d, int i) {
return d[i];
}
// Dot product of two dims
template <int i>
HOSTDEVICE int linearize(const Dim<i>& a, const Dim<i>& b) {
return a.head * b.head + linearize(a.tail, b.tail);
}
// Base case dot product of two Dims
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline int linearize(const Dim<1>& a, const Dim<1>& b) {
return a.head * b.head;
}
// Product of a Dim
template <int i>
HOSTDEVICE int product(const Dim<i>& a, int prod = 1) {
return prod * a.head * product(a.tail);
}
// Base case product of a Dim
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline int product(const Dim<1>& a, int prod) {
return prod * a.head;
}
// Is 0 <= idx_i < size_i for all i?
template <int i>
HOSTDEVICE bool contained(const Dim<i>& idx, const Dim<i>& size) {
return ((0 <= idx.head) && (idx.head < size.head) &&
contained(idx.tail, size.tail));
}
// Base case of is 0 <= idx_i < size_i ?
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline bool contained(const Dim<1>& idx, const Dim<1>& size) {
return ((0 <= idx.head) && (idx.head < size.head));
}
/**
* \brief Check if a size and a stride create a Fortran order contiguous
* block of memory.
*/
template <int i>
HOST bool contiguous(const Dim<i>& size, const Dim<i>& stride, int mul = 1) {
if (product(size) == 0) return true;
int contiguous_stride = get<0>(size) == 1 ? 0 : mul;
return (get<0>(stride) == contiguous_stride &&
contiguous(size.tail, stride.tail, mul * get<0>(size)));
}
///\cond HIDDEN
// Base case of contiguous, check the nth stride is the size of
// the prefix multiply of n-1 dims.
template <>
inline bool contiguous(const Dim<1>& size, const Dim<1>& stride, int mul) {
if (get<0>(size) == 0) return true;
int contiguous_stride = get<0>(size) == 1 ? 0 : mul;
return get<0>(stride) == contiguous_stride;
}
///\endcond
/**
* \brief Compute exclusive prefix-multiply of a Dim.
*/
template <int i>
HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i>& src, int mul = 1) {
return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
}
///\cond HIDDEN
// Base case of ex_prefix_mul
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline Dim<1> ex_prefix_mul(const Dim<1>& src, int mul) {
return Dim<1>(mul);
}
///\endcond
/**
* \brief Calculate strides of a contiguous array of the given size
*
* Sets the stride for any dimension with an extent of 1 to 0.
* \param size Dim object containing the size of the array.
* \param base The base stride to use.
* \return Dim object the same size as \p size with the strides.
*/
template <int i>
HOSTDEVICE Dim<i> contiguous_strides(const Dim<i>& size, int base = 1) {
int stride = size.head == 1 ? 0 : base;
return Dim<i>(stride, contiguous_strides(size.tail, base * size.head));
}
///\cond HIDDEN
// Base case of contiguous_strides
template <>
HOSTDEVICE inline Dim<1> contiguous_strides(const Dim<1>& size, int base) {
int stride = size.head == 1 ? 0 : base;
return Dim<1>(stride);
}
///\endcond
/**
* Add two dimensions together
*/
template <int i>
HOSTDEVICE Dim<i> dim_plus(const Dim<i>& a, const Dim<i>& b) {
return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
}
// Base case
template <>
HOSTDEVICE inline Dim<1> dim_plus(const Dim<1>& a, const Dim<1>& b) {
return Dim<1>(a.head + b.head);
}
template <int i>
HOSTDEVICE Dim<i> operator+(const Dim<i>& lhs, const Dim<i>& rhs) {
return dim_plus(lhs, rhs);
}
/**
* Multiply two dimensions together
*/
template <int i>
HOSTDEVICE Dim<i> dim_mult(const Dim<i>& a, const Dim<i>& b) {
return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
}
// Base case
template <>
HOSTDEVICE inline Dim<1> dim_mult(const Dim<1>& a, const Dim<1>& b) {
return Dim<1>(a.head * b.head);
}
template <int i>
HOSTDEVICE Dim<i> operator*(const Dim<i>& lhs, const Dim<i>& rhs) {
return dim_mult(lhs, rhs);
}
/**
* \brief Normalize strides to ensure any dimension with extent 1
* has stride 0.
*
* \param size Dim object containing the size of an array
* \param stride Dim object containing stride of an array
* \return Dim object the same size as \p size with normalized strides
*
*/
template <int i>
HOSTDEVICE Dim<i> normalize_strides(const Dim<i>& size, const Dim<i>& stride) {
int norm_stride = size.head == 1 ? 0 : stride.head;
return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail));
}
///\cond HIDDEN
template <>
HOSTDEVICE inline Dim<1> normalize_strides(const Dim<1>& size,
const Dim<1>& stride) {
int norm_stride = size.head == 1 ? 0 : stride.head;
return Dim<1>(norm_stride);
}
///\endcond
/**
* Helper function to create a Dim
*
* \param idxes The type of Dim constructed depends on the number of params
*
*/
template <typename... Args>
HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
return Dim<sizeof...(Args)>(idxes...);
}
// Allows us to output a Dim
// XXX For some reason, overloading fails to resolve this correctly
template <int i>
typename std::enable_if<(i > 1), std::ostream&>::type operator<<(
std::ostream& os, const majel::Dim<i>& d) {
os << d.head << ", " << d.tail;
return os;
}
// Base case that allows us to output a Dim
// XXX I wish this could be an overload instead of a template
template <int i>
typename std::enable_if<(i == 1), std::ostream&>::type operator<<(
std::ostream& os, const majel::Dim<i>& d) {
os << d.head;
return os;
}
template <int i>
HOST std::string Dim<i>::to_string() const {
std::stringstream stream;
stream << *this;
return stream.str();
}
template <int D>
HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
Dim<D> result;
for (int i = 0; i < D - 1; ++i) {
result[i] = linear_index % extents[i];
linear_index /= extents[i];
}
result[D - 1] = linear_index;
return result;
}
} // namespace majel
#include <majel/place.h>
#include "paddle/majel/place.h"
namespace majel {
......@@ -16,7 +16,7 @@ public:
void operator()(const GpuPlace& p) { os_ << "GpuPlace(" << p.device << ")"; }
};
} // namespace majel
} // namespace detail
static Place the_default_place;
......
cc_test(place_test
SRCS place_test.cc
DEPS majel)
DEPS place)
cc_test(ddim_test
SRCS ddim_test.cc
DEPS ddim)
if(WITH_GPU)
nv_test(cuda_test SRCS cuda_test.cu)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
endif()
//#include <stdexcept>
//#include <unittest/unittest.h>
#include <sstream>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/majel/ddim.h"
TEST(DDim, Equality) {
// construct a DDim from an initialization list
majel::DDim ddim = majel::make_ddim({9, 1, 5});
EXPECT_EQ(ddim[0], 9);
EXPECT_EQ(ddim[1], 1);
EXPECT_EQ(ddim[2], 5);
// construct a DDim from a vector
std::vector<int> vec({9, 1, 5});
majel::DDim vddim = majel::make_ddim(vec);
EXPECT_EQ(ddim[0], 9);
EXPECT_EQ(ddim[1], 1);
EXPECT_EQ(ddim[2], 5);
// mutate a DDim
ddim[1] = 2;
EXPECT_EQ(ddim[1], 2);
majel::set(ddim, 0, 6);
EXPECT_EQ(majel::get(ddim, 0), 6);
// vectorize a DDim
std::vector<int> res_vec = majel::vectorize(vddim);
EXPECT_EQ(res_vec[0], 9);
EXPECT_EQ(res_vec[1], 1);
EXPECT_EQ(res_vec[2], 5);
majel::Dim<3> d(3, 2, 1);
res_vec = majel::vectorize(majel::DDim(d));
EXPECT_EQ(res_vec[0], 3);
EXPECT_EQ(res_vec[1], 2);
EXPECT_EQ(res_vec[2], 1);
// add two DDims
majel::DDim ddim_sum = ddim + vddim;
EXPECT_EQ(ddim_sum[0], 15);
EXPECT_EQ(ddim_sum[1], 3);
EXPECT_EQ(ddim_sum[2], 10);
// multiply two DDims
majel::DDim ddim_mul = ddim * vddim;
EXPECT_EQ(ddim_mul[0], 54);
EXPECT_EQ(ddim_mul[1], 2);
EXPECT_EQ(ddim_mul[2], 25);
// arity of a DDim
EXPECT_EQ(majel::arity(ddim), 3);
// product of a DDim
EXPECT_EQ(majel::product(vddim), 45);
}
TEST(DDim, Print) {
// print a DDim
std::stringstream ss;
majel::DDim ddim = majel::make_ddim({2, 3, 4});
ss << ddim;
EXPECT_EQ("2, 3, 4", ss.str());
}
#include <thrust/device_vector.h>
#include <sstream>
#include "paddle/majel/dim.h"
#include "gtest/gtest.h"
__global__ void test(majel::Dim<2>* o) {
o[0] = majel::make_dim(5, 6);
}
__global__ void dyn_idx_gpu(int* o) {
auto d = majel::make_dim(5, 6);
o[0] = d[1];
}
TEST(Dim, Equality) {
// construct a Dim on the CPU
auto a = majel::make_dim(3, 4);
EXPECT_EQ(majel::get<0>(a), 3);
EXPECT_EQ(majel::get<1>(a), 4);
// construct a Dim on the GPU
thrust::device_vector<majel::Dim<2>> t(2);
test<<<1,1>>>(thrust::raw_pointer_cast(t.data()));
a = t[0];
EXPECT_EQ(majel::get<0>(a), 5);
EXPECT_EQ(majel::get<1>(a), 6);
// linearization
auto b = majel::make_dim(7, 8);
EXPECT_EQ(majel::linearize(a, b), 83);
// product
EXPECT_EQ(majel::product(a), 30);
// mutate a Dim
majel::get<1>(b) = 10;
EXPECT_EQ(majel::get<0>(b), 7);
EXPECT_EQ(majel::get<1>(b), 10);
// dynamic access
majel::get(b, 0) = 8;
b[1] = 11;
EXPECT_EQ(majel::get<0>(b), 8);
EXPECT_EQ(majel::get<1>(b), 11);
EXPECT_EQ(majel::get(b, 0), 8);
EXPECT_EQ(b[1], 11);
// dynamic access on GPU
thrust::device_vector<int> r(1);
dyn_idx_gpu<<<1,1>>>(thrust::raw_pointer_cast(r.data()));
int res = r[0];
EXPECT_EQ(res, 6);
// ex_prefix_mul
majel::Dim<3> c = majel::ex_prefix_mul(majel::Dim<3>(3, 4, 5));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 3);
EXPECT_EQ(majel::get<2>(c), 12);
// contiguous_strides
c = majel::contiguous_strides(majel::Dim<3>(10, 1, 10));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 0);
EXPECT_EQ(majel::get<2>(c), 10);
c = majel::contiguous_strides(majel::Dim<3>(10, 10, 1));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 10);
EXPECT_EQ(majel::get<2>(c), 0);
c = majel::contiguous_strides(majel::Dim<3>(1, 10, 10));
EXPECT_EQ(majel::get<0>(c), 0);
EXPECT_EQ(majel::get<1>(c), 1);
EXPECT_EQ(majel::get<2>(c), 10);
c = majel::contiguous_strides(majel::Dim<3>(2, 3, 4));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 2);
EXPECT_EQ(majel::get<2>(c), 6);
// generate from an index
auto size = majel::make_dim(4, 5, 2);
c = majel::Dim<3>(14, size);
EXPECT_EQ(majel::get<0>(c), 2);
EXPECT_EQ(majel::get<1>(c), 3);
EXPECT_EQ(majel::get<2>(c), 0);
c = majel::Dim<3>(25, size);
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 1);
EXPECT_EQ(majel::get<2>(c), 1);
}
TEST(Dim, Bool) {
auto a = majel::make_dim(3, 4);
auto b = majel::make_dim(5, 6);
auto c = majel::make_dim(3, 4);
// in_bounds check
EXPECT_TRUE(majel::contained(a, b));
EXPECT_FALSE(majel::contained(b, a));
// comparison
EXPECT_TRUE(a == a);
EXPECT_FALSE(a == b);
EXPECT_TRUE(a == c);
// contiguous check
int x = 4, y = 5, z = 2;
majel::Dim<3> sizef(x, y, z);
majel::Dim<3> stridea(1, x, x*y);
majel::Dim<3> strideb(2, 2*x, 2*x*y);
majel::Dim<3> stridec(1, x, 2*x*y);
EXPECT_TRUE(majel::contiguous(sizef, stridea));
EXPECT_FALSE(majel::contiguous(sizef, strideb));
EXPECT_FALSE(majel::contiguous(sizef, stridec));
}
TEST(Dim, Print) {
{
std::stringstream ss;
auto a = majel::make_dim(2, 3);
ss << a;
EXPECT_EQ(ss.str(), "2, 3");
}
{
std::stringstream ss;
ss << majel::make_dim(8);
EXPECT_EQ(ss.str(), "8");
}
}
#include "majel/place.h"
#include "paddle/majel/place.h"
#include <sstream>
#include "gtest/gtest.h"
#include "paddle/utils/Logging.h"
TEST(Place, Equality) {
majel::CpuPlace cpu;
......@@ -37,4 +38,5 @@ TEST(Place, Print) {
ss << majel::CpuPlace();
EXPECT_EQ("CpuPlace", ss.str());
}
LOG(INFO) << "\n[----------] Done \n";
}
......@@ -225,6 +225,24 @@ class LayerType(object):
class AggregateLevel(object):
"""
PaddlePaddle supports three sequence types:
- :code:`SequenceType.NO_SEQUENCE` means the sample is not a sequence.
- :code:`SequenceType.SEQUENCE` means the sample is a sequence.
- :code:`SequenceType.SUB_SEQUENCE` means the sample is a nested sequence,
each timestep of which is also a sequence.
Accordingly, AggregateLevel supports two modes:
- :code:`AggregateLevel.EACH_TIMESTEP` means the aggregation acts on each
timestep of a sequence, both :code:`SUB_SEQUENCE` and :code:`SEQUENCE` will
be aggregated to :code:`NO_SEQUENCE`.
- :code:`AggregateLevel.EACH_SEQUENCE` means the aggregation acts on each
sequence of a nested sequence, :code:`SUB_SEQUENCE` will be aggregated to
:code:`SEQUENCE`.
"""
EACH_TIMESTEP = 'non-seq'
EACH_SEQUENCE = 'seq'
......@@ -1454,6 +1472,19 @@ def first_seq(input,
class ExpandLevel(object):
"""
Please refer to AggregateLevel first.
ExpandLevel supports two modes:
- :code:`ExpandLevel.FROM_TIMESTEP` means the expandation acts on each
timestep of a sequence, :code:`NO_SEQUENCE` will be expanded to
:code:`SEQUENCE` or :code:`SUB_SEQUENCE`.
- :code:`ExpandLevel.FROM_SEQUENCE` means the expandation acts on each
sequence of a nested sequence, :code:`SEQUENCE` will be expanded to
:code:`SUB_SEQUENCE`.
"""
FROM_TIMESTEP = AggregateLevel.EACH_TIMESTEP
FROM_SEQUENCE = AggregateLevel.EACH_SEQUENCE
......
import numpy as np
try:
import cv2
except ImportError:
cv2 = None
from cv2 import resize
except:
print(
"import cv2 error, please install opencv-python: pip install opencv-python"
)
__all__ = [
"load_image", "resize_short", "to_chw", "center_crop", "random_crop",
......@@ -76,7 +76,7 @@ def resize_short(im, size):
h_new = size * h / w
else:
w_new = size * w / h
im = resize(im, (h_new, w_new), interpolation=cv2.INTER_CUBIC)
im = cv2.resize(im, (h_new, w_new), interpolation=cv2.INTER_CUBIC)
return im
......
......@@ -404,8 +404,8 @@ class RecurrentLayerOutput(Layer):
LayerV2 = Layer
data = DataLayerV2
data.__name__ = 'data'
AggregateLevel = conf_helps.layers.AggregateLevel
ExpandLevel = conf_helps.layers.ExpandLevel
AggregateLevel = conf_helps.AggregateLevel
ExpandLevel = conf_helps.ExpandLevel
memory = MemoryV2
memory.__name__ = 'memory'
memory.__doc__ = conf_helps.memory.__doc__
......
......@@ -18,7 +18,6 @@ setup(name='paddle',
"numpy",
"protobuf==${PROTOBUF_VERSION}",
"matplotlib",
"opencv-python",
],
packages=packages,
package_dir={
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册