diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4117f077219d3b8fc097631073eafa748ff918bc..23bb27e77b9eab0c322a71a8ff570d12d1050377 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,8 +61,11 @@ option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
+option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
+option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
+option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
@@ -131,6 +134,10 @@ if (NOT DEFINED WITH_MKLDNN)
set(WITH_MKLDNN OFF)
endif()
endif()
+
+if (REPLACE_ENFORCE_GLOG)
+ add_definitions("-DREPLACE_ENFORCE_GLOG")
+endif()
########################################################################################
include(external/mklml) # download mklml package
@@ -153,12 +160,24 @@ include(external/cares)
if(WITH_DISTRIBUTE)
if(WITH_GRPC)
include(external/grpc)
+ message(STATUS "Use grpc framework.")
else()
+ message(STATUS "Use brpc framework.")
include(external/leveldb)
include(external/brpc)
endif()
endif()
+if(WITH_BRPC_RDMA)
+ message(STATUS "Use brpc with rdma.")
+ if(WITH_GRPC)
+ message(FATAL_ERROR "Can't use grpc with brpc rdma.")
+ endif()
+ if(NOT WITH_DISTRIBUTE)
+ message(FATAL_ERROR "Can't use brpc rdma in no distribute env.")
+ endif()
+endif()
+
include(external/snappy) # download snappy
include(external/snappystream)
include(external/threadpool)
@@ -178,7 +197,7 @@ include(inference_lib) # add paddle fluid inference libraries
include_directories("${PADDLE_SOURCE_DIR}")
-include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include")
+include_directories("${PADDLE_SOURCE_DIR}/paddle/legacy/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c")
@@ -222,7 +241,7 @@ add_subdirectory(proto)
if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY)
# "add_subdirectory(go)" should be placed after the following loine,
# because it depends on paddle/optimizer.
- add_subdirectory(paddle/optimizer)
+ add_subdirectory(paddle/legacy/optimizer)
endif()
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b1b02bcc2f4fd14297715bcf5bfd1617e3d5f0c9..b878f37a5b8e807e5aa346e0074a741f2f8b6cc5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -159,4 +159,4 @@ This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the
- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework)
- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)
- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform)
-- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/math)
+- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/math)
diff --git a/README.md b/README.md
index 8d89c6b1ec9e4aefbd64328dedb4e8c7cc50c21b..eb99ed21d02650ef16cc7da91836909c02895be9 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,6 @@
[](https://travis-ci.org/PaddlePaddle/Paddle)
[](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html)
[](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html)
-[](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[](https://github.com/PaddlePaddle/Paddle/releases)
[](LICENSE)
@@ -19,6 +18,8 @@ learning to many products at Baidu.
Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
+### Lastest PaddlePaddle Version: [Fluid](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid)
+
## Features
- **Flexibility**
diff --git a/benchmark/fluid/args.py b/benchmark/fluid/args.py
index 99c9d79b068f5886012fd702d84d0666b9d197b5..a79f25ccc6ace1594f3f331633130eaace5e175b 100644
--- a/benchmark/fluid/args.py
+++ b/benchmark/fluid/args.py
@@ -125,6 +125,10 @@ def parse_args():
parser.add_argument(
'--use_inference_transpiler',
action='store_true',
- help='If set, uses inference transpiler to optimize the program.')
+ help='If set, use inference transpiler to optimize the program.')
+ parser.add_argument(
+ '--no_random',
+ action='store_true',
+ help='If set, keep the random seed and do not shuffle the data.')
args = parser.parse_args()
return args
diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py
old mode 100755
new mode 100644
index dcd4d9ea95d816029317a29055b5ca8273ac9f43..94ea7bd6aca7c9595037a2dacc5e36d4c77827e7
--- a/benchmark/fluid/fluid_benchmark.py
+++ b/benchmark/fluid/fluid_benchmark.py
@@ -132,10 +132,6 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
exe.run(startup_prog)
# Use inference_transpiler to speedup
- if args.use_inference_transpiler:
- t = fluid.InferenceTranspiler()
- t.transpile(infer_prog, place)
-
if not args.use_reader_op:
feed_var_list = [
var for var in train_prog.global_block().vars.itervalues()
@@ -186,6 +182,10 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))),
# evaluation
if not args.no_test and batch_acc and not args.use_reader_op:
+ if args.use_inference_transpiler:
+ t = fluid.InferenceTranspiler()
+ t.transpile(infer_prog, place)
+
pass_test_acc = test(exe, infer_prog, test_reader, feeder,
batch_acc)
print(", Test Accuracy: %f" % pass_test_acc)
@@ -316,6 +316,8 @@ def main():
args = parse_args()
print_arguments(args)
print_paddle_envs()
+ if args.no_random:
+ fluid.default_startup_program().random_seed = 1
# the unique trainer id, starting from 0, needed by trainer
# only
diff --git a/benchmark/fluid/models/resnet.py b/benchmark/fluid/models/resnet.py
index 9ed1093c54a501cc93dbbf9c3651fe70914ce26b..d44a9c07d31cfae9d54ad5949b85c77e60eae258 100644
--- a/benchmark/fluid/models/resnet.py
+++ b/benchmark/fluid/models/resnet.py
@@ -197,12 +197,12 @@ def get_model(args):
optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
batched_train_reader = paddle.batch(
- paddle.reader.shuffle(
+ train_reader if args.no_random else paddle.reader.shuffle(
train_reader, buf_size=5120),
batch_size=args.batch_size * args.gpus,
drop_last=True)
batched_test_reader = paddle.batch(
- train_reader, batch_size=args.batch_size, drop_last=True)
+ test_reader, batch_size=args.batch_size, drop_last=True)
return avg_cost, inference_program, optimizer, batched_train_reader,\
batched_test_reader, batch_acc
diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake
index e3b9d94215a858c5c9a34e1b7e97540f1876801d..6ed51c648478efb9784d0c43b169c285e740e0f3 100644
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@@ -83,18 +83,20 @@ else()
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib)
endif()
-find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
+if(WITH_SYSTEM_BLAS)
+ find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS})
-find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
+ find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
${REFERENCE_CBLAS_LIB_SEARCH_PATHS})
-if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
- set(CBLAS_FOUND ON)
- set(CBLAS_PROVIDER REFERENCE)
- set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
- set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY})
- add_definitions(-DPADDLE_USE_REFERENCE_CBLAS)
- message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
+ if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
+ set(CBLAS_FOUND ON)
+ set(CBLAS_PROVIDER REFERENCE)
+ set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
+ set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY})
+ add_definitions(-DPADDLE_USE_REFERENCE_CBLAS)
+ message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
+ endif()
endif()
if(IOS_USE_VECLIB_FOR_BLAS AND VECLIB_FOUND)
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index 6a8b15a6b60a2e5635dc78fc877f0c8da9a2a998..e4af34d10ed92c501dd805addb62747c91c00978 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -174,3 +174,7 @@ endif(WITH_GOLANG)
if(WITH_GRPC)
add_definitions(-DPADDLE_WITH_GRPC)
endif(WITH_GRPC)
+
+if(WITH_BRPC_RDMA)
+ add_definitions(-DPADDLE_WITH_BRPC_RDMA)
+endif(WITH_BRPC_RDMA)
diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake
index 8e2c913b2caae0c4eeb844d2b51a8975e81c1592..30b227b6452abf44171a1a4e04569e66b16e67a4 100644
--- a/cmake/external/brpc.cmake
+++ b/cmake/external/brpc.cmake
@@ -14,6 +14,15 @@
INCLUDE(ExternalProject)
+find_library(SSL_LIBRARY NAMES ssl)
+ADD_LIBRARY(ssl SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET ssl PROPERTY IMPORTED_LOCATION ${SSL_LIBRARY})
+
+find_library(CRYPTO_LIBRARY NAMES crypto)
+ADD_LIBRARY(crypto SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET crypto PROPERTY IMPORTED_LOCATION ${CRYPTO_LIBRARY})
+
+
SET(BRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/brpc)
SET(BRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/brpc)
SET(BRPC_INCLUDE_DIR "${BRPC_INSTALL_DIR}/include" CACHE PATH "brpc include directory." FORCE)
@@ -22,14 +31,14 @@ SET(BRPC_LIBRARIES "${BRPC_INSTALL_DIR}/lib/libbrpc.a" CACHE FILEPATH "brpc libr
INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR})
# Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args
-set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf")
+set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib")
# If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF
ExternalProject_Add(
extern_brpc
${EXTERNAL_PROJECT_LOG_ARGS}
- GIT_REPOSITORY "https://github.com/brpc/brpc"
- GIT_TAG "6d153dd7ff00f960ae6895c9c5fff0ce9f07aff2"
+ GIT_REPOSITORY "https://github.com/gongweibao/brpc"
+ GIT_TAG "7dc04defad1fd4173aae170c3fcbde131b65155a"
PREFIX ${BRPC_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@@ -42,6 +51,8 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_PREFIX_PATH=${prefix_path}
-DBRPC_WITH_GLOG=ON
+ -DIOBUF_WITH_HUGE_BLOCK=ON
+ -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA}
${EXTERNAL_OPTIONAL_ARGS}
LIST_SEPARATOR |
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BRPC_INSTALL_DIR}
@@ -49,7 +60,7 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
)
-ADD_DEPENDENCIES(extern_brpc protobuf leveldb gflags glog gtest snappy)
+ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog gtest snappy)
ADD_LIBRARY(brpc STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES})
ADD_DEPENDENCIES(brpc extern_brpc)
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index 9c42044ec163e9db1dd21d5c3915b010c30fdf1c..eafb11b6f21e226fc68556a78d675dea94080140 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -96,6 +96,20 @@ if(NOT APPLE AND NOT ANDROID)
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
endif(NOT APPLE AND NOT ANDROID)
+set_property(GLOBAL PROPERTY FLUID_MODULES "")
+# find all fluid modules is used for paddle fluid static library
+# for building inference libs
+function(find_fluid_modules TARGET_NAME)
+ get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
+ string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
+ string(FIND "${__target_path}" "fluid" pos)
+ if(pos GREATER 1)
+ get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
+ set(fluid_modules ${fluid_modules} ${TARGET_NAME})
+ set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}")
+ endif()
+endfunction(find_fluid_modules)
+
function(merge_static_libs TARGET_NAME)
set(libs ${ARGN})
list(REMOVE_DUPLICATES libs)
@@ -243,13 +257,14 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
- target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
- add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
+ target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
+ add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if (${cc_test_SERIAL})
set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1)
+ set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
endif()
endif()
endfunction(cc_test)
@@ -309,11 +324,12 @@ function(nv_test TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
- target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog)
- add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog)
+ target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
+ add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_test(${TARGET_NAME} ${TARGET_NAME})
if (nv_test_SERIAL)
set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1)
+ set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
endif()
endif()
endfunction(nv_test)
@@ -561,7 +577,7 @@ function(py_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME}
- COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
+ COMMAND env FLAGS_init_allocated_mem=true PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index 850098297e1456487cb8a7b83dffd3d2b0478689..c6979713231f631f8757e4139d6f685d4554b54e 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -12,19 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-set_property(GLOBAL PROPERTY FLUID_MODULES "")
-# find all fluid modules is used for paddle fluid static library
-function(find_fluid_modules TARGET_NAME)
- get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
- string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
- string(FIND "${__target_path}" "fluid" pos)
- if(pos GREATER 1)
- get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
- set(fluid_modules ${fluid_modules} ${TARGET_NAME})
- set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}")
- endif()
-endfunction(find_fluid_modules)
-
# make package for paddle fluid shared and static library
function(copy TARGET)
set(options "")
@@ -154,7 +141,7 @@ set(inference_deps paddle_fluid_shared paddle_fluid)
if(WITH_CONTRIB)
message(STATUS "installing contrib")
set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference")
- if (WITH_ANAKIN)
+ if (WITH_ANAKIN AND WITH_GPU)
copy(contrib_anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
SRCS
${PADDLE_BINARY_DIR}/paddle/contrib/inference/libinference_anakin_api* # compiled anakin api
@@ -163,9 +150,9 @@ if(WITH_CONTRIB)
list(APPEND inference_deps contrib_anakin_inference_lib)
endif()
- copy(contrib_inference_lib DEPS paddle_inference_api
+ copy(contrib_inference_lib DEPS paddle_inference_api paddle_inference_api_shared
SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h
- ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api.*
+ ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api*
DSTS ${contrib_dst_dir} ${contrib_dst_dir})
list(APPEND inference_deps contrib_inference_lib)
endif()
diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst
index 264506a68ae17d081dd58ef4794bf7723f6d021c..d443c49657b92583e527035f49e74462cf41487d 100644
--- a/doc/fluid/api/layers.rst
+++ b/doc/fluid/api/layers.rst
@@ -1468,6 +1468,14 @@ argmax
.. autofunction:: paddle.fluid.layers.argmax
:noindex:
+.. _api_fluid_layers_argsort:
+
+argsort
+-------
+
+.. autofunction:: paddle.fluid.layers.argsort
+ :noindex:
+
.. _api_fluid_layers_ones:
ones
diff --git a/doc/fluid/design/dist_train/dist_train_nccl2.md b/doc/fluid/design/dist_train/dist_train_nccl2.md
new file mode 100644
index 0000000000000000000000000000000000000000..aa7455ec5de0d46d7c2b0cef3b7ebf4754af3cb1
--- /dev/null
+++ b/doc/fluid/design/dist_train/dist_train_nccl2.md
@@ -0,0 +1,35 @@
+# Distributed Training with NCCL2
+
+We design a pattern that can enable training with `ParallelExecutor` and
+using [NCCL2](https://developer.nvidia.com/nccl) as it's collective
+communication library.
+
+In `ParallelExecutor` we can use `AllReduce` or `Reduce` and `Broadcast`
+to do multi GPU training. And if we initialize NCCL2 communicators as
+ranks in a distributed environment, we can simply run the `ParallelExecutor`
+as a distributed program! The only thing that may be different than in
+the single node version is that we need to broadcast the NCCL unique ID
+to all the nodes, and initialize communicators using that ID, so NCCL2
+will know each other as ranks.
+
+To achieve this feature, we introduce a new operator: `gen_nccl_id` op,
+so we are ***not*** "bind to" running NCCL2 with MPI, we can run it in
+what ever platform you like.
+
+It have two running modes:
+
+1. Generate and broadcast mode, which should be used on trainer 0;
+1. Listen and fetch mode, which should be used on trainers other than 0.
+
+In both two modes, this op can save the NCCL ID into current scope as a
+persistable variable, Then we can insert this op at the end of
+"startup program" of fluid, so that all workers can get the same ID to
+initialize NCCL communicator objects.
+
+
+
+The above figure indicates the general process when training with NCCL2
+distributed. Each trainer have the number of communicators equal to the
+number of GPUs, but the ranks should match the global ranks number: here
+we have total 8 GPUs, so `nranks==8`, for each trainer, the ranks should
+be from 0 ~ 3 on trainer 0 and 4 ~ 7 on trainer 1.
diff --git a/doc/fluid/design/dist_train/distributed_lookup_table_design.md b/doc/fluid/design/dist_train/distributed_lookup_table_design.md
index 988729138926f035750b59eb245dde82502a3ad2..97f890c88e778a59ea475e984ccbc28cf026fc5b 100644
--- a/doc/fluid/design/dist_train/distributed_lookup_table_design.md
+++ b/doc/fluid/design/dist_train/distributed_lookup_table_design.md
@@ -119,6 +119,32 @@ optimization algorithm $f$ runs on the storage service.
- Con: the storage service needs to be able to run the optimization
algorithm.
+## Distributed Sparse Table in Fluid
+
+For another design, we can implement a distributed sparse table in Fluid,
+and don't need to maintain an external storage component while training.
+
+You may need to read Fluid [Distributed Training Architecture](./distributed_architecture.md)
+and [Parameter Server](./parameter_server.md) before going on.
+
+
+
+Partition a large table into multiple pserver instances
+1. `DistributeTranspiler` would split the table partitioned into some small
+table blocks with some partitioned algorithms such as
+[RoundRobin](https://en.wikipedia.org/wiki/Round-robin_scheduling),
+[Hash](https://en.wikipedia.org/wiki/Hash) and etc...
+1. For some cases, the range of input `Ids` is very wide and unpredictable, so the sparse
+table would be able to fill a new value for the id that didn't appear before with
+zero, uniform random or Gaussian distribution.
+
+For each Trainer's training process:
+1. In the forward pass, we use `pre-fetch` op to pre-fetch parameter blocks according to the
+input `Ids` from PServers instead of the local `lookup_table` op, and then merge the blocks
+into a parameter `W`.
+1. Compute `GRAD@W'` in the backward pass using the pre-fetched `W` and send it to PServer to
+execute the optimize pass.
+
## Conclusion
Let us do the "storage service does not optimize" solution first, as a
diff --git a/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.graffle b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..96ca6d48f43bd9f49c6861dab006e2037873db87
Binary files /dev/null and b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.graffle differ
diff --git a/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.png b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.png
new file mode 100644
index 0000000000000000000000000000000000000000..afa25ab3b4e427bc595a855b12ab966478e01ed0
Binary files /dev/null and b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.png differ
diff --git a/doc/fluid/design/dist_train/src/ncc2_design.graffle b/doc/fluid/design/dist_train/src/ncc2_design.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..7d2753bbb03bc28c7a0054bb0aa424deb072ffbf
Binary files /dev/null and b/doc/fluid/design/dist_train/src/ncc2_design.graffle differ
diff --git a/doc/fluid/design/dist_train/src/ncc2_design.png b/doc/fluid/design/dist_train/src/ncc2_design.png
new file mode 100644
index 0000000000000000000000000000000000000000..da0d5ee81f5dfeb4ca1356601b0bb5870456e3d6
Binary files /dev/null and b/doc/fluid/design/dist_train/src/ncc2_design.png differ
diff --git a/doc/fluid/design/multi_devices/kernel_selection.md b/doc/fluid/design/multi_devices/kernel_selection.md
index 967317d5d2eeb818ab14faabca342cc8c4ed717e..4d2aab87b8cf30d03075e96cc4c67070efaf963a 100644
--- a/doc/fluid/design/multi_devices/kernel_selection.md
+++ b/doc/fluid/design/multi_devices/kernel_selection.md
@@ -74,10 +74,10 @@ void OperatorWithKernel::Run(
auto kernel_type_for_var = this->GetKernelTypeForVar(...);
if (kernel_type_for_var.place_ != expected_kernel_key.place_) {
auto* trans_var = new_scope.Var(var_name);
- auto* out = DataTransform(expected_kernel_key,
+ auto* out = TransformData(expected_kernel_key,
kernel_type_for_var,
*tensor_in);
- CopyVariableWithTensor(...);
+ SetTensorToVariable(...);
}
}
diff --git a/doc/v2/design/cluster_train/large_model_dist_train.md b/doc/v2/design/cluster_train/large_model_dist_train.md
index 0c4b5bc24c854b7062d509249bea9c50d42bd5f1..edb0245ea083e791b7f32ac57a330698299fceda 100644
--- a/doc/v2/design/cluster_train/large_model_dist_train.md
+++ b/doc/v2/design/cluster_train/large_model_dist_train.md
@@ -52,7 +52,7 @@ In `trainer_internal.cpp:L93 trainOneBatch`:
When doing actual network forward and backward, at the beginning of each batch, the trainer will try to download one row of data from pserver.
-In `trainer/RemoteParameterUpdater.cpp`: `parameterUpdater_->getParametersRemote();`:
+In `legacy/trainer/RemoteParameterUpdater.cpp`: `parameterUpdater_->getParametersRemote();`:
```c++
if (fullSize) {
diff --git a/doc/v2/design/interface/00.why_plain_c.md b/doc/v2/design/interface/00.why_plain_c.md
index a1443093342c5a3ed698fb6b52a751dfc7cb5319..826ff3141bc2512b525cb44ac0f18b376ce57e92 100644
--- a/doc/v2/design/interface/00.why_plain_c.md
+++ b/doc/v2/design/interface/00.why_plain_c.md
@@ -65,7 +65,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix matrix,
而在CPP里面实现这个C的接口,文件 `paddle_matrix.cpp`
```cpp
-#include "paddle/math/matrix.h"
+#include "paddle/legacy/math/matrix.h"
extern "C"
paddle_error paddle_matrix_shape(paddle_matrix matrix,
uint64_t *width,
diff --git a/doc/v2/design/mkl/mkldnn.md b/doc/v2/design/mkl/mkldnn.md
index bd5bcf6f67168c21cebb046a629b948d1661e75c..4876de0045979be20fa45bdc84d2594516f71c03 100644
--- a/doc/v2/design/mkl/mkldnn.md
+++ b/doc/v2/design/mkl/mkldnn.md
@@ -18,20 +18,20 @@ Figure 1. PaddlePaddle on IA
具体的完成状态可以参见[这里](https://github.com/PaddlePaddle/Paddle/projects/21)。
## Contents
-
-- [Overview](#overview)
-- [Actions](#actions)
- - [CMake](#cmake)
- - [Matrix](#matrix)
- - [Layers](#layers)
- - [Activations](#activations)
- - [Parameters](#parameters)
- - [Gradients](#gradients)
- - [Unit Tests](#unit-tests)
- - [Python API](#python-api)
- - [Benchmarking](#benchmarking)
- - [Others](#others)
-- [Design Concerns](#design-concerns)
+
+- [Overview](#overview)
+- [Actions](#actions)
+ - [CMake](#cmake)
+ - [Matrix](#matrix)
+ - [Layers](#layers)
+ - [Activations](#activations)
+ - [Parameters](#parameters)
+ - [Gradients](#gradients)
+ - [Unit Tests](#unit-tests)
+ - [Python API](#python-api)
+ - [Benchmarking](#benchmarking)
+ - [Others](#others)
+- [Design Concerns](#design-concerns)
## Overview
@@ -218,20 +218,20 @@ if use_mkldnn
我们总结出一些特别需要注意的点:
1. 使用**deviceId_**。为了尽可能少的在父类Layer中添加变量或者函数,
-我们决定使用已有的`deviceId_`变量来区分layer的属性,定义`-2`为`MKLDNNLayer`特有的设备ID。
-2. 重写父类Layer的**init**函数,修改`deviceId_`为`-2`,代表这个layer是用于跑在MKL-DNN的环境下。
+我们决定使用已有的`deviceId_`变量来区分layer的属性,定义`-2`为`MKLDNNLayer`特有的设备ID。
+2. 重写父类Layer的**init**函数,修改`deviceId_`为`-2`,代表这个layer是用于跑在MKL-DNN的环境下。
3. 创建`MKLDNNBase`,定义一些除了layer和memory相关的类和函数。
-包括MKL-DNN会用到`MKLDNNStream`和`CPUEngine`,和未来可能还会用到`FPGAEngine`等。
+包括MKL-DNN会用到`MKLDNNStream`和`CPUEngine`,和未来可能还会用到`FPGAEngine`等。
4. 如果MKL-DNN layer的后面接有cpu device,那么就会使`output_.value`与`extOutVal_`共享内存,
同时数据格式就是`NCHW`,这样下一个cpu device就能拿到正确的数据。
在有普通的CPU layer时, `extOutVal_`和`extOutGrad_`的格式始终是`NCHW`或者`NC`。
## References
1. [MKL small library](https://github.com/01org/mkl-dnn#linking-your-application)是[Intel MKL](https://software.intel.com/en-us/mkl)的一个子集。
-主要包括了深度学习相关的数学原语与操作,一般由MKL-DNN在发布[新版本](https://github.com/01org/mkl-dnn/releases)时一起更新。
+主要包括了深度学习相关的数学原语与操作,一般由MKL-DNN在发布[新版本](https://github.com/01org/mkl-dnn/releases)时一起更新。
2. [MKL-DNN System Requirements](https://github.com/01org/mkl-dnn#system-requirements)。
目前在PaddlePaddle中,仅会在支持AVX2指令集及以上的机器才使用MKL-DNN。
3. [原来的方案](https://github.com/PaddlePaddle/Paddle/pull/3096)会引入**nextLayer**的信息。
-但是在PaddlePaddle中,无论是重构前的layer还是重构后的op,都不会想要知道next layer/op的信息。
+但是在PaddlePaddle中,无论是重构前的layer还是重构后的op,都不会想要知道next layer/op的信息。
4. MKL-DNN的高性能格式与PaddlePaddle原有的`NCHW`不同(PaddlePaddle中的cuDNN部分使用的也是`NCHW`,所以不存在这个问题)。
-所以需要引入一个转换方法,并且只需要在必要的时候转换这种格式,才能更好的发挥MKL-DNN的性能。
+所以需要引入一个转换方法,并且只需要在必要的时候转换这种格式,才能更好的发挥MKL-DNN的性能。
diff --git a/doc/v2/dev/new_layer_cn.rst b/doc/v2/dev/new_layer_cn.rst
index 3115654b2bd87995fa63bb7828fd1b3039aea8cc..e5a14346123d342de0b67757cbbce654bd4180dc 100644
--- a/doc/v2/dev/new_layer_cn.rst
+++ b/doc/v2/dev/new_layer_cn.rst
@@ -58,7 +58,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。
实现C++类
===================
-一个网络层的C++类需要实现初始化,前向和后向。全连接层的实现位于:code:`paddle/gserver/layers/FullyConnectedLayer.h`及:code:`paddle/gserver/layers/FullyConnectedLayer.cpp`。这里我们展示一份简化过的代码。
+一个网络层的C++类需要实现初始化,前向和后向。全连接层的实现位于:code:`paddle/legacy/gserver/layers/FullyConnectedLayer.h`及:code:`paddle/legacy/gserver/layers/FullyConnectedLayer.cpp`。这里我们展示一份简化过的代码。
这个类需要继承 :code:`paddle::Layer` 这个基类,并且需要重写基类中的以下几个虚函数:
@@ -153,7 +153,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。
- 每个层在其 :code:`forward` 函数的开头必须调用 :code:`Layer::forward(passType);` 。
- 之后使用 :code:`reserveOutput(batchSize, size);` 为输出分配内存。由于我们支持训练数据有不同的批次大小,所以这一步是必要的。 :code:`reserveOutput` 会相应地改变输出的尺寸。为了保证效率,如果需要扩大矩阵,我们会重新分配内存;如果需要缩减矩阵,我们会继续使用现有的内存块。
-- 之后使用矩阵运算函数来计算 :math:`\sum_i W_i x + b`。:code:`getInput(i).value` 返回第i个输入矩阵。每个输入都是一个 :math:`batchSize \times dim` 的矩阵,每行表示一个批次中的单个输入。对于我们支持的全部矩阵操作,请参考 :code:`paddle/math/Matrix.h`和:code:`paddle/math/BaseMatrix.h` 。
+- 之后使用矩阵运算函数来计算 :math:`\sum_i W_i x + b`。:code:`getInput(i).value` 返回第i个输入矩阵。每个输入都是一个 :math:`batchSize \times dim` 的矩阵,每行表示一个批次中的单个输入。对于我们支持的全部矩阵操作,请参考 :code:`paddle/legacy/math/Matrix.h`和:code:`paddle/legacy/math/BaseMatrix.h` 。
- 最终,使用 :code:`forwardActivation();` 进行激活操作。这会自动进行网络配置中声明的激活操作。
@@ -262,7 +262,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。
REGISTER_LAYER(fc, FullyConnectedLayer);
}
-若 :code:`cpp` 被放在 :code:`paddle/gserver/layers` 目录下,其会自动被加入编译列表。
+若 :code:`cpp` 被放在 :code:`paddle/legacy/gserver/layers` 目录下,其会自动被加入编译列表。
写梯度检查单元测试
@@ -270,7 +270,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。
写梯度检查单元测试是一个验证新实现的层是否正确的相对简单的办法。梯度检查单元测试通过有限差分法来验证一个层的梯度。首先对输入做一个小的扰动 :math:`\Delta x` ,然后观察到输出的变化为 :math:`\Delta y` ,那么,梯度就可以通过这个方程计算得到 :math:`\frac{\Delta y}{\Delta x }` 。之后,再用这个梯度去和 :code:`backward` 函数得到的梯度去对比,以保证梯度计算的正确性。需要注意的是梯度检查仅仅验证了梯度的计算,并不保证 :code:`forward` 和 :code:`backward` 函数的实现是正确的。你需要一些更复杂的单元测试来保证你实现的网络层是正确的。
-所有网络层的梯度检查单测都位于 :code:`paddle/gserver/tests/test_LayerGrad.cpp` 。我们建议你在写新网络层时把测试代码放入新的文件中。下面列出了全连接层的梯度检查单元测试。它包含以下几步:
+所有网络层的梯度检查单测都位于 :code:`paddle/legacy/gserver/tests/test_LayerGrad.cpp` 。我们建议你在写新网络层时把测试代码放入新的文件中。下面列出了全连接层的梯度检查单元测试。它包含以下几步:
+ 生成网络层配置。网络层配置包含以下几项:
- 偏置参数的大小。(例子中是4096)
@@ -322,7 +322,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。
}
}
-如果你要为了测试而增加新的文件,例如 :code:`paddle/gserver/tests/testFCGrad.cpp` ,你需要把该文件加入 :code:`paddle/gserver/tests/CMakeLists.txt` 中。下面给出了一个例子。当你执行命令 :code:`make tests` 时,所有的单测都会被执行一次。注意,有些层可能需要高精度来保证梯度检查单测正确执行。你需要在配置cmake时将 :code:`WITH_DOUBLE` 设置为 `ON` 。
+如果你要为了测试而增加新的文件,例如 :code:`paddle/legacy/gserver/tests/testFCGrad.cpp` ,你需要把该文件加入 :code:`paddle/legacy/gserver/tests/CMakeLists.txt` 中。下面给出了一个例子。当你执行命令 :code:`make tests` 时,所有的单测都会被执行一次。注意,有些层可能需要高精度来保证梯度检查单测正确执行。你需要在配置cmake时将 :code:`WITH_DOUBLE` 设置为 `ON` 。
.. code-block:: bash
diff --git a/doc/v2/dev/new_layer_en.rst b/doc/v2/dev/new_layer_en.rst
index b05bb45f11eb253dfb87d6283c29ec6689394d22..ad723738801908a5f48343574c204bdbfc97ee08 100644
--- a/doc/v2/dev/new_layer_en.rst
+++ b/doc/v2/dev/new_layer_en.rst
@@ -58,7 +58,7 @@ Finally we can use chain rule to calculate :math:`\frac{\partial z}{\partial x}`
Implement C++ Class
===================
-The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below.
+The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/legacy/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/legacy/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below.
It needs to derive the base class :code:`paddle::Layer`, and it needs to override the following functions:
@@ -154,7 +154,7 @@ The implementation of the forward part has the following steps.
- Every layer must call :code:`Layer::forward(passType);` at the beginning of its :code:`forward` function.
- Then it allocates memory for the output using :code:`reserveOutput(batchSize, size);`. This step is necessary because we support the batches to have different batch sizes. :code:`reserveOutput` will change the size of the output accordingly. For the sake of efficiency, we will allocate new memory if we want to expand the matrix, but we will reuse the existing memory block if we want to shrink the matrix.
-- Then it computes :math:`\sum_i W_i x + b` using Matrix operations. :code:`getInput(i).value` retrieve the matrix of the i-th input. Each input is a :math:`batchSize \times dim` matrix, where each row represents an single input in a batch. For a complete lists of supported matrix operations, please refer to :code:`paddle/math/Matrix.h` and :code:`paddle/math/BaseMatrix.h`.
+- Then it computes :math:`\sum_i W_i x + b` using Matrix operations. :code:`getInput(i).value` retrieve the matrix of the i-th input. Each input is a :math:`batchSize \times dim` matrix, where each row represents an single input in a batch. For a complete lists of supported matrix operations, please refer to :code:`paddle/legacy/math/Matrix.h` and :code:`paddle/legacy/math/BaseMatrix.h`.
- Finally it applies the activation function using :code:`forwardActivation();`. It will automatically applies the corresponding activation function specifies in the network configuration.
@@ -263,7 +263,7 @@ Finally, you can use :code:`REGISTER_LAYER(fc, FullyConnectedLayer);` to registe
REGISTER_LAYER(fc, FullyConnectedLayer);
}
-If the :code:`cpp` file is put into :code:`paddle/gserver/layers`, it will be automatically added to the compilation list.
+If the :code:`cpp` file is put into :code:`paddle/legacy/gserver/layers`, it will be automatically added to the compilation list.
Write Gradient Check Unit Test
@@ -271,7 +271,7 @@ Write Gradient Check Unit Test
An easy way to verify the correctness of new layer's implementation is to write a gradient check unit test. Gradient check unit test utilizes finite difference method to verify the gradient of a layer. It modifies the input with a small perturbation :math:`\Delta x` and observes the changes of output :math:`\Delta y`, the gradient can be computed as :math:`\frac{\Delta y}{\Delta x }`. This gradient can be compared with the gradient computed by the :code:`backward` function of the layer to ensure the correctness of the gradient computation. Notice that the gradient check only tests the correctness of the gradient computation, it does not necessarily guarantee the correctness of the implementation of the :code:`forward` and :code:`backward` function. You need to write more sophisticated unit tests to make sure your layer is implemented correctly.
-All the gradient check unit tests are located in :code:`paddle/gserver/tests/test_LayerGrad.cpp`. You are recommended to put your test into a new test file if you are planning to write a new layer. The gradient test of the gradient check unit test of the fully connected layer is listed below. It has the following steps.
+All the gradient check unit tests are located in :code:`paddle/legacy/gserver/tests/test_LayerGrad.cpp`. You are recommended to put your test into a new test file if you are planning to write a new layer. The gradient test of the gradient check unit test of the fully connected layer is listed below. It has the following steps.
+ Create layer configuration. A layer configuration can include the following attributes:
- size of the bias parameter. (4096 in our example)
@@ -323,7 +323,7 @@ All the gradient check unit tests are located in :code:`paddle/gserver/tests/tes
}
}
-If you are creating a new file for the test, such as :code:`paddle/gserver/tests/testFCGrad.cpp`, you need to add the file to :code:`paddle/gserver/tests/CMakeLists.txt`. An example is given below. All the unit tests will run when you execute the command :code:`make tests`. Notice that some layers might need high accuracy for the gradient check unit tests to work well. You need to configure :code:`WITH_DOUBLE` to `ON` when configuring cmake.
+If you are creating a new file for the test, such as :code:`paddle/legacy/gserver/tests/testFCGrad.cpp`, you need to add the file to :code:`paddle/legacy/gserver/tests/CMakeLists.txt`. An example is given below. All the unit tests will run when you execute the command :code:`make tests`. Notice that some layers might need high accuracy for the gradient check unit tests to work well. You need to configure :code:`WITH_DOUBLE` to `ON` when configuring cmake.
.. code-block:: bash
@@ -339,7 +339,7 @@ If you are creating a new file for the test, such as :code:`paddle/gserver/tests
Implement Python Wrapper
========================
-Implementing Python wrapper allows us to use the added layer in configuration files. All the Python wrappers are in file :code:`python/paddle/trainer/config_parser.py`. An example of the Python wrapper for fully connected layer is listed below. It has the following steps:
+Implementing Python wrapper allows us to use the added layer in configuration files. All the Python wrappers are in file :code:`python/paddle/legacy/trainer/config_parser.py`. An example of the Python wrapper for fully connected layer is listed below. It has the following steps:
- Use :code:`@config_layer('fc')` at the decorator for all the Python wrapper class. :code:`fc` is the identifier of the layer.
- Implements :code:`__init__` constructor function.
diff --git a/doc/v2/faq/parameter/index_cn.rst b/doc/v2/faq/parameter/index_cn.rst
index 1fa4b3e1311d2007ccba98fde9ff94300ea42c16..987e8cf088be4ee8daa7c28fdc855506cbfd31c7 100644
--- a/doc/v2/faq/parameter/index_cn.rst
+++ b/doc/v2/faq/parameter/index_cn.rst
@@ -196,6 +196,6 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数
obj="process",
args={"src_dict_path": src_dict_path})
-完整源码可参考 `sequence_recurrent `_ 示例。
+完整源码可参考 `sequence_recurrent `_ 示例。
diff --git a/doc/v2/howto/capi/compile_paddle_lib_cn.md b/doc/v2/howto/capi/compile_paddle_lib_cn.md
index e223fd33a8420abcdfdad53d1cfc5ed160a1b37e..2c87e9afc6911526cd51d6c691f262960accc9e8 100644
--- a/doc/v2/howto/capi/compile_paddle_lib_cn.md
+++ b/doc/v2/howto/capi/compile_paddle_lib_cn.md
@@ -18,7 +18,7 @@
| cpu_avx_openblas |
-暂无 |
+paddle.tgz |
| cpu_noavx_openblas |
@@ -35,7 +35,12 @@
| cuda8.0_cudnn7_avx_mkl |
paddle.tgz |
-
+
+
+| cuda9.0_cudnn7_avx_mkl |
+paddle.tgz |
+
+
### 从源码编译
diff --git a/doc/v2/howto/capi/compile_paddle_lib_en.md b/doc/v2/howto/capi/compile_paddle_lib_en.md
index 6212a3081116d988630706e83d2349dd200b73ab..3fa8a18a9fbea21b494c416e6b938990fbb68337 100644
--- a/doc/v2/howto/capi/compile_paddle_lib_en.md
+++ b/doc/v2/howto/capi/compile_paddle_lib_en.md
@@ -17,7 +17,7 @@
| cpu_avx_openblas |
-- |
+paddle.tgz |
| cpu_noavx_openblas |
@@ -34,7 +34,12 @@
| cuda8.0_cudnn7_avx_mkl |
paddle.tgz |
-
+
+
+| cuda9.0_cudnn7_avx_mkl |
+paddle.tgz |
+
+
### From source
diff --git a/doc/v2/howto/optimization/gpu_profiling_cn.rst b/doc/v2/howto/optimization/gpu_profiling_cn.rst
index 25bcaccb6975bc21fba2e8c5843da15c69948d72..f2396716bddd4810fa77c738d41f5482aa6d6055 100644
--- a/doc/v2/howto/optimization/gpu_profiling_cn.rst
+++ b/doc/v2/howto/optimization/gpu_profiling_cn.rst
@@ -50,12 +50,12 @@ GPU则还需要高并行性,才能发挥其全部能力。这正是它们速
**nvprof** 是Nvidia性能分析工具, **nvvp** 则是带GUI的Nvidia可视化性能分析工具。
在这个教程中,我们主要会介绍nvprof和nvvp。
-:code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate
+:code:`test_GpuProfiler` from :code:`paddle/legacy/math/tests` directory will be used to evaluate
above profilers.
-:code:`paddle/math/test` 目录中的 :code:`test_GpuProfiler` 就是用于展示上述分析工具的用法。
+:code:`paddle/legacy/math/test` 目录中的 :code:`test_GpuProfiler` 就是用于展示上述分析工具的用法。
-.. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp
+.. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 137-151
:linenos:
@@ -83,7 +83,7 @@ program crashes when CPU version of PaddlePaddle invokes them.
1. 加入 :code:`REGISTER_TIMER_INFO` 和 :code:`printAllStatus` 函数(如高亮部分)。
- .. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp
+ .. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 137-151
:emphasize-lines: 8-12,14
@@ -101,8 +101,8 @@ program crashes when CPU version of PaddlePaddle invokes them.
.. code-block:: bash
:emphasize-lines: 1,12-15
- > ./paddle/math/tests/test_GpuProfiler
- I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/math/tests/test_GpuProfiler
+ > ./paddle/legacy/math/tests/test_GpuProfiler
+ I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/legacy/math/tests/test_GpuProfiler
I1117 11:13:42.845065 2522362816 Util.cpp:130] Calling runInitFunctions
I1117 11:13:42.845208 2522362816 Util.cpp:143] Call runInitFunctions done.
[==========] Running 1 test from 1 test case.
@@ -130,7 +130,7 @@ nvprof 工具
1. 将 :code:`REGISTER_GPU_PROFILER` 函数加到代码中(参考强调部分)。
- .. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp
+ .. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 137-151
:emphasize-lines: 6-7
@@ -147,13 +147,13 @@ nvprof 工具
.. code-block:: bash
- nvprof ./paddle/math/tests/test_GpuProfiler
+ nvprof ./paddle/legacy/math/tests/test_GpuProfiler
然后,您就能获得如下的分析结果:
.. code-block:: bash
- ==78544== Profiling application: ./paddle/math/tests/test_GpuProfiler
+ ==78544== Profiling application: ./paddle/legacy/math/tests/test_GpuProfiler
==78544== Profiling result:
Time(%) Time Calls Avg Min Max Name
27.60% 9.6305ms 5 1.9261ms 3.4560us 6.4035ms [CUDA memcpy HtoD]
diff --git a/doc/v2/howto/optimization/gpu_profiling_en.rst b/doc/v2/howto/optimization/gpu_profiling_en.rst
index 50adb7da24906515cb5977db565e9f8a76599fef..6e439be9bba8935cdd65f1c131cfd3725530ec0e 100644
--- a/doc/v2/howto/optimization/gpu_profiling_en.rst
+++ b/doc/v2/howto/optimization/gpu_profiling_en.rst
@@ -51,10 +51,10 @@ For general GPU profiling, a bunch of tools are provided from both NVIDIA and th
**nvprof** is Nvidia profiler and **nvvp** is (GUI based) Nvidia visual profiler.
In this tutorial, we will focus on nvprof and nvvp.
-:code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate
+:code:`test_GpuProfiler` from :code:`paddle/legacy/math/tests` directory will be used to evaluate
above profilers.
-.. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp
+.. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 137-151
:linenos:
@@ -80,7 +80,7 @@ As a simple example, consider the following:
1. Add :code:`REGISTER_TIMER_INFO` and :code:`printAllStatus` functions (see the emphasize-lines).
- .. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp
+ .. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 137-151
:emphasize-lines: 8-12,14
@@ -98,8 +98,8 @@ As a simple example, consider the following:
.. code-block:: bash
:emphasize-lines: 1,12-15
- > ./paddle/math/tests/test_GpuProfiler
- I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/math/tests/test_GpuProfiler
+ > ./paddle/legacy/math/tests/test_GpuProfiler
+ I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/legacy/math/tests/test_GpuProfiler
I1117 11:13:42.845065 2522362816 Util.cpp:130] Calling runInitFunctions
I1117 11:13:42.845208 2522362816 Util.cpp:143] Call runInitFunctions done.
[==========] Running 1 test from 1 test case.
@@ -127,7 +127,7 @@ To use this command line profiler **nvprof**, you can simply issue the following
1. Add :code:`REGISTER_GPU_PROFILER` function (see the emphasize-lines).
- .. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp
+ .. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 137-151
:emphasize-lines: 6-7
@@ -144,13 +144,13 @@ To use this command line profiler **nvprof**, you can simply issue the following
.. code-block:: bash
- nvprof ./paddle/math/tests/test_GpuProfiler
+ nvprof ./paddle/legacy/math/tests/test_GpuProfiler
Then, you can get the following profiling result:
.. code-block:: bash
- ==78544== Profiling application: ./paddle/math/tests/test_GpuProfiler
+ ==78544== Profiling application: ./paddle/legacy/math/tests/test_GpuProfiler
==78544== Profiling result:
Time(%) Time Calls Avg Min Max Name
27.60% 9.6305ms 5 1.9261ms 3.4560us 6.4035ms [CUDA memcpy HtoD]
diff --git a/doc/v2/howto/rnn/hrnn_rnn_api_compare_cn.rst b/doc/v2/howto/rnn/hrnn_rnn_api_compare_cn.rst
index 67c7b774e9c476a3035037a421c84ebf17a31b09..9d6d417075485dceb1ee71f527b408aa6a6638ea 100644
--- a/doc/v2/howto/rnn/hrnn_rnn_api_compare_cn.rst
+++ b/doc/v2/howto/rnn/hrnn_rnn_api_compare_cn.rst
@@ -4,7 +4,7 @@
单双层RNN API对比介绍
#####################
-本文以PaddlePaddle的双层RNN单元测试为示例,用多对效果完全相同的、分别使用单双层RNN作为网络配置的模型,来讲解如何使用双层RNN。本文中所有的例子,都只是介绍双层RNN的API接口,并不是使用双层RNN解决实际的问题。如果想要了解双层RNN在具体问题中的使用,请参考\ :ref:`algo_hrnn_demo`\ 。本文中示例所使用的单元测试文件是\ `test_RecurrentGradientMachine.cpp `_\ 。
+本文以PaddlePaddle的双层RNN单元测试为示例,用多对效果完全相同的、分别使用单双层RNN作为网络配置的模型,来讲解如何使用双层RNN。本文中所有的例子,都只是介绍双层RNN的API接口,并不是使用双层RNN解决实际的问题。如果想要了解双层RNN在具体问题中的使用,请参考\ :ref:`algo_hrnn_demo`\ 。本文中示例所使用的单元测试文件是\ `test_RecurrentGradientMachine.cpp `_\ 。
示例1:双层RNN,子序列间无Memory
================================
@@ -13,8 +13,8 @@
在本示例中,单层RNN和双层RNN的网络配置,都是将每一句分好词后的句子,使用LSTM作为encoder,压缩成一个向量。区别是RNN使用两层序列模型,将多句话看成一个整体同时使用encoder压缩。二者语意上完全一致。这组语义相同的示例配置如下:
-* 单层RNN\: `sequence_layer_group.conf `_
-* 双层RNN\: `sequence_nest_layer_group.conf `_
+* 单层RNN\: `sequence_layer_group.conf `_
+* 双层RNN\: `sequence_nest_layer_group.conf `_
读取双层序列数据
@@ -24,18 +24,18 @@
- 本例中的原始数据一共有10个样本。每个样本由两部分组成,一个label(此处都为2)和一个已经分词后的句子。这个数据也被单层RNN网络直接使用。
-.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/Sequence/tour_train_wdseg
:language: text
- 双层序列数据一共有4个样本。 每个样本间用空行分开,整体数据和原始数据完全一样。但于双层序列的LSTM来说,第一个样本同时encode两条数据成两个向量。这四条数据同时处理的句子数量为\ :code:`[2, 3, 2, 3]`\ 。
-.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg.nest
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/Sequence/tour_train_wdseg.nest
:language: text
-其次,对于两种不同的输入数据类型,不同DataProvider对比如下(`sequenceGen.py `_)\:
+其次,对于两种不同的输入数据类型,不同DataProvider对比如下(`sequenceGen.py `_)\:
-.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequenceGen.py
:language: python
:lines: 21-39
:linenos:
@@ -47,7 +47,7 @@
- words是原始数据中的每一句话,所对应的词表index数组。它是integer_value_sequence类型的,即整数数组。words即为这个数据中的单层时间序列。
- label是原始数据中对于每一句话的分类标签,它是integer_value类型的。
-.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequenceGen.py
:language: python
:lines: 42-71
:linenos:
@@ -64,7 +64,7 @@
首先,我们看一下单层RNN的配置。代码中9-15行(高亮部分)即为单层RNN序列的使用代码。这里使用了PaddlePaddle预定义好的RNN处理函数。在这个函数中,RNN对于每一个时间步通过了一个LSTM网络。
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_layer_group.conf
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_layer_group.conf
:language: python
:lines: 38-63
:linenos:
@@ -85,7 +85,7 @@
* 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_layer_group.conf
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_layer_group.conf
:language: python
:lines: 38-64
:linenos:
@@ -107,7 +107,7 @@
- 单层RNN:过了一个很简单的recurrent_group。每一个时间步,当前的输入y和上一个时间步的输出rnn_state做了一个全链接。
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn.conf
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_rnn.conf
:language: python
:lines: 36-48
@@ -116,7 +116,7 @@
- 内层inner_step的recurrent_group和单层序列的几乎一样。除了boot_layer=outer_mem,表示将外层的outer_mem作为内层memory的初始状态。外层outer_step中,outer_mem是一个子句的最后一个向量,即整个双层group是将前一个子句的最后一个向量,作为下一个子句memory的初始状态。
- 从输入数据上看,单双层序列的句子是一样的,只是双层序列将其又做了子序列划分。因此双层序列的配置中,必须将前一个子句的最后一个元素,作为boot_layer传给下一个子句的memory,才能保证和单层序列的配置中“每个时间步都用了上一个时间步的输出结果”一致。
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn.conf
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_rnn.conf
:language: python
:lines: 39-66
@@ -134,7 +134,7 @@
**输入不等长** 是指recurrent_group的多个输入序列,在每个时间步的子序列长度可以不相等。但序列输出时,需要指定与某一个输入的序列信息是一致的。使用\ :red:`targetInlink`\ 可以指定哪一个输入和输出序列信息一致,默认指定第一个输入。
-示例3的配置分别为\ `单层不等长RNN `_\ 和\ `双层不等长RNN `_\ 。
+示例3的配置分别为\ `单层不等长RNN `_\ 和\ `双层不等长RNN `_\ 。
示例3对于单层RNN和双层RNN数据完全相同。
@@ -152,14 +152,14 @@
* 单层RNN\:
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
:language: python
:lines: 42-59
:linenos:
* 双层RNN\ \:
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
:language: python
:lines: 41-80
:linenos:
diff --git a/doc/v2/howto/rnn/hrnn_rnn_api_compare_en.rst b/doc/v2/howto/rnn/hrnn_rnn_api_compare_en.rst
index ae997f0805db5b01a34867c9e8b188c931721920..a4485f7b5edf21871444801230ab1ee191b1137b 100644
--- a/doc/v2/howto/rnn/hrnn_rnn_api_compare_en.rst
+++ b/doc/v2/howto/rnn/hrnn_rnn_api_compare_en.rst
@@ -4,7 +4,7 @@
API comparision between RNN and hierarchical RNN
#####################
-This article takes PaddlePaddle's hierarchical RNN unit test as an example. We will use several examples to illestrate the usage of single-layer and hierarchical RNNs. Each example has two model configurations, one for single-layer, and the other for hierarchical RNN. Although the implementations are different, both the two model configurations' effects are the same. All of the examples in this article only describe the API interface of the hierarchical RNN, while we do not use this hierarchical RNN to solve practical problems. If you want to understand the use of hierarchical RNN in specific issues, please refer to \ :ref:`algo_hrnn_demo`\ 。The unit test file used in this article's example is \ `test_RecurrentGradientMachine.cpp `_\ 。
+This article takes PaddlePaddle's hierarchical RNN unit test as an example. We will use several examples to illestrate the usage of single-layer and hierarchical RNNs. Each example has two model configurations, one for single-layer, and the other for hierarchical RNN. Although the implementations are different, both the two model configurations' effects are the same. All of the examples in this article only describe the API interface of the hierarchical RNN, while we do not use this hierarchical RNN to solve practical problems. If you want to understand the use of hierarchical RNN in specific issues, please refer to \ :ref:`algo_hrnn_demo`\ 。The unit test file used in this article's example is \ `test_RecurrentGradientMachine.cpp `_\ 。
Example 1:Hierarchical RNN without Memory between subsequences
================================
@@ -13,8 +13,8 @@ The classical case in the hierarchical RNN is to perform sequence operations on
In this example, the network configuration of single-layer RNNs and hierarchical RNNs are all to use LSTM as en encoder to compress a word-segmented sentence into a vector. The difference is that, RNN uses a hierarchical RNN model, treating multiple sentences as a whole to use encoder to compress simultaneously. They are completely consistent in their semantic meanings. This pair of semantically identical example configurations is as follows:
-* RNN\: `sequence_layer_group.conf `_
-* Hierarchical RNN\: `sequence_nest_layer_group.conf `_
+* RNN\: `sequence_layer_group.conf `_
+* Hierarchical RNN\: `sequence_nest_layer_group.conf `_
Reading hierarchical sequence data
@@ -24,18 +24,18 @@ Firstly, the original data in this example is as follows \:
- The original data in this example has 10 samples. Each of the sample includes two components: a lable(all 2 here), and a word-segmented sentence. This data is used by single RNN as well.
-.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/Sequence/tour_train_wdseg
:language: text
- The data for hierarchical RNN has 4 samples. Every sample is seperated by a blank line, while the content of the data is the same as the original data. But as for hierarchical LSTM, the first sample will encode two sentences into two vectors simultaneously. The sentence count dealed simultaneously by this 4 samples are \ :code:`[2, 3, 2, 3]`\ .
-.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg.nest
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/Sequence/tour_train_wdseg.nest
:language: text
-Secondly, as for these two types of different input data formats, the contrast of different DataProviders are as follows (`sequenceGen.py `_)\:
+Secondly, as for these two types of different input data formats, the contrast of different DataProviders are as follows (`sequenceGen.py `_)\:
-.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequenceGen.py
:language: python
:lines: 21-39
:linenos:
@@ -47,7 +47,7 @@ Secondly, as for these two types of different input data formats, the contrast o
- "words" is a list of word table indices corresponding to each word in the sentence in the original data. Its data type is integer_value_sequence, that is integer list. So, "words" is a singler-layer time series in the data.
- "label" is the categorical label of each sentence, whose data type is integer_value.
-.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequenceGen.py
:language: python
:lines: 42-71
:linenos:
@@ -64,7 +64,7 @@ Model configuration
Firstly, let's look at the configuration of single-layer RNN. The hightlighted part of line 9 to line 15 is the usage of single-layer RNN. Here we use the pre-defined RNN process function in PaddlePaddle. In this function, for each time step, RNN passes through an LSTM network.
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_layer_group.conf
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_layer_group.conf
:language: python
:lines: 38-63
:linenos:
@@ -85,7 +85,7 @@ Secondly, let's look at the model configuration of hierarchical RNN which has th
* Till now, \ :code:`lstm_last`\ has the same result as \ :code:`lstm_last`\ in single-layer RNN configuration.
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_layer_group.conf
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_layer_group.conf
:language: python
:lines: 38-64
:linenos:
@@ -107,7 +107,7 @@ We select the different parts between single-layer RNN and hierarchical RNN conf
- single-layer RNN:passes through a simple recurrent_group. For each time step, the current input y and the last time step's output rnn_state pass through a fully-connected layer.
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn.conf
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_rnn.conf
:language: python
:lines: 36-48
@@ -116,7 +116,7 @@ We select the different parts between single-layer RNN and hierarchical RNN conf
- The recurrent_group of inner layer's inner_step is nearly the same as single-layer sequence, except for the case of boot_layer=outer_mem, which means using the outer layer's outer_mem as the initial state for the inner layer's memory. In the outer layer's out_step, outer_mem is the last vector of a subsequence, that is, the whole hierarchical group uses the last vector of the previous subsequence as the initial state for the next subsequence's memory.
- From the aspect of the input data, sentences from single-layer and hierarchical RNN are the same. The only difference is that, hierarchical RNN disassembes the sequence into subsequences. So in the hierarchical RNN configuration, we must use the last element of the previous subsequence as a boot_layer for the memory of the next subsequence, so that it makes no difference with "every time step uses the output of last time step" in the sigle-layer RNN configuration.
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn.conf
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_rnn.conf
:language: python
:lines: 39-66
@@ -134,7 +134,7 @@ Example 3:hierarchical RNN with unequal length inputs
**unequal length inputs** means in the multiple input sequences of recurrent_group, the lengths of subsequences can be unequal. But the output of the sequence, needs to be consistent with one of the input sequences. Using \ :red:`targetInlink`\ can help you specify which of the input sequences and the output sequence can be consistent, by default is the first input.
-The configurations of Example 3 are \ `sequence_rnn_multi_unequalength_inputs `_ \ and \ `sequence_nest_rnn_multi_unequalength_inputs `_\ .
+The configurations of Example 3 are \ `sequence_rnn_multi_unequalength_inputs `_ \ and \ `sequence_nest_rnn_multi_unequalength_inputs `_\ .
The data for the configurations of Example 3's single-layer RNN and hierarchical RNN are exactly the same.
@@ -152,14 +152,14 @@ Similar to Example 2's configuration, Example 3's configuration uses single-laye
* single-layer RNN\:
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
:language: python
:lines: 42-59
:linenos:
* hierarchical RNN\ \:
-.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
+.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
:language: python
:lines: 41-80
:linenos:
diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go
index f17577997bc94b08f3e296c4d6e35682ca3c0e57..eba0c47e195a80fc298f0fdd78c8d6345e963be8 100644
--- a/go/pserver/optimizer.go
+++ b/go/pserver/optimizer.go
@@ -16,7 +16,7 @@ package pserver
// #cgo CFLAGS: -I ../../
// #cgo LDFLAGS: ${SRCDIR}/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
-// #include "paddle/optimizer/optimizer.h"
+// #include "paddle/legacy/optimizer/optimizer.h"
// #include
// #include
import "C"
diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index d722eec1892206ac44c49e7a12d92be0c54df8c0..6653244507742b33d9524a7a0e4a5b2b575d358a 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -1,24 +1,24 @@
if(NOT WITH_FLUID_ONLY)
- add_subdirectory(cuda)
- add_subdirectory(function)
- add_subdirectory(utils)
- add_subdirectory(math)
- add_subdirectory(gserver)
- add_subdirectory(parameter)
+ add_subdirectory(legacy/cuda)
+ add_subdirectory(legacy/function)
+ add_subdirectory(legacy/utils)
+ add_subdirectory(legacy/math)
+ add_subdirectory(legacy/gserver)
+ add_subdirectory(legacy/parameter)
if(MOBILE_INFERENCE)
- add_subdirectory(capi)
+ add_subdirectory(legacy/capi)
else()
- add_subdirectory(pserver)
- add_subdirectory(trainer)
+ add_subdirectory(legacy/pserver)
+ add_subdirectory(legacy/trainer)
add_subdirectory(scripts)
if(WITH_C_API)
- add_subdirectory(capi)
+ add_subdirectory(legacy/capi)
endif()
if(WITH_SWIG_PY)
- add_subdirectory(api)
+ add_subdirectory(legacy/api)
endif()
endif()
endif()
diff --git a/paddle/api/Arguments.cpp b/paddle/api/Arguments.cpp
deleted file mode 100644
index 62d6a574d55d2748635879a21cbbaa474f070cff..0000000000000000000000000000000000000000
--- a/paddle/api/Arguments.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "PaddleAPI.h"
-#include "PaddleAPIPrivate.h"
-
-#include "paddle/parameter/Argument.h"
-
-size_t Arguments::getSlotNum() const { return m->outputs.size(); }
-
-Arguments* Arguments::createArguments(size_t slotNum) {
- auto args = new Arguments();
- args->m->outputs.resize(slotNum);
- return args;
-}
-
-void Arguments::resize(size_t slotNum) { m->outputs.resize(slotNum); }
-
-Arguments::Arguments() : m(new ArgumentsPrivate()) {}
-
-Arguments::~Arguments() { delete m; }
-
-Arguments* Arguments::createByPaddleArgumentVector(void* ptr) {
- auto p = (std::vector*)(ptr);
- auto args = new Arguments();
- args->m->outputs = *p;
- return args;
-}
-
-Arguments* Arguments::createByPaddleArgument(const void* ptr) {
- auto p = (paddle::Argument*)(ptr);
- auto args = new Arguments();
- args->m->outputs.push_back(*p);
- return args;
-}
-
-Matrix* Arguments::getSlotValue(size_t idx) const throw(RangeError) {
- auto& a = m->getArg(idx);
- return Matrix::createByPaddleMatrixPtr(&a.value);
-}
-
-Matrix* Arguments::getSlotGrad(size_t idx) const throw(RangeError) {
- auto& a = m->getArg(idx);
- return Matrix::createByPaddleMatrixPtr(&a.grad);
-}
-
-IVector* Arguments::getSlotIds(size_t idx) const throw(RangeError) {
- auto& a = m->getArg(idx);
- return IVector::createByPaddleVectorPtr(&a.ids);
-}
-
-Matrix* Arguments::getSlotIn(size_t idx) const throw(RangeError) {
- auto& a = m->getArg(idx);
- return Matrix::createByPaddleMatrixPtr(&a.in);
-}
-
-void Arguments::setSlotValue(size_t idx, Matrix* mat) throw(RangeError) {
- auto& a = m->getArg(idx);
- a.value = m->cast(mat->getSharedPtr());
-}
-
-void Arguments::setSlotGrad(size_t idx, Matrix* mat) throw(RangeError) {
- auto& a = m->getArg(idx);
- a.grad = m->cast(mat->getSharedPtr());
-}
-
-void Arguments::setSlotIn(size_t idx, Matrix* mat) throw(RangeError) {
- auto& a = m->getArg(idx);
- a.in = m->cast(mat->getSharedPtr());
-}
-
-void Arguments::setSlotIds(size_t idx, IVector* vec) throw(RangeError) {
- auto& a = m->getArg(idx);
- auto& v = m->cast(vec->getSharedPtr());
- a.ids = v;
-}
-
-template
-static inline void doCopyFromSafely(std::shared_ptr& dest,
- std::shared_ptr& src) {
- if (src) {
- if (dest) {
- dest->copyFrom(*src);
- } else {
- dest = src;
- }
- }
-}
-
-IVector* Arguments::getSlotSequenceStartPositions(size_t idx) const
- throw(RangeError) {
- auto& a = m->getArg(idx);
- if (a.sequenceStartPositions) {
- return IVector::createByPaddleVectorPtr(
- &a.sequenceStartPositions->getMutableVector(false));
- } else {
- return nullptr;
- }
-}
-
-IVector* Arguments::getSlotSubSequenceStartPositions(size_t idx) const
- throw(RangeError) {
- auto& a = m->getArg(idx);
- if (a.subSequenceStartPositions) {
- return IVector::createByPaddleVectorPtr(
- &a.subSequenceStartPositions->getMutableVector(false));
- } else {
- return nullptr;
- }
-}
-
-void Arguments::setSlotSequenceStartPositions(size_t idx,
- IVector* vec) throw(RangeError) {
- auto& a = m->getArg(idx);
- auto& v = m->cast(vec->getSharedPtr());
- a.sequenceStartPositions = std::make_shared(v);
-}
-
-void Arguments::setSlotSubSequenceStartPositions(
- size_t idx, IVector* vec) throw(RangeError) {
- auto& a = m->getArg(idx);
- auto& v = m->cast(vec->getSharedPtr());
- a.subSequenceStartPositions = std::make_shared(v);
-}
-
-IVector* Arguments::getSlotSequenceDim(size_t idx) const throw(RangeError) {
- auto& a = m->getArg(idx);
- return IVector::createByPaddleVectorPtr(&a.cpuSequenceDims);
-}
-
-void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) {
- auto& a = m->getArg(idx);
- a.cpuSequenceDims = m->cast(vec->getSharedPtr());
-}
-
-float Arguments::sum() const { return paddle::Argument::sum(m->outputs); }
-
-int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) {
- auto& a = m->getArg(idx);
- return a.getBatchSize();
-}
-
-void Arguments::setSlotFrameHeight(size_t idx, size_t h) throw(RangeError) {
- auto& a = m->getArg(idx);
- a.setFrameHeight(h);
-}
-
-void Arguments::setSlotFrameWidth(size_t idx, size_t w) throw(RangeError) {
- auto& a = m->getArg(idx);
- a.setFrameWidth(w);
-}
-
-size_t Arguments::getSlotFrameHeight(size_t idx) const throw(RangeError) {
- auto& a = m->getArg(idx);
- return a.getFrameHeight();
-}
-
-size_t Arguments::getSlotFrameWidth(size_t idx) const throw(RangeError) {
- auto& a = m->getArg(idx);
- return a.getFrameWidth();
-}
-
-void* Arguments::getInternalArgumentsPtr() const { return &m->outputs; }
diff --git a/paddle/api/ConfigParser.cpp b/paddle/api/ConfigParser.cpp
deleted file mode 100644
index d362a1e7cf3c8cd05b8c85cfaf8dbbee8b827d4b..0000000000000000000000000000000000000000
--- a/paddle/api/ConfigParser.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "PaddleAPI.h"
-#include "PaddleAPIPrivate.h"
-#include "paddle/trainer/Trainer.h"
-
-struct ParameterConfigPrivate {
- paddle::ParameterPtr parameter;
- paddle::ParameterConfig config;
-
- inline paddle::ParameterConfig* getConfigPtr() {
- if (parameter != nullptr) {
- auto& conf = parameter->getConfig();
- return const_cast(&conf);
- } else {
- return &config;
- }
- }
-};
-
-TrainerConfig::TrainerConfig() : m(new TrainerConfigPrivate()) {}
-
-TrainerConfig::~TrainerConfig() { delete m; }
-
-TrainerConfig* TrainerConfig::createFromTrainerConfigFile(
- const std::string& confPath) {
- LOG(INFO) << "load trainer config from " << confPath;
- auto conf = std::make_shared(confPath);
- auto retv = new TrainerConfig();
- retv->m->conf = conf;
- return retv;
-}
-
-TrainerConfig* TrainerConfig::createFromProtoString(const std::string& str) {
- auto retv = new TrainerConfig();
- paddle::TrainerConfig trainerConfigProto;
- auto conf = std::make_shared(trainerConfigProto);
- CHECK(conf->getMutableConfig().ParseFromString(str));
- retv->m->conf = conf;
- return retv;
-}
-
-ModelConfig::ModelConfig() : m(new ModelConfigPrivate()) {}
-
-ModelConfig::~ModelConfig() { delete m; }
-
-ModelConfig* TrainerConfig::getModelConfig() const {
- auto retv = new ModelConfig();
- retv->m->conf = m->conf;
- return retv;
-}
-
-ParameterConfig::ParameterConfig() : m(new ParameterConfigPrivate()) {}
-
-ParameterConfig::~ParameterConfig() { delete m; }
-
-ParameterConfig* ParameterConfig::createParameterConfigFromParameterSharedPtr(
- void* ptr) {
- auto& p = *(paddle::ParameterPtr*)(ptr);
- if (p != nullptr) {
- auto conf = new ParameterConfig();
- conf->m->parameter = p;
- return conf;
- } else {
- return nullptr;
- }
-}
-
-ParameterConfig* ParameterConfig::createParameterConfigFromParameterPtr(
- void* ptr) {
- auto& p = *(paddle::Parameter*)(ptr);
- auto conf = new ParameterConfig();
- conf->m->config = p.getConfig();
- return conf;
-}
-
-std::string ParameterConfig::toProtoString() const {
- return m->getConfigPtr()->SerializeAsString();
-}
-
-void* ParameterConfig::getRawPtr() { return m->getConfigPtr(); }
-
-OptimizationConfig::OptimizationConfig() : m(new OptimizationConfigPrivate()) {}
-
-OptimizationConfig::~OptimizationConfig() { delete m; }
-
-std::string OptimizationConfig::toProtoString() {
- return m->getConfig().SerializeAsString();
-}
-
-OptimizationConfig* TrainerConfig::getOptimizationConfig() const {
- auto opt_config = new OptimizationConfig();
- opt_config->m->trainer_config = m->conf;
- return opt_config;
-}
-
-OptimizationConfig* OptimizationConfig::createFromProtoString(
- const std::string& str) {
- auto conf = new OptimizationConfig();
- conf->m->config.ParseFromString(str);
- return conf;
-}
diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp
deleted file mode 100644
index 0d9ad30de9c1f3f8f58c856a748abdc050ff8740..0000000000000000000000000000000000000000
--- a/paddle/api/GradientMachine.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "PaddleAPI.h"
-#include "PaddleAPIPrivate.h"
-
-#include "Internal.h"
-#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
-
-std::vector GradientMachine::defaultParamTypes = {
- PARAMETER_VALUE, PARAMETER_GRADIENT, PARAMETER_MOMENTUM};
-
-GradientMachine::GradientMachine() : m(new GradientMachinePrivate()) {}
-
-GradientMachine::~GradientMachine() { delete m; }
-
-GradientMachine* GradientMachine::createFromPaddleModelPtr(
- const void* confPtr,
- GradientMatchineCreateMode mode,
- const std::vector& types) {
- auto& conf = *(const paddle::ModelConfig*)(confPtr);
- std::vector realTypes;
- staticCastVector(&realTypes, types);
- auto machineRawPtr = paddle::GradientMachine::create(conf, mode, realTypes);
- auto machinePtr = std::shared_ptr(machineRawPtr);
- if (machinePtr != nullptr) {
- auto machine = new GradientMachine();
- machine->m->machine = machinePtr;
- return machine;
- } else {
- return nullptr;
- }
-}
-
-GradientMachine* GradientMachine::createByConfigProtoStr(
- const std::string& protoStr,
- GradientMatchineCreateMode mode,
- const std::vector& types) {
- paddle::ModelConfig conf;
- conf.ParseFromString(protoStr);
- if (conf.IsInitialized()) {
- return GradientMachine::createFromPaddleModelPtr(&conf, mode, types);
- } else {
- return nullptr;
- }
-}
-
-GradientMachine* GradientMachine::createByModelConfig(
- ModelConfig* conf,
- GradientMatchineCreateMode mode,
- const std::vector& types) {
- auto confPtr = &conf->m->conf->getModelConfig();
- return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types);
-}
-
-void GradientMachine::start() { m->machine->start(); }
-
-void GradientMachine::finish() { m->machine->finish(); }
-
-void GradientMachine::onPassEnd() { m->machine->onPassEnd(); }
-
-void GradientMachine::prefetch(const Arguments& inArgs) {
- auto& in =
- m->cast>(inArgs.getInternalArgumentsPtr());
- m->machine->prefetch(in);
-}
-
-void GradientMachine::forward(const Arguments& inArgs,
- Arguments* outArgs,
- PassType passType) {
- auto& in =
- m->cast>(inArgs.getInternalArgumentsPtr());
- auto& out = m->cast>(
- outArgs->getInternalArgumentsPtr());
- paddle::PassType pt = (paddle::PassType)(passType);
- m->machine->forward(in, &out, pt);
-}
-
-UpdateCallback::~UpdateCallback() {}
-
-void UpdateCallback::apply(Parameter* p) {
- // UNUSED(p);
-}
-
-class UpdateCallbackWrapper {
- public:
- explicit UpdateCallbackWrapper(const UpdateCallback& callback)
- : callback(const_cast(callback)) {}
-
- void operator()(paddle::Parameter* param) {
- auto p = Parameter::createFromRawPtr(¶m);
- // @TODO Use Stack variable instead.
- callback.apply(p);
- delete p;
- }
-
- private:
- UpdateCallback& callback;
-};
-
-void GradientMachine::backward(const UpdateCallback& callback) {
- m->machine->backward(UpdateCallbackWrapper(callback));
-}
-
-void GradientMachine::forwardBackward(const Arguments& inArgs,
- Arguments* outArgs,
- PassType passType,
- const UpdateCallback& callback) {
- auto& in =
- m->cast>(inArgs.getInternalArgumentsPtr());
- auto& out = m->cast>(
- outArgs->getInternalArgumentsPtr());
- paddle::PassType pt = (paddle::PassType)(passType);
- m->machine->forwardBackward(in, &out, pt, UpdateCallbackWrapper(callback));
-}
-
-void GradientMachine::loadParameters(const std::string& path) {
- m->machine->loadParameters(path);
-}
-
-size_t GradientMachine::getParameterSize() const {
- return m->machine->getParameters().size();
-}
-
-Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
- auto params = m->machine->getParameters();
- if (i < params.size()) {
- return Parameter::createFromSharedPtr(&m->machine->getParameters()[i]);
- } else {
- throw RangeError();
- }
-}
-
-size_t GradientMachine::getNonStaticParameterSize() const {
- return m->machine->getNonStaticParameters().size();
-}
-
-Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) {
- auto params = m->machine->getNonStaticParameters();
- if (i < params.size()) {
- return Parameter::createFromSharedPtr(
- &m->machine->getNonStaticParameters()[i]);
- } else {
- throw RangeError();
- }
-}
-
-void GradientMachine::randParameters() { m->machine->randParameters(); }
-
-Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const
- throw(UnsupportError) {
- auto nn = m->machine;
- if (nn) {
- auto arg = nn->getLayerOutput(layerName);
- return Arguments::createByPaddleArgument(&arg);
- } else {
- throw UnsupportError();
- }
-}
-
-SequenceGenerator* GradientMachine::asSequenceGenerator(
- const std::vector& dict,
- size_t begin_id,
- size_t end_id,
- size_t max_length,
- size_t beam_size) {
- SequenceGenerator* r =
- SequenceGenerator::createByGradientMachineSharedPtr(&m->machine);
- r->setDict(dict);
- r->setBos(begin_id);
- r->setEos(end_id);
- r->setMaxLength(max_length);
- r->setBeamSize(beam_size);
- return r;
-}
-
-Evaluator* GradientMachine::makeEvaluator() {
- auto ev = new Evaluator();
- ev->m->rawPtr = m->machine->makeEvaluator();
- return ev;
-}
-
-void GradientMachine::eval(Evaluator* evaluator) {
- m->machine->eval(evaluator->m->rawPtr);
-}
diff --git a/paddle/api/Matrix.cpp b/paddle/api/Matrix.cpp
deleted file mode 100644
index 8282b4629dc08a7fcd9b52cbc3492ac10d8ed55c..0000000000000000000000000000000000000000
--- a/paddle/api/Matrix.cpp
+++ /dev/null
@@ -1,317 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/math/Matrix.h"
-#include
-#include
-#include "PaddleAPI.h"
-#include "paddle/math/CpuSparseMatrix.h"
-#include "paddle/math/SparseMatrix.h"
-
-struct MatrixPrivate {
- std::shared_ptr mat;
-};
-
-Matrix::Matrix() : m(new MatrixPrivate()) {}
-
-Matrix* Matrix::createByPaddleMatrixPtr(void* sharedPtr) {
- auto* mat = reinterpret_cast(sharedPtr);
- if ((*mat) != nullptr) {
- auto m = new Matrix();
- m->m->mat = *mat;
- return m;
- } else {
- return nullptr;
- }
-}
-
-Matrix* Matrix::createZero(size_t height, size_t width, bool useGpu) {
- auto m = new Matrix();
- m->m->mat = paddle::Matrix::create(height, width, useGpu);
- m->m->mat->zero();
- return m;
-}
-
-Matrix* Matrix::createDense(const std::vector& data,
- size_t height,
- size_t width,
- bool useGpu) {
- auto m = new Matrix();
- m->m->mat = paddle::Matrix::create(height, width, useGpu);
- m->m->mat->copyFrom(data.data(), data.size());
- return m;
-}
-
-Matrix* Matrix::createDenseFromNumpy(float* data,
- int dim1,
- int dim2,
- bool copy,
- bool useGpu) throw(UnsupportError) {
- if (useGpu) {
- /// Gpu mode only supports copy=True
- if (!copy) {
- throw UnsupportError("Gpu mode only supports copy=True");
- }
- return Matrix::createGpuDenseFromNumpy(data, dim1, dim2);
- } else {
- return Matrix::createCpuDenseFromNumpy(data, dim1, dim2, copy);
- }
-}
-
-Matrix* Matrix::createCpuDenseFromNumpy(float* data,
- int dim1,
- int dim2,
- bool copy) {
- auto m = new Matrix();
- if (copy) {
- m->m->mat = paddle::Matrix::create(dim1, dim2);
- m->m->mat->copyFrom(data, dim1 * dim2);
- } else {
- m->m->mat = paddle::Matrix::create(data, dim1, dim2, false);
- }
- return m;
-}
-
-Matrix* Matrix::createGpuDenseFromNumpy(float* data, int dim1, int dim2) {
- auto m = new Matrix();
- m->m->mat = paddle::Matrix::create(dim1, dim2, false, true);
- m->m->mat->copyFrom(data, dim1 * dim2);
- return m;
-}
-
-Matrix* Matrix::createSparse(size_t height,
- size_t width,
- size_t nnz,
- bool isNonVal,
- bool isTrans,
- bool useGpu) {
- auto m = new Matrix();
- m->m->mat = paddle::Matrix::createSparseMatrix(
- height,
- width,
- nnz,
- isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE,
- isTrans,
- useGpu);
- return m;
-}
-
-Matrix::~Matrix() { delete m; }
-
-size_t Matrix::getHeight() const { return m->mat->getHeight(); }
-
-size_t Matrix::getWidth() const { return m->mat->getWidth(); }
-
-float Matrix::get(size_t x, size_t y) const throw(RangeError) {
- if (x > this->getWidth() || y > this->getHeight()) {
- RangeError e;
- throw e;
- }
- return m->mat->getElement(x, y);
-}
-
-void Matrix::set(size_t x, size_t y, float val) throw(RangeError,
- UnsupportError) {
- if (x > this->getWidth() || y > this->getHeight()) {
- RangeError e;
- throw e;
- }
- auto rawMat = m->mat.get();
- if (auto cDenseMat = dynamic_cast(rawMat)) {
- *(cDenseMat->getData() + x + y * cDenseMat->getWidth()) = val;
- } else {
- UnsupportError e;
- throw e;
- }
-}
-
-bool Matrix::isSparse() const {
- auto raw_mat = m->mat.get();
- return dynamic_cast(raw_mat) != nullptr ||
- dynamic_cast(raw_mat) != nullptr;
-}
-
-SparseValueType Matrix::getSparseValueType() const throw(UnsupportError) {
- auto cpuSparseMat =
- std::dynamic_pointer_cast(m->mat);
- if (cpuSparseMat != nullptr) {
- return (SparseValueType)cpuSparseMat->getValueType();
- } else {
- auto gpuSparseMat =
- std::dynamic_pointer_cast(m->mat);
- if (gpuSparseMat != nullptr) {
- return (SparseValueType)gpuSparseMat->getValueType();
- } else {
- UnsupportError e;
- throw e;
- }
- }
-}
-
-SparseFormatType Matrix::getSparseFormat() const throw(UnsupportError) {
- auto cpuSparseMat =
- std::dynamic_pointer_cast(m->mat);
- if (cpuSparseMat != nullptr) {
- return (SparseFormatType)cpuSparseMat->getFormat();
- } else {
- auto gpuSparseMat =
- std::dynamic_pointer_cast(m->mat);
- if (gpuSparseMat != nullptr) {
- return SPARSE_CSR;
- } else {
- UnsupportError e;
- throw e;
- }
- }
-}
-
-IntArray Matrix::getSparseRowCols(size_t i) const
- throw(UnsupportError, RangeError) {
- auto cpuSparseMat =
- std::dynamic_pointer_cast(m->mat);
- if (cpuSparseMat != nullptr &&
- cpuSparseMat->getFormat() == paddle::SPARSE_CSR) {
- if (i < cpuSparseMat->getHeight()) {
- // cpuSparseMat->print(std::cout);
- size_t len = cpuSparseMat->getColNum(i);
- return IntArray(cpuSparseMat->getRowCols(i), len);
- } else {
- RangeError e;
- throw e;
- }
- } else {
- UnsupportError e;
- throw e;
- }
-}
-
-IntWithFloatArray Matrix::getSparseRowColsVal(size_t i) const
- throw(UnsupportError, RangeError) {
- auto cpuSparseMat =
- std::dynamic_pointer_cast(m->mat);
- if (cpuSparseMat != nullptr &&
- cpuSparseMat->getValueType() == paddle::FLOAT_VALUE) {
- if (i < cpuSparseMat->getHeight()) {
- return IntWithFloatArray(cpuSparseMat->getRowValues(i),
- cpuSparseMat->getRowCols(i),
- cpuSparseMat->getColNum(i));
- } else {
- RangeError e;
- throw e;
- }
- } else {
- UnsupportError e;
- throw e;
- }
-}
-
-FloatArray Matrix::getData() const {
- auto rawMat = m->mat.get();
- if (dynamic_cast(rawMat->getMemoryHandle().get())) {
- // is gpu. then copy data
- float* data = rawMat->getData();
- size_t len = rawMat->getElementCnt();
- float* cpuData = new float[len];
- hl_memcpy_device2host(cpuData, data, len * sizeof(float));
- FloatArray ret_val(cpuData, len);
- ret_val.needFree = true;
- return ret_val;
- } else {
- FloatArray ret_val(rawMat->getData(), rawMat->getElementCnt());
- return ret_val;
- }
-}
-
-void Matrix::sparseCopyFrom(
- const std::vector& rows,
- const std::vector& cols,
- const std::vector& vals) throw(UnsupportError) {
- auto cpuSparseMat =
- std::dynamic_pointer_cast(m->mat);
- if (cpuSparseMat != nullptr) {
- // LOG(INFO) <<"RowSize = "<isSparse()) {
- throw UnsupportError();
- } else {
- *dim1 = m->mat->getHeight();
- *dim2 = m->mat->getWidth();
- *view_m_data = new float[(*dim1) * (*dim2)];
- if (auto cpuMat = dynamic_cast(m->mat.get())) {
- auto src = cpuMat->getData();
- auto dest = *view_m_data;
- std::memcpy(dest, src, sizeof(paddle::real) * (*dim1) * (*dim2));
- } else if (auto gpuMat = dynamic_cast(m->mat.get())) {
- auto src = gpuMat->getData();
- auto dest = *view_m_data;
- hl_memcpy_device2host(
- dest, src, sizeof(paddle::real) * (*dim1) * (*dim2));
- } else {
- LOG(WARNING) << "Unexpected Situation";
- throw UnsupportError();
- }
- }
-}
-
-void Matrix::copyFromNumpyMat(float* data,
- int dim1,
- int dim2) throw(UnsupportError, RangeError) {
- if (isSparse()) {
- throw UnsupportError();
- } else {
- if (this->getHeight() == (size_t)dim1 && this->getWidth() == (size_t)dim2) {
- if (m->mat->getData() != data) {
- m->mat->copyFrom(data, dim1 * dim2);
- }
- } else {
- throw RangeError();
- }
- }
-}
-
-bool Matrix::isGpu() const {
- auto rawPtr = m->mat.get();
- return dynamic_cast(rawPtr) != nullptr ||
- dynamic_cast(rawPtr) != nullptr;
-}
diff --git a/paddle/api/Paddle.i b/paddle/api/Paddle.i
deleted file mode 100644
index 3237e73745dca58bed923b20851f0f0039a3487c..0000000000000000000000000000000000000000
--- a/paddle/api/Paddle.i
+++ /dev/null
@@ -1,202 +0,0 @@
-%module(directors="1") swig_paddle
-%include "std_string.i"
-%{
-#define SWIG_FILE_WITH_INIT
-#include "api/PaddleAPI.h"
-%}
-
-%include "exception.i"
-%typemap(throws) UnsupportError %{
- SWIG_exception(SWIG_RuntimeError, $1.what());
- SWIG_fail;
-%}
-
-%include "std_vector.i"
-%include "std_pair.i"
-#ifdef SWIGPYTHON
-%include "numpy.i"
-#endif
-
-%init %{
-#ifdef SWIGPYTHON
-import_array();
-#endif
-%}
-
-
-namespace std {
-%template(vector_int) vector;
-%template(vector_uint) vector;
-%template(vector_float) vector;
-%template(vector_string) vector;
-%template(vector_vec_star) vector;
-}
-#ifdef SWIGPYTHON
-%typemap(in) (int argc, char** argv) {
- int i = 0;
- if (!PyList_Check($input)) {
- PyErr_SetString(PyExc_ValueError, "Expecting a list");
- return NULL;
- }
- $1 = PyList_Size($input);
- $2 = (char **) malloc(($1+1)*sizeof(char *));
- for (i = 0; i < $1; i++) {
- PyObject *s = PyList_GetItem($input,i);
- if (!PyString_Check(s)) {
- free($2);
- PyErr_SetString(PyExc_ValueError, "List items must be strings");
- return NULL;
- }
- $2[i] = PyString_AsString(s);
- }
- $2[i] = 0;
-}
-%typemap(freearg) (int argc, char** argv) {
- if ($2) free($2);
-}
-
-%typemap(out) FloatArray {
- $result = PyList_New($1.length);
- for (size_t i=0; i<$1.length; ++i) {
- PyList_SetItem($result, i, PyFloat_FromDouble($1.buf[i]));
- }
- if($1.needFree) {
- delete [] $1.buf;
- }
-}
-
-%typemap(out) IntArray {
- $result = PyList_New($1.length);
- for (size_t i=0; i<$1.length; ++i) {
- PyList_SetItem($result, i, PyInt_FromLong($1.buf[i]));
- }
- if ($1.needFree) {
- delete [] $1.buf;
- }
-}
-
-%typemap(out) IntWithFloatArray {
- $result = PyList_New($1.length);
- for (size_t i=0; i<$1.length; ++i) {
- PyList_SetItem($result, i, PyTuple_Pack(2,
- PyInt_FromLong($1.idxBuf[i]),
- PyFloat_FromDouble($1.valBuf[i])
- ));
- }
- if ($1.needFree) {
- delete [] $1.idxBuf;
- delete [] $1.valBuf;
- }
-}
-
-
-%rename(__getitem__) IVector::get;
-%rename(__setitem__) IVector::set;
-%rename(__len__) IVector::getSize;
-%rename(__getitem__) Vector::get;
-%rename(__setitem__) Vector::set;
-%rename(__len__) Vector::getSize;
-%rename(__len__) Parameter::getSize;
-%rename(__call__) ParameterTraverseCallback::apply;
-%rename(__repr__) Evaluator::toString;
-
-%apply (float* INPLACE_ARRAY2, int DIM1, int DIM2) {
- (float* data, int dim1, int dim2)
-}
-
-%apply (float** ARGOUTVIEW_ARRAY2, int* DIM1, int* DIM2) {
- (float** view_data, int* dim1, int* dim2)
-}
-
-%apply (float** ARGOUTVIEWM_ARRAY2, int* DIM1, int* DIM2) {
- (float** view_m_data, int* dim1, int* dim2)
-}
-
-%apply (int** ARGOUTVIEWM_ARRAY1, int* DIM1) {
- (int** view_m_data, int* dim1)
-}
-
-%apply (int* INPLACE_ARRAY1, int DIM1) {
- (int* data, int dim)
-}
-
-%apply (int** ARGOUTVIEW_ARRAY1, int* DIM1) {
- (int** view_data, int* dim1)
-}
-
-%apply (float* INPLACE_ARRAY1, int DIM1) {
- (float* data, int dim)
-}
-
-%apply (float** ARGOUTVIEW_ARRAY1, int* DIM1) {
- (float** view_data, int* dim1)
-}
-
-%apply (float** ARGOUTVIEWM_ARRAY1, int* DIM1) {
- (float** view_m_data, int* dim1)
-}
-
-#endif
-// The below functions internally create object by "new", so it should use
-// use SWIG to handle gc. There are hints for SWIG to handle GC.
-%newobject Matrix::createZero;
-%newobject Matrix::createSparse;
-%newobject Matrix::createDense;
-%newobject Matrix::createDenseFromNumpy;
-%newobject Matrix::createCpuDenseFromNumpy;
-%newobject Matrix::createGpuDenseFromNumpy;
-%newobject Vector::createZero;
-%newobject Vector::create;
-%newobject Vector::createVectorFromNumpy;
-%newobject Vector::createCpuVectorFromNumpy;
-%newobject Vector::createGpuVectorFromNumpy;
-%newobject IVector::createZero;
-%newobject IVector::create;
-%newobject IVector::createVectorFromNumpy;
-%newobject IVector::createCpuVectorFromNumpy;
-%newobject IVector::createGpuVectorFromNumpy;
-%newobject Trainer::createByCommandLine;
-%newobject Trainer::getForwardOutput;
-%newobject Trainer::getLayerOutput;
-%newobject Arguments::getSlotValue;
-%newobject Arguments::getSlotIds;
-%newobject Arguments::getSlotIn;
-%newobject Arguments::getSlotSequenceStartPositions;
-%newobject Arguments::getSlotSequenceDim;
-%newobject Arguments::createArguments;
-%newobject GradientMachine::createByConfigProtoStr;
-%newobject GradientMachine::createByModelConfig;
-%newobject GradientMachine::asSequenceGenerator;
-%newobject GradientMachine::getParameter;
-%newobject GradientMachine::getLayerOutput;
-%newobject GradientMachine::makeEvaluator;
-%newobject TrainerConfig::createFromTrainerConfigFile;
-%newobject TrainerConfig::getModelConfig;
-%newobject TrainerConfig::getOptimizationConfig;
-%newobject Parameter::getBuf;
-%newobject Parameter::getConfig;
-%newobject ParameterOptimizer::create;
-%newobject ParameterOptimizer::needSpecialTraversal;
-%newobject ParameterUpdater::createLocalUpdater;
-%newobject ParameterUpdater::createRemoteUpdater;
-%newobject ParameterUpdater::createNewRemoteUpdater;
-
-%feature("director") UpdateCallback;
-%feature("autodoc", 1); // To generate method stub, for code hint in ide
-
-// Ignore many private class, and method cannot be handled by swig.
-%ignore MatrixPrivate;
-%ignore TrainerPrivate;
-%ignore IVector::operator[];
-%ignore ArgumentsPrivate;
-%ignore GradientMachinePrivate;
-%ignore TrainerConfigPrivate;
-%ignore ModelConfigPrivate;
-%ignore ParameterPrivate;
-%ignore SequenceGeneratorPrivate;
-%ignore VectorPrivate;
-%ignore ParameterConfigPrivate;
-%ignore OptimizationConfigPrivate;
-%ignore ParameterTraverseCallbackPrivate;
-%include "utils/GlobalConstants.h"
-%include "api/PaddleAPI.h"
diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h
deleted file mode 100644
index 7866122006a996cbe5201c661cab9c81aa82a219..0000000000000000000000000000000000000000
--- a/paddle/api/PaddleAPI.h
+++ /dev/null
@@ -1,1054 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include
-#include
-#include
-#include
-#include
-#include "paddle/gserver/gradientmachines/GradientMachine.h"
-#include "paddle/utils/Common.h"
-#include "paddle/utils/GlobalConstants.h"
-
-/// Import PaddlePaddle's enumeration into global namespace.
-using namespace paddle::enumeration_wrapper; // NOLINT
-
-/**
- * @brief Initialize paddle.
- *
- * In python, this method should be invoked as
- * @code
- * import sys
- * import paddle
- * paddle.initPaddle(sys.argv)
- * or you can change arguments as any list of str.
- * @endcode
- */
-void initPaddle(int argc, char** argv);
-
-/// Return FLAGS_use_gpu
-bool isUsingGpu();
-
-/// Set the Flags_use_gpu to the given parameter
-void setUseGpu(bool useGpu);
-
-/// Return true if this py_paddle is compiled in GPU Version
-bool isGpuVersion();
-
-/// Return FLAGS_trainer_count
-int getTrainerCount();
-
-/// The Error of IO Operation. Such as file not found, etc.
-class IOError {};
-
-/// Out of range error
-class RangeError {};
-
-/// Not support Error, such as access GPU memory directly, etc.
-class UnsupportError : public std::runtime_error {
- public:
- UnsupportError() : std::runtime_error(" ") {}
- explicit UnsupportError(const std::string& message)
- : std::runtime_error(message) {}
-};
-
-/// This type will map to python's list of float.
-struct FloatArray {
- const float* buf;
- const size_t length;
- bool needFree; // true if the buf is dynamic alloced.
- FloatArray(const float* b, const size_t l);
-};
-
-/// This type will map to python's list of int
-struct IntArray {
- const int* buf;
- const size_t length;
- bool needFree;
- IntArray(const int* b, const size_t l, bool f = false);
-};
-
-/// This type will map to python's list of (int, float)
-struct IntWithFloatArray {
- const float* valBuf;
- const int* idxBuf;
- const size_t length;
- bool needFree;
- IntWithFloatArray(const float* v, const int* i, size_t l, bool f = false);
-};
-
-enum SparseValueType { SPARSE_NON_VALUE = 0, SPARSE_VALUE = 1 };
-
-enum SparseFormatType { SPARSE_CSR = 0, SPARSE_CSC = 1 };
-
-/**
- * In Python, -1UL is hard to write. So define a const value used by python
- * side.
- */
-const size_t NO_SPARSE_ID = -1UL;
-
-struct MatrixPrivate;
-class Matrix {
- Matrix(); // User Cannot Create Matrix.
- DISABLE_COPY(Matrix);
- static Matrix* createByPaddleMatrixPtr(void* sharedPtr);
-
- public:
- virtual ~Matrix();
-
- /**
- * Create A Matrix with height,width, which is filled by zero.
- */
- static Matrix* createZero(size_t height,
- size_t width,
- bool useGpu = isUsingGpu());
-
- /**
- * Create Sparse Matrix.
- *
- * After create sparse, sparseCopyFrom can be used to fill matrix.
- *
- * @param nnz Number of non zero values.
- *
- * @note the default sparse type is SPARSE_CSR.
- */
- static Matrix* createSparse(size_t height,
- size_t width,
- size_t nnz,
- bool isNonVal = true,
- bool trans = false,
- bool useGpu = isUsingGpu());
-
- /**
- * Create Dense Matrix.
- *
- * @param data list of float should be passed in python.
- * @note the value will be copy into a new matrix.
- */
- static Matrix* createDense(const std::vector& data,
- size_t height,
- size_t width,
- bool useGpu = isUsingGpu());
-
- static Matrix* createDenseFromNumpy(
- float* data,
- int dim1,
- int dim2,
- bool copy = true,
- bool useGpu = isUsingGpu()) throw(UnsupportError);
-
- /**
- * Create Cpu Dense Matrix from numpy matrix, dtype=float32
- *
- * @param data a numpy matrix.
- * @param dim1 dimension of data.
- * @param dim2 dimension of data.
- * @param copy true if copy into a new matrix, false will create
- * matrix inplace. copy = false should be used with extreme
- * care because Matrix will share the memory with the given
- * numpy array. If the numpy array object is no longer valid,
- * the memory space will not be usable.
- */
- static Matrix* createCpuDenseFromNumpy(float* data,
- int dim1,
- int dim2,
- bool copy = true);
-
- /// Create Gpu Dense Matrix from numpy matrix, dtype=float32
- static Matrix* createGpuDenseFromNumpy(float* data, int dim1, int dim2);
-
- /**
- * Cast to numpy matrix.
- *
- * @note This method take no parameter in python.
- * @note This method in python will return a numpy matrix, not void.
- * @note Only CpuDenseMatrix is supported.
- *
- * Example:
- * @code
- * import paddle
- * m = paddle.Matrix.createZero(10,2)
- * numpy_mat = m.toNumpyMat()
- * @endcode
- */
- void toNumpyMatInplace(float** view_data,
- int* dim1,
- int* dim2) throw(UnsupportError);
-
- /// Copy To numpy mat.
- void copyToNumpyMat(float** view_m_data,
- int* dim1,
- int* dim2) throw(UnsupportError);
-
- /// Copy From Numpy Mat
- void copyFromNumpyMat(float* data, int dim1, int dim2) throw(UnsupportError,
- RangeError);
-
- /// return true if this matrix is sparse.
- bool isSparse() const;
-
- SparseValueType getSparseValueType() const throw(UnsupportError);
-
- SparseFormatType getSparseFormat() const throw(UnsupportError);
-
- IntArray getSparseRowCols(size_t i) const throw(UnsupportError, RangeError);
-
- IntWithFloatArray getSparseRowColsVal(size_t i) const
- throw(UnsupportError, RangeError);
-
- size_t getHeight() const;
-
- size_t getWidth() const;
-
- float get(size_t x, size_t y) const throw(RangeError);
-
- void set(size_t x, size_t y, float val) throw(RangeError, UnsupportError);
-
- /// return type is list of float
- FloatArray getData() const;
-
- /**
- * Copy from rows, cols, values.
- *
- * if sparse_nonvalue, the values should be []
- */
- void sparseCopyFrom(const std::vector& rows,
- const std::vector& cols,
- const std::vector& values =
- std::vector()) throw(UnsupportError);
-
- bool isGpu() const;
-
- private:
- void* getSharedPtr() const;
-
- MatrixPrivate* m;
- friend class Trainer;
- friend class GradientMachine;
- friend class Arguments;
-};
-
-struct VectorPrivate;
-class Vector {
- DISABLE_COPY(Vector);
- Vector();
- static Vector* createByPaddleVectorPtr(void* ptr);
-
- void* getSharedPtr();
-
- public:
- ~Vector();
-
- /// Create Vector filled with zero.
- static Vector* createZero(size_t sz, bool useGpu = isUsingGpu());
-
- /**
- * Create Vector from list of float.
- *
- * It will create a new vector, and copy data into it.
- */
- static Vector* create(const std::vector& data,
- bool useGpu = isUsingGpu());
-
- static Vector* createVectorFromNumpy(
- float* data,
- int dim,
- bool copy = true,
- bool useGpu = isUsingGpu()) throw(UnsupportError);
- /**
- * Create Cpu Vector from numpy array, which dtype=float32
- *
- * If copy is false, it will create vector inplace.
- */
- static Vector* createCpuVectorFromNumpy(float* data,
- int dim,
- bool copy = true);
-
- /// Create Gpu Vector from numpy array, which dtype=float32
- static Vector* createGpuVectorFromNumpy(float* data, int dim);
-
- /**
- * copy from another vector
- * throw(RangeError) if size of src vector is different from size of this
- * vector
- */
- void copyFrom(Vector* src) throw(RangeError);
-
- /// Cast to numpy array inplace.
- void toNumpyArrayInplace(float** view_data, int* dim1) throw(UnsupportError);
-
- /// Copy to numpy array.
- void copyToNumpyArray(float** view_m_data, int* dim1);
-
- /// Copy from numpy array.
- void copyFromNumpyArray(float* data, int dim);
-
- /// __getitem__ in python
- float get(const size_t idx) const throw(RangeError, UnsupportError);
-
- /// __setitem__ in python
- void set(const size_t idx, float val) throw(RangeError, UnsupportError);
-
- /// Return is GPU vector or not.
- bool isGpu() const;
-
- /// Return a list of float, the memory is alloced and copied.
- FloatArray getData() const;
-
- /// __len__ in python
- size_t getSize() const;
-
- private:
- VectorPrivate* m;
-
- private:
- friend class Parameter;
- friend class ParameterOptimizer;
- friend struct ParameterTraverseCallbackPrivate;
-};
-
-struct IVectorPrivate;
-class IVector {
- IVector();
- DISABLE_COPY(IVector);
- static IVector* createByPaddleVectorPtr(void* ptr);
-
- public:
- /// Create IVector filled with zero
- static IVector* createZero(size_t sz, bool useGpu = isUsingGpu());
-
- /**
- * Create IVector from list of int.
- * It will create a new vector, and copy data into it.
- */
- static IVector* create(const std::vector& data,
- bool useGpu = isUsingGpu());
-
- static IVector* createVectorFromNumpy(
- int* data,
- int dim,
- bool copy = true,
- bool useGpu = isUsingGpu()) throw(UnsupportError);
-
- /**
- * Create Cpu IVector from numpy array, which dtype=int32
- *
- * If copy is false, it will create vector inplace
- */
- static IVector* createCpuVectorFromNumpy(int* data,
- int dim,
- bool copy = true);
- /**
- * Create Gpu IVector from numpy array, which dtype=int32
- */
- static IVector* createGpuVectorFromNumpy(int* data, int dim);
-
- /// Cast to numpy array inplace.
- void toNumpyArrayInplace(int** view_data, int* dim1) throw(UnsupportError);
-
- /// Copy to numpy array.
- void copyToNumpyArray(int** view_m_data, int* dim1);
-
- /// Copy from numpy array.
- void copyFromNumpyArray(int* data, int dim);
-
- virtual ~IVector();
-
- /// Return a list of int, the memory is alloced and copied.
- IntArray getData() const;
-
- /// This method will map to python [] method.
- int& operator[](const size_t idx) throw(RangeError, UnsupportError);
-
- const int& operator[](const size_t idx) const
- throw(RangeError, UnsupportError);
-
- inline int get(const size_t idx) const throw(RangeError, UnsupportError) {
- return (*this)[idx];
- }
-
- inline void set(const size_t idx, int val) throw(RangeError, UnsupportError) {
- (*this)[idx] = val;
- }
-
- /// Return true if it is gpu vector.
- bool isGpu() const;
-
- /// This method will map to python __len__();
- size_t getSize() const;
-
- private:
- void* getSharedPtr() const;
-
- friend class Arguments;
- IVectorPrivate* m;
-};
-
-struct ArgumentsPrivate;
-
-/// The Arguments is actual a std::vector in paddle.
-class Arguments {
- private:
- Arguments(); // Internal Create.
- DISABLE_COPY(Arguments);
-
- public:
- /**
- * Create a arguments with size.
- * Note that it can be zero.
- */
- static Arguments* createArguments(size_t slotNum);
-
- void resize(size_t slotNum);
-
- virtual ~Arguments();
-
- /**
- * Return the slot number that aguments contains.
- *
- * It is actually the vector's size
- */
- size_t getSlotNum() const;
-
- /**
- * The get functions of Arguments
- *
- * the param idx is the slot id
- */
- Matrix* getSlotValue(size_t idx) const throw(RangeError);
- Matrix* getSlotGrad(size_t idx) const throw(RangeError);
- IVector* getSlotIds(size_t idx) const throw(RangeError);
- Matrix* getSlotIn(size_t idx) const throw(RangeError);
- IVector* getSlotSequenceStartPositions(size_t idx) const throw(RangeError);
- IVector* getSlotSubSequenceStartPositions(size_t idx) const throw(RangeError);
- IVector* getSlotSequenceDim(size_t idx) const throw(RangeError);
- // End Of get functions of Arguments
-
- int64_t getBatchSize(size_t idx = 0) const throw(RangeError);
-
- /**
- * The set functions of Arguments.
- *
- * The param idx is the slot id.
- * The other param is the input Matrix or vector.
- */
- void setSlotValue(size_t idx, Matrix* mat) throw(RangeError);
- void setSlotGrad(size_t idx, Matrix* mat) throw(RangeError);
- void setSlotIn(size_t idx, Matrix* mat) throw(RangeError);
- void setSlotIds(size_t idx, IVector* vec) throw(RangeError);
- void setSlotSequenceStartPositions(size_t idx,
- IVector* vec) throw(RangeError);
- void setSlotSubSequenceStartPositions(size_t idx,
- IVector* vec) throw(RangeError);
- void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError);
-
- /**
- * Set the frame height of the idx-th Argument.
- *
- * @param ids The index of which Argument.
- * @param h The height value.
- */
- void setSlotFrameHeight(size_t idx, size_t h) throw(RangeError);
-
- /**
- * Set the frame height of the idx-th Argument.
- *
- * @param ids The index of which Argument.
- * @param h The height value.
- */
- void setSlotFrameWidth(size_t idx, size_t w) throw(RangeError);
-
- size_t getSlotFrameHeight(size_t idx = 0) const throw(RangeError);
- size_t getSlotFrameWidth(size_t idx = 0) const throw(RangeError);
-
- float sum() const;
-
- private:
- static Arguments* createByPaddleArgumentVector(void* ptr);
- static Arguments* createByPaddleArgument(const void* ptr);
- void* getInternalArgumentsPtr() const;
-
- private:
- ArgumentsPrivate* m;
- friend class Trainer;
- friend class GradientMachine;
- friend class SequenceGenerator;
-};
-
-enum GradientMatchineCreateMode {
- CREATE_MODE_NORMAL = paddle::GradientMachine::kNormal,
- CREATE_MODE_SGD_SPARSE_CPU_TRAINING =
- paddle::GradientMachine::kSgdSparseCpuTraining,
- CREATE_MODE_TESTING = paddle::GradientMachine::kTesting
-};
-
-struct ParameterConfigPrivate;
-class ParameterConfig {
- DISABLE_COPY(ParameterConfig);
- ParameterConfig();
-
- /**
- * Internal methods
- */
- static ParameterConfig* createParameterConfigFromParameterSharedPtr(
- void* ptr);
- static ParameterConfig* createParameterConfigFromParameterPtr(void* ptr);
- void* getRawPtr();
-
- public:
- ~ParameterConfig();
-
- /**
- * return proto buf string.
- */
- std::string toProtoString() const;
-
- private:
- ParameterConfigPrivate* m;
-
- private:
- friend class Parameter;
- friend class ParameterOptimizer;
- friend struct ParameterTraverseCallbackPrivate;
-};
-
-struct OptimizationConfigPrivate;
-class OptimizationConfig {
- DISABLE_COPY(OptimizationConfig);
- OptimizationConfig();
-
- public:
- static OptimizationConfig* createFromProtoString(const std::string& str);
- ~OptimizationConfig();
-
- /**
- * return protobuf string.
- */
- std::string toProtoString();
-
- private:
- OptimizationConfigPrivate* m;
-
- friend class TrainerConfig;
- friend class ParameterOptimizer;
- friend class ParameterUpdater;
- friend class Trainer;
-};
-
-struct ParameterPrivate;
-class Parameter {
- private:
- Parameter();
- DISABLE_COPY(Parameter);
-
- public:
- virtual ~Parameter();
-
- /**
- * get parameter name
- */
- std::string getName() const;
-
- /**
- * get buf in Parameter
- */
- Vector* getBuf(ParameterType type);
-
- /**
- * get id
- */
- size_t getID() const;
-
- ParameterConfig* getConfig();
- void setValueUpdated();
-
- bool save(const std::string& filename) const;
-
- bool load(const std::string& filename) const;
-
- size_t getSize() const;
-
- private:
- static Parameter* createFromRawPtr(void* ptr);
- static Parameter* createFromSharedPtr(void* ptr);
-
- private:
- ParameterPrivate* m;
- friend class UpdateCallbackWrapper;
- friend class GradientMachine;
- friend class ParameterUpdater;
-};
-
-struct ModelConfigPrivate;
-/**
- * You can only get model config from TrainerConfig.
- *
- * It is used by GradientMachine.
- */
-class ModelConfig {
- private:
- ModelConfig();
- DISABLE_COPY(ModelConfig);
-
- public:
- virtual ~ModelConfig();
-
- private:
- ModelConfigPrivate* m;
- friend class TrainerConfig;
- friend struct TrainerConfigPrivate;
- friend class GradientMachine;
-};
-
-struct TrainerConfigPrivate;
-/**
- * To get TrainerConfig from file.
- *
- * It is used by GradientMachine.
- */
-class TrainerConfig {
- private:
- TrainerConfig();
- DISABLE_COPY(TrainerConfig);
-
- public:
- virtual ~TrainerConfig();
-
- static TrainerConfig* createFromTrainerConfigFile(
- const std::string& configPath);
- static TrainerConfig* createFromProtoString(const std::string& str);
-
- ModelConfig* getModelConfig() const;
-
- OptimizationConfig* getOptimizationConfig() const;
-
- private:
- TrainerConfigPrivate* m;
- friend class Trainer;
-};
-
-/**
- * The callback in backword.
- *
- * You can inherit this class in python.
- *
- * @code
- * class UpdateCallbackInPython(paddle.UpdateCallback):
- * def __init__(self):
- * paddle.UpdateCallback.__init__(self)
- *
- * def apply(self, param):
- * assert isinstance(param, paddle.Parameter)
- * @endcode
- */
-class UpdateCallback {
- public:
- virtual ~UpdateCallback();
- virtual void apply(Parameter* p);
-};
-
-struct ParameterTraverseCallbackPrivate;
-class ParameterTraverseCallback {
- DISABLE_COPY(ParameterTraverseCallback);
- ParameterTraverseCallback();
-
- public:
- ~ParameterTraverseCallback();
-
- void apply(const std::vector& vecs,
- const ParameterConfig& config,
- size_t sparseId);
-
- private:
- ParameterTraverseCallbackPrivate* m;
- friend class ParameterOptimizer;
-};
-
-/**
- * The ParameterOptimizer Wrapper Class.
- *
- * Basically same as common/ParameterOptimizer.h
- */
-struct ParameterOptimizerPrivate;
-class ParameterOptimizer {
- DISABLE_COPY(ParameterOptimizer);
- ParameterOptimizer();
-
- public:
- static ParameterOptimizer* create(OptimizationConfig* config);
-
- ~ParameterOptimizer();
-
- void init(size_t numRows, const ParameterConfig* config);
-
- void startPass();
-
- void finishPass();
-
- void startBatch(size_t numSamplesProcessed);
-
- void finishBatch();
-
- void update(const std::vector& vecs,
- const ParameterConfig& conf,
- size_t sparseId = NO_SPARSE_ID);
-
- std::vector getParameterTypes() const;
-
- ParameterTraverseCallback* needSpecialTraversal(
- const ParameterConfig& config) const;
-
- private:
- ParameterOptimizerPrivate* m;
-};
-
-class SequenceGenerator;
-class Evaluator;
-struct GradientMachinePrivate;
-class GradientMachine {
- private:
- GradientMachine();
- DISABLE_COPY(GradientMachine);
-
- public:
- virtual ~GradientMachine();
-
- /**
- * Create By ProtoStr.
- *
- * The ProtoStr can be generate by python's protobuf code.
- */
- static GradientMachine* createByConfigProtoStr(
- const std::string& protoStr,
- GradientMatchineCreateMode mode = CREATE_MODE_NORMAL,
- const std::vector& parameterTypes = defaultParamTypes);
-
- /**
- * Create by ModelConfig object.
- *
- * To get ModelConfig, you can get TrainerConfig from config file, then get
- * model config by TrainerConfig
- */
- static GradientMachine* createByModelConfig(
- ModelConfig* conf,
- GradientMatchineCreateMode mode = CREATE_MODE_NORMAL,
- const std::vector& parameterTypes = defaultParamTypes);
-
- /**
- * @brief finish
- */
- void finish();
-
- void start();
-
- /**
- * Prefetch row ids of sparse parameter.
- */
- void prefetch(const Arguments& inArgs);
-
- /**
- * Do some thing when train pass ended.
- */
- void onPassEnd();
-
- /**
- * The forward stage of GradientMachine.
- *
- * @note the outArgs could be zero length arguemnts.
- * @note THIS METHOD IS VERY USEFULL FOR PREDICT FROM TRAINED MODEL.
- */
- void forward(const Arguments& inArgs, Arguments* outArgs, PassType passType);
-
- /**
- * The backward stage of GradientMachine.
- *
- * @note Currently the ParameterUpdater is not wrapped in SWIG, so backward
- * cannot actually train a network. But you can write a update callback to
- * change the parameter or implement a ParameterUpdater in python side.
- */
- void backward(const UpdateCallback& callback = UpdateCallback());
-
- /**
- * Combine forward/backward
- */
- void forwardBackward(const Arguments& inArgs,
- Arguments* outArgs,
- PassType passType,
- const UpdateCallback& callback = UpdateCallback());
-
- void loadParameters(const std::string& path);
-
- size_t getParameterSize() const;
- Parameter* getParameter(size_t i) throw(RangeError);
-
- size_t getNonStaticParameterSize() const;
- Parameter* getNonStaticParameter(size_t i) throw(RangeError);
-
- void randParameters();
-
- Arguments* getLayerOutput(const std::string& layerName) const
- throw(UnsupportError);
-
- /**
- * Create a sequence generator.
- *
- * @note It just like a paddle_gen_sequence.
- */
- SequenceGenerator* asSequenceGenerator(
- const std::vector& dict = std::vector(),
- size_t begin_id = 0UL,
- size_t end_id = 0UL,
- size_t max_length = 100UL,
- size_t beam_size = -1UL);
-
- Evaluator* makeEvaluator();
-
- void eval(Evaluator* evaluator);
-
- private:
- GradientMachinePrivate* m;
-
- static GradientMachine* createFromPaddleModelPtr(
- const void* confPtr,
- GradientMatchineCreateMode mode,
- const std::vector& types);
-
- // Not to use c++ 11 init-list, so we use static var as function default arg.
- static std::vector defaultParamTypes;
- friend class Trainer;
- friend class ParameterUpdater;
-};
-
-struct ParameterUpdaterPrivate;
-class ParameterUpdater {
- private:
- ParameterUpdater();
-
- public:
- static ParameterUpdater* createLocalUpdater(OptimizationConfig* config);
- static ParameterUpdater* createRemoteUpdater(OptimizationConfig* config,
- int passCount,
- bool useSparseUpdater);
- static ParameterUpdater* createNewRemoteUpdater(
- OptimizationConfig* config,
- const std::string pserverSpec,
- const bool useEtcd) throw(UnsupportError);
- ~ParameterUpdater();
-
- /**
- * @brief initialize Parameter Updater by GradientMachine.
- * @param gm
- */
- void init(const GradientMachine& gm);
-
- /**
- * @brief begin of a training/testing of one pass.
- */
- void startPass();
-
- /**
- * @brief end of a traning/testing of one pass.
- */
- void finishPass();
-
- /**
- * @brief begin of a training/testing of one batch.
- * @param data batch's size
- * @return PassType, mostly will be training.
- */
- PassType startBatch(size_t batchSize);
-
- /**
- * @brief end of a traning/testing of one batch
- * @param cost current batch cost.
- */
- void finishBatch(float cost);
-
- /**
- * @brief update a parameter (by local optimizer or by cluster pserver)
- * @param param
- */
- void update(Parameter* param);
-
- /**
- * @breif only get required sparse rows by default.
- * @param fullSize: get full matrix parameter if *fullSize* set
- * @param apply: get PARAMETER_APPLY on pserver if *apply* set
- */
- void getParametersRemote(bool fullSize = false, bool apply = false);
-
- /**
- * @brief restore the average parameter.
- * @note It is only used in AverageOptimizer. Restore will get the current
- * PARAMETER_VALUE back.
- */
- void restore();
-
- /**
- * @brief apply. Store the average parameter.
- * @note It is only used in AverageOptimizer. Apply will store the current
- * PARAMETER_VALUE to buffer, calcaualte current Average Parameter, and save
- * it to PARAMETER_VALUE.
- */
- void apply();
-
- /**
- * @brief catchUpWith The Regularization will be delayed in many situations(
- * pserver, local sparse). Catch Up means catch the regularization up, apply
- * regularization to all params.
- */
- void catchUpWith();
-
- private:
- ParameterUpdaterPrivate* m;
-};
-
-struct EvaluatorPrivate;
-class Evaluator {
- private:
- Evaluator();
- DISABLE_COPY(Evaluator);
-
- public:
- ~Evaluator();
-
- /**
- * @brief begin an evaluate stage.
- */
- void start();
-
- /**
- * @brief end an evaluate stage.
- */
- void finish();
-
- /**
- * @brief toString will get a evaluate result.
- *
- * __repr__ method in python
- */
- std::string toString();
-
- std::vector getNames() const;
-
- double getValue(const std::string name) const;
-
- private:
- EvaluatorPrivate* m;
-
- friend class GradientMachine;
-};
-
-struct TrainerPrivate;
-class Trainer {
- private:
- TrainerPrivate* m;
- Trainer();
- Trainer(TrainerConfig* optConfig, GradientMachine* gm);
- DISABLE_COPY(Trainer);
-
- public:
- virtual ~Trainer();
-
- /// Create A Trainer By TrainerConfig. using paddle command line.
- static Trainer* createByCommandLine() throw(IOError);
-
- static Trainer* create(TrainerConfig* optConfig,
- GradientMachine* gm) throw(IOError);
-
- /// Start training
- void startTrain();
-
- /// Finish training
- void finishTrain();
-
- /// Start a pass.
- void startTrainPass();
-
- /// Finish a pass
- void finishTrainPass();
-
- /**
- * Train one batch,
- *
- * @return true if all batch finished.
- */
- bool trainOneBatch(size_t batchSize);
-
- void trainOneDataBatch(size_t batchSize, const Arguments& args);
-
- void startTestPeriod();
- void testOneDataBatch(size_t batchSize, const Arguments& args);
- void finishTestPeriod();
-
- void forwardOneBatch(size_t batchSize);
-
- Arguments* getForwardOutput();
-
- Arguments* getLayerOutput(const std::string& layerName) const;
-};
-
-/// the N-Best results generated from one input sequence.
-class ISequenceResults {
- public:
- virtual ~ISequenceResults();
-
- /// Number of result.
- virtual size_t getSize() const = 0;
-
- /**
- * Get sentence from dictionary.
- *
- * @param id the index of result.
- * @param split if true, the return sentence will be splited with ' ' by
- * each word. Default is false.
- */
- virtual std::string getSentence(size_t id, bool split = false) const
- throw(RangeError) = 0;
- virtual std::vector getSequence(size_t id) const throw(RangeError) = 0;
- virtual float getScore(size_t id) const throw(RangeError) = 0;
-};
-
-struct SequenceGeneratorPrivate;
-class SequenceGenerator {
- DISABLE_COPY(SequenceGenerator);
- SequenceGenerator();
-
- public:
- virtual ~SequenceGenerator();
-
- /**
- * Generate Sequence by input.
- *
- * @note The inArgs is just one sequence of data.
- * @note The return will get a N-best generate result by inArgs.
- * Sort by score.
- */
- ISequenceResults* generateSequence(const Arguments& inArgs) const;
-
- void setDict(const std::vector& dict);
- void setBos(size_t bos);
- void setEos(size_t eos);
- void setMaxLength(size_t maxlength);
- void setBeamSize(size_t beamSize);
-
- private:
- static SequenceGenerator* createByGradientMachineSharedPtr(void* ptr);
- friend class GradientMachine;
-
- private:
- SequenceGeneratorPrivate* m;
-};
diff --git a/paddle/api/PaddleAPIPrivate.h b/paddle/api/PaddleAPIPrivate.h
deleted file mode 100644
index e141fcd761d7db2d3836a6343700ac4a7ca80c16..0000000000000000000000000000000000000000
--- a/paddle/api/PaddleAPIPrivate.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include
-#include "PaddleAPI.h"
-#include "paddle/gserver/evaluators/Evaluator.h"
-#include "paddle/gserver/gradientmachines/GradientMachine.h"
-#include "paddle/parameter/ParameterUpdaterBase.h"
-#include "paddle/trainer/TrainerConfigHelper.h"
-
-struct GradientMachinePrivate {
- std::shared_ptr machine;
-
- template
- inline T& cast(void* ptr) {
- return *(T*)(ptr);
- }
-};
-
-struct OptimizationConfigPrivate {
- std::shared_ptr trainer_config;
- paddle::OptimizationConfig config;
-
- const paddle::OptimizationConfig& getConfig() {
- if (trainer_config != nullptr) {
- return trainer_config->getOptConfig();
- } else {
- return config;
- }
- }
-};
-
-struct TrainerConfigPrivate {
- std::shared_ptr conf;
- TrainerConfigPrivate() {}
-};
-
-struct ModelConfigPrivate {
- std::shared_ptr conf;
-};
-
-struct ArgumentsPrivate {
- std::vector outputs;
-
- inline paddle::Argument& getArg(size_t idx) throw(RangeError) {
- if (idx < outputs.size()) {
- return outputs[idx];
- } else {
- RangeError e;
- throw e;
- }
- }
-
- template
- std::shared_ptr& cast(void* rawPtr) const {
- return *(std::shared_ptr*)(rawPtr);
- }
-};
-
-struct ParameterUpdaterPrivate {
- std::unique_ptr updater;
-};
-
-struct ParameterPrivate {
- std::shared_ptr sharedPtr;
- paddle::Parameter* rawPtr; // rawPtr only used in ParameterUpdater,
- // in other situation sharedPtr should
- // contains value.
-
- ParameterPrivate() : sharedPtr(nullptr), rawPtr(nullptr) {}
-
- paddle::Parameter* getPtr() {
- if (sharedPtr) {
- return sharedPtr.get();
- } else {
- return rawPtr;
- }
- }
-};
-
-struct EvaluatorPrivate {
- paddle::Evaluator* rawPtr;
-
- EvaluatorPrivate() : rawPtr(nullptr) {}
- ~EvaluatorPrivate() { delete rawPtr; }
-};
diff --git a/paddle/api/Parameter.cpp b/paddle/api/Parameter.cpp
deleted file mode 100644
index 589d22e74e742de2595a9efd17412ddc55159230..0000000000000000000000000000000000000000
--- a/paddle/api/Parameter.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/parameter/Parameter.h"
-#include "PaddleAPI.h"
-#include "PaddleAPIPrivate.h"
-
-Parameter::Parameter() : m(new ParameterPrivate()) {}
-
-Parameter::~Parameter() { delete m; }
-
-Parameter* Parameter::createFromRawPtr(void* ptr) {
- auto p = new Parameter();
- p->m->rawPtr = *static_cast(ptr);
- return p;
-}
-
-Parameter* Parameter::createFromSharedPtr(void* ptr) {
- auto& p = *(paddle::ParameterPtr*)(ptr);
- if (p == nullptr) {
- return nullptr;
- } else {
- auto retParam = new Parameter();
- retParam->m->sharedPtr = p;
- return retParam;
- }
-}
-
-std::string Parameter::getName() const { return m->getPtr()->getName(); }
-
-Vector* Parameter::getBuf(ParameterType type) {
- auto buf = m->getPtr()->getBuf(type);
- return Vector::createByPaddleVectorPtr(&buf);
-}
-
-ParameterConfig* Parameter::getConfig() {
- if (m->sharedPtr) {
- return ParameterConfig::createParameterConfigFromParameterSharedPtr(
- &m->sharedPtr);
- } else {
- return ParameterConfig::createParameterConfigFromParameterPtr(m->rawPtr);
- }
-}
-
-size_t Parameter::getID() const { return m->getPtr()->getID(); }
-
-void Parameter::setValueUpdated() { m->getPtr()->setValueUpdated(); }
-
-bool Parameter::save(const std::string& filename) const {
- return m->getPtr()->save(filename);
-}
-
-bool Parameter::load(const std::string& filename) const {
- return m->getPtr()->load(filename);
-}
-
-size_t Parameter::getSize() const { return m->getPtr()->getSize(); }
diff --git a/paddle/api/ParameterOptimizer.cpp b/paddle/api/ParameterOptimizer.cpp
deleted file mode 100644
index d4620be3e6f26cdd4caffffac712e4ef936b222a..0000000000000000000000000000000000000000
--- a/paddle/api/ParameterOptimizer.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/parameter/ParameterOptimizer.h"
-#include
-#include "Internal.h"
-#include "PaddleAPI.h"
-#include "PaddleAPIPrivate.h"
-
-struct ParameterOptimizerPrivate {
- std::unique_ptr optimizer;
-};
-
-struct ParameterTraverseCallbackPrivate {
- paddle::ParameterOptimizer::TraverseCallback callback;
-
- ParameterTraverseCallbackPrivate() {}
-
- ParameterTraverseCallbackPrivate(
- const paddle::ParameterOptimizer::TraverseCallback& callback)
- : callback(callback) {}
-
- void apply(const std::vector& vecs,
- const ParameterConfig& conf,
- size_t sparseId) {
- std::vector real_vecs;
- real_vecs.resize(vecs.size());
- std::transform(vecs.begin(), vecs.end(), real_vecs.begin(), [](Vector* v) {
- if (v) {
- return *(paddle::VectorPtr*)(v->getSharedPtr());
- } else {
- return paddle::VectorPtr();
- }
- });
-
- paddle::ParameterConfig& real_conf =
- *(paddle::ParameterConfig*)(const_cast(conf)
- .getRawPtr());
- callback(real_vecs.data(), real_conf, sparseId);
- }
-};
-
-ParameterOptimizer::ParameterOptimizer() : m(new ParameterOptimizerPrivate()) {}
-
-ParameterOptimizer::~ParameterOptimizer() { delete m; }
-
-ParameterOptimizer* ParameterOptimizer::create(OptimizationConfig* config) {
- CHECK(config != nullptr);
- auto retOptimizer = new ParameterOptimizer();
- retOptimizer->m->optimizer.reset(
- paddle::ParameterOptimizer::create(config->m->getConfig(), false));
- return retOptimizer;
-}
-
-void ParameterOptimizer::init(size_t numRows, const ParameterConfig* config) {
- auto& conf = *(paddle::ParameterConfig*)(const_cast(config)
- ->getRawPtr());
- m->optimizer->init(numRows, &conf);
-}
-
-void ParameterOptimizer::startPass() { m->optimizer->startPass(); }
-
-void ParameterOptimizer::finishPass() { m->optimizer->finishPass(); }
-
-void ParameterOptimizer::startBatch(size_t numSamplesProcessed) {
- constexpr size_t high_1 = 1UL << (sizeof(size_t) * 8 - 1);
- CHECK_EQ(numSamplesProcessed & high_1, 0UL); // Safely cast.
- m->optimizer->startBatch((int64_t)numSamplesProcessed);
-}
-
-void ParameterOptimizer::finishBatch() { m->optimizer->finishBatch(); }
-
-void ParameterOptimizer::update(const std::vector& vecs,
- const ParameterConfig& conf,
- size_t sparseId) {
- ParameterTraverseCallbackPrivate invoker(
- [&](const paddle::VectorPtr _vecs[],
- const paddle::ParameterConfig& config,
- size_t sid = -1UL) { m->optimizer->update(_vecs, config, sid); });
- invoker.apply(vecs, conf, sparseId);
-}
-
-std::vector ParameterOptimizer::getParameterTypes() const {
- std::vector returnValue;
- staticCastVector(&returnValue, m->optimizer->getParameterTypes());
- return returnValue;
-}
-
-ParameterTraverseCallback::ParameterTraverseCallback()
- : m(new ParameterTraverseCallbackPrivate()) {}
-
-ParameterTraverseCallback::~ParameterTraverseCallback() { delete m; }
-
-void ParameterTraverseCallback::apply(const std::vector& vecs,
- const ParameterConfig& conf,
- size_t sparseId) {
- m->apply(vecs, conf, sparseId);
-}
-
-ParameterTraverseCallback* ParameterOptimizer::needSpecialTraversal(
- const ParameterConfig& config) const {
- auto& param_config =
- *(paddle::ParameterConfig*)const_cast(config)
- .getRawPtr();
- auto callback = m->optimizer->needSpecialTraversal(param_config);
- if (callback) {
- auto retCallback = new ParameterTraverseCallback();
- retCallback->m->callback = callback;
- return retCallback;
- } else {
- return nullptr;
- }
-}
diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp
deleted file mode 100644
index 63c000c959f67dc682190b73bac24640ca8d0682..0000000000000000000000000000000000000000
--- a/paddle/api/ParameterUpdater.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "PaddleAPI.h"
-
-#include "PaddleAPIPrivate.h"
-#ifndef PADDLE_WITHOUT_GOLANG
-#include "paddle/trainer/NewRemoteParameterUpdater.h"
-#endif
-#include "paddle/trainer/RemoteParameterUpdater.h"
-#include "paddle/trainer/ThreadParameterUpdater.h"
-
-ParameterUpdater::ParameterUpdater() : m(new ParameterUpdaterPrivate()) {}
-
-ParameterUpdater *ParameterUpdater::createLocalUpdater(
- OptimizationConfig *config) {
- auto updater = new ParameterUpdater();
- updater->m->updater.reset(
- new paddle::SgdThreadUpdater(config->m->getConfig()));
- return updater;
-}
-
-ParameterUpdater *ParameterUpdater::createNewRemoteUpdater(
- OptimizationConfig *config,
- const std::string pserverSpec,
- const bool useEtcd) throw(UnsupportError) {
-#ifndef PADDLE_WITHOUT_GOLANG
- auto updater = new ParameterUpdater();
- updater->m->updater.reset(new paddle::NewRemoteParameterUpdater(
- config->m->getConfig(), pserverSpec, useEtcd));
- return updater;
-#else
- throw UnsupportError("not compiled with WITH_GOLANG");
-#endif
-}
-
-ParameterUpdater *ParameterUpdater::createRemoteUpdater(
- OptimizationConfig *config, int passCount, bool useSparseUpdater) {
- auto updater = new ParameterUpdater();
- auto remoteUpdater = new paddle::RemoteParameterUpdater(
- config->m->getConfig(), passCount, nullptr);
- if (useSparseUpdater) {
- std::unique_ptr remoteUpdaterPtr(remoteUpdater);
- auto sparseRemoteUpdater =
- new paddle::SparseRemoteParameterUpdaterComposite(
- config->m->getConfig(),
- passCount,
- false,
- std::move(remoteUpdaterPtr));
- updater->m->updater.reset(sparseRemoteUpdater);
- } else {
- updater->m->updater.reset(remoteUpdater);
- }
- return updater;
-}
-
-ParameterUpdater::~ParameterUpdater() { delete m; }
-
-void ParameterUpdater::init(const GradientMachine &gm) {
- m->updater->init(gm.m->machine->getNonStaticParameters());
-}
-
-void ParameterUpdater::startPass() { m->updater->startPass(); }
-
-void ParameterUpdater::finishPass() { m->updater->finishPass(); }
-
-PassType ParameterUpdater::startBatch(size_t batchSize) {
- return m->updater->startBatch((int64_t)batchSize);
-}
-
-void ParameterUpdater::finishBatch(float cost) {
- m->updater->finishBatch(cost);
-}
-
-void ParameterUpdater::update(Parameter *param) {
- auto paddleParam = param->m->getPtr();
- m->updater->update(paddleParam);
-}
-
-void ParameterUpdater::getParametersRemote(bool fullSize, bool apply) {
- m->updater->getParametersRemote(fullSize, apply);
-}
-
-void ParameterUpdater::restore() { m->updater->restore(); }
-
-void ParameterUpdater::apply() { m->updater->apply(); }
-
-void ParameterUpdater::catchUpWith() { m->updater->catchUpWith(); }
diff --git a/paddle/api/SequenceGenerator.cpp b/paddle/api/SequenceGenerator.cpp
deleted file mode 100644
index 1446c3084238859a759669f3a32c7efde67dcc2b..0000000000000000000000000000000000000000
--- a/paddle/api/SequenceGenerator.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include
-#include
-#include
-#include
-#include "PaddleAPI.h"
-#include "paddle/gserver/gradientmachines/GradientMachine.h"
-#include "paddle/parameter/Argument.h"
-#include "paddle/utils/Flags.h"
-
-// used to represent partial sequence
-struct Path {
- std::vector ids;
- float logProb;
- paddle::MachineState machineState;
-
- Path() { logProb = 0; }
-
- Path(std::vector& ids, float logProb, paddle::MachineState& machineState)
- : ids(ids), logProb(logProb), machineState(machineState) {}
-
- bool operator<(const Path& other) const { return (logProb > other.logProb); }
-};
-
-// Return top k (k == beam_size) optimal paths using beam search. The last
-// element of inArgs is the Argument of feedback. gradMachine has MaxIdLayer
-// as output and outArgs thus stores top k labels and their probabilities per
-// position
-static void findNBest(paddle::GradientMachine* gradMachine,
- std::vector& inArgs,
- std::vector& finalPaths,
- size_t bos_id,
- size_t eos_id,
- size_t max_length) {
- std::vector paths;
- Path emptyPath;
- paths.push_back(emptyPath);
- finalPaths.clear();
- gradMachine->resetState();
- paddle::Argument feedback = inArgs.back();
- feedback.ids->setElement(0, (int)(bos_id));
- float minFinalPathLogProb = 0;
- size_t beam = 0;
- int id;
- std::vector outArgs;
- while (true) { // iterate over each generated word
- std::vector newPaths;
- paddle::MachineState machineState;
- for (size_t j = 0; j < paths.size(); j++) {
- Path& path = paths[j];
- if (path.machineState.size() > 0) {
- gradMachine->setState(path.machineState);
- feedback.ids->setElement(0, path.ids.back());
- }
- gradMachine->forward(inArgs, &outArgs, paddle::PASS_TEST);
- gradMachine->getState(machineState);
- beam = outArgs[0].ids->getSize();
- for (size_t k = 0; k < beam; k++) {
- id = outArgs[0].ids->getElement(k);
- float prob = outArgs[0].in->getElement(0, k);
- std::vector nids(path.ids);
- nids.push_back(id);
- float newLogProb = path.logProb + log(prob);
- Path newPath(nids, newLogProb, machineState);
- if (id == (int)eos_id || nids.size() >= max_length) {
- finalPaths.push_back(newPath);
- if (minFinalPathLogProb > newPath.logProb) {
- minFinalPathLogProb = newPath.logProb;
- }
- } else {
- newPaths.push_back(newPath);
- }
- }
- }
-
- if (newPaths.size() == 0) {
- break;
- }
- std::nth_element(newPaths.begin(),
- newPaths.begin() + std::min(beam, newPaths.size()),
- newPaths.end());
- if (newPaths.size() > beam) {
- newPaths.resize(beam);
- }
- // pathA < pathB means pathA.logProb > pathB.logProb
- float maxPathLogProb =
- std::min_element(newPaths.begin(), newPaths.end())->logProb;
- if (finalPaths.size() >= beam && minFinalPathLogProb >= maxPathLogProb) {
- break;
- }
- paths = newPaths;
- } // end while
-
- std::partial_sort(finalPaths.begin(),
- finalPaths.begin() + std::min(beam, finalPaths.size()),
- finalPaths.end());
- if (finalPaths.size() > beam) {
- finalPaths.resize(beam);
- }
-}
-
-struct SequenceGeneratorPrivate {
- std::shared_ptr machine;
- std::shared_ptr> dict;
- size_t beginPos;
- size_t endPos;
- size_t maxLength;
-
- paddle::Argument feedback;
-
- template
- inline T& cast(void* ptr) {
- return *(T*)(ptr);
- }
-
- inline void findNBest(std::vector& inArgs,
- std::vector& path) {
- ::findNBest(machine.get(), inArgs, path, beginPos, endPos, maxLength);
- }
-
- SequenceGeneratorPrivate()
- : dict(std::make_shared>()),
- beginPos(0UL),
- endPos(0UL),
- maxLength(0UL),
- feedback(__create_feedback__()) {}
-
- private:
- static paddle::Argument __create_feedback__() {
- paddle::Argument feedback;
- feedback.ids = paddle::IVector::create(/* size= */ 1, FLAGS_use_gpu);
-
- feedback.sequenceStartPositions =
- paddle::ICpuGpuVector::create(/* size= */ 2, /* useGpu= */ false);
- feedback.sequenceStartPositions->getMutableData(false)[0] = 0;
- feedback.sequenceStartPositions->getMutableData(false)[1] = 1;
- return feedback;
- }
-};
-
-SequenceGenerator::SequenceGenerator() : m(new SequenceGeneratorPrivate()) {}
-
-SequenceGenerator::~SequenceGenerator() { delete m; }
-
-class PathSequenceResults : public ISequenceResults {
- // ISequenceResults interface
- public:
- PathSequenceResults(const std::shared_ptr>& path,
- const std::shared_ptr>& dict)
- : path_(path), dict_(dict) {}
-
- size_t getSize() const { return path_->size(); }
- std::string getSentence(size_t id, bool split) const throw(RangeError) {
- if (id < getSize()) {
- Path& p = (*path_)[id];
- std::ostringstream sout;
- std::transform(p.ids.begin(),
- p.ids.end(),
- std::ostream_iterator(sout, split ? " " : ""),
- [&](int id) { return (*dict_)[id]; });
- return sout.str();
- } else {
- RangeError e;
- throw e;
- }
- }
- std::vector getSequence(size_t id) const throw(RangeError) {
- if (id < getSize()) {
- Path& p = (*path_)[id];
- return p.ids;
- } else {
- RangeError e;
- throw e;
- }
- }
- float getScore(size_t id) const throw(RangeError) {
- if (id < getSize()) {
- Path& p = (*path_)[id];
- return p.logProb;
- } else {
- RangeError e;
- throw e;
- }
- }
-
- private:
- std::shared_ptr> path_;
- std::shared_ptr> dict_;
-};
-
-ISequenceResults* SequenceGenerator::generateSequence(
- const Arguments& inArgs) const {
- auto& in_args =
- m->cast>(inArgs.getInternalArgumentsPtr());
- for (auto& arg : in_args) {
- arg.sequenceStartPositions = m->feedback.sequenceStartPositions;
- }
- in_args.push_back(m->feedback);
- auto path = std::make_shared>();
- m->findNBest(in_args, *path);
- return new PathSequenceResults(path, m->dict);
-}
-
-SequenceGenerator* SequenceGenerator::createByGradientMachineSharedPtr(
- void* ptr) {
- SequenceGenerator* r = new SequenceGenerator();
- r->m->machine = r->m->cast>(ptr);
- return r;
-}
-
-void SequenceGenerator::setDict(const std::vector& dict) {
- *m->dict = dict;
-}
-
-void SequenceGenerator::setBos(size_t bos) { m->beginPos = bos; }
-
-void SequenceGenerator::setEos(size_t eos) { m->endPos = eos; }
-
-void SequenceGenerator::setMaxLength(size_t maxLength) {
- m->maxLength = maxLength;
-}
-
-void SequenceGenerator::setBeamSize(size_t beamSize) {
- if (beamSize != -1UL) {
- FLAGS_beam_size = beamSize;
- }
-}
-
-ISequenceResults::~ISequenceResults() {}
diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp
deleted file mode 100644
index 795460b65051b4ec0d9772d2503f123c4a6ea3d0..0000000000000000000000000000000000000000
--- a/paddle/api/Trainer.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "PaddleAPI.h"
-#include "PaddleAPIPrivate.h"
-
-#include
-#include
-#include
-
-#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
-#include "paddle/trainer/ParamUtil.h"
-#include "paddle/trainer/Trainer.h"
-#include "paddle/trainer/TrainerInternal.h"
-#include "paddle/utils/Flags.h"
-
-using paddle::real;
-
-DECLARE_string(config);
-DECLARE_string(init_model_path);
-DECLARE_int32(start_pass);
-
-struct TrainerPrivate : public paddle::Trainer {
- bool _trainOneBatch(size_t batchSize);
- bool forwardOneBatch(size_t batchSize);
- void forwardOneDataBatch(const std::vector& inArgs);
- void setBatchSize(size_t batchSize);
- std::vector& getForwardOutput();
-
- void startTestPeriod();
- void finishTestPeriod();
- void testOneDataBatch(const paddle::DataBatch& dataBatch);
- TrainerPrivate() : paddle::Trainer() {}
-};
-
-Trainer::Trainer() : m(new TrainerPrivate()) {
- auto conf = paddle::TrainerConfigHelper::createFromFlags();
- if (conf != nullptr) {
- m->init(conf);
- }
-}
-
-Trainer::~Trainer() { delete m; }
-
-Trainer* Trainer::createByCommandLine() throw(IOError) {
- auto retv = new Trainer();
- if (retv->m->getConfig().IsInitialized()) {
- return retv;
- } else {
- throw IOError();
- }
-}
-
-Trainer::Trainer(TrainerConfig* config, GradientMachine* gm)
- : m(new TrainerPrivate()) {
- m->init(config->m->conf, /* testing= */ false, gm ? gm->m->machine : nullptr);
-}
-
-Trainer* Trainer::create(TrainerConfig* config,
- GradientMachine* gm) throw(IOError) {
- auto retv = new Trainer(config, gm);
- if (retv->m->getConfig().IsInitialized()) {
- return retv;
- } else {
- retv->m->getConfig().CheckInitialized();
- throw IOError();
- }
-}
-
-void Trainer::startTrain() { m->startTrain(); }
-
-void Trainer::finishTrain() { m->finishTrain(); }
-
-void Trainer::startTrainPass() { m->startTrainPass(); }
-
-void Trainer::finishTrainPass() { m->finishTrainPass(); }
-
-void Trainer::trainOneDataBatch(size_t batchSize, const Arguments& inArgs) {
- paddle::DataBatch dataBatch;
- dataBatch.getStreams() = inArgs.m->outputs;
- dataBatch.setSize(batchSize);
- m->trainOneDataBatch(dataBatch);
-}
-
-bool Trainer::trainOneBatch(size_t batchSize) {
- return m->_trainOneBatch(batchSize);
-}
-
-bool TrainerPrivate::_trainOneBatch(size_t batchSize) {
- paddle::DataBatch dataBatch;
- CHECK(dataProvider_) << "data_provider is not specified";
- int num = dataProvider_->getNextBatch(batchSize, &dataBatch);
- if (num == 0) {
- return false;
- }
- trainOneDataBatch(dataBatch);
- return false;
-}
-
-void TrainerPrivate::startTestPeriod() {
- if (!tester_) {
- createTester();
- }
- tester_->startTestPeriod();
-}
-
-void Trainer::startTestPeriod() { m->startTestPeriod(); }
-
-void TrainerPrivate::testOneDataBatch(const paddle::DataBatch& dataBatch) {
- tester_->testOneDataBatch(dataBatch, &forwardOutput_);
-}
-
-void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) {
- paddle::DataBatch dataBatch;
- dataBatch.getStreams() = args.m->outputs;
- dataBatch.setSize(batchSize);
- m->testOneDataBatch(dataBatch);
-}
-
-void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); }
-void Trainer::finishTestPeriod() { m->finishTestPeriod(); }
-
-Arguments* Trainer::getLayerOutput(const std::string& layerName) const {
- auto nn = this->m->getGradientMachine();
- CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork";
- auto arg = nn->getLayerOutput(layerName);
- return Arguments::createByPaddleArgument(&arg);
-}
-
-void Trainer::forwardOneBatch(size_t batchSize) {
- m->forwardOneBatch(batchSize);
-}
-
-bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
- CHECK(dataProvider_) << "data_provider is not specified";
- paddle::DataBatch dataBatch;
- int num = dataProvider_->getNextBatch(batchSize, &dataBatch);
- if (num == 0) {
- return false;
- }
-
- forwardOneDataBatch(dataBatch.getStreams());
- return true;
-}
-
-void TrainerPrivate::forwardOneDataBatch(
- const std::vector& inArgs) {
- std::vector& outArgs = forwardOutput_;
-
- if (config_->getOptConfig().use_sparse_remote_updater()) {
- trainerInternal_.getGradientMachine()->prefetch(inArgs);
- trainerInternal_.getParameterUpdater()->getParametersRemote();
- }
- trainerInternal_.getGradientMachine()->forward(
- inArgs, &outArgs, paddle::PASS_TEST);
-}
-
-Arguments* Trainer::getForwardOutput() {
- return Arguments::createByPaddleArgumentVector(&m->getForwardOutput());
-}
-
-std::vector& TrainerPrivate::getForwardOutput() {
- return forwardOutput_;
-}
diff --git a/paddle/api/Util.cpp b/paddle/api/Util.cpp
deleted file mode 100644
index 618e87e96459674302d8b468c3ac410e8d3af6a8..0000000000000000000000000000000000000000
--- a/paddle/api/Util.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "PaddleAPI.h"
-
-#include "paddle/parameter/Parameter.h"
-#include "paddle/utils/Common.h"
-#include "paddle/utils/Flags.h"
-#include "paddle/utils/PythonUtil.h"
-#include "paddle/utils/Util.h"
-
-#include
-#include
-#include
-
-void initPaddle(int argc, char** argv) {
- paddle::initMain(argc, argv);
- paddle::initPython(argc, argv);
- feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
-}
-
-FloatArray::FloatArray(const float* b, const size_t l)
- : buf(b), length(l), needFree(false) {}
-
-IntArray::IntArray(const int* b, const size_t l, bool f)
- : buf(b), length(l), needFree(f) {}
-
-IntWithFloatArray::IntWithFloatArray(const float* v,
- const int* i,
- size_t l,
- bool f)
- : valBuf(v), idxBuf(i), length(l), needFree(f) {}
-
-bool isUsingGpu() { return FLAGS_use_gpu; }
-
-void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; }
-
-bool isGpuVersion() {
-#ifndef PADDLE_WITH_CUDA
- return false;
-#else
- return true;
-#endif
-}
-
-int getTrainerCount() { return FLAGS_trainer_count; }
-
-static_assert(NUM_PARAMETER_TYPES == paddle::NUM_PARAMETER_TYPES,
- "The Parameter Type should be same in core/api and core/common");
diff --git a/paddle/api/Vector.cpp b/paddle/api/Vector.cpp
deleted file mode 100644
index e2a7b974ca78ae3e6e0e66c206a40c8811126b53..0000000000000000000000000000000000000000
--- a/paddle/api/Vector.cpp
+++ /dev/null
@@ -1,304 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "PaddleAPI.h"
-
-#include "paddle/math/Vector.h"
-
-#include
-
-struct IVectorPrivate {
- paddle::IVectorPtr vec;
-};
-
-IVector::IVector() : m(new IVectorPrivate()) {}
-
-IVector* IVector::createZero(size_t sz, bool useGpu) {
- auto v = new IVector();
- v->m->vec = paddle::IVector::create(sz, useGpu);
- v->m->vec->zeroMem();
- return v;
-}
-
-IVector* IVector::create(const std::vector& data, bool useGpu) {
- auto v = new IVector();
- v->m->vec = paddle::IVector::create(data.size(), useGpu);
- v->m->vec->copyFrom(data.data(), data.size());
- return v;
-}
-
-IVector* IVector::createVectorFromNumpy(int* data,
- int dim,
- bool copy,
- bool useGpu) throw(UnsupportError) {
- if (useGpu) {
- /// if use gpu only copy=true is supported
- if (!copy) {
- throw UnsupportError("Gpu mode only supports copy=True");
- }
- return IVector::createGpuVectorFromNumpy(data, dim);
- } else {
- return IVector::createCpuVectorFromNumpy(data, dim, copy);
- }
-}
-
-IVector* IVector::createCpuVectorFromNumpy(int* data, int dim, bool copy) {
- auto v = new IVector();
- if (copy) {
- v->m->vec = paddle::IVector::create(dim, false);
- v->m->vec->copyFrom(data, dim);
- } else {
- v->m->vec = paddle::IVector::create(data, dim, false);
- }
- return v;
-}
-
-IVector* IVector::createGpuVectorFromNumpy(int* data, int dim) {
- auto v = new IVector();
- v->m->vec = paddle::IVector::create(dim, true);
- v->m->vec->copyFrom(data, dim);
- return v;
-}
-
-bool IVector::isGpu() const {
- return dynamic_cast(m->vec.get()) != nullptr;
-}
-
-IntArray IVector::getData() const {
- if (this->isGpu()) {
- int* src = m->vec->getData();
- size_t len = m->vec->getSize();
- int* dest = new int[len];
- hl_memcpy_device2host(dest, src, len * sizeof(int));
- return IntArray(dest, len, true);
- } else {
- return IntArray(m->vec->getData(), m->vec->getSize());
- }
-}
-
-int& IVector::operator[](const size_t idx) throw(RangeError, UnsupportError) {
- if (this->isGpu()) {
- UnsupportError e;
- throw e;
- } else {
- if (idx >= m->vec->getSize()) {
- RangeError e;
- throw e;
- }
- }
- return m->vec->getData()[idx];
-}
-
-const int& IVector::operator[](const size_t idx) const
- throw(RangeError, UnsupportError) {
- return (*const_cast(this))[idx];
-}
-
-IVector* IVector::createByPaddleVectorPtr(void* ptr) {
- auto* p = (paddle::IVectorPtr*)ptr;
- if ((*p) != nullptr) {
- IVector* vec = new IVector();
- vec->m->vec = *p;
- return vec;
- } else {
- return nullptr;
- }
-}
-
-IVector::~IVector() { delete m; }
-
-void* IVector::getSharedPtr() const { return &m->vec; }
-
-size_t IVector::getSize() const { return m->vec->getSize(); }
-
-void IVector::toNumpyArrayInplace(int** data, int* dim1) throw(UnsupportError) {
- auto v = std::dynamic_pointer_cast(m->vec);
- if (v) {
- *data = v->getData();
- *dim1 = v->getSize();
- } else {
- throw UnsupportError();
- }
-}
-
-void IVector::copyToNumpyArray(int** view_m_data, int* dim1) {
- *dim1 = m->vec->getSize();
- *view_m_data = new int[*dim1];
- if (auto cpuVec = dynamic_cast(m->vec.get())) {
- std::memcpy(*view_m_data, cpuVec->getData(), sizeof(int) * (*dim1));
- } else if (auto gpuVec = dynamic_cast(m->vec.get())) {
- hl_memcpy_device2host(
- *view_m_data, gpuVec->getData(), sizeof(int) * (*dim1));
- } else {
- LOG(INFO) << "Unexpected situation";
- }
-}
-
-void IVector::copyFromNumpyArray(int* data, int dim) {
- m->vec->resize(dim);
- m->vec->copyFrom(data, dim);
-}
-
-struct VectorPrivate {
- paddle::VectorPtr vec;
-
- void safeAccessData(const size_t idx,
- const std::function& func) const
- throw(RangeError, UnsupportError) {
- auto cpuVec = std::dynamic_pointer_cast(vec);
- if (cpuVec != nullptr) {
- if (idx < vec->getSize()) {
- func(vec->getData()[idx]);
- } else {
- throw RangeError();
- }
- } else {
- throw UnsupportError();
- }
- }
-};
-
-Vector::Vector() : m(new VectorPrivate()) {}
-
-Vector::~Vector() { delete m; }
-
-Vector* Vector::createZero(size_t sz, bool useGpu) {
- auto retVec = new Vector();
- retVec->m->vec = paddle::Vector::create(sz, useGpu);
- retVec->m->vec->zero();
- return retVec;
-}
-
-Vector* Vector::create(const std::vector& data, bool useGpu) {
- auto retVec = new Vector();
- retVec->m->vec = paddle::Vector::create(data.size(), useGpu);
- retVec->m->vec->copyFrom(data.data(), data.size());
- return retVec;
-}
-
-Vector* Vector::createByPaddleVectorPtr(void* ptr) {
- auto& v = *(paddle::VectorPtr*)(ptr);
- if (v == nullptr) {
- return nullptr;
- } else {
- auto retVec = new Vector();
- retVec->m->vec = v;
- return retVec;
- }
-}
-
-Vector* Vector::createVectorFromNumpy(float* data,
- int dim,
- bool copy,
- bool useGpu) throw(UnsupportError) {
- if (useGpu) {
- /// if use gpu only copy=True is supported
- if (!copy) {
- throw UnsupportError("Gpu mode only supports copy=True");
- }
- return Vector::createGpuVectorFromNumpy(data, dim);
- } else {
- return Vector::createCpuVectorFromNumpy(data, dim, copy);
- }
-}
-
-Vector* Vector::createCpuVectorFromNumpy(float* data, int dim, bool copy) {
- CHECK_GT(dim, 0);
- auto retVec = new Vector();
- if (copy) {
- retVec->m->vec = paddle::Vector::create((size_t)dim, false);
- retVec->m->vec->copyFrom(data, dim);
- } else {
- retVec->m->vec = paddle::Vector::create(data, (size_t)dim, false);
- }
- return retVec;
-}
-
-Vector* Vector::createGpuVectorFromNumpy(float* data, int dim) {
- CHECK_GT(dim, 0);
- auto retVec = new Vector();
- retVec->m->vec = paddle::Vector::create((size_t)dim, true);
- retVec->m->vec->copyFrom(data, (size_t)dim);
- return retVec;
-}
-
-void Vector::toNumpyArrayInplace(float** view_data,
- int* dim1) throw(UnsupportError) {
- auto v = std::dynamic_pointer_cast(m->vec);
- if (v != nullptr) {
- *view_data = v->getData();
- *dim1 = (int)v->getSize();
- } else {
- throw UnsupportError();
- }
-}
-
-void Vector::copyToNumpyArray(float** view_m_data, int* dim1) {
- *dim1 = m->vec->getSize();
- *view_m_data = new float[*dim1];
- if (auto cpuVec = dynamic_cast(m->vec.get())) {
- std::memcpy(*view_m_data, cpuVec->getData(), sizeof(float) * (*dim1));
- } else if (auto gpuVec = dynamic_cast(m->vec.get())) {
- hl_memcpy_device2host(
- *view_m_data, gpuVec->getData(), sizeof(float) * (*dim1));
- } else {
- LOG(INFO) << "Unexpected situation";
- }
-}
-
-void Vector::copyFromNumpyArray(float* data, int dim) {
- m->vec->resize(dim);
- m->vec->copyFrom(data, dim);
-}
-
-FloatArray Vector::getData() const {
- if (this->isGpu()) {
- float* src = m->vec->getData();
- size_t len = m->vec->getSize();
- float* dest = new float[len];
- hl_memcpy_device2host(dest, src, len * sizeof(float));
- FloatArray ret_val(dest, len);
- ret_val.needFree = true;
- return ret_val;
- } else {
- FloatArray ret_val(m->vec->getData(), m->vec->getSize());
- return ret_val;
- }
-}
-
-void Vector::copyFrom(Vector* src) throw(RangeError) {
- if (src->m->vec->getSize() != m->vec->getSize()) {
- throw RangeError();
- }
- m->vec->copyFrom(*src->m->vec);
-}
-
-bool Vector::isGpu() const {
- return std::dynamic_pointer_cast(m->vec) != nullptr;
-}
-
-float Vector::get(const size_t idx) const throw(RangeError, UnsupportError) {
- float r;
- m->safeAccessData(idx, [&](float& o) { r = o; });
- return r;
-}
-
-void Vector::set(const size_t idx, float val) throw(RangeError,
- UnsupportError) {
- m->safeAccessData(idx, [&](float& o) { o = val; });
-}
-
-size_t Vector::getSize() const { return m->vec->getSize(); }
-
-void* Vector::getSharedPtr() { return &m->vec; }
diff --git a/paddle/capi/Main.cpp b/paddle/capi/Main.cpp
deleted file mode 100644
index 0a289dede65406facf1f1cba584f4330f2569214..0000000000000000000000000000000000000000
--- a/paddle/capi/Main.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include
-#include
-#include
-#include
-#include "capi_private.h"
-#include "main.h"
-#include "paddle/trainer/TrainerConfigHelper.h"
-#include "paddle/utils/Excepts.h"
-#include "paddle/utils/PythonUtil.h"
-
-static void initPaddle(int argc, char** argv) {
- paddle::initMain(argc, argv);
- paddle::initPython(argc, argv);
-}
-
-extern "C" {
-paddle_error paddle_init(int argc, char** argv) {
- static bool isInit = false;
- if (isInit) return kPD_NO_ERROR;
-
- std::vector realArgv;
- realArgv.reserve(argc + 1);
- realArgv.push_back(strdup(""));
- for (int i = 0; i < argc; ++i) {
- realArgv.push_back(argv[i]);
- }
- initPaddle(argc + 1, realArgv.data());
- free(realArgv[0]);
- isInit = true;
- return kPD_NO_ERROR;
-}
-
-paddle_error paddle_init_thread() {
- if (FLAGS_use_gpu) {
- hl_init(FLAGS_gpu_id);
- }
- return kPD_NO_ERROR;
-}
-}
diff --git a/paddle/capi/capi_private.h b/paddle/capi/capi_private.h
deleted file mode 100644
index 3332f42a4a6e57fed6ddb20cf7d759d67e7240b5..0000000000000000000000000000000000000000
--- a/paddle/capi/capi_private.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "capi.h"
-#include "paddle/gserver/gradientmachines/GradientMachine.h"
-#include "paddle/math/Matrix.h"
-#include "paddle/math/Vector.h"
-#include "paddle/parameter/Argument.h"
-#pragma once
-
-namespace paddle {
-namespace capi {
-
-enum CType { kIVECTOR = 0, kMATRIX, kARGUMENTS, kGRADIENT_MACHINE };
-
-#define STRUCT_HEADER CType type;
-
-struct CHeader {
- STRUCT_HEADER
-};
-
-struct CIVector {
- STRUCT_HEADER
- IVectorPtr vec;
-
- CIVector() : type(kIVECTOR) {}
-};
-
-struct CMatrix {
- STRUCT_HEADER
- MatrixPtr mat;
-
- CMatrix() : type(kMATRIX) {}
-};
-
-struct CArguments {
- STRUCT_HEADER
- std::vector args;
-
- CArguments() : type(kARGUMENTS) {}
-
- template
- paddle_error accessSeqPos(uint64_t ID, uint32_t nestedLevel, T callback) {
- if (ID >= args.size()) return kPD_OUT_OF_RANGE;
- switch (nestedLevel) {
- case 0:
- callback(args[ID].sequenceStartPositions);
- break;
- case 1:
- callback(args[ID].subSequenceStartPositions);
- break;
- default:
- return kPD_OUT_OF_RANGE;
- }
- return kPD_NO_ERROR;
- }
-};
-
-struct CGradientMachine {
- STRUCT_HEADER
- paddle::GradientMachinePtr machine;
-
- CGradientMachine() : type(kGRADIENT_MACHINE) {}
-};
-
-template
-inline T* cast(void* ptr) {
- return reinterpret_cast(ptr);
-}
-} // namespace capi
-} // namespace paddle
diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp
deleted file mode 100644
index 8c3f504e5a2d807c0cc664af486ebab4a82ddec3..0000000000000000000000000000000000000000
--- a/paddle/capi/gradient_machine.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "gradient_machine.h"
-#include "capi_private.h"
-#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
-
-#define cast(v) paddle::capi::cast(v)
-
-enum GradientMatchineCreateMode {
- CREATE_MODE_NORMAL = 0,
- CREATE_MODE_TESTING = 4
-};
-
-namespace paddle {
-
-class MyNeuralNetwork : public NeuralNetwork {
- public:
- MyNeuralNetwork(const std::string& name, NeuralNetwork* network)
- : NeuralNetwork(name, network) {}
-};
-
-NeuralNetwork* newCustomNerualNetwork(const std::string& name,
- NeuralNetwork* network) {
- return new MyNeuralNetwork(name, network);
-}
-} // namespace paddle
-
-extern "C" {
-paddle_error paddle_gradient_machine_create_for_inference(
- paddle_gradient_machine* machine, void* modelConfigProtobuf, int size) {
- if (modelConfigProtobuf == nullptr) return kPD_NULLPTR;
- paddle::ModelConfig config;
- if (!config.ParseFromArray(modelConfigProtobuf, size) ||
- !config.IsInitialized()) {
- return kPD_PROTOBUF_ERROR;
- }
-
- auto ptr = new paddle::capi::CGradientMachine();
- ptr->machine.reset(paddle::GradientMachine::create(
- config, CREATE_MODE_TESTING, {paddle::PARAMETER_VALUE}));
- *machine = ptr;
- return kPD_NO_ERROR;
-}
-
-paddle_error paddle_gradient_machine_create_for_inference_with_parameters(
- paddle_gradient_machine* machine, void* mergedModel, uint64_t size) {
- if (mergedModel == nullptr) return kPD_NULLPTR;
- std::istringstream is(std::string(static_cast(mergedModel), size));
- int64_t modelConfigSize = 0;
- is.read((char*)(&modelConfigSize), sizeof(modelConfigSize));
- std::string modelConfigProtobuf;
- modelConfigProtobuf.resize(modelConfigSize);
- is.read(&modelConfigProtobuf[0], modelConfigSize);
- paddle::TrainerConfig config;
- paddle::ModelConfig modelConfig;
- if (!config.ParseFromString(modelConfigProtobuf) || !config.IsInitialized()) {
- if (!modelConfig.ParseFromString(modelConfigProtobuf) ||
- !modelConfig.IsInitialized()) {
- return kPD_PROTOBUF_ERROR;
- }
- } else {
- modelConfig = config.model_config();
- }
- auto ptr = new paddle::capi::CGradientMachine();
- ptr->machine.reset(paddle::GradientMachine::create(
- modelConfig, CREATE_MODE_TESTING, {paddle::PARAMETER_VALUE}));
- std::vector& parameters = ptr->machine->getParameters();
- for (auto& para : parameters) {
- para->load(is);
- }
-
- *machine = ptr;
- return kPD_NO_ERROR;
-}
-
-paddle_error paddle_gradient_machine_destroy(paddle_gradient_machine machine) {
- delete cast(machine);
- return kPD_NO_ERROR;
-}
-
-paddle_error paddle_gradient_machine_load_parameter_from_disk(
- paddle_gradient_machine machine, const char* path) {
- auto m = cast(machine);
- if (m == nullptr || path == nullptr || m->machine == nullptr)
- return kPD_NULLPTR;
- m->machine->loadParameters(path);
- return kPD_NO_ERROR;
-}
-
-paddle_error paddle_gradient_machine_forward(paddle_gradient_machine machine,
- paddle_arguments inArgs,
- paddle_arguments outArgs,
- bool isTrain) {
- auto m = cast(machine);
- auto in = paddle::capi::cast(inArgs);
- auto out = paddle::capi::cast(outArgs);
- if (m == nullptr || in == nullptr || out == nullptr || m->machine == nullptr)
- return kPD_NULLPTR;
- m->machine->forward(
- in->args, &out->args, isTrain ? paddle::PASS_TRAIN : paddle::PASS_TEST);
- return kPD_NO_ERROR;
-}
-
-paddle_error paddle_gradient_machine_create_shared_param(
- paddle_gradient_machine origin,
- void* modelConfigProtobuf,
- int size,
- paddle_gradient_machine* slave) {
- auto o = cast(origin);
- if (origin == nullptr || slave == nullptr || o->machine == nullptr) {
- return kPD_NULLPTR;
- }
- paddle::ModelConfig config;
- if (!config.ParseFromArray(modelConfigProtobuf, size) ||
- !config.IsInitialized()) {
- return kPD_PROTOBUF_ERROR;
- }
-
- std::unique_ptr ptr(
- new paddle::capi::CGradientMachine());
- auto nn = paddle::NeuralNetwork::create(config);
- nn->init(config,
- [&o](int paramId, paddle::Parameter* param) {
- auto p = o->machine->getParameters()[paramId];
- param->enableSharedType(paddle::PARAMETER_VALUE,
- p->getBuf(paddle::PARAMETER_VALUE));
- },
- {paddle::PARAMETER_VALUE},
- false);
- ptr->machine.reset(nn);
- *slave = ptr.release();
- return kPD_NO_ERROR;
-}
-}
-
-paddle_error paddle_gradient_machine_randomize_param(
- paddle_gradient_machine machine) {
- auto m = cast(machine);
- if (m == nullptr || m->machine == nullptr) return kPD_NULLPTR;
- m->machine->randParameters();
- return kPD_NO_ERROR;
-}
-
-paddle_error paddle_gradient_machine_get_layer_output(
- paddle_gradient_machine machine,
- const char* layerName,
- paddle_arguments args) {
- auto m = cast(machine);
- auto out = paddle::capi::cast(args);
- if (m == nullptr || layerName == nullptr || out == nullptr ||
- m->machine == nullptr) {
- return kPD_NULLPTR;
- }
-
- auto layerOutput = m->machine->getLayerOutput(layerName);
- out->args.push_back(layerOutput);
- return kPD_NO_ERROR;
-}
-
-paddle_error paddle_gradient_machine_release_layer_output(
- paddle_gradient_machine machine) {
- auto m = cast(machine);
- if (m == nullptr || m->machine == nullptr) {
- return kPD_NULLPTR;
- }
- m->machine->releaseOutput();
- return kPD_NO_ERROR;
-}
diff --git a/paddle/capi/tests/test_Arguments.cpp b/paddle/capi/tests/test_Arguments.cpp
deleted file mode 100644
index bb08adf716bfd6e3c88747616e538e9da89a0e25..0000000000000000000000000000000000000000
--- a/paddle/capi/tests/test_Arguments.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include
-#include "capi.h"
-#include "gtest/gtest.h"
-#include "paddle/utils/ThreadLocal.h"
-
-static std::vector randomBuffer(size_t bufSize) {
- auto& eng = paddle::ThreadLocalRandomEngine::get();
- std::uniform_real_distribution dist(-1.0, 1.0);
- std::vector retv;
- retv.reserve(bufSize);
- for (size_t i = 0; i < bufSize; ++i) {
- retv.push_back(dist(eng));
- }
- return retv;
-}
-
-TEST(CAPIArguments, create) {
- //! TODO(yuyang18): Test GPU Code.
- paddle_arguments args = paddle_arguments_create_none();
- uint64_t size;
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_size(args, &size));
- ASSERT_EQ(0UL, size);
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(args));
-}
-
-TEST(CAPIArguments, value) {
- paddle_arguments args = paddle_arguments_create_none();
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_resize(args, 1));
-
- paddle_matrix mat = paddle_matrix_create(128, 64, false);
- for (size_t i = 0; i < 128; ++i) {
- std::vector sampleBuf = randomBuffer(64);
- paddle_matrix_set_row(mat, i, sampleBuf.data());
- }
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_set_value(args, 0, mat));
-
- paddle_matrix val = paddle_matrix_create_none();
-
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_value(args, 0, val));
-
- for (size_t i = 0; i < 128; ++i) {
- paddle_real* row1;
- paddle_real* row2;
-
- ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_row(mat, i, &row1));
- ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_row(val, i, &row2));
- ASSERT_EQ(row1, row2);
- }
-
- paddle_ivector ivec = paddle_ivector_create_none();
- ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(ivec));
- ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(val));
- ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(mat));
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(args));
-}
-
-TEST(CAPIArguments, ids) {
- paddle_arguments args = paddle_arguments_create_none();
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_resize(args, 1));
-
- paddle_ivector ivec;
- int array[3] = {1, 2, 3};
- ivec = paddle_ivector_create(array, 3, true, false);
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_set_ids(args, 0, ivec));
-
- paddle_ivector val = paddle_ivector_create_none();
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_ids(args, 0, val));
- ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(ivec));
- ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(val));
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(args));
-}
-
-template
-void testSequenceHelper(T1 setter, T2 getter) {
- paddle_arguments args = paddle_arguments_create_none();
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_resize(args, 1));
-
- paddle_ivector ivec;
- int array[3] = {1, 2, 3};
- ivec = paddle_ivector_create(array, 3, true, false);
- ASSERT_EQ(kPD_NO_ERROR, setter(args, 0, ivec));
-
- paddle_ivector val = paddle_ivector_create_none();
- ASSERT_EQ(kPD_NO_ERROR, getter(args, 0, val));
- uint64_t size;
- ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_get_size(val, &size));
-
- int* rawBuf;
- ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_get(val, &rawBuf));
- for (size_t i = 0; i < size; ++i) {
- ASSERT_EQ(array[i], rawBuf[i]);
- }
-
- ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(ivec));
- ASSERT_EQ(kPD_NO_ERROR, paddle_ivector_destroy(val));
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(args));
-}
-
-TEST(CAPIArguments, Sequence) {
- auto testSequence = [](uint32_t nestedLevel) {
- testSequenceHelper(std::bind(paddle_arguments_set_sequence_start_pos,
- std::placeholders::_1,
- std::placeholders::_2,
- nestedLevel,
- std::placeholders::_3),
- std::bind(paddle_arguments_get_sequence_start_pos,
- std::placeholders::_1,
- std::placeholders::_2,
- nestedLevel,
- std::placeholders::_3));
- };
- for (uint32_t i = 0; i < 2; ++i) { // test seq and sub-seq.
- testSequence(i);
- }
-}
diff --git a/paddle/capi/tests/test_GradientMachine.cpp b/paddle/capi/tests/test_GradientMachine.cpp
deleted file mode 100644
index 73b9e477b2a2749250e878cf2174dcf4cc599be1..0000000000000000000000000000000000000000
--- a/paddle/capi/tests/test_GradientMachine.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include "capi.h"
-#include "paddle/utils/ThreadLocal.h"
-
-static std::vector randomBuffer(size_t bufSize) {
- auto& eng = paddle::ThreadLocalRandomEngine::get();
- std::uniform_real_distribution dist(-1.0, 1.0);
- std::vector retv;
- retv.reserve(bufSize);
- for (size_t i = 0; i < bufSize; ++i) {
- retv.push_back(dist(eng));
- }
- return retv;
-}
-
-TEST(GradientMachine, testPredict) {
- //! TODO(yuyang18): Test GPU Code.
- paddle::TrainerConfigHelper config("./test_predict_network.py");
- std::string buffer;
- ASSERT_TRUE(config.getModelConfig().SerializeToString(&buffer));
- paddle_gradient_machine machine;
-
- ASSERT_EQ(kPD_NO_ERROR,
- paddle_gradient_machine_create_for_inference(
- &machine, &buffer[0], (int)buffer.size()));
- std::unique_ptr gm(
- paddle::GradientMachine::create(config.getModelConfig()));
- ASSERT_NE(nullptr, gm);
- gm->randParameters();
- gm->saveParameters("./");
-
- ASSERT_EQ(kPD_NO_ERROR,
- paddle_gradient_machine_load_parameter_from_disk(machine, "./"));
-
- paddle_gradient_machine machineSlave;
- ASSERT_EQ(kPD_NO_ERROR,
- paddle_gradient_machine_create_shared_param(
- machine, &buffer[0], (int)buffer.size(), &machineSlave));
- std::swap(machineSlave, machine);
- paddle_arguments outArgs = paddle_arguments_create_none();
-
- paddle_arguments inArgs = paddle_arguments_create_none();
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_resize(inArgs, 1));
- paddle_matrix mat = paddle_matrix_create(1, 100, false);
- static_assert(std::is_same::value, "");
-
- auto data = randomBuffer(100);
- paddle_real* rowPtr;
- ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_row(mat, 0, &rowPtr));
- memcpy(rowPtr, data.data(), data.size() * sizeof(paddle_real));
-
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_set_value(inArgs, 0, mat));
- ASSERT_EQ(kPD_NO_ERROR,
- paddle_gradient_machine_forward(machine, inArgs, outArgs, false));
-
- uint64_t sz;
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_size(outArgs, &sz));
- ASSERT_EQ(1UL, sz);
-
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_get_value(outArgs, 0, mat));
- std::vector paddleInArgs;
- std::vector paddleOutArgs;
- paddleInArgs.resize(1);
- paddleInArgs[0].value =
- paddle::Matrix::create(data.data(), 1, 100, false, false);
-
- gm->forward(paddleInArgs, &paddleOutArgs, paddle::PASS_TEST);
-
- auto matPaddle = paddleOutArgs[0].value;
-
- uint64_t height, width;
- ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_shape(mat, &height, &width));
- ASSERT_EQ(matPaddle->getHeight(), height);
- ASSERT_EQ(matPaddle->getWidth(), width);
-
- ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_get_row(mat, 0, &rowPtr));
- for (size_t i = 0; i < width; ++i) {
- ASSERT_NEAR(matPaddle->getData()[i], rowPtr[i], 1e-5);
- }
-
- ASSERT_EQ(kPD_NO_ERROR, paddle_matrix_destroy(mat));
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(inArgs));
- ASSERT_EQ(kPD_NO_ERROR, paddle_arguments_destroy(outArgs));
- std::swap(machineSlave, machine);
- ASSERT_EQ(kPD_NO_ERROR, paddle_gradient_machine_destroy(machineSlave));
- ASSERT_EQ(kPD_NO_ERROR, paddle_gradient_machine_destroy(machine));
-}
-
-int main(int argc, char** argv) {
- testing::InitGoogleTest(&argc, argv);
- std::vector argvs;
- argvs.push_back(strdup("--use_gpu=false"));
- paddle_init((int)argvs.size(), argvs.data());
- for (auto each : argvs) {
- free(each);
- }
- return RUN_ALL_TESTS();
-}
diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt
index 2cd6ab2bbf042bced41957193a0269f477eb10d0..a8bbb4eb8081420ae0bbaf761bd27303c0d043cb 100644
--- a/paddle/contrib/inference/CMakeLists.txt
+++ b/paddle/contrib/inference/CMakeLists.txt
@@ -46,6 +46,10 @@ cc_library(paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
+cc_library(paddle_inference_api_shared SHARED
+ SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
+ DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
+
cc_test(test_paddle_inference_api
SRCS test_paddle_inference_api.cc
DEPS paddle_inference_api)
diff --git a/paddle/contrib/inference/paddle_inference_api_impl.h b/paddle/contrib/inference/paddle_inference_api_impl.h
index ba266b608da342fb71faf05d02ddf74330e21e98..f9ec6f55449fc46b4a44b9563980cb5f8e80a951 100644
--- a/paddle/contrib/inference/paddle_inference_api_impl.h
+++ b/paddle/contrib/inference/paddle_inference_api_impl.h
@@ -22,9 +22,9 @@
#include "paddle/contrib/inference/paddle_inference_api.h"
#include "paddle/fluid/framework/ddim.h"
-#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
+#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
diff --git a/paddle/cuda/include/hl_base.h b/paddle/cuda/include/hl_base.h
deleted file mode 100644
index 77f5d82dbe2cad183491033736bac85961b6d320..0000000000000000000000000000000000000000
--- a/paddle/cuda/include/hl_base.h
+++ /dev/null
@@ -1,250 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include
-
-#ifdef PADDLE_TYPE_DOUBLE
-#define HL_FLOAT_MAX 3.40282347e+38F
-#define HL_FLOAT_MIN 1.17549435e-38F
-using real = double;
-#else
-#define HL_FLOAT_MAX 1.7976931348623157e+308
-#define HL_FLOAT_MIN 2.2250738585072014e-308
-using real = float;
-#endif
-
-/**
- * The maximum input value for exp, used to avoid overflow problem.
- * currently only used for tanh function.
- */
-#define EXP_MAX_INPUT 40.0
-
-/**
- * @brief DIVUP(x, y) is similar to ceil(x / y).
- * @note For CUDA, DIVUP will be used to specify
- * the size of blockDim.
- */
-#ifndef DIVUP
-#define DIVUP(x, y) (((x) + (y)-1) / (y))
-#endif
-
-/**
- * HPPL is an internal high performance parallel computing library
- * for high-level neural network routines, which can support many
- * heterogeneous compute architectures, such as GPU, FPGA, etc.
- */
-
-/**
- * @brief HPPL CUDA Stream.
- *
- * @note Each thread can use HPPL_STREAM_* after calling hl_init.
- * HPPL_STREAM_DEFAULT is HPPL default stream.
- */
-typedef enum {
- HPPL_STREAM_DEFAULT = 0, /* Thread Default Stream*/
- HPPL_STREAM_1 = 1,
- HPPL_STREAM_2 = 2,
- HPPL_STREAM_3 = 3,
- HPPL_STREAM_4 = 4,
- HPPL_THREAD_STREAM_1 = 5,
- HPPL_THREAD_STREAM_2 = 6,
- HPPL_THREAD_STREAM_3 = 7,
- HPPL_THREAD_STREAM_4 = 8,
- HPPL_STREAM_END
-} hl_stream_t;
-
-/**
- * @brief HPPL activation mode.
- */
-typedef enum {
- HL_ACTIVATION_SIGMOID = 0,
- HL_ACTIVATION_RELU = 1,
- HL_ACTIVATION_TANH = 2,
- HL_ACTIVATION_LINEAR = 3,
- HL_ACTIVATION_END
-} hl_activation_mode_t;
-
-/**
- * @brief Transpose type.
- */
-typedef enum {
- HPPL_OP_N = 0, /* transpose */
- HPPL_OP_T = 1, /* non transpose */
- HPPL_OP_END
-} hl_trans_op_t;
-
-/**
- * @brief Lstm value.
- *
- * @param gateValue input value.
- * @param prevStateValue previous state value.
- * @param stateValue state value.
- * @param stateActiveValue state active value.
- * @param outputValue output value.
- */
-typedef struct {
- real *gateValue;
- real *prevStateValue;
- real *stateValue;
- real *stateActiveValue;
- real *outputValue;
- real *checkIg;
- real *checkFg;
- real *checkOg;
-} hl_lstm_value;
-
-/**
- * @brief Lstm gradient.
- *
- * @param gateGrad input gradient.
- * @param prevStateGrad previous state gradient.
- * @param stateGrad state gradient.
- * @param stateActiveGrad state active gradient.
- * @param outputGrad output gradient.
- */
-typedef struct {
- real *gateGrad;
- real *prevStateGrad;
- real *stateGrad;
- real *stateActiveGrad;
- real *outputGrad;
- real *checkIgGrad;
- real *checkFgGrad;
- real *checkOgGrad;
-} hl_lstm_grad;
-
-/**
- * @brief Gru value.
- *
- * @param gateWeight gate weight (updateGate + resetGate).
- * @param stateWeight frame state weight.
- * @param gateValue gate value results.
- * @param resetOutputValue resetOutput value.
- * @param outputValue output value.
- * @param prevOutValue previous output value.
- *
- */
-typedef struct {
- real *gateWeight;
- real *stateWeight;
- real *gateValue;
- real *resetOutputValue;
- real *outputValue;
- real *prevOutValue;
-} hl_gru_value;
-
-/**
- * @brief Gru gradient.
- *
- * @param gateWeightGrad gate weight gradient.
- * @param stateWeightGrad frame state weight gradient.
- * @param gateGrad gate gradient results.
- * @param resetOutputGrad resetOutput gradient.
- * @param outputGrad output gradient.
- * @param prevOutGrad previous output gradient.
- */
-typedef struct {
- real *gateWeightGrad;
- real *stateWeightGrad;
- real *gateGrad;
- real *resetOutputGrad;
- real *outputGrad;
- real *prevOutGrad;
-} hl_gru_grad;
-
-/**
- * @brief Sparse matrix value type.
- */
-typedef enum {
- HL_NO_VALUE = 0, /* matrix values only 0 or 1 */
- HL_FLOAT_VALUE = 1,
- HL_VALUE_END
-} hl_matrix_value_t;
-
-/**
- * @brief HPPL matrix format.
- */
-typedef enum {
- HL_SPARSE_CSR = 0,
- HL_SPARSE_CSC = 1,
- HL_SPARSE_END
-} hl_matrix_format_t;
-
-typedef struct _hl_matrix_s *hl_matrix_s;
-
-/**
- * @brief HPPL sparse matrix.
- *
- * @param matrix sparse matrix.
- * @param format matrix format.
- * @param type the type of matrix values.
- * @param rows matrix rows.
- * @param cols matrix columns.
- * @param nnz nonzero values of sparse matrix.
- */
-typedef struct {
- hl_matrix_s matrix;
- hl_matrix_format_t format;
- hl_matrix_value_t type;
- int rows;
- int cols;
- size_t nnz;
-} _hl_sparse_matrix_s, *hl_sparse_matrix_s;
-
-#ifdef __NVCC__
-
-#include
-#include "paddle/cuda/include/hl_cuda.h"
-#include "paddle/utils/Logging.h"
-
-extern __thread bool g_sync_flag;
-extern __thread cudaStream_t default_stream;
-#define STREAM_DEFAULT default_stream
-
-/**
- * @brief Check cuda kernel execution.
- * @param msg error string
- */
-#define CHECK_SYNC(msg) \
- if (true == g_sync_flag) { \
- hl_stream_synchronize(HPPL_STREAM_DEFAULT); \
- cudaError_t err = (cudaError_t)hl_get_device_last_error(); \
- CHECK_EQ(cudaSuccess, err) \
- << "[" << msg << "] " \
- << "CUDA error: " << hl_get_device_error_string((size_t)err); \
- }
-
-// __shfl has been deprecated as of CUDA 9.0.
-#if CUDA_VERSION < 9000
-template
-__forceinline__ __device__ T __shfl_down_sync(unsigned, T val, int delta) {
- return __shfl_down(val, delta);
-}
-
-template
-__forceinline__ __device__ T
-__shfl_sync(unsigned, T val, int src_line, int width) {
- return __shfl(val, src_line, width);
-}
-
-#define CREATE_SHFL_MASK(mask, predicate) mask = 0u;
-#else
-#define FULL_WARP_MASK 0xFFFFFFFF
-#define CREATE_SHFL_MASK(mask, predicate) \
- mask = __ballot_sync(FULL_WARP_MASK, (predicate))
-#endif
-
-#endif // __NVCC__
diff --git a/paddle/cuda/include/hl_gpu_gru.cuh b/paddle/cuda/include/hl_gpu_gru.cuh
deleted file mode 100644
index 9fcad2c3bc2fa255e3d7cd3e7940a32fd286751b..0000000000000000000000000000000000000000
--- a/paddle/cuda/include/hl_gpu_gru.cuh
+++ /dev/null
@@ -1,393 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-
-#ifndef HL_GPU_GRU_CUH_
-#define HL_GPU_GRU_CUH_
-
-#ifdef __NVCC__
-
-#include "paddle/utils/Logging.h"
-
-/*
- * threads(framePerBlock, batchPerBlock)
- * grid(frameBlocks, batchBlocks)
- */
-template
-__global__ void KeGruForwardResetOutput(OpResetOutput opResetOutput,
- real *gateValue,
- real *resetOutputValue,
- real *prevOutputValue,
- int frameSize,
- int batchSize,
- hl_activation_mode_t active_gate) {
- const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
- if (frameIdx >= frameSize) return;
-
- int batchIdx = 0;
- if (isBatch) {
- batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
- if (batchIdx >= batchSize) return;
- gateValue += batchIdx * 3 * frameSize;
- resetOutputValue += batchIdx * frameSize;
- }
-
- real rPrevOut = 0;
- real rValueResetOutput;
- real rValueUpdateGate = gateValue[frameIdx + frameSize * 0];
- real rValueResetGate = gateValue[frameIdx + frameSize * 1];
-
- if (prevOutputValue) {
- if (isBatch) prevOutputValue += batchIdx * frameSize;
- rPrevOut = prevOutputValue[frameIdx];
- }
-
- opResetOutput(rValueUpdateGate,
- rValueResetGate,
- rPrevOut,
- rValueResetOutput,
- hppl::gpu::forward[active_gate]);
-
- gateValue[frameIdx + frameSize * 0] = rValueUpdateGate;
- gateValue[frameIdx + frameSize * 1] = rValueResetGate;
- resetOutputValue[frameIdx] = rValueResetOutput;
-}
-
-/*
- * threads(framePerBlock, batchPerBlock)
- * grid(frameBlocks, batchBlocks)
- */
-template
-__global__ void KeGruForwardFinalOutput(OpFinalOutput opFinalOutput,
- real *gateValue,
- real *prevOutputValue,
- real *outputValue,
- int frameSize,
- int batchSize,
- hl_activation_mode_t active_node) {
- const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
- if (frameIdx >= frameSize) return;
- int batchIdx = 0;
- if (isBatch) {
- batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
- if (batchIdx >= batchSize) return;
- gateValue += batchIdx * 3 * frameSize;
- outputValue += batchIdx * frameSize;
- }
-
- real rOutput;
- real rPrevOut = 0;
- real rValueUpdateGate = gateValue[frameIdx + frameSize * 0];
- real rValueFrameState = gateValue[frameIdx + frameSize * 2];
-
- if (prevOutputValue) {
- if (isBatch) prevOutputValue += batchIdx * frameSize;
- rPrevOut = prevOutputValue[frameIdx];
- }
-
- opFinalOutput(rValueUpdateGate,
- rValueFrameState,
- rPrevOut,
- rOutput,
- hppl::gpu::forward[active_node]);
-
- gateValue[frameIdx + frameSize * 2] = rValueFrameState;
- outputValue[frameIdx] = rOutput;
-}
-
-template
-void hl_gpu_gru_forward(OpResetOutput opResetOutput,
- OpFinalOutput opFinalOutput,
- hl_gru_value value,
- int frameSize,
- int batchSize,
- hl_activation_mode_t active_node,
- hl_activation_mode_t active_gate) {
- dim3 threads;
- dim3 grid;
- if (batchSize == 1) {
- int framePerBlock = frameSize <= 1024 ? frameSize : 1024;
- int frameBlocks = (frameSize + 1024 - 1) / 1024;
- threads = dim3(framePerBlock, 1);
- grid = dim3(frameBlocks, 1);
- } else {
- threads = dim3(32, 32);
- grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32);
- }
-
- if (value.prevOutValue) {
- hl_matrix_mul(value.prevOutValue, HPPL_OP_N,
- value.gateWeight, HPPL_OP_N,
- value.gateValue,
- batchSize, 2*frameSize, frameSize,
- /*alpha = */ 1, /*beta = */ 1,
- frameSize, 2* frameSize, 3*frameSize);
- }
-
- if (batchSize == 1) {
- KeGruForwardResetOutput
- <<>>(opResetOutput,
- value.gateValue, value.resetOutputValue, value.prevOutValue,
- frameSize, batchSize, active_gate);
- } else {
- KeGruForwardResetOutput
- <<>>(opResetOutput,
- value.gateValue, value.resetOutputValue, value.prevOutValue,
- frameSize, batchSize, active_gate);
- }
-
- if (value.prevOutValue) {
- hl_matrix_mul(value.resetOutputValue, HPPL_OP_N,
- value.stateWeight, HPPL_OP_N,
- value.gateValue + 2*frameSize,
- batchSize, frameSize, frameSize,
- /*alpha = */ 1, /*beta = */ 1,
- frameSize, frameSize, 3*frameSize);
- }
-
- if (batchSize == 1) {
- KeGruForwardFinalOutput
- <<>>(opFinalOutput,
- value.gateValue, value.prevOutValue, value.outputValue,
- frameSize, batchSize, active_node);
- } else {
- KeGruForwardFinalOutput
- <<>>(opFinalOutput,
- value.gateValue, value.prevOutValue, value.outputValue,
- frameSize, batchSize, active_node);
- }
-
- CHECK_SYNC("hl_gpu_gru_forward failed");
-}
-
-/*
- * threads(framePerBlock, batchPerBlock)
- * grid(frameBlocks, batchBlocks)
- */
-template
-__global__ void KeGruBackwardStateGrad(OpStateGrad opStateGrad,
- real *gateValue,
- real *gateGrad,
- real *prevOutValue,
- real *prevOutGrad,
- real *outputGrad,
- int frameSize,
- int batchSize,
- hl_activation_mode_t active_node) {
- const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
- if (frameIdx >= frameSize) return;
- int batchIdx = 0;
- if (isBatch) {
- batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
- if (batchIdx >= batchSize) return;
- gateValue += batchIdx * 3 * frameSize;
- gateGrad += batchIdx * 3 * frameSize;
- outputGrad += batchIdx * frameSize;
- }
-
- real rUpdateGateGrad;
- real rFrameStateGrad;
- real rPrevOutValue = 0;
- real rPrevOutGrad = 0;
- real rUpdateGateValue = gateValue[frameIdx + frameSize * 0];
- real rFrameStateValue = gateValue[frameIdx + frameSize * 2];
- real rOutGrad = outputGrad[frameIdx];
-
- if (prevOutValue && prevOutGrad) {
- if (isBatch) prevOutValue += batchIdx * frameSize;
- rPrevOutValue = prevOutValue[frameIdx];
-
- if (isBatch) prevOutGrad += batchIdx * frameSize;
- rPrevOutGrad = prevOutGrad[frameIdx];
- }
-
- opStateGrad(rUpdateGateValue,
- rUpdateGateGrad,
- rFrameStateValue,
- rFrameStateGrad,
- rPrevOutValue,
- rPrevOutGrad,
- rOutGrad,
- hppl::gpu::backward[active_node]);
-
- gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad;
- gateGrad[frameIdx + frameSize * 2] = rFrameStateGrad;
- if (prevOutGrad) {
- prevOutGrad[frameIdx] = rPrevOutGrad;
- }
-}
-
-/*
- * threads(framePerBlock, batchPerBlock)
- * grid(frameBlocks, batchBlocks)
- */
-template
-__global__ void KeGruBackwardResetGrad(OpResetGrad opResetGrad,
- real *gateValue,
- real *gateGrad,
- real *prevOutValue,
- real *prevOutGrad,
- real *resetOutputGrad,
- int frameSize,
- int batchSize,
- hl_activation_mode_t active_gate) {
- const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
- if (frameIdx >= frameSize) return;
- int batchIdx = 0;
- if (isBatch) {
- batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
- if (batchIdx >= batchSize) return;
- gateValue += batchIdx * 3 * frameSize;
- gateGrad += batchIdx * 3 * frameSize;
- resetOutputGrad += batchIdx * frameSize;
- }
-
- real rResetGateGrad;
- real rPrevOutValue = 0;
- real rPrevOutGrad = 0;
- real rResetOutputGrad = 0;
- real rUpdateGateValue = gateValue[frameIdx + frameSize * 0];
- real rUpdateGateGrad = gateGrad[frameIdx + frameSize * 0];
- real rResetGateValue = gateValue[frameIdx + frameSize * 1];
-
- if (prevOutValue && prevOutGrad) {
- if (isBatch) prevOutValue += batchIdx * frameSize;
- if (isBatch) prevOutGrad += batchIdx * frameSize;
- rPrevOutValue = prevOutValue[frameIdx];
- rPrevOutGrad = prevOutGrad[frameIdx];
- rResetOutputGrad = resetOutputGrad[frameIdx];
- }
-
- opResetGrad(rUpdateGateValue,
- rUpdateGateGrad,
- rResetGateValue,
- rResetGateGrad,
- rPrevOutValue,
- rPrevOutGrad,
- rResetOutputGrad,
- hppl::gpu::backward[active_gate]);
-
- gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad;
- gateGrad[frameIdx + frameSize * 1] = rResetGateGrad;
- if (prevOutGrad) {
- prevOutGrad[frameIdx] = rPrevOutGrad;
- }
-}
-
-template
-void hl_gpu_gru_backward(OpStateGrad opStateGrad,
- OpResetGrad opResetGrad,
- hl_gru_value value,
- hl_gru_grad grad,
- int frameSize,
- int batchSize,
- hl_activation_mode_t active_node,
- hl_activation_mode_t active_gate) {
- dim3 threads;
- dim3 grid;
- if (batchSize == 1) {
- int framePerBlock = frameSize <= 1024 ? frameSize : 1024;
- int frameBlocks = (frameSize + 1024 - 1) / 1024;
- threads = dim3(framePerBlock, 1);
- grid = dim3(frameBlocks, 1);
- } else {
- threads = dim3(32, 32);
- grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32);
- }
-
- if (batchSize == 1) {
- KeGruBackwardStateGrad
- <<>>(opStateGrad,
- value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad,
- grad.outputGrad, frameSize, batchSize, active_node);
- } else {
- KeGruBackwardStateGrad
- <<>>(opStateGrad,
- value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad,
- grad.outputGrad, frameSize, batchSize, active_node);
- }
-
- if (value.prevOutValue && grad.prevOutGrad) {
- hl_matrix_mul(grad.gateGrad + 2*frameSize, HPPL_OP_N,
- value.stateWeight, HPPL_OP_T,
- grad.resetOutputGrad,
- batchSize, frameSize, frameSize,
- /*alpha = */ 1, /*beta = */ 0,
- 3*frameSize, frameSize, frameSize);
- if (grad.stateWeightGrad) {
- hl_matrix_mul(value.resetOutputValue, HPPL_OP_T,
- grad.gateGrad + 2*frameSize, HPPL_OP_N,
- grad.stateWeightGrad,
- frameSize, frameSize, batchSize,
- /*alpha = */ 1, /*beta = */ 1,
- frameSize, 3*frameSize, frameSize);
- }
- }
-
- if (batchSize == 1) {
- KeGruBackwardResetGrad
- <<>>(opResetGrad,
- value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad,
- grad.resetOutputGrad, frameSize, batchSize, active_gate);
- } else {
- KeGruBackwardResetGrad
- <<>>(opResetGrad,
- value.gateValue, grad.gateGrad, value.prevOutValue, grad.prevOutGrad,
- grad.resetOutputGrad, frameSize, batchSize, active_gate);
- }
-
- if (grad.prevOutGrad && value.prevOutValue) {
- hl_matrix_mul(grad.gateGrad, HPPL_OP_N,
- value.gateWeight, HPPL_OP_T,
- grad.prevOutGrad,
- batchSize, frameSize, 2*frameSize,
- /*alpha = */ 1, /*beta = */ 1,
- 3*frameSize, 2*frameSize, frameSize);
- if (grad.gateWeightGrad) {
- hl_matrix_mul(value.prevOutValue, HPPL_OP_T,
- grad.gateGrad, HPPL_OP_N,
- grad.gateWeightGrad,
- frameSize, 2*frameSize, batchSize,
- /*alpha = */ 1, /*beta = */ 1,
- frameSize, 3*frameSize, 2*frameSize);
- }
- }
-
- CHECK_SYNC("hl_gpu_gru_backward failed");
-}
-
-#else
-
-template
-void hl_gpu_gru_forward(OpResetOutput opResetOutput,
- OpFinalOutput opFinalOutput,
- hl_gru_value value,
- int frameSize,
- int batchSize,
- hl_activation_mode_t active_node,
- hl_activation_mode_t active_gate) {}
-
-template
-void hl_gpu_gru_backward(OpStateGrad opStateGrad,
- OpResetGrad opResetGrad,
- hl_gru_value value,
- hl_gru_grad grad,
- int frameSize,
- int batchSize,
- hl_activation_mode_t active_node,
- hl_activation_mode_t active_gate) {}
-
-#endif
-
-#endif /* HL_GPU_GRU_CUH_ */
diff --git a/paddle/cuda/include/hl_gpu_lstm.cuh b/paddle/cuda/include/hl_gpu_lstm.cuh
deleted file mode 100644
index 92517a44d2353a42d905708fc9aa98727a13a9e9..0000000000000000000000000000000000000000
--- a/paddle/cuda/include/hl_gpu_lstm.cuh
+++ /dev/null
@@ -1,300 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-
-#ifndef HL_GPU_LSTM_CUH_
-#define HL_GPU_LSTM_CUH_
-
-#ifdef __NVCC__
-
-#include "paddle/utils/Logging.h"
-#include "hl_device_functions.cuh"
-
-/*
- * threads(framePerBlock, batchPerBlock)
- * grid(frameBlocks, batchBlocks)
- */
-template
-__global__ void KeLstmForward(Op op,
- hl_lstm_value value,
- int frameSize,
- int batchSize,
- hl_activation_mode_t active_node,
- hl_activation_mode_t active_gate,
- hl_activation_mode_t active_state) {
- const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
- if (frameIdx >= frameSize) return;
-
- int batchIdx = 0;
- if (isBatch) {
- batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
- if (batchIdx >= batchSize) return;
- value.gateValue += batchIdx * frameSize * 4;
- value.outputValue += batchIdx * frameSize;
- value.stateValue += batchIdx * frameSize;
- value.stateActiveValue += batchIdx * frameSize;
- }
-
- real rState;
- real rPrevState = 0;
- real rStateAtv;
- real rOut;
- real rValueIn;
- real rValueIg;
- real rValueFg;
- real rValueOg;
- real rCheckI = value.checkIg[frameIdx];
- real rCheckF = value.checkFg[frameIdx];
- real rCheckO = value.checkOg[frameIdx];
-
- rValueIn = value.gateValue[frameIdx];
- rValueIg = value.gateValue[frameIdx + frameSize];
- rValueFg = value.gateValue[frameIdx + frameSize * 2];
- rValueOg = value.gateValue[frameIdx + frameSize * 3];
-
- if (value.prevStateValue) {
- if (isBatch) value.prevStateValue += batchIdx * frameSize;
- rPrevState = value.prevStateValue[frameIdx];
- }
-
- op(rValueIn,
- rValueIg,
- rValueFg,
- rValueOg,
- rPrevState,
- rState,
- rStateAtv,
- rOut,
- rCheckI,
- rCheckF,
- rCheckO,
- hppl::gpu::forward[active_node],
- hppl::gpu::forward[active_gate],
- hppl::gpu::forward[active_state]);
-
- value.gateValue[frameIdx] = rValueIn;
- value.gateValue[frameIdx + frameSize] = rValueIg;
- value.gateValue[frameIdx + frameSize * 2] = rValueFg;
- value.gateValue[frameIdx + frameSize * 3] = rValueOg;
-
- value.stateValue[frameIdx] = rState;
- value.stateActiveValue[frameIdx] = rStateAtv;
- value.outputValue[frameIdx] = rOut;
-}
-
-/*
- * threads(framePerBlock, batchPerBlock)
- * grid(frameBlocks, batchBlocks)
- */
-template