diff --git a/.clang-format b/.clang-format
index 6bbd46d0ff956517991d4faad3f2c026487f412b..9ba433b17362424973626470d930356c2173dd84 100644
--- a/.clang-format
+++ b/.clang-format
@@ -13,8 +13,6 @@
# The document of clang-format is
# http://clang.llvm.org/docs/ClangFormat.html
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
-#
-# TODO(yuyang18): Add python and other language code style
---
Language: Cpp
BasedOnStyle: Google
@@ -22,8 +20,9 @@ IndentWidth: 2
TabWidth: 2
ContinuationIndentWidth: 4
AccessModifierOffset: -2 # The private/protected/public has no indent in class
-PointerAlignment: Left # int* p/int& p, not int *p/int &p
Standard: Cpp11
AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
...
diff --git a/.gitignore b/.gitignore
index 65ba217de37c82287829eef105066aba86d69651..ee8489c1d71bd050b9a1d9358a664d2294165292 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@ build/
.vscode
.idea
.project
+.cproject
.pydevproject
+Makefile
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90c25e435083d78ad4c123999a588aaf9092f719
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+- repo: https://github.com/Lucas-C/pre-commit-hooks.git
+ sha: c25201a00e6b0514370501050cf2a8538ac12270
+ hooks:
+ - id: remove-crlf
+- repo: https://github.com/reyoung/mirrors-yapf.git
+ sha: v0.13.2
+ hooks:
+ - id: yapf
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
+ hooks:
+ - id: check-added-large-files
+ - id: check-merge-conflict
+ - id: check-symlinks
+ - id: detect-private-key
+ - id: end-of-file-fixer
+- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
+ sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
+ hooks:
+ - id: clang-formater
diff --git a/.style.yapf b/.style.yapf
new file mode 100644
index 0000000000000000000000000000000000000000..4741fb4f3bbc6681088cf9e960321e7b857a93a8
--- /dev/null
+++ b/.style.yapf
@@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
diff --git a/.travis.yml b/.travis.yml
index 7812ac02837895a32fcad36158814268e93a4da8..ffe3bc193b49eb3b3318cbbc7f1c3d86dc205c14 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,10 +38,19 @@ addons:
- curl
- lcov
- graphviz
+ - swig
before_install:
+ - |
+ if [ ${JOB} == "BUILD_AND_TEST" ]; then
+ if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)'
+ then
+ echo "Only markdown docs were updated, stopping build process."
+ exit
+ fi
+ fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- - pip install wheel protobuf sphinx breathe recommonmark
+ - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy
script:
- paddle/scripts/travis/main.sh
notifications:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 282e3e199ef440092550deec906019bc44bc73bd..090ac9e188422099cc4270b87064b5590e7b620c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8)
project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0)
-set(PADDLE_MINOR_VERSION 8)
-set(PADDLE_PATCH_VERSION 0b3)
+set(PADDLE_MINOR_VERSION 9)
+set(PADDLE_PATCH_VERSION 0a0)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
@@ -95,11 +95,24 @@ if(NOT WITH_GPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
+ if(${CUDA_VERSION_MAJOR} GREATER 6)
+ if(COMPILER_SUPPORT_CXX11)
+ LIST(APPEND CUDA_NVCC_FLAGS -std=c++11)
+ endif()
+ endif()
+
# TODO(yuyang18): Change it to remove std=c++11 in cuda compile.
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile")
endif()
+ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math")
+
+ if(WITH_AVX)
+ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
+ else(WITH_AVX)
+ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
+ endif(WITH_AVX)
if(WITH_DSO)
set(CUDA_LIBRARIES "")
@@ -123,11 +136,11 @@ if(NOT WITH_TIMER)
endif(NOT WITH_TIMER)
if(WITH_AVX)
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX)
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX)
if(WITH_PYTHON)
diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
index b70d66dc259afbad0243895fbc2a57ad5c071488..6b2614b1011081a5e0e03a53fec2012bc7b81333 100644
--- a/ISSUE_TEMPLATE.md
+++ b/ISSUE_TEMPLATE.md
@@ -7,7 +7,7 @@ Before submitting the issue, look over the following criteria before handing you
- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github.
- [ ] Did you retrieve your issue from widespread search engines ?
- [ ] Is my description of the issue clear enough to reproduce this problem?
- * If some errors occured, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc.
+ * If some errors occurred, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc.
* If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly.
- [ ] Is my description of the issue use the github markdown correctly?
* Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc.
diff --git a/README.md b/README.md
index 66767d7ff8e4acf8ef246f7e0129a66e64486727..8a8e15841586ae6a01bb93e94f6074189f556f5a 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,13 @@
# PaddlePaddle
-[](https://travis-ci.org/baidu/Paddle)
-[](https://coveralls.io/github/baidu/Paddle?branch=develop)
-[](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-[](LICENSE)
+[](https://travis-ci.org/PaddlePaddle/Paddle)
+[](http://www.paddlepaddle.org/)
+[](http://www.paddlepaddle.org/cn/index.html)
+[](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
+[](https://github.com/PaddlePaddle/Paddle/releases)
+[](LICENSE)
+
Welcome to the PaddlePaddle GitHub.
@@ -14,7 +17,7 @@ developed by Baidu scientists and engineers for the purpose of applying deep
learning to many products at Baidu.
Our vision is to enable deep learning for everyone via PaddlePaddle.
-Please refer to our [release log](https://github.com/baidu/Paddle/releases) to track the latest feature of PaddlePaddle.
+Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
## Features
@@ -26,15 +29,15 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t
connection.
- **Efficiency**
-
+
In order to unleash the power of heterogeneous computing resource,
optimization occurs at different levels of PaddlePaddle, including
computing, memory, architecture and communication. The following are some
examples:
- Optimized math operations through SSE/AVX intrinsics, BLAS libraries
- (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
- - Highly optimized recurrent networks which can handle **variable-length**
+ (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
+ - Highly optimized recurrent networks which can handle **variable-length**
sequence without padding.
- Optimized local and distributed training for models with high dimensional
sparse data.
@@ -57,41 +60,39 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t
## Installation
Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
-pre-built packages (**docker image**, **deb package**) or
+pre-built packages (**docker image**, **deb package**) or
directly build on **Linux** and **Mac OS X** from the source code.
-
+
## Documentation
Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en)
You can follow the quick start tutorial to learn how use PaddlePaddle
step-by-step.
-
+
- [Example and Demo](http://paddlepaddle.org/doc/demo/)
We provide five demos, including: image classification, sentiment analysis,
- sequence to sequence model, recommendation, semantic role labeling.
-
+ sequence to sequence model, recommendation, semantic role labeling.
+
- [Distributed Training](http://paddlepaddle.org/doc/cluster)
This system supports training deep learning models on multiple machines
with data parallelism.
-
+
- [Python API](http://paddlepaddle.org/doc/ui/)
PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your
favorite programming language.
-
+
- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html)
We sincerely appreciate your interest and contributions. If you would like to
- contribute, please read the contribution guide.
+ contribute, please read the contribution guide.
- [Source Code Documents](http://paddlepaddle.org/doc/source/)
## Ask Questions
-Please join the [**gitter chat**](https://gitter.im/PaddlePaddle/Deep_Learning) or send email to
-**paddle-dev@baidu.com** to ask questions and talk about methods and models.
-Framework development discussions and
-bug reports are collected on [Issues](https://github.com/baidu/paddle/issues).
+
+You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).
## Copyright and License
PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
diff --git a/cmake/FindAVX.cmake b/cmake/FindAVX.cmake
index f6103c6e667e8a8f6b8998d8eb467235fb49cb19..d380c996dfa95f0caa2b9cd9daa0ac9141e51fe0 100644
--- a/cmake/FindAVX.cmake
+++ b/cmake/FindAVX.cmake
@@ -3,36 +3,55 @@
INCLUDE(CheckCXXSourceRuns)
-SET(FIND_AVX_10)
-SET(FIND_AVX_20)
-SET(AVX_FLAGS)
-SET(AVX_FOUND)
-
-# Check AVX 2
-SET(CMAKE_REQUIRED_FLAGS)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- SET(CMAKE_REQUIRED_FLAGS "-mavx2")
-ELSEIF(MSVC AND NOT CMAKE_CL_64) # reserve for WINDOWS
- SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
+ set(MMX_FLAG "-mmmx")
+ set(SSE2_FLAG "-msse2")
+ set(SSE3_FLAG "-msse3")
+ SET(AVX_FLAG "-mavx")
+ SET(AVX2_FLAG "-mavx2")
+ELSEIF(MSVC)
+ set(MMX_FLAG "/arch:MMX")
+ set(SSE2_FLAG "/arch:SSE2")
+ set(SSE3_FLAG "/arch:SSE3")
+ SET(AVX_FLAG "/arch:AVX")
+ SET(AVX2_FLAG "/arch:AVX2")
ENDIF()
+# Check MMX
+set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
CHECK_CXX_SOURCE_RUNS("
-#include
+#include
int main()
{
- __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
- __m256i result = _mm256_abs_epi32 (a);
+ _mm_setzero_si64();
return 0;
-}" FIND_AVX_20)
+}" MMX_FOUND)
-# Check AVX
-SET(CMAKE_REQUIRED_FLAGS)
-IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- SET(CMAKE_REQUIRED_FLAGS "-mavx")
-ELSEIF(MSVC AND NOT CMAKE_CL_64)
- SET(CMAKE_REQUIRED_FLAGS "/arch:AVX")
-endif()
+# Check SSE2
+set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include
+int main()
+{
+ _mm_setzero_si128();
+ return 0;
+}" SSE2_FOUND)
+# Check SSE3
+set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include
+int main()
+{
+ __m128d a = _mm_set1_pd(6.28);
+ __m128d b = _mm_set1_pd(3.14);
+ __m128d result = _mm_addsub_pd(a, b);
+ result = _mm_movedup_pd(result);
+ return 0;
+}" SSE3_FOUND)
+
+# Check AVX
+set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
CHECK_CXX_SOURCE_RUNS("
#include
int main()
@@ -41,25 +60,17 @@ int main()
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
-}" FIND_AVX_10)
-
-IF(${FIND_AVX_20})
- IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- SET(AVX_FLAGS "${AVX_FLAGS} -mavx2")
- ELSEIF(MSVC)
- SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
- ENDIF()
-ENDIF()
+}" AVX_FOUND)
-IF(${FIND_AVX_10})
- IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- SET(AVX_FLAGS "${AVX_FLAGS} -mavx")
- ELSEIF(MSVC)
- SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
- ENDIF()
-ENDIF()
+# Check AVX 2
+set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include
+int main()
+{
+ __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
+ __m256i result = _mm256_abs_epi32 (a);
+ return 0;
+}" AVX2_FOUND)
-IF(${FIND_AVX_10})
- SET(AVX_FOUND TRUE)
- MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.")
-ENDIF()
+mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
diff --git a/cmake/util.cmake b/cmake/util.cmake
index 3f78cd08c390550790b7145c412de32351873e4e..a8282f07184c34f77d506ed7ef40206fbbd55b41 100644
--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@@ -188,14 +188,6 @@ macro(add_simple_unittest TARGET_NAME)
add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp)
endmacro()
-macro(add_paddle_culib TARGET_NAME)
- set(NVCC_FLAG ${CUDA_NVCC_FLAGS})
- set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--use_fast_math)
- cuda_add_library(${TARGET_NAME} STATIC ${ARGN})
- set(CUDA_NVCC_FLAGS ${NVCC_FLAG})
-endmacro()
-
-
# Creates C resources file from files in given resource file
function(create_resources res_file output)
# Create empty output file
diff --git a/demo/image_classification/.gitignore b/demo/image_classification/.gitignore
index 76961dd1436f859f85f75ff9ed7d3fefdec83dc4..6a05b8f6632db0977fceade8b48a89b9f7f6e6cc 100644
--- a/demo/image_classification/.gitignore
+++ b/demo/image_classification/.gitignore
@@ -5,3 +5,5 @@ plot.png
train.log
image_provider_copy_1.py
*pyc
+train.list
+test.list
diff --git a/demo/image_classification/data/download_cifar.sh b/demo/image_classification/data/download_cifar.sh
old mode 100644
new mode 100755
diff --git a/demo/image_classification/data/process_cifar.py b/demo/image_classification/data/process_cifar.py
index b766118eb00737c7a196ed85850b3cebd690b0d0..b235010e4ece377beffaaa1b9247a77d7a96b712 100644
--- a/demo/image_classification/data/process_cifar.py
+++ b/demo/image_classification/data/process_cifar.py
@@ -16,7 +16,6 @@ import numpy as np
import sys
import os
import PIL.Image as Image
-
"""
Usage: python process_cifar input_dir output_dir
"""
@@ -30,6 +29,7 @@ def mkdir_not_exist(path):
if not os.path.exists(path):
os.mkdir(path)
+
def create_dir_structure(output_dir):
"""
Create the directory structure for the directory.
@@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
mkdir_not_exist(os.path.join(output_dir, "train"))
mkdir_not_exist(os.path.join(output_dir, "test"))
-def convert_batch(batch_path, label_set, label_map,
- output_dir, data_split):
+
+def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
"""
Convert CIFAR batch to the structure of Paddle format.
batch_path: the batch to be converted.
@@ -67,11 +67,23 @@ if __name__ == '__main__':
output_dir = sys.argv[2]
num_batch = 5
create_dir_structure(output_dir)
- label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer",
- 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
+ label_map = {
+ 0: "airplane",
+ 1: "automobile",
+ 2: "bird",
+ 3: "cat",
+ 4: "deer",
+ 5: "dog",
+ 6: "frog",
+ 7: "horse",
+ 8: "ship",
+ 9: "truck"
+ }
labels = {}
for i in range(1, num_batch + 1):
- convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels,
- label_map, output_dir, "train")
- convert_batch(os.path.join(input_dir, "test_batch"), {},
- label_map, output_dir, "test")
\ No newline at end of file
+ convert_batch(
+ os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
+ output_dir, "train")
+ convert_batch(
+ os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
+ "test")
diff --git a/demo/image_classification/image_provider.py b/demo/image_classification/image_provider.py
index 9e2f8b8949b39b930680e6d84758133eed566881..28bf1bb02c1f08b2e8ec9acd38f0a8594b05ab66 100644
--- a/demo/image_classification/image_provider.py
+++ b/demo/image_classification/image_provider.py
@@ -46,36 +46,41 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
settings.img_mean = image_util.load_meta(settings.meta_path,
settings.mean_img_size,
- settings.img_size,
- settings.color)
+ settings.img_size, settings.color)
settings.logger.info('Image size: %s', settings.img_size)
settings.logger.info('Meta path: %s', settings.meta_path)
settings.input_types = [
dense_vector(settings.img_raw_size), # image feature
- integer_value(settings.num_classes)] # labels
+ integer_value(settings.num_classes)
+ ] # labels
settings.logger.info('DataProvider Initialization finished')
-@provider(init_hook=hook)
-def processData(settings, file_name):
+@provider(init_hook=hook, min_pool_size=0)
+def processData(settings, file_list):
"""
The main function for loading data.
Load the batch, iterate all the images and labels in this batch.
- file_name: the batch file name.
+ file_list: the batch file list.
"""
- data = cPickle.load(io.open(file_name, 'rb'))
- indexes = list(range(len(data['images'])))
- if settings.is_train:
- random.shuffle(indexes)
- for i in indexes:
- if settings.use_jpeg == 1:
- img = image_util.decode_jpeg(data['images'][i])
- else:
- img = data['images'][i]
- img_feat = image_util.preprocess_img(img, settings.img_mean,
- settings.img_size, settings.is_train,
- settings.color)
- label = data['labels'][i]
- yield img_feat.tolist(), int(label)
+ with open(file_list, 'r') as fdata:
+ lines = [line.strip() for line in fdata]
+ random.shuffle(lines)
+ for file_name in lines:
+ with io.open(file_name.strip(), 'rb') as file:
+ data = cPickle.load(file)
+ indexes = list(range(len(data['images'])))
+ if settings.is_train:
+ random.shuffle(indexes)
+ for i in indexes:
+ if settings.use_jpeg == 1:
+ img = image_util.decode_jpeg(data['images'][i])
+ else:
+ img = data['images'][i]
+ img_feat = image_util.preprocess_img(
+ img, settings.img_mean, settings.img_size,
+ settings.is_train, settings.color)
+ label = data['labels'][i]
+ yield img_feat.astype('float32'), int(label)
diff --git a/demo/image_classification/image_util.py b/demo/image_classification/image_util.py
index c545d16aafbc741bce25f9469e7f67de5b88fa8c..b5c6431c06f77cef5c31ca844a8427eebaea2fce 100644
--- a/demo/image_classification/image_util.py
+++ b/demo/image_classification/image_util.py
@@ -16,17 +16,20 @@ import numpy as np
from PIL import Image
from cStringIO import StringIO
+
def resize_image(img, target_size):
"""
Resize an image so that the shorter edge has length target_size.
img: the input image to be resized.
target_size: the target resized image size.
"""
- percent = (target_size/float(min(img.size[0], img.size[1])))
- resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent))
+ percent = (target_size / float(min(img.size[0], img.size[1])))
+ resized_size = int(round(img.size[0] * percent)), int(
+ round(img.size[1] * percent))
img = img.resize(resized_size, Image.ANTIALIAS)
return img
+
def flip(im):
"""
Return the flipped image.
@@ -38,6 +41,7 @@ def flip(im):
else:
return im[:, ::-1]
+
def crop_img(im, inner_size, color=True, test=True):
"""
Return cropped image.
@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
If True, crop the center of images.
"""
if color:
- height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2])
+ height, width = max(inner_size, im.shape[1]), max(inner_size,
+ im.shape[2])
padded_im = np.zeros((3, height, width))
startY = (height - im.shape[1]) / 2
startX = (width - im.shape[2]) / 2
endY, endX = startY + im.shape[1], startX + im.shape[2]
- padded_im[:, startY: endY, startX: endX] = im
+ padded_im[:, startY:endY, startX:endX] = im
else:
im = im.astype('float32')
- height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1])
+ height, width = max(inner_size, im.shape[0]), max(inner_size,
+ im.shape[1])
padded_im = np.zeros((height, width))
startY = (height - im.shape[0]) / 2
startX = (width - im.shape[1]) / 2
endY, endX = startY + im.shape[0], startX + im.shape[1]
- padded_im[startY: endY, startX: endX] = im
+ padded_im[startY:endY, startX:endX] = im
if test:
startY = (height - inner_size) / 2
startX = (width - inner_size) / 2
@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
startX = np.random.randint(0, width - inner_size + 1)
endY, endX = startY + inner_size, startX + inner_size
if color:
- pic = padded_im[:, startY: endY, startX: endX]
+ pic = padded_im[:, startY:endY, startX:endX]
else:
- pic = padded_im[startY: endY, startX: endX]
+ pic = padded_im[startY:endY, startX:endX]
if (not test) and (np.random.randint(2) == 0):
pic = flip(pic)
return pic
+
def decode_jpeg(jpeg_string):
np_array = np.array(Image.open(StringIO(jpeg_string)))
if len(np_array.shape) == 3:
np_array = np.transpose(np_array, (2, 0, 1))
return np_array
+
def preprocess_img(im, img_mean, crop_size, is_train, color=True):
"""
Does data augmentation for images.
@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
pic -= img_mean
return pic.flatten()
+
def load_meta(meta_path, mean_img_size, crop_size, color=True):
"""
Return the loaded meta file.
@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
mean = np.load(meta_path)['data_mean']
border = (mean_img_size - crop_size) / 2
if color:
- assert(mean_img_size * mean_img_size * 3 == mean.shape[0])
+ assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
mean = mean.reshape(3, mean_img_size, mean_img_size)
- mean = mean[:, border: border + crop_size,
- border: border + crop_size].astype('float32')
+ mean = mean[:, border:border + crop_size, border:border +
+ crop_size].astype('float32')
else:
- assert(mean_img_size * mean_img_size == mean.shape[0])
+ assert (mean_img_size * mean_img_size == mean.shape[0])
mean = mean.reshape(mean_img_size, mean_img_size)
- mean = mean[border: border + crop_size,
- border: border + crop_size].astype('float32')
+ mean = mean[border:border + crop_size, border:border +
+ crop_size].astype('float32')
return mean
+
def load_image(img_path, is_color=True):
"""
Load image and return.
@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
img.load()
return img
+
def oversample(img, crop_dims):
"""
image : iterable of (H x W x K) ndarrays
@@ -152,50 +163,53 @@ def oversample(img, crop_dims):
for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1
- crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
- -crop_dims / 2.0,
- crop_dims / 2.0
- ])
+ crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
+ [-crop_dims / 2.0, crop_dims / 2.0])
crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops
- crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1],
- im_shape[-1]), dtype=np.float32)
+ crops = np.empty(
+ (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
+ dtype=np.float32)
ix = 0
for im in img:
for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1
- crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors
+ crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :] # flip for mirrors
return crops
+
class ImageTransformer:
- def __init__(self, transpose = None,
- channel_swap = None, mean = None, is_color = True):
+ def __init__(self,
+ transpose=None,
+ channel_swap=None,
+ mean=None,
+ is_color=True):
self.transpose = transpose
self.channel_swap = None
self.mean = None
- self.is_color = is_color
+ self.is_color = is_color
- def set_transpose(self, order):
+ def set_transpose(self, order):
if self.is_color:
- assert 3 == len(order)
+ assert 3 == len(order)
self.transpose = order
- def set_channel_swap(self, order):
+ def set_channel_swap(self, order):
if self.is_color:
- assert 3 == len(order)
+ assert 3 == len(order)
self.channel_swap = order
def set_mean(self, mean):
# mean value, may be one value per channel
if mean.ndim == 1:
- mean = mean[:, np.newaxis, np.newaxis]
- else:
+ mean = mean[:, np.newaxis, np.newaxis]
+ else:
# elementwise mean
if self.is_color:
assert len(mean.shape) == 3
- self.mean = mean
+ self.mean = mean
def transformer(self, data):
if self.transpose is not None:
diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py
index 5d9e93265867389ca6d2aa26e48fcfa08561e6ae..6a47bd5851c99635dd7d3f1d5df67dd081ca4584 100755
--- a/demo/image_classification/prediction.py
+++ b/demo/image_classification/prediction.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import os,sys
+import os, sys
import numpy as np
import logging
from PIL import Image
@@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
+
class ImageClassifier():
def __init__(self,
train_conf,
@@ -58,18 +60,19 @@ class ImageClassifier():
self.oversample = oversample
self.is_color = is_color
- self.transformer = image_util.ImageTransformer(is_color = is_color)
- self.transformer.set_transpose((2,0,1))
+ self.transformer = image_util.ImageTransformer(is_color=is_color)
+ self.transformer.set_transpose((2, 0, 1))
self.mean_file = mean_file
mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
- self.transformer.set_mean(mean) # mean pixel
+ self.transformer.set_mean(mean) # mean pixel
gpu = 1 if use_gpu else 0
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
- self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ self.network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir)
@@ -90,14 +93,14 @@ class ImageClassifier():
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim)
image = np.array(image)
- input = np.zeros((1, image.shape[0], image.shape[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims)
else:
image = image.resize(self.crop_dims, Image.ANTIALIAS)
- input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32)
data_in = []
@@ -133,22 +136,24 @@ class ImageClassifier():
lab = np.argsort(-prob)
logging.info("Label of %s is: %d", image, lab[0])
+
if __name__ == '__main__':
- image_size=32
- crop_size=32
- multi_crop=True
- config="vgg_16_cifar.py"
- output_layer="__fc_layer_1__"
- mean_path="data/cifar-out/batches/batches.meta"
- model_path=sys.argv[1]
- image=sys.argv[2]
- use_gpu=bool(int(sys.argv[3]))
-
- obj = ImageClassifier(train_conf=config,
- model_dir=model_path,
- resize_dim=image_size,
- crop_dim=crop_size,
- mean_file=mean_path,
- use_gpu=use_gpu,
- oversample=multi_crop)
+ image_size = 32
+ crop_size = 32
+ multi_crop = True
+ config = "vgg_16_cifar.py"
+ output_layer = "__fc_layer_1__"
+ mean_path = "data/cifar-out/batches/batches.meta"
+ model_path = sys.argv[1]
+ image = sys.argv[2]
+ use_gpu = bool(int(sys.argv[3]))
+
+ obj = ImageClassifier(
+ train_conf=config,
+ model_dir=model_path,
+ resize_dim=image_size,
+ crop_dim=crop_size,
+ mean_file=mean_path,
+ use_gpu=use_gpu,
+ oversample=multi_crop)
obj.predict(image, output_layer)
diff --git a/demo/image_classification/preprocess.py b/demo/image_classification/preprocess.py
index 0286a5d7e9dc8d0f546b18b1ed846c9452cdbe4b..10b9c1691b5e51273c73a975545cd36f3822e901 100755
--- a/demo/image_classification/preprocess.py
+++ b/demo/image_classification/preprocess.py
@@ -19,22 +19,36 @@ from optparse import OptionParser
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
- parser.add_option("-i", "--input", action="store",
- dest="input", help="Input data directory.")
- parser.add_option("-s", "--size", action="store",
- dest="size", help="Processed image size.")
- parser.add_option("-c", "--color", action="store",
- dest="color", help="whether to use color images.")
+ parser.add_option(
+ "-i",
+ "--input",
+ action="store",
+ dest="input",
+ help="Input data directory.")
+ parser.add_option(
+ "-s",
+ "--size",
+ action="store",
+ dest="size",
+ help="Processed image size.")
+ parser.add_option(
+ "-c",
+ "--color",
+ action="store",
+ dest="color",
+ help="whether to use color images.")
return parser.parse_args()
+
if __name__ == '__main__':
- options, args = option_parser()
- data_dir = options.input
- processed_image_size = int(options.size)
- color = options.color == "1"
- data_creator = ImageClassificationDatasetCreater(data_dir,
- processed_image_size,
- color)
- data_creator.num_per_batch = 1000
- data_creator.overwrite = True
- data_creator.create_batches()
+ options, args = option_parser()
+ data_dir = options.input
+ processed_image_size = int(options.size)
+ color = options.color == "1"
+ data_creator = ImageClassificationDatasetCreater(
+ data_dir, processed_image_size, color)
+ data_creator.train_list_name = "train.txt"
+ data_creator.test_list_name = "test.txt"
+ data_creator.num_per_batch = 1000
+ data_creator.overwrite = True
+ data_creator.create_batches()
diff --git a/demo/image_classification/preprocess.sh b/demo/image_classification/preprocess.sh
index dfe3eb95d1ab8b2114fcf5e0f461ea0efb7cc1e5..e3e86ff10675c0622867af2eb0d26c87f4bc2db5 100755
--- a/demo/image_classification/preprocess.sh
+++ b/demo/image_classification/preprocess.sh
@@ -17,3 +17,6 @@ set -e
data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1
+
+echo "data/cifar-out/batches/train.txt" > train.list
+echo "data/cifar-out/batches/test.txt" > test.list
diff --git a/demo/image_classification/vgg_16_cifar.py b/demo/image_classification/vgg_16_cifar.py
index e8b8af4bd313d0738aafab8da93fc510e40cc3d6..58ceff5fc2f46cac9997b6d8af2b0db0c43e0c75 100755
--- a/demo/image_classification/vgg_16_cifar.py
+++ b/demo/image_classification/vgg_16_cifar.py
@@ -18,36 +18,38 @@ is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
if not is_predict:
- data_dir='data/cifar-out/batches/'
- meta_path=data_dir+'batches.meta'
-
- args = {'meta':meta_path,'mean_img_size': 32,
- 'img_size': 32,'num_classes': 10,
- 'use_jpeg': 1,'color': "color"}
-
- define_py_data_sources2(train_list=data_dir+"train.list",
- test_list=data_dir+'test.list',
- module='image_provider',
- obj='processData',
- args=args)
+ data_dir = 'data/cifar-out/batches/'
+ meta_path = data_dir + 'batches.meta'
+
+ args = {
+ 'meta': meta_path,
+ 'mean_img_size': 32,
+ 'img_size': 32,
+ 'num_classes': 10,
+ 'use_jpeg': 1,
+ 'color': "color"
+ }
+
+ define_py_data_sources2(
+ train_list="train.list",
+ test_list="train.list",
+ module='image_provider',
+ obj='processData',
+ args=args)
######################Algorithm Configuration #############
settings(
- batch_size = 128,
- learning_rate = 0.1 / 128.0,
- learning_method = MomentumOptimizer(0.9),
- regularization = L2Regularization(0.0005 * 128)
-)
+ batch_size=128,
+ learning_rate=0.1 / 128.0,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
-data_size=3*32*32
-label_size=10
-img = data_layer(name='image',
- size=data_size)
+data_size = 3 * 32 * 32
+label_size = 10
+img = data_layer(name='image', size=data_size)
# small_vgg is predefined in trainer_config_helpers.networks
-predict = small_vgg(input_image=img,
- num_channels=3,
- num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
if not is_predict:
lbl = data_layer(name="label", size=label_size)
diff --git a/demo/introduction/README.md b/demo/introduction/README.md
index bebf1d090d98691199ede55736dfe5b964a8fd42..0614a7afe645677ef0b65a17ea05f1dcfa45214f 100644
--- a/demo/introduction/README.md
+++ b/demo/introduction/README.md
@@ -1,4 +1,3 @@
This folder contains scripts used in PaddlePaddle introduction.
- use `bash train.sh` to train a simple linear regression model
- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
-
diff --git a/demo/introduction/dataprovider.py b/demo/introduction/dataprovider.py
index be8c0bc89156cf843d9b08276b52f92a4d8c9706..8515022e18dc6bbf055e6db3121568acf1df1c55 100644
--- a/demo/introduction/dataprovider.py
+++ b/demo/introduction/dataprovider.py
@@ -15,10 +15,10 @@
from paddle.trainer.PyDataProvider2 import *
import random
+
# define data types of input: 2 real numbers
-@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
+@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
def process(settings, input_file):
for i in xrange(2000):
x = random.random()
- yield [x], [2*x+0.3]
-
+ yield [x], [2 * x + 0.3]
diff --git a/demo/introduction/evaluate_model.py b/demo/introduction/evaluate_model.py
index 8cfb843c42105757b0f63c4a00d034b47a37a0bb..ca4a1872731abde90e72cb167929b3d9e2e1ebf4 100755
--- a/demo/introduction/evaluate_model.py
+++ b/demo/introduction/evaluate_model.py
@@ -23,14 +23,17 @@ Usage:
import numpy as np
import os
+
def load(file_name):
with open(file_name, 'rb') as f:
- f.read(16) # skip header for float type.
+ f.read(16) # skip header for float type.
return np.fromfile(f, dtype=np.float32)
+
def main():
print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
- load('output/pass-00029/b'))
+ load('output/pass-00029/b'))
+
if __name__ == '__main__':
main()
diff --git a/demo/introduction/trainer_config.py b/demo/introduction/trainer_config.py
index 3e3df5583282a4335ddea7b1cb30a84052d0adca..7c838c1a8f5b3cb6ac732197c85cd7c728eb013f 100644
--- a/demo/introduction/trainer_config.py
+++ b/demo/introduction/trainer_config.py
@@ -16,9 +16,14 @@ from paddle.trainer_config_helpers import *
# 1. read data. Suppose you saved above python code as dataprovider.py
data_file = 'empty.list'
-with open(data_file, 'w') as f: f.writelines(' ')
-define_py_data_sources2(train_list=data_file, test_list=None,
- module='dataprovider', obj='process',args={})
+with open(data_file, 'w') as f:
+ f.writelines(' ')
+define_py_data_sources2(
+ train_list=data_file,
+ test_list=None,
+ module='dataprovider',
+ obj='process',
+ args={})
# 2. learning algorithm
settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
@@ -26,7 +31,11 @@ settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
# 3. Network configuration
x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1)
-y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
+y_predict = fc_layer(
+ input=x,
+ param_attr=ParamAttr(name='w'),
+ size=1,
+ act=LinearActivation(),
+ bias_attr=ParamAttr(name='b'))
cost = regression_cost(input=y_predict, label=y)
outputs(cost)
-
diff --git a/demo/mnist/data/generate_list.py b/demo/mnist/data/generate_list.py
index 1b929048b4d82b5e9d80585b6d0180f2e92200ce..d880721f94c68bbbc1740f82872462efdb368fa2 100644
--- a/demo/mnist/data/generate_list.py
+++ b/demo/mnist/data/generate_list.py
@@ -13,9 +13,9 @@
# limitations under the License.
o = open("./" + "train.list", "w")
-o.write("./data/raw_data/train" +"\n")
+o.write("./data/raw_data/train" + "\n")
o.close()
o = open("./" + "test.list", "w")
-o.write("./data/raw_data/t10k" +"\n")
-o.close()
\ No newline at end of file
+o.write("./data/raw_data/t10k" + "\n")
+o.close()
diff --git a/demo/mnist/data/get_mnist_data.sh b/demo/mnist/data/get_mnist_data.sh
index 9099b5ab6fb85d86d346a7ad819538fbd013c6ff..5a2e34026d4fe7f8315d4f5453bec7c4ee4f6885 100755
--- a/demo/mnist/data/get_mnist_data.sh
+++ b/demo/mnist/data/get_mnist_data.sh
@@ -19,4 +19,3 @@ done
cd $DIR
rm -f *.list
python generate_list.py
-
diff --git a/demo/mnist/mnist_provider.py b/demo/mnist/mnist_provider.py
index 32af29730a7365df1a98fe54a2edf8850ee93e8d..6df4676da3bdc2e6949cc911fa3720cb51ddc568 100644
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *
# Define a py data provider
-@provider(input_types={
- 'pixel': dense_vector(28 * 28),
- 'label': integer_value(10)
-})
+@provider(
+ input_types={'pixel': dense_vector(28 * 28),
+ 'label': integer_value(10)})
def process(settings, filename): # settings is not used currently.
imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte"
diff --git a/demo/mnist/vgg_16_mnist.py b/demo/mnist/vgg_16_mnist.py
index 45a45bb061aa781231a944bb82ebfbc6b0dc9618..f9e89bc588abacd98a8f5fc82a00fae6bb2de10e 100644
--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
@@ -18,32 +18,29 @@ is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
-
if not is_predict:
- data_dir='./data/'
- define_py_data_sources2(train_list= data_dir + 'train.list',
- test_list= data_dir + 'test.list',
- module='mnist_provider',
- obj='process')
+ data_dir = './data/'
+ define_py_data_sources2(
+ train_list=data_dir + 'train.list',
+ test_list=data_dir + 'test.list',
+ module='mnist_provider',
+ obj='process')
######################Algorithm Configuration #############
settings(
- batch_size = 128,
- learning_rate = 0.1 / 128.0,
- learning_method = MomentumOptimizer(0.9),
- regularization = L2Regularization(0.0005 * 128)
-)
+ batch_size=128,
+ learning_rate=0.1 / 128.0,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
-data_size=1*28*28
-label_size=10
+data_size = 1 * 28 * 28
+label_size = 10
img = data_layer(name='pixel', size=data_size)
# small_vgg is predined in trainer_config_helpers.network
-predict = small_vgg(input_image=img,
- num_channels=1,
- num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)
if not is_predict:
lbl = data_layer(name="label", size=label_size)
diff --git a/demo/model_zoo/embedding/extract_para.py b/demo/model_zoo/embedding/extract_para.py
index 17067792fc38d0d25bd28dc35bfb1b88ad5020cd..47e06fae9caa9c3d9e0d6eb2e3f6633a776c5b1d 100755
--- a/demo/model_zoo/embedding/extract_para.py
+++ b/demo/model_zoo/embedding/extract_para.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Example:
python extract_para.py --preModel PREMODEL --preDict PREDICT \
@@ -29,6 +28,7 @@ Options:
from optparse import OptionParser
import struct
+
def get_row_index(preDict, usrDict):
"""
Get the row positions for all words in user dictionary from pre-trained dictionary.
@@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict):
pos.append(index[word])
return pos
-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
+
+def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
+ paraDim):
"""
Extract desired parameters from a pretrained embedding model based on user dictionary
"""
@@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim)
print "extract parameters finish, total", len(rowIndex), "lines"
fi.close()
+
def main():
"""
Main entry for running paraconvert.py
@@ -78,19 +81,33 @@ def main():
"python %prog --preModel PREMODEL --preDict PREDICT" \
" --usrModel USRMODEL --usrDict USRDICT -d DIM"
parser = OptionParser(usage)
- parser.add_option("--preModel", action="store", dest="preModel",
- help="the name of pretrained embedding model")
- parser.add_option("--preDict", action="store", dest="preDict",
- help="the name of pretrained dictionary")
- parser.add_option("--usrModel", action="store", dest="usrModel",
- help="the name of output usr embedding model")
- parser.add_option("--usrDict", action="store", dest="usrDict",
- help="the name of user specified dictionary")
- parser.add_option("-d", action="store", dest="dim",
- help="dimension of parameter")
+ parser.add_option(
+ "--preModel",
+ action="store",
+ dest="preModel",
+ help="the name of pretrained embedding model")
+ parser.add_option(
+ "--preDict",
+ action="store",
+ dest="preDict",
+ help="the name of pretrained dictionary")
+ parser.add_option(
+ "--usrModel",
+ action="store",
+ dest="usrModel",
+ help="the name of output usr embedding model")
+ parser.add_option(
+ "--usrDict",
+ action="store",
+ dest="usrDict",
+ help="the name of user specified dictionary")
+ parser.add_option(
+ "-d", action="store", dest="dim", help="dimension of parameter")
(options, args) = parser.parse_args()
- extract_parameters_by_usrDict(options.preModel, options.preDict,
- options.usrModel, options.usrDict, int(options.dim))
+ extract_parameters_by_usrDict(options.preModel, options.preDict,
+ options.usrModel, options.usrDict,
+ int(options.dim))
+
if __name__ == '__main__':
main()
diff --git a/demo/model_zoo/embedding/paraconvert.py b/demo/model_zoo/embedding/paraconvert.py
index 523412303617a38035392e4bb99f8ce119be8ac8..54155eff8e26b16ff5303d8d279e81b4bf8a90f4 100755
--- a/demo/model_zoo/embedding/paraconvert.py
+++ b/demo/model_zoo/embedding/paraconvert.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Example:
python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
@@ -29,6 +28,7 @@ Options:
from optparse import OptionParser
import struct
+
def binary2text(input, output, paraDim):
"""
Convert a binary parameter file of embedding model to be a text file.
@@ -76,12 +76,13 @@ def binary2text(input, output, paraDim):
fo.close()
print "binary2text finish, total", line, "lines"
+
def get_para_count(input):
"""
Compute the total number of embedding parameters in input text file.
input: the name of input text file
"""
- numRows = 1
+ numRows = 1
paraDim = 0
with open(input) as f:
line = f.readline()
@@ -90,6 +91,7 @@ def get_para_count(input):
numRows += 1
return numRows * paraDim
+
def text2binary(input, output, paddle_head=True):
"""
Convert a text parameter file of embedding model to be a binary file.
@@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True):
fo.close()
print "text2binary finish, total", count, "lines"
+
def main():
"""
Main entry for running paraconvert.py
@@ -131,21 +134,26 @@ def main():
"python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
"python %prog --t2b -i INPUT -o OUTPUT"
parser = OptionParser(usage)
- parser.add_option("--b2t", action="store_true",
- help="convert parameter file of embedding model from binary to text")
- parser.add_option("--t2b", action="store_true",
- help="convert parameter file of embedding model from text to binary")
- parser.add_option("-i", action="store", dest="input",
- help="input parameter file name")
- parser.add_option("-o", action="store", dest="output",
- help="output parameter file name")
- parser.add_option("-d", action="store", dest="dim",
- help="dimension of parameter")
+ parser.add_option(
+ "--b2t",
+ action="store_true",
+ help="convert parameter file of embedding model from binary to text")
+ parser.add_option(
+ "--t2b",
+ action="store_true",
+ help="convert parameter file of embedding model from text to binary")
+ parser.add_option(
+ "-i", action="store", dest="input", help="input parameter file name")
+ parser.add_option(
+ "-o", action="store", dest="output", help="output parameter file name")
+ parser.add_option(
+ "-d", action="store", dest="dim", help="dimension of parameter")
(options, args) = parser.parse_args()
if options.b2t:
binary2text(options.input, options.output, options.dim)
if options.t2b:
text2binary(options.input, options.output)
+
if __name__ == '__main__':
main()
diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py
index 06d471722f8059804a59e6823bebccff85a8d542..7855126edcfec20de251e5bc08c08c7aab8f7a8e 100755
--- a/demo/model_zoo/resnet/classify.py
+++ b/demo/model_zoo/resnet/classify.py
@@ -26,16 +26,22 @@ from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
+
class ImageClassifier():
- def __init__(self, train_conf, model_dir=None,
- resize_dim=256, crop_dim=224,
+ def __init__(self,
+ train_conf,
+ model_dir=None,
+ resize_dim=256,
+ crop_dim=224,
use_gpu=True,
mean_file=None,
output_layer=None,
- oversample=False, is_color=True):
+ oversample=False,
+ is_color=True):
"""
train_conf: network configure.
model_dir: string, directory of model.
@@ -62,24 +68,25 @@ class ImageClassifier():
assert isinstance(self.output_layer, basestring)
self.output_layer = self.output_layer.split(",")
- self.transformer = image_util.ImageTransformer(is_color = is_color)
- self.transformer.set_transpose((2,0,1))
- self.transformer.set_channel_swap((2,1,0))
+ self.transformer = image_util.ImageTransformer(is_color=is_color)
+ self.transformer.set_transpose((2, 0, 1))
+ self.transformer.set_channel_swap((2, 1, 0))
self.mean_file = mean_file
if self.mean_file is not None:
mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
- self.transformer.set_mean(mean) # mean pixel
+ self.transformer.set_mean(mean) # mean pixel
else:
# if you use three mean value, set like:
# this three mean value is calculated from ImageNet.
- self.transformer.set_mean(np.array([103.939,116.779,123.68]))
+ self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
- self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ self.network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir)
@@ -105,14 +112,14 @@ class ImageClassifier():
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim)
image = np.array(image)
- input = np.zeros((1, image.shape[0], image.shape[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims)
else:
image = image.resize(self.crop_dims, Image.ANTIALIAS)
- input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32)
data_in = []
@@ -172,7 +179,7 @@ class ImageClassifier():
logging.info("Label of %s is: %d", image, lab[0])
return results
- def extract(self, data_file, output_dir, batch_size = 10000):
+ def extract(self, data_file, output_dir, batch_size=10000):
"""
extract and save features of output layers, which are
specify in Outputs() in network configure.
@@ -197,7 +204,7 @@ class ImageClassifier():
image_feature[file_name] = feature
sample_num += 1
if sample_num == batch_size:
- batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+ batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num)
batch_num += 1
@@ -206,7 +213,7 @@ class ImageClassifier():
if idx % 1000 == 0:
logging.info('%d/%d, %s', idx, len(image_files), file_name)
if sample_num > 0:
- batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+ batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num)
logging.info('Done: make image feature batch')
@@ -215,38 +222,64 @@ class ImageClassifier():
of = open(file, 'wb')
cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
+
def option_parser():
"""
Main entry for predciting
"""
usage = "%prog -c config -i data_list -w model_dir [options]"
parser = OptionParser(usage="usage: %s" % usage)
- parser.add_option("-j", "--job",
- action="store", dest="job_type",
- help="job type: predict, extract\
+ parser.add_option(
+ "-j",
+ "--job",
+ action="store",
+ dest="job_type",
+ help="job type: predict, extract\
predict: predicting,\
extract: extract features")
- parser.add_option("-c", "--conf",
- action="store", dest="train_conf",
- help="network config")
- parser.add_option("-i", "--data",
- action="store", dest="data_file",
- help="image list")
- parser.add_option("-w", "--model",
- action="store", dest="model_path",
- default=None, help="model path")
- parser.add_option("-g", "--use_gpu", action="store",
- dest="use_gpu", default=True,
- help="Whether to use gpu mode.")
- parser.add_option("-o", "--output_dir",
- action="store", dest="output_dir",
- default="output", help="output path")
- parser.add_option("-m", "--mean", action="store",
- dest="mean", default=None,
- help="mean file.")
- parser.add_option("-p", "--multi_crop", action="store_true",
- dest="multi_crop", default=False,
- help="Wether to use multiple crops on image.")
+ parser.add_option(
+ "-c",
+ "--conf",
+ action="store",
+ dest="train_conf",
+ help="network config")
+ parser.add_option(
+ "-i", "--data", action="store", dest="data_file", help="image list")
+ parser.add_option(
+ "-w",
+ "--model",
+ action="store",
+ dest="model_path",
+ default=None,
+ help="model path")
+ parser.add_option(
+ "-g",
+ "--use_gpu",
+ action="store",
+ dest="use_gpu",
+ default=True,
+ help="Whether to use gpu mode.")
+ parser.add_option(
+ "-o",
+ "--output_dir",
+ action="store",
+ dest="output_dir",
+ default="output",
+ help="output path")
+ parser.add_option(
+ "-m",
+ "--mean",
+ action="store",
+ dest="mean",
+ default=None,
+ help="mean file.")
+ parser.add_option(
+ "-p",
+ "--multi_crop",
+ action="store_true",
+ dest="multi_crop",
+ default=False,
+ help="Wether to use multiple crops on image.")
parser.add_option("-l", "--output_layer", action="store",
dest="output_layer", default=None,
help="--job=extract, specify layers to extract "\
@@ -254,24 +287,26 @@ def option_parser():
"classification probability, output in resnet.py.")
return parser.parse_args()
+
def main():
"""
1. parse input arguments.
2. predicting or extract features according job type.
"""
options, args = option_parser()
- obj = ImageClassifier(options.train_conf,
- options.model_path,
- use_gpu=options.use_gpu,
- mean_file=options.mean,
- output_layer=options.output_layer,
- oversample=options.multi_crop)
+ obj = ImageClassifier(
+ options.train_conf,
+ options.model_path,
+ use_gpu=options.use_gpu,
+ mean_file=options.mean,
+ output_layer=options.output_layer,
+ oversample=options.multi_crop)
if options.job_type == "predict":
obj.predict(options.data_file)
elif options.job_type == "extract":
- obj.extract(options.data_file,
- options.output_dir)
+ obj.extract(options.data_file, options.output_dir)
+
if __name__ == '__main__':
main()
diff --git a/demo/model_zoo/resnet/example/__init__.py b/demo/model_zoo/resnet/example/__init__.py
index 7f9e87eee6037666b86420fba194624859d356b3..c90af2ee000d46a032984ee23559e7e99b49ddad 100644
--- a/demo/model_zoo/resnet/example/__init__.py
+++ b/demo/model_zoo/resnet/example/__init__.py
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
diff --git a/demo/model_zoo/resnet/example/image_list_provider.py b/demo/model_zoo/resnet/example/image_list_provider.py
index ee457e1fffc7ed8629dc6bde63a8047818c0ff9d..9e415f76a53326c5809b7a8c508701c519ab443b 100644
--- a/demo/model_zoo/resnet/example/image_list_provider.py
+++ b/demo/model_zoo/resnet/example/image_list_provider.py
@@ -16,8 +16,7 @@ from paddle.utils.image_util import *
from paddle.trainer.PyDataProvider2 import *
-def hook(settings, image_size, crop_size, color, file_list,
- is_train, **kwargs):
+def hook(settings, image_size, crop_size, color, file_list, is_train, **kwargs):
"""
Description: Init with a list of data file
file_list is the name list of input files.
@@ -58,7 +57,7 @@ def hook(settings, image_size, crop_size, color, file_list,
sz = settings.crop_size * settings.crop_size
settings.img_mean = np.zeros(sz * 3, dtype=np.single)
for idx, value in enumerate(settings.mean_value):
- settings.img_mean[idx * sz: (idx + 1) * sz] = value
+ settings.img_mean[idx * sz:(idx + 1) * sz] = value
settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
settings.crop_size)
@@ -69,7 +68,8 @@ def hook(settings, image_size, crop_size, color, file_list,
settings.input_types = [
dense_vector(settings.img_input_size), # image feature
- integer_value(1)] # labels
+ integer_value(1)
+ ] # labels
settings.logger.info('Image short side: %s', settings.img_size)
settings.logger.info('Crop size: %s', settings.crop_size)
@@ -97,9 +97,6 @@ def processData(settings, file_list):
# swap channel
if settings.is_swap_channel:
img = img[settings.swap_channel, :, :]
- img_feat = preprocess_img(img,
- settings.img_mean,
- settings.crop_size,
- settings.is_train,
- settings.color)
+ img_feat = preprocess_img(img, settings.img_mean, settings.crop_size,
+ settings.is_train, settings.color)
yield img_feat.tolist(), int(lab.strip())
diff --git a/demo/model_zoo/resnet/load_feature.py b/demo/model_zoo/resnet/load_feature.py
index ee4930b7a17f7f21ceeba8db253eed64416ebf10..b0948b75fd0ac9a3fa89070aed04d523ce286f4e 100644
--- a/demo/model_zoo/resnet/load_feature.py
+++ b/demo/model_zoo/resnet/load_feature.py
@@ -17,9 +17,11 @@ import sys
import cPickle
import logging
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
+
def load_feature_c(file):
"""
Load feature extracted by C++ interface.
@@ -30,14 +32,15 @@ def load_feature_c(file):
f = open(file, 'r')
for line in f:
sample = []
- for slot in line.strip().split(";"):
- fea = [float(val) for val in slot.strip().split()]
+ for slot in line.strip().split(";"):
+ fea = [float(val) for val in slot.strip().split()]
if fea:
sample.append(fea)
features.append(sample)
f.close()
return features
+
def load_feature_py(feature_dir):
"""
Load feature extracted by python interface.
@@ -54,6 +57,7 @@ def load_feature_py(feature_dir):
logging.info('Load feature file %s', file_name)
return features
+
if __name__ == '__main__':
- print load_feature_py(sys.argv[1])
+ print load_feature_py(sys.argv[1])
#print load_feature_c(sys.argv[1])
diff --git a/demo/model_zoo/resnet/resnet.py b/demo/model_zoo/resnet/resnet.py
index 483e308ac804e13ca249ef4e47e9e9b00770ce1b..015b74cd484596039b9fcf010576ca340d044db7 100644
--- a/demo/model_zoo/resnet/resnet.py
+++ b/demo/model_zoo/resnet/resnet.py
@@ -13,7 +13,6 @@
# limitations under the License.
from paddle.trainer_config_helpers import *
-
"""
paper: https://arxiv.org/abs/1512.03385
"""
@@ -28,15 +27,19 @@ if not is_predict and data_provider:
# mean.meta size : 3 x 224 x 224.
# If you use three mean value, set like:
# "mean_value:103.939,116.779,123.68;"
- args={
+ args = {
'mean_meta': "model/mean_meta_224/mean.meta",
- 'image_size': 224, 'crop_size': 224,
- 'color': True,'swap_channel:': [2, 1, 0]}
- define_py_data_sources2(train_list,
- 'example/test.list',
- module="example.image_list_provider",
- obj="processData",
- args=args)
+ 'image_size': 224,
+ 'crop_size': 224,
+ 'color': True,
+ 'swap_channel:': [2, 1, 0]
+ }
+ define_py_data_sources2(
+ train_list,
+ 'example/test.list',
+ module="example.image_list_provider",
+ obj="processData",
+ args=args)
batch_size = 1
learning_rate = 0.1 / batch_size
@@ -54,12 +57,16 @@ Settings(
learning_method='momentum',
learning_rate_decay_a=0.5,
learning_rate_decay_b=1200000 * 10,
- learning_rate_schedule="discexp",
-)
+ learning_rate_schedule="discexp", )
-def conv_bn_layer(name, input, filter_size, num_filters,
- stride, padding, channels=None,
+def conv_bn_layer(name,
+ input,
+ filter_size,
+ num_filters,
+ stride,
+ padding,
+ channels=None,
active_type=ReluActivation()):
"""
A wrapper for conv layer with batch normalization layers.
@@ -67,19 +74,18 @@ def conv_bn_layer(name, input, filter_size, num_filters,
conv layer has no activation.
"""
- tmp = img_conv_layer(name=name + "_conv",
- input=input,
- filter_size=filter_size,
- num_channels=channels,
- num_filters=num_filters,
- stride=stride,
- padding=padding,
- act=LinearActivation(),
- bias_attr=False)
- return batch_norm_layer(name=name + "_bn",
- input=tmp,
- act=active_type,
- use_global_stats=is_test)
+ tmp = img_conv_layer(
+ name=name + "_conv",
+ input=input,
+ filter_size=filter_size,
+ num_channels=channels,
+ num_filters=num_filters,
+ stride=stride,
+ padding=padding,
+ act=LinearActivation(),
+ bias_attr=False)
+ return batch_norm_layer(
+ name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
def bottleneck_block(name, input, num_filters1, num_filters2):
@@ -88,29 +94,31 @@ def bottleneck_block(name, input, num_filters1, num_filters2):
Last conv_bn_layer has no activation.
Addto layer has activation of relu.
"""
- last_name = conv_bn_layer(name=name + '_branch2a',
- input=input,
- filter_size=1,
- num_filters=num_filters1,
- stride=1,
- padding=0)
- last_name = conv_bn_layer(name=name + '_branch2b',
- input=last_name,
- filter_size=3,
- num_filters=num_filters1,
- stride=1,
- padding=1)
- last_name = conv_bn_layer(name=name + '_branch2c',
- input=last_name,
- filter_size=1,
- num_filters=num_filters2,
- stride=1,
- padding=0,
- active_type=LinearActivation())
-
- return addto_layer(name=name + "_addto",
- input=[input, last_name],
- act=ReluActivation())
+ last_name = conv_bn_layer(
+ name=name + '_branch2a',
+ input=input,
+ filter_size=1,
+ num_filters=num_filters1,
+ stride=1,
+ padding=0)
+ last_name = conv_bn_layer(
+ name=name + '_branch2b',
+ input=last_name,
+ filter_size=3,
+ num_filters=num_filters1,
+ stride=1,
+ padding=1)
+ last_name = conv_bn_layer(
+ name=name + '_branch2c',
+ input=last_name,
+ filter_size=1,
+ num_filters=num_filters2,
+ stride=1,
+ padding=0,
+ active_type=LinearActivation())
+
+ return addto_layer(
+ name=name + "_addto", input=[input, last_name], act=ReluActivation())
def mid_projection(name, input, num_filters1, num_filters2, stride=2):
@@ -123,38 +131,41 @@ def mid_projection(name, input, num_filters1, num_filters2, stride=2):
branch2x: bottleneck building block, shortcuts are identity.
"""
# stride = 2
- branch1 = conv_bn_layer(name=name + '_branch1',
- input=input,
- filter_size=1,
- num_filters=num_filters2,
- stride=stride,
- padding=0,
- active_type=LinearActivation())
-
- last_name = conv_bn_layer(name=name + '_branch2a',
- input=input,
- filter_size=1,
- num_filters=num_filters1,
- stride=stride,
- padding=0)
- last_name = conv_bn_layer(name=name + '_branch2b',
- input=last_name,
- filter_size=3,
- num_filters=num_filters1,
- stride=1,
- padding=1)
-
- last_name = conv_bn_layer(name=name + '_branch2c',
- input=last_name,
- filter_size=1,
- num_filters=num_filters2,
- stride=1,
- padding=0,
- active_type=LinearActivation())
-
- return addto_layer(name=name + "_addto",
- input=[branch1, last_name],
- act=ReluActivation())
+ branch1 = conv_bn_layer(
+ name=name + '_branch1',
+ input=input,
+ filter_size=1,
+ num_filters=num_filters2,
+ stride=stride,
+ padding=0,
+ active_type=LinearActivation())
+
+ last_name = conv_bn_layer(
+ name=name + '_branch2a',
+ input=input,
+ filter_size=1,
+ num_filters=num_filters1,
+ stride=stride,
+ padding=0)
+ last_name = conv_bn_layer(
+ name=name + '_branch2b',
+ input=last_name,
+ filter_size=3,
+ num_filters=num_filters1,
+ stride=1,
+ padding=1)
+
+ last_name = conv_bn_layer(
+ name=name + '_branch2c',
+ input=last_name,
+ filter_size=1,
+ num_filters=num_filters2,
+ stride=1,
+ padding=0,
+ active_type=LinearActivation())
+
+ return addto_layer(
+ name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
@@ -168,67 +179,67 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
# For ImageNet
# conv1: 112x112
img = data_layer(name='input', size=224 * 224 * 3)
- tmp = conv_bn_layer("conv1", img,
- filter_size=7,
- channels=3,
- num_filters=64,
- stride=2,
- padding=3)
+ tmp = conv_bn_layer(
+ "conv1",
+ img,
+ filter_size=7,
+ channels=3,
+ num_filters=64,
+ stride=2,
+ padding=3)
tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
# conv2_x: 56x56
- tmp = mid_projection(name="res2_1",
- input=tmp,
- num_filters1=64,
- num_filters2=256,
- stride=1)
+ tmp = mid_projection(
+ name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
for i in xrange(2, res2_num + 1, 1):
- tmp = bottleneck_block(name="res2_" + str(i),
- input=tmp,
- num_filters1=64,
- num_filters2=256)
+ tmp = bottleneck_block(
+ name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
# conv3_x: 28x28
- tmp = mid_projection(name="res3_1",
- input=tmp,
- num_filters1=128,
- num_filters2=512)
+ tmp = mid_projection(
+ name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
for i in xrange(2, res3_num + 1, 1):
- tmp = bottleneck_block(name="res3_" + str(i),
- input=tmp, num_filters1=128,
- num_filters2=512)
+ tmp = bottleneck_block(
+ name="res3_" + str(i),
+ input=tmp,
+ num_filters1=128,
+ num_filters2=512)
# conv4_x: 14x14
- tmp = mid_projection(name="res4_1", input=tmp,
- num_filters1=256, num_filters2=1024)
+ tmp = mid_projection(
+ name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
for i in xrange(2, res4_num + 1, 1):
- tmp = bottleneck_block(name="res4_" + str(i),
- input=tmp,
- num_filters1=256,
- num_filters2=1024)
+ tmp = bottleneck_block(
+ name="res4_" + str(i),
+ input=tmp,
+ num_filters1=256,
+ num_filters2=1024)
# conv5_x: 7x7
- tmp = mid_projection(name="res5_1", input=tmp,
- num_filters1=512, num_filters2=2048)
+ tmp = mid_projection(
+ name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
for i in xrange(2, res5_num + 1, 1):
- tmp = bottleneck_block(name="res5_" + str(i),
- input=tmp, num_filters1=512,
- num_filters2=2048)
-
- tmp = img_pool_layer(name='avgpool',
- input=tmp,
- pool_size=7,
- stride=1,
- pool_type=AvgPooling())
-
- output = fc_layer(name='output',
- input=tmp,
- size=1000,
- act=SoftmaxActivation())
+ tmp = bottleneck_block(
+ name="res5_" + str(i),
+ input=tmp,
+ num_filters1=512,
+ num_filters2=2048)
+
+ tmp = img_pool_layer(
+ name='avgpool',
+ input=tmp,
+ pool_size=7,
+ stride=1,
+ pool_type=AvgPooling())
+
+ output = fc_layer(
+ name='output', input=tmp, size=1000, act=SoftmaxActivation())
if not is_predict:
- classification_cost(input=output, label=data_layer(name='label',
- size=1))
+ classification_cost(
+ input=output, label=data_layer(
+ name='label', size=1))
def res_net_50():
diff --git a/demo/quick_start/api_train.py b/demo/quick_start/api_train.py
index 5ae19b8d26534a9521a6da7630796edce36780e7..66cbb856484d231613a0026be129a7bc3a7cfdf5 100644
--- a/demo/quick_start/api_train.py
+++ b/demo/quick_start/api_train.py
@@ -22,27 +22,32 @@ from py_paddle import DataProviderConverter
from paddle.trainer.PyDataProvider2 \
import integer_value, integer_value_sequence, sparse_binary_vector
+
def parse_arguments():
parser = argparse.ArgumentParser()
- parser.add_argument("--train_data",
- type=str, required=False, help="train data file")
+ parser.add_argument(
+ "--train_data", type=str, required=False, help="train data file")
parser.add_argument("--test_data", type=str, help="test data file")
- parser.add_argument("--config",
- type=str, required=True, help="config file name")
+ parser.add_argument(
+ "--config", type=str, required=True, help="config file name")
parser.add_argument("--dict_file", required=True, help="dictionary file")
- parser.add_argument("--seq",
- default=1, type=int,
- help="whether use sequence training")
- parser.add_argument("--use_gpu", default=0, type=int,
- help="whether use GPU for training")
- parser.add_argument("--trainer_count", default=1, type=int,
- help="Number of threads for training")
- parser.add_argument("--num_passes", default=5, type=int,
- help="Number of training passes")
+ parser.add_argument(
+ "--seq", default=1, type=int, help="whether use sequence training")
+ parser.add_argument(
+ "--use_gpu", default=0, type=int, help="whether use GPU for training")
+ parser.add_argument(
+ "--trainer_count",
+ default=1,
+ type=int,
+ help="Number of threads for training")
+ parser.add_argument(
+ "--num_passes", default=5, type=int, help="Number of training passes")
return parser.parse_args()
+
UNK_IDX = 0
+
def load_data(file_name, word_dict):
with open(file_name, 'r') as f:
for line in f:
@@ -51,6 +56,7 @@ def load_data(file_name, word_dict):
word_slot = [word_dict.get(w, UNK_IDX) for w in words]
yield word_slot, int(label)
+
def load_dict(dict_file):
word_dict = dict()
with open(dict_file, 'r') as f:
@@ -59,6 +65,7 @@ def load_dict(dict_file):
word_dict[w] = i
return word_dict
+
def main():
options = parse_arguments()
api.initPaddle("--use_gpu=%s" % options.use_gpu,
@@ -86,9 +93,9 @@ def main():
# create a data converter which converts data to PaddlePaddle
# internal format
input_types = [
- integer_value_sequence(len(word_dict)) if options.seq
- else sparse_binary_vector(len(word_dict)),
- integer_value(2)]
+ integer_value_sequence(len(word_dict)) if options.seq else
+ sparse_binary_vector(len(word_dict)), integer_value(2)
+ ]
converter = DataProviderConverter(input_types)
batch_size = trainer_config.opt_config.batch_size
@@ -102,7 +109,7 @@ def main():
trainer.trainOneDataBatch(size, converter(batch))
trainer.finishTrainPass()
if test_dataset:
- trainer.startTestPeriod();
+ trainer.startTestPeriod()
for pos in xrange(0, len(test_dataset), batch_size):
batch = itertools.islice(test_dataset, pos, pos + batch_size)
size = min(batch_size, len(test_dataset) - pos)
@@ -110,5 +117,6 @@ def main():
trainer.finishTestPeriod()
trainer.finishTrain()
+
if __name__ == '__main__':
main()
diff --git a/demo/quick_start/dataprovider_bow.py b/demo/quick_start/dataprovider_bow.py
index f8cde189cf87d73aec05da4b34e064cddecff56b..a5156a2d40cc04c02e50d676045ae6da8937ba01 100644
--- a/demo/quick_start/dataprovider_bow.py
+++ b/demo/quick_start/dataprovider_bow.py
@@ -17,6 +17,7 @@ from paddle.trainer.PyDataProvider2 import *
# id of the word not in dictionary
UNK_IDX = 0
+
# initializer is called by the framework during initialization.
# It allows the user to describe the data types and setup the
# necessary data structure for later use.
@@ -38,7 +39,9 @@ def initializer(settings, dictionary, **kwargs):
# The second input is an integer. It represents the category id of the
# sample. 2 means there are two labels in the dataset.
# (1 for positive and 0 for negative)
- integer_value(2)]
+ integer_value(2)
+ ]
+
# Delaring a data provider. It has an initializer 'data_initialzer'.
# It will cache the generated data of the first pass in memory, so that
@@ -69,9 +72,8 @@ def process(settings, file_name):
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
- settings.input_types = [
- sparse_binary_vector(len(dictionary))
- ]
+ settings.input_types = [sparse_binary_vector(len(dictionary))]
+
# Declaring a data provider for prediction. The difference with process
# is that label is not generated.
diff --git a/demo/quick_start/dataprovider_emb.py b/demo/quick_start/dataprovider_emb.py
index f5632d5f3f8bd8bb83b12198e7450b239eb1f7f6..286f3f5c82081f1a6e02a26023969790792a78a3 100755
--- a/demo/quick_start/dataprovider_emb.py
+++ b/demo/quick_start/dataprovider_emb.py
@@ -24,7 +24,8 @@ def initializer(settings, dictionary, **kwargs):
# The value of the integers range from 0 to len(dictrionary)-1
integer_value_sequence(len(dictionary)),
# Define the second input for label id
- integer_value(2)]
+ integer_value(2)
+ ]
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
@@ -40,7 +41,8 @@ def process(settings, file_name):
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
- integer_value(len(dictionary), seq_type=SequenceType.SEQUENCE)
+ integer_value(
+ len(dictionary), seq_type=SequenceType.SEQUENCE)
]
diff --git a/demo/quick_start/preprocess.py b/demo/quick_start/preprocess.py
index 69fdbe44b5245bc2855847a1507e6eaed517eb96..d87fad632a7429f7d9682badabe4c72ca127354f 100755
--- a/demo/quick_start/preprocess.py
+++ b/demo/quick_start/preprocess.py
@@ -13,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
1. (remove HTML before or not)tokensizing
2. pos sample : rating score 5; neg sample: rating score 1-2.
@@ -35,7 +34,8 @@ import multiprocessing
batch_size = 5000
word_count = {}
-num_tokenize = max(1, multiprocessing.cpu_count() - 2) # parse + tokenize + save
+num_tokenize = max(1,
+ multiprocessing.cpu_count() - 2) # parse + tokenize + save
max_queue_size = 8
parse_queue = Queue(maxsize=max_queue_size + num_tokenize)
tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize)
diff --git a/demo/quick_start/preprocess.sh b/demo/quick_start/preprocess.sh
index fe2acbbd74898fa3d12ddee3271658043c43e32e..c9190e2dd2ef754bf3c7287006322b52493dc3a0 100755
--- a/demo/quick_start/preprocess.sh
+++ b/demo/quick_start/preprocess.sh
@@ -21,14 +21,21 @@
set -e
export LC_ALL=C
+UNAME_STR=`uname`
+
+if [ ${UNAME_STR} == 'Linux' ]; then
+ SHUF_PROG='shuf'
+else
+ SHUF_PROG='gshuf'
+fi
mkdir -p data/tmp
python preprocess.py -i data/reviews_Electronics_5.json.gz
# uniq and shuffle
cd data/tmp
echo 'uniq and shuffle...'
-cat pos_*|sort|uniq|shuf> pos.shuffed
-cat neg_*|sort|uniq|shuf> neg.shuffed
+cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
+cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
min_len=`sed -n '$=' neg.shuffed`
test_num=$((min_len/10))
@@ -42,8 +49,8 @@ head -n$train_num neg.shuffed >train.neg
tail -n$test_num pos.shuffed >test.pos
tail -n$test_num neg.shuffed >test.neg
-cat train.pos train.neg|shuf>../train.txt
-cat test.pos test.neg|shuf>../test.txt
+cat train.pos train.neg | ${SHUF_PROG} >../train.txt
+cat test.pos test.neg | ${SHUF_PROG} >../test.txt
cd -
echo 'data/train.txt' > data/train.list
diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh
index 49806292a4ec5bd4194ccb6f6a638b6b2b4f37ed..b3c471608c3248bfc714d5e44dd927f25dd23ea0 100755
--- a/demo/quick_start/train.sh
+++ b/demo/quick_start/train.sh
@@ -20,6 +20,7 @@ cfg=trainer_config.lr.py
#cfg=trainer_config.lstm.py
#cfg=trainer_config.bidi-lstm.py
#cfg=trainer_config.db-lstm.py
+#cfg=trainer_config.resnet-lstm.py
paddle train \
--config=$cfg \
--save_dir=./output \
diff --git a/demo/quick_start/trainer_config.bidi-lstm.py b/demo/quick_start/trainer_config.bidi-lstm.py
index 3be3d373422714c6b40e530cf112f9106b85d20b..51deaf31f94681b6b61f98f798cef14a65ec92cb 100644
--- a/demo/quick_start/trainer_config.bidi-lstm.py
+++ b/demo/quick_start/trainer_config.bidi-lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,19 +40,17 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
bi_lstm = bidirectional_lstm(input=emb, size=128)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
-output = fc_layer(input=dropout, size=2,
- bias_attr=bias_attr,
- act=SoftmaxActivation())
+output = fc_layer(
+ input=dropout, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
diff --git a/demo/quick_start/trainer_config.cnn.py b/demo/quick_start/trainer_config.cnn.py
index 253ec0aee26cf42226d79726a75aad6c61c77565..388efa75f903e0c7c803c99cd50d73a004133a67 100644
--- a/demo/quick_start/trainer_config.cnn.py
+++ b/demo/quick_start/trainer_config.cnn.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,8 +40,7 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128)
diff --git a/demo/quick_start/trainer_config.db-lstm.py b/demo/quick_start/trainer_config.db-lstm.py
index b35bdf5a61b4731cadb5eb992796c5e885efbd7e..02bc898d881efbd3bfaed95d45cd9e70ed046746 100644
--- a/demo/quick_start/trainer_config.db-lstm.py
+++ b/demo/quick_start/trainer_config.db-lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,10 +40,9 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
@@ -52,17 +52,18 @@ lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
input_layers = [hidden_0, lstm_0]
-for i in range(1,8):
+for i in range(1, 8):
fc = fc_layer(input=input_layers, size=128)
- lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1),
- reverse=(i % 2) == 1,)
+ lstm = lstmemory(
+ input=fc,
+ layer_attr=ExtraAttr(drop_rate=0.1),
+ reverse=(i % 2) == 1, )
input_layers = [fc, lstm]
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_last, size=2,
- bias_attr=bias_attr,
- act=SoftmaxActivation())
+output = fc_layer(
+ input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
diff --git a/demo/quick_start/trainer_config.emb.py b/demo/quick_start/trainer_config.emb.py
index 34dd7b96f2f142159472b98a09fd0092fac15e43..8fd18a7aac704e62b137845edb46cce5bc373285 100644
--- a/demo/quick_start/trainer_config.emb.py
+++ b/demo/quick_start/trainer_config.emb.py
@@ -27,18 +27,16 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
- batch_size=batch_size,
- learning_rate=2e-3,
- learning_method=AdamOptimizer()
-)
+ batch_size=batch_size, learning_rate=2e-3, learning_method=AdamOptimizer())
data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128)
diff --git a/demo/quick_start/trainer_config.lr.py b/demo/quick_start/trainer_config.lr.py
index c6059947f30b32975d72155150de095ade01aa9d..b9c9441baac28a8a8f6078065b75664819d6cd04 100644
--- a/demo/quick_start/trainer_config.lr.py
+++ b/demo/quick_start/trainer_config.lr.py
@@ -32,11 +32,12 @@ process = 'process' if not is_predict else 'process_predict'
# We need to use different process for training and prediction.
# For training, the input data includes both word IDs and labels.
# For prediction, the input data only includs word Ids.
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_bow",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_bow",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -44,8 +45,7 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
# Define the data for text features. The size of the data layer is the number
# of words in the dictionary.
diff --git a/demo/quick_start/trainer_config.lstm.py b/demo/quick_start/trainer_config.lstm.py
index b412a9cbd914dc7abd70b93bbe250759552ee071..8821e02d9bd4a0d06b8afa99df8e0fac3e2fcefe 100644
--- a/demo/quick_start/trainer_config.lstm.py
+++ b/demo/quick_start/trainer_config.lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,17 +40,14 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
-
+ gradient_clipping_threshold=25)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
-lstm = simple_lstm(input=emb, size=128,
- lstm_cell_attr=ExtraAttr(drop_rate=0.25))
+lstm = simple_lstm(
+ input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.25))
lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_max, size=2,
- act=SoftmaxActivation())
+output = fc_layer(input=lstm_max, size=2, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
outputs([maxid, output])
diff --git a/demo/quick_start/trainer_config.resnet-lstm.py b/demo/quick_start/trainer_config.resnet-lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..91e1581c386eb880d481b7352c4d21f3a5ef5c9a
--- /dev/null
+++ b/demo/quick_start/trainer_config.resnet-lstm.py
@@ -0,0 +1,94 @@
+# edit-mode: -*- python -*-
+
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This configuration is a demonstration of how to implement the stacked LSTM
+with residual connections, i.e. an LSTM layer takes the sum of the hidden states
+and inputs of the previous LSTM layer instead of only the hidden states.
+This architecture is from:
+Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi,
+Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey,
+Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser,
+Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens,
+George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, Jason Riesa,
+Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, Jeffrey Dean. 2016.
+Google's Neural Machine Translation System: Bridging the Gap between Human and
+Machine Translation. In arXiv https://arxiv.org/pdf/1609.08144v2.pdf
+Different from the architecture described in the paper, we use a stack single
+direction LSTM layers as the first layer instead of bi-directional LSTM. Also,
+since this is a demo code, to reduce computation time, we stacked 4 layers
+instead of 8 layers.
+"""
+
+from paddle.trainer_config_helpers import *
+
+dict_file = "./data/dict.txt"
+word_dict = dict()
+with open(dict_file, 'r') as f:
+ for i, line in enumerate(f):
+ w = line.strip().split()[0]
+ word_dict[w] = i
+
+is_predict = get_config_arg('is_predict', bool, False)
+trn = 'data/train.list' if not is_predict else None
+tst = 'data/test.list' if not is_predict else 'data/pred.list'
+process = 'process' if not is_predict else 'process_predict'
+define_py_data_sources2(train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
+
+batch_size = 128 if not is_predict else 1
+settings(
+ batch_size=batch_size,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25
+)
+
+bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+
+data = data_layer(name="word", size=len(word_dict))
+emb = embedding_layer(input=data, size=128)
+lstm = simple_lstm(input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+
+previous_input, previous_hidden_state = emb, lstm
+
+for i in range(3):
+ # The input to the current layer is the sum of the hidden state
+ # and input of the previous layer.
+ current_input = addto_layer(input=[previous_input, previous_hidden_state])
+ hidden_state = simple_lstm(input=current_input, size=128,
+ lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+ previous_input, previous_hidden_state = current_input, hidden_state
+
+lstm = previous_hidden_state
+
+lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
+output = fc_layer(input=lstm_last, size=2,
+ bias_attr=bias_attr,
+ act=SoftmaxActivation())
+
+
+if is_predict:
+ maxid = maxid_layer(output)
+ outputs([maxid, output])
+else:
+ label = data_layer(name="label", size=2)
+ cls = classification_cost(input=output, label=label)
+ outputs(cls)
diff --git a/demo/recommendation/common_utils.py b/demo/recommendation/common_utils.py
index a5f00b3ef9ca00b42b8e31ddd6cfeca3580152b0..613e36b496e47edbc0eabd8f15a0abdcb50f6424 100755
--- a/demo/recommendation/common_utils.py
+++ b/demo/recommendation/common_utils.py
@@ -21,8 +21,9 @@ def meta_to_header(meta, name):
yield integer_value(each_meta['max'])
elif each_meta['type'] == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
- yield integer_value(len(each_meta['dict']),
- seq_type=SequenceType.SEQUENCE if is_seq
- else SequenceType.NO_SEQUENCE)
+ yield integer_value(
+ len(each_meta['dict']),
+ seq_type=SequenceType.SEQUENCE
+ if is_seq else SequenceType.NO_SEQUENCE)
elif each_meta['type'] == 'one_hot_dense':
yield dense_vector(len(each_meta['dict']))
diff --git a/demo/recommendation/data/config.json b/demo/recommendation/data/config.json
index 71a9dd7be6bd10e177dfb443a94b719c3816d833..f26e74ce47bb7843a571e6033f051c046b31f054 100644
--- a/demo/recommendation/data/config.json
+++ b/demo/recommendation/data/config.json
@@ -14,4 +14,3 @@
"fields": ["id", "title", "genres"]
}
}
-
diff --git a/demo/recommendation/data/config_generator.py b/demo/recommendation/data/config_generator.py
index 29f38082693ad890ac4dfa302399663af8dbd27b..fa605458300f81da6772d88cfbad413e4dcf97fe 100644
--- a/demo/recommendation/data/config_generator.py
+++ b/demo/recommendation/data/config_generator.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
config_generator.py
@@ -29,10 +28,7 @@ import json
import docopt
import copy
-DEFAULT_FILE = {
- "type": "split",
- "delimiter": ","
-}
+DEFAULT_FILE = {"type": "split", "delimiter": ","}
DEFAULT_FIELD = {
"id": {
@@ -107,19 +103,16 @@ def main(filename, fmt):
field = copy.deepcopy(DEFAULT_FIELD[field_key])
field['pos'] = pos
fields.append(field)
- obj[k] = {
- "file": file_dict,
- "fields": fields
- }
- meta = {
- "meta": obj
- }
+ obj[k] = {"file": file_dict, "fields": fields}
+ meta = {"meta": obj}
# print meta
if fmt == 'json':
+
def formatter(x):
import json
return json.dumps(x, indent=2)
elif fmt == 'yaml':
+
def formatter(x):
import yaml
return yaml.safe_dump(x, default_flow_style=False)
diff --git a/demo/recommendation/data/meta_generator.py b/demo/recommendation/data/meta_generator.py
index 8d1a33d02aea112e51f1d43bedc06fdcee1186f5..593c863670d5eb5d684adf643ff745f3914b656b 100644
--- a/demo/recommendation/data/meta_generator.py
+++ b/demo/recommendation/data/meta_generator.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Preprocess Movielens dataset, to get movie/user object.
@@ -66,8 +65,8 @@ class SortedIDGenerator(object):
self.__key_set__.add(key)
def finish_scan(self, compare=None, key=None, reverse=False):
- self.__key_set__ = sorted(list(self.__key_set__), cmp=compare,
- key=key, reverse=reverse)
+ self.__key_set__ = sorted(
+ list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
self.dict = dict()
for idx, each_key in enumerate(self.__key_set__):
self.dict[each_key] = idx
@@ -207,11 +206,10 @@ class EmbeddingFieldParser(object):
self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
self.seq_type == EmbeddingFieldParser.SEQUENCE)
elif config['dict']['type'] == 'split':
- self.dict = SplitEmbeddingDict(
- config['dict'].get('delimiter', ','))
+ self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
elif config['dict']['type'] == 'whole_content':
- self.dict = EmbeddingFieldParser.WholeContentDict(
- config['dict']['sort'])
+ self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
+ 'sort'])
else:
print config
assert False
@@ -333,8 +331,8 @@ class ContentExtractorFactory(object):
return PositionContentExtractor(config['pos'])
else:
extra_args = config['regex']
- return RegexPositionContentExtractor(pos=config['pos'],
- **extra_args)
+ return RegexPositionContentExtractor(
+ pos=config['pos'], **extra_args)
class MetaFile(object):
@@ -364,9 +362,10 @@ class MetaFile(object):
metas = map(lambda x: x.meta_field(), field_parsers)
# print metas
- key_index = filter(lambda x: x is not None, map(
- lambda (idx, meta): idx if 'is_key' in meta and meta['is_key']
- else None, enumerate(metas)))[0]
+ key_index = filter(
+ lambda x: x is not None,
+ map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
+ enumerate(metas)))[0]
key_map = []
for i in range(min(key_index, len(metas))):
@@ -374,12 +373,7 @@ class MetaFile(object):
for i in range(key_index + 1, len(metas)):
key_map.append(i)
- obj = {
- '__meta__': {
- 'raw_meta': metas,
- 'feature_map': key_map
- }
- }
+ obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
for each_block in reader.read():
idx = field_parsers[key_index].parse(each_block)
diff --git a/demo/recommendation/data/split.py b/demo/recommendation/data/split.py
index ff1f7fab7befdb5bdfa39fd0f1753e6804e82d8f..8dd0cbd32af6074439e98dac024c5fed76cd52b2 100644
--- a/demo/recommendation/data/split.py
+++ b/demo/recommendation/data/split.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Separate movielens 1m dataset to train/test file.
diff --git a/demo/recommendation/dataprovider.py b/demo/recommendation/dataprovider.py
index 454467f40b44bb526d143934c4a7350d41e54c0e..ff3932be03f1e4a1fc1d0bdb189ab7fe1fbbeca0 100755
--- a/demo/recommendation/dataprovider.py
+++ b/demo/recommendation/dataprovider.py
@@ -15,6 +15,7 @@
from paddle.trainer.PyDataProvider2 import *
import common_utils # parse
+
def hook(settings, meta, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store
@@ -41,6 +42,7 @@ def hook(settings, meta, **kwargs):
settings.input_types = headers
settings.meta = meta
+
@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename):
with open(filename, 'r') as f:
diff --git a/demo/recommendation/prediction.py b/demo/recommendation/prediction.py
index f8044a3195ec25bc2fa7c9079e4977f971059352..e2a202cfd1a476046d7e1d1896b87d72c4906ff2 100755
--- a/demo/recommendation/prediction.py
+++ b/demo/recommendation/prediction.py
@@ -28,7 +28,8 @@ if __name__ == '__main__':
model_path = sys.argv[1]
swig_paddle.initPaddle('--use_gpu=0')
conf = parse_config("trainer_config.py", "is_predict=1")
- network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine)
network.loadParameters(model_path)
with open('./data/meta.bin', 'rb') as f:
@@ -39,11 +40,12 @@ if __name__ == '__main__':
while True:
movie_id = int(raw_input("Input movie_id: "))
user_id = int(raw_input("Input user_id: "))
- movie_meta = meta['movie'][movie_id] # Query Data From Meta.
+ movie_meta = meta['movie'][movie_id] # Query Data From Meta.
user_meta = meta['user'][user_id]
data = [movie_id - 1]
data.extend(movie_meta)
data.append(user_id - 1)
data.extend(user_meta)
- print "Prediction Score is %.2f" % ((network.forwardTest(
- cvt.convert([data]))[0]['value'][0][0] + 5) / 2)
+ print "Prediction Score is %.2f" % (
+ (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
+ / 2)
diff --git a/demo/recommendation/trainer_config.py b/demo/recommendation/trainer_config.py
index 624c22ec969dc98808863ad53573b9633f1791ac..cec340b0b65a841029a1c0538d9881bb38f026ff 100755
--- a/demo/recommendation/trainer_config.py
+++ b/demo/recommendation/trainer_config.py
@@ -27,8 +27,8 @@ with open(META_FILE, 'rb') as f:
# load meta file
meta = pickle.load(f)
-settings(batch_size=1600, learning_rate=1e-3,
- learning_method=RMSPropOptimizer())
+settings(
+ batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
def construct_feature(name):
@@ -59,11 +59,10 @@ def construct_feature(name):
slot_name = each_meta.get('name', '%s_id' % name)
if type_name == 'id':
slot_dim = each_meta['max']
- embedding = embedding_layer(input=data_layer(slot_name,
- size=slot_dim),
- size=256)
- fusion.append(fc_layer(input=embedding,
- size=256))
+ embedding = embedding_layer(
+ input=data_layer(
+ slot_name, size=slot_dim), size=256)
+ fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
slot_dim = len(each_meta['dict'])
@@ -71,17 +70,14 @@ def construct_feature(name):
embedding = embedding_layer(input=din, size=256)
if is_seq:
fusion.append(
- text_conv_pool(input=embedding, context_len=5,
- hidden_size=256))
+ text_conv_pool(
+ input=embedding, context_len=5, hidden_size=256))
else:
- fusion.append(fc_layer(input=embedding,
- size=256))
+ fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'one_hot_dense':
slot_dim = len(each_meta['dict'])
- hidden = fc_layer(input=data_layer(slot_name, slot_dim),
- size=256)
- fusion.append(fc_layer(input=hidden,
- size=256))
+ hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
+ fusion.append(fc_layer(input=hidden, size=256))
return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
@@ -90,10 +86,16 @@ movie_feature = construct_feature("movie")
user_feature = construct_feature("user")
similarity = cos_sim(a=movie_feature, b=user_feature)
if not is_predict:
- outputs(regression_cost(input=similarity,
- label=data_layer('rating', size=1)))
-
- define_py_data_sources2('data/train.list', 'data/test.list', module='dataprovider',
- obj='process', args={'meta': meta})
+ outputs(
+ regression_cost(
+ input=similarity, label=data_layer(
+ 'rating', size=1)))
+
+ define_py_data_sources2(
+ 'data/train.list',
+ 'data/test.list',
+ module='dataprovider',
+ obj='process',
+ args={'meta': meta})
else:
outputs(similarity)
diff --git a/demo/semantic_role_labeling/data/extract_dict_feature.py b/demo/semantic_role_labeling/data/extract_dict_feature.py
index 2982e54c665b41400aab0a893ff3c76335404988..daca5f01cf2b3bd231bf530f17ec760272ce93e0 100644
--- a/demo/semantic_role_labeling/data/extract_dict_feature.py
+++ b/demo/semantic_role_labeling/data/extract_dict_feature.py
@@ -17,24 +17,15 @@ import os
from optparse import OptionParser
-def extract_dict_features(pair_file, feature_file, src_dict_file,
- tgt_dict_file):
- src_dict = set()
- tgt_dict = set()
-
- with open(pair_file) as fin, open(feature_file, 'w') as feature_out, open(
- src_dict_file, 'w') as src_dict_out, open(tgt_dict_file,
- 'w') as tgt_dict_out:
+def extract_dict_features(pair_file, feature_file):
+
+ with open(pair_file) as fin, open(feature_file, 'w') as feature_out:
for line in fin:
- sentence, labels = line.strip().split('\t')
+ sentence, predicate, labels = line.strip().split('\t')
sentence_list = sentence.split()
labels_list = labels.split()
- src_dict.update(sentence_list)
- tgt_dict.update(labels_list)
-
verb_index = labels_list.index('B-V')
- verb_feature = sentence_list[verb_index]
mark = [0] * len(labels_list)
if verb_index > 0:
@@ -42,47 +33,50 @@ def extract_dict_features(pair_file, feature_file, src_dict_file,
ctx_n1 = sentence_list[verb_index - 1]
else:
ctx_n1 = 'bos'
- ctx_n1_feature = ctx_n1
+
+ if verb_index > 1:
+ mark[verb_index - 2] = 1
+ ctx_n2 = sentence_list[verb_index - 2]
+ else:
+ ctx_n2 = 'bos'
mark[verb_index] = 1
- ctx_0_feature = sentence_list[verb_index]
+ ctx_0 = sentence_list[verb_index]
if verb_index < len(labels_list) - 2:
mark[verb_index + 1] = 1
ctx_p1 = sentence_list[verb_index + 1]
else:
ctx_p1 = 'eos'
- ctx_p1_feature = ctx_p1
+
+ if verb_index < len(labels_list) - 3:
+ mark[verb_index + 2] = 1
+ ctx_p2 = sentence_list[verb_index + 2]
+ else:
+ ctx_p2 = 'eos'
+
feature_str = sentence + '\t' \
- + verb_feature + '\t' \
- + ctx_n1_feature + '\t' \
- + ctx_0_feature + '\t' \
- + ctx_p1_feature + '\t' \
+ + predicate + '\t' \
+ + ctx_n2 + '\t' \
+ + ctx_n1 + '\t' \
+ + ctx_0 + '\t' \
+ + ctx_p1 + '\t' \
+ + ctx_p2 + '\t' \
+ ' '.join([str(i) for i in mark]) + '\t' \
+ labels
feature_out.write(feature_str + '\n')
- src_dict_out.write('\n')
- src_dict_out.write('\n'.join(list(src_dict)))
-
- tgt_dict_out.write('\n'.join(list(tgt_dict)))
if __name__ == '__main__':
- usage = '-p pair_file -f feature_file -s source dictionary -t target dictionary '
+ usage = '-p pair_file -f feature_file'
parser = OptionParser(usage)
parser.add_option('-p', dest='pair_file', help='the pair file')
- parser.add_option(
- '-f', dest='feature_file', help='the file to store feature')
- parser.add_option(
- '-s', dest='src_dict', help='the file to store source dictionary')
- parser.add_option(
- '-t', dest='tgt_dict', help='the file to store target dictionary')
+ parser.add_option('-f', dest='feature_file', help='the feature file')
(options, args) = parser.parse_args()
- extract_dict_features(options.pair_file, options.feature_file,
- options.src_dict, options.tgt_dict)
+ extract_dict_features(options.pair_file, options.feature_file)
diff --git a/demo/semantic_role_labeling/data/extract_pairs.py b/demo/semantic_role_labeling/data/extract_pairs.py
index 4d1bef8f958a62be9941d474a0b67542dcc5cfab..86ab00ce41723169de035a841d9e129a1b9e82a3 100644
--- a/demo/semantic_role_labeling/data/extract_pairs.py
+++ b/demo/semantic_role_labeling/data/extract_pairs.py
@@ -51,7 +51,7 @@ def read_sentences(words_file):
for line in fin:
line = line.strip()
if line == '':
- sentences.append(s.lower())
+ sentences.append(s)
s = ''
else:
s += line + ' '
@@ -64,6 +64,11 @@ def transform_labels(sentences, labels):
if len(labels[i]) == 1:
continue
else:
+ verb_list = []
+ for x in labels[i][0]:
+ if x !='-':
+ verb_list.append(x)
+
for j in xrange(1, len(labels[i])):
label_list = labels[i][j]
current_tag = 'O'
@@ -88,8 +93,7 @@ def transform_labels(sentences, labels):
is_in_bracket = True
else:
print 'error:', ll
-
- sen_lab_pair.append((sentences[i], label_seq))
+ sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq))
return sen_lab_pair
@@ -97,9 +101,9 @@ def write_file(sen_lab_pair, output_file):
with open(output_file, 'w') as fout:
for x in sen_lab_pair:
sentence = x[0]
- label_seq = ' '.join(x[1])
- assert len(sentence.split()) == len(x[1])
- fout.write(sentence + '\t' + label_seq + '\n')
+ label_seq = ' '.join(x[2])
+ assert len(sentence.split()) == len(x[2])
+ fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n')
if __name__ == '__main__':
diff --git a/demo/semantic_role_labeling/data/get_data.sh b/demo/semantic_role_labeling/data/get_data.sh
index 268c0995e27006ec62f38bdda9b0a0994dab096c..55e33f4685627ed483aa6642c518a33558091531 100644
--- a/demo/semantic_role_labeling/data/get_data.sh
+++ b/demo/semantic_role_labeling/data/get_data.sh
@@ -14,6 +14,10 @@
# limitations under the License.
set -e
wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/verbDict.txt --no-check-certificate
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/targetDict.txt --no-check-certificate
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/wordDict.txt --no-check-certificate
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/emb --no-check-certificate
tar -xzvf conll05st-tests.tar.gz
rm conll05st-tests.tar.gz
cp ./conll05st-release/test.wsj/words/test.wsj.words.gz .
@@ -22,4 +26,4 @@ gunzip test.wsj.words.gz
gunzip test.wsj.props.gz
python extract_pairs.py -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair
-python extract_dict_feature.py -p test.wsj.seq_pair -f feature -s src.dict -t tgt.dict
+python extract_dict_feature.py -p test.wsj.seq_pair -f feature
diff --git a/demo/semantic_role_labeling/dataprovider.py b/demo/semantic_role_labeling/dataprovider.py
index 2ef25c42c1794c410fe85fd497a6ed9d2295dca9..d4c137ef42c4e2ec609f3e6f809363e602dfd8dd 100644
--- a/demo/semantic_role_labeling/dataprovider.py
+++ b/demo/semantic_role_labeling/dataprovider.py
@@ -17,41 +17,51 @@ from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 0
-def hook(settings, word_dict, label_dict, **kwargs):
+def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
+ settings.predicate_dict = predicate_dict
+
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
+ integer_value_sequence(len(predicate_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
- integer_value_sequence(2),
- integer_value_sequence(len(label_dict))]
+ integer_value_sequence(len(word_dict)), integer_value_sequence(2),
+ integer_value_sequence(len(label_dict))
+ ]
-@provider(init_hook=hook)
-def process(obj, file_name):
+def get_batch_size(yeild_data):
+ return len(yeild_data[0])
+
+
+@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
+ can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
- sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = \
+ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
-
+
words = sentence.split()
sen_len = len(words)
- word_slot = [obj.word_dict.get(w, UNK_IDX) for w in words]
+ word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
- predicate_slot = [obj.word_dict.get(predicate, UNK_IDX)] * sen_len
- ctx_n1_slot = [obj.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
- ctx_0_slot = [obj.word_dict.get(ctx_0, UNK_IDX)] * sen_len
- ctx_p1_slot = [obj.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+ predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
+ ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
+ ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
+ ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
+ ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+ ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
- label_slot = [obj.label_dict.get(w) for w in label_list]
-
- yield word_slot, predicate_slot, ctx_n1_slot, \
- ctx_0_slot, ctx_p1_slot, mark_slot, label_slot
+ label_slot = [settings.label_dict.get(w) for w in label_list]
+ yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
+ ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
diff --git a/demo/semantic_role_labeling/db_lstm.py b/demo/semantic_role_labeling/db_lstm.py
index 364460afbe31caf42cd4f0836eba75e444b3f5b8..54ceff0e724220cc9ea96b9e0ec6844947a8343e 100644
--- a/demo/semantic_role_labeling/db_lstm.py
+++ b/demo/semantic_role_labeling/db_lstm.py
@@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
import math
import os
import sys
from paddle.trainer_config_helpers import *
#file paths
-word_dict_file = './data/src.dict'
-label_dict_file = './data/tgt.dict'
+word_dict_file = './data/wordDict.txt'
+label_dict_file = './data/targetDict.txt'
+predicate_file= './data/verbDict.txt'
train_list_file = './data/train.list'
test_list_file = './data/test.list'
@@ -31,8 +31,10 @@ if not is_predict:
#load dictionaries
word_dict = dict()
label_dict = dict()
+ predicate_dict = dict()
with open(word_dict_file, 'r') as f_word, \
- open(label_dict_file, 'r') as f_label:
+ open(label_dict_file, 'r') as f_label, \
+ open(predicate_file, 'r') as f_pre:
for i, line in enumerate(f_word):
w = line.strip()
word_dict[w] = i
@@ -41,8 +43,13 @@ if not is_predict:
w = line.strip()
label_dict[w] = i
+ for i, line in enumerate(f_pre):
+ w = line.strip()
+ predicate_dict[w] = i
+
+
if is_test:
- train_list_file = None
+ train_list_file = None
#define data provider
define_py_data_sources2(
@@ -51,91 +58,157 @@ if not is_predict:
module='dataprovider',
obj='process',
args={'word_dict': word_dict,
- 'label_dict': label_dict})
+ 'label_dict': label_dict,
+ 'predicate_dict': predicate_dict })
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
+ pred_len = len(predicate_dict)
else:
word_dict_len = get_config_arg('dict_len', int)
label_dict_len = get_config_arg('label_len', int)
+ pred_len = get_config_arg('pred_len', int)
+############################## Hyper-parameters ##################################
mark_dict_len = 2
word_dim = 32
mark_dim = 5
-hidden_dim = 128
+hidden_dim = 512
depth = 8
-emb_lr = 1e-2
-fc_lr = 1e-2
-lstm_lr = 2e-2
+
+
+
+########################### Optimizer #######################################
+
settings(
batch_size=150,
- learning_method=AdamOptimizer(),
- learning_rate=1e-3,
+ learning_method=MomentumOptimizer(momentum=0),
+ learning_rate=2e-2,
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25)
+ is_async=False,
+ model_average=ModelAverage(average_window=0.5,
+ max_average_window=10000),
+
+)
-#6 features
+
+
+
+####################################### network ##############################
+#8 features and 1 target
word = data_layer(name='word_data', size=word_dict_len)
-predicate = data_layer(name='verb_data', size=word_dict_len)
+predicate = data_layer(name='verb_data', size=pred_len)
+
+ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len)
ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len)
ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len)
ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
+ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len)
+
if not is_predict:
target = data_layer(name='target', size=label_dict_len)
-ptt = ParameterAttribute(name='src_emb', learning_rate=emb_lr)
-layer_attr = ExtraLayerAttribute(drop_rate=0.5)
-fc_para_attr = ParameterAttribute(learning_rate=fc_lr)
-lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=lstm_lr)
-para_attr = [fc_para_attr, lstm_para_attr]
-word_embedding = embedding_layer(size=word_dim, input=word, param_attr=ptt)
-predicate_embedding = embedding_layer(
- size=word_dim, input=predicate, param_attr=ptt)
-ctx_n1_embedding = embedding_layer(size=word_dim, input=ctx_n1, param_attr=ptt)
-ctx_0_embedding = embedding_layer(size=word_dim, input=ctx_0, param_attr=ptt)
-ctx_p1_embedding = embedding_layer(size=word_dim, input=ctx_p1, param_attr=ptt)
-mark_embedding = embedding_layer(size=mark_dim, input=mark)
+default_std=1/math.sqrt(hidden_dim)/3.0
+
+emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
+std_0 = ParameterAttribute(initial_std=0.)
+std_default = ParameterAttribute(initial_std=default_std)
+
+predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std))
+mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
+
+word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
+emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input]
+emb_layers.append(predicate_embedding)
+emb_layers.append(mark_embedding)
hidden_0 = mixed_layer(
+ name='hidden0',
size=hidden_dim,
- input=[
- full_matrix_projection(input=word_embedding),
- full_matrix_projection(input=predicate_embedding),
- full_matrix_projection(input=ctx_n1_embedding),
- full_matrix_projection(input=ctx_0_embedding),
- full_matrix_projection(input=ctx_p1_embedding),
- full_matrix_projection(input=mark_embedding),
- ])
+ bias_attr=std_default,
+ input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ])
+
-lstm_0 = lstmemory(input=hidden_0, layer_attr=layer_attr)
+mix_hidden_lr = 1e-3
+lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
+hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr)
+
+lstm_0 = lstmemory(name='lstm0',
+ input=hidden_0,
+ act=ReluActivation(),
+ gate_act=SigmoidActivation(),
+ state_act=SigmoidActivation(),
+ bias_attr=std_0,
+ param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
+
for i in range(1, depth):
- fc = fc_layer(input=input_tmp, size=hidden_dim, param_attr=para_attr)
+ mix_hidden = mixed_layer(name='hidden'+str(i),
+ size=hidden_dim,
+ bias_attr=std_default,
+ input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
+ full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
+ ]
+ )
+
+ lstm = lstmemory(name='lstm'+str(i),
+ input=mix_hidden,
+ act=ReluActivation(),
+ gate_act=SigmoidActivation(),
+ state_act=SigmoidActivation(),
+ reverse=((i % 2)==1),
+ bias_attr=std_0,
+ param_attr=lstm_para_attr)
+
+ input_tmp = [mix_hidden, lstm]
+
+feature_out = mixed_layer(name='output',
+ size=label_dict_len,
+ bias_attr=std_default,
+ input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
+ full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
+ ],
+ )
- lstm = lstmemory(
- input=fc,
- act=ReluActivation(),
- reverse=(i % 2) == 1,
- layer_attr=layer_attr)
- input_tmp = [fc, lstm]
-prob = fc_layer(
- input=input_tmp,
- size=label_dict_len,
- act=SoftmaxActivation(),
- param_attr=para_attr)
if not is_predict:
- cls = classification_cost(input=prob, label=target)
- outputs(cls)
+ crf_l = crf_layer( name = 'crf',
+ size = label_dict_len,
+ input = feature_out,
+ label = target,
+ param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr)
+
+ )
+
+
+ crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
+ size = label_dict_len,
+ input = feature_out,
+ label = target,
+ param_attr=ParameterAttribute(name='crfw')
+ )
+
+
+ eval = sum_evaluator(input=crf_dec_l)
+
+ outputs(crf_l)
+
else:
- outputs(prob)
+ crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
+ size = label_dict_len,
+ input = feature_out,
+ param_attr=ParameterAttribute(name='crfw')
+ )
+
+ outputs(crf_dec_l)
+
diff --git a/demo/semantic_role_labeling/predict.py b/demo/semantic_role_labeling/predict.py
index 9a27112828e449174e3da79dc7db9fed20bfed6f..2761814e1811e701122e0be4850526c5b290c457 100644
--- a/demo/semantic_role_labeling/predict.py
+++ b/demo/semantic_role_labeling/predict.py
@@ -26,7 +26,7 @@ UNK_IDX = 0
class Prediction():
- def __init__(self, train_conf, dict_file, model_dir, label_file):
+ def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
@@ -35,16 +35,19 @@ class Prediction():
self.dict = {}
self.labels = {}
+ self.predicate_dict={}
self.labels_reverse = {}
- self.load_dict_label(dict_file, label_file)
+ self.load_dict_label(dict_file, label_file, predicate_dict_file)
len_dict = len(self.dict)
len_label = len(self.labels)
+ len_pred = len(self.predicate_dict)
conf = parse_config(
train_conf,
- 'dict_len=' + str(len_dict) +
+ 'dict_len=' + str(len_dict) +
',label_len=' + str(len_label) +
+ ',pred_len=' + str(len_pred) +
',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
@@ -52,15 +55,21 @@ class Prediction():
slots = [
integer_value_sequence(len_dict),
+ integer_value_sequence(len_pred),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
+ integer_value_sequence(len_dict),
integer_value_sequence(2)
+ ]
+ integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+ integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+ integer_value_sequence(len_dict), integer_value_sequence(2)
]
self.converter = DataProviderConverter(slots)
- def load_dict_label(self, dict_file, label_file):
+ def load_dict_label(self, dict_file, label_file, predicate_dict_file):
"""
Load dictionary from self.dict_file.
"""
@@ -71,52 +80,55 @@ class Prediction():
self.labels[line.strip()] = line_count
self.labels_reverse[line_count] = line.strip()
+ for line_count, line in enumerate(open(predicate_dict_file, 'r')):
+ self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file):
"""
Get input data of paddle format.
"""
with open(data_file, 'r') as fdata:
for line in fdata:
- sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = line.strip(
+ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip(
).split('\t')
words = sentence.split()
sen_len = len(words)
-
+
word_slot = [self.dict.get(w, UNK_IDX) for w in words]
- predicate_slot = [self.dict.get(predicate, UNK_IDX)] * sen_len
+ predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len
+ ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len
+ ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
+
+ yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
+ ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot
- yield word_slot, predicate_slot, ctx_n1_slot, \
- ctx_0_slot, ctx_p1_slot, mark_slot
-
- def predict(self, data_file):
+ def predict(self, data_file, output_file):
"""
data_file: file name of input data.
"""
input = self.converter(self.get_data(data_file))
output = self.network.forwardTest(input)
- prob = output[0]["value"]
- lab = list(np.argsort(-prob)[:, 0])
+ lab = output[0]["id"].tolist()
- with open(data_file, 'r') as fin, open('predict.res', 'w') as fout:
+ with open(data_file, 'r') as fin, open(output_file, 'w') as fout:
index = 0
for line in fin:
sen = line.split('\t')[0]
len_sen = len(sen.split())
line_labels = lab[index:index + len_sen]
index += len_sen
- fout.write(sen + '\t' + ' '.join([self.labels_reverse[
- i] for i in line_labels]) + '\n')
+ fout.write(sen + '\t' + ' '.join(
+ [self.labels_reverse[i] for i in line_labels]) + '\n')
def option_parser():
- usage = ("python predict.py -c config -w model_dir "
- "-d word dictionary -l label_file -i input_file")
+ usage = ("python predict.py -c config -w model_dir "
+ "-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-c",
@@ -137,6 +149,13 @@ def option_parser():
dest="label_file",
default=None,
help="label file")
+ parser.add_option(
+ "-p",
+ "--predict_dict_file",
+ action="store",
+ dest="predict_dict_file",
+ default=None,
+ help="predict_dict_file")
parser.add_option(
"-i",
"--data",
@@ -150,6 +169,14 @@ def option_parser():
dest="model_path",
default=None,
help="model path")
+
+ parser.add_option(
+ "-o",
+ "--output_file",
+ action="store",
+ dest="output_file",
+ default=None,
+ help="output file")
return parser.parse_args()
@@ -160,10 +187,12 @@ def main():
dict_file = options.dict_file
model_path = options.model_path
label_file = options.label_file
+ predict_dict_file = options.predict_dict_file
+ output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0")
- predict = Prediction(train_conf, dict_file, model_path, label_file)
- predict.predict(data_file)
+ predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file)
+ predict.predict(data_file,output_file)
if __name__ == '__main__':
diff --git a/demo/semantic_role_labeling/predict.sh b/demo/semantic_role_labeling/predict.sh
index a545b9a5d591b41bdbd54905cbbffc410abc8fb0..d0acdb0bd093974485475cf796c6d41ac7899135 100644
--- a/demo/semantic_role_labeling/predict.sh
+++ b/demo/semantic_role_labeling/predict.sh
@@ -26,15 +26,18 @@ LOG=`get_best_pass $log`
LOG=(${LOG})
best_model_path="output/pass-${LOG[1]}"
-
config_file=db_lstm.py
-dict_file=./data/src.dict
-label_file=./data/tgt.dict
+dict_file=./data/wordDict.txt
+label_file=./data/targetDict.txt
+predicate_dict_file=./data/verbDict.txt
input_file=./data/feature
+output_file=predict.res
python predict.py \
-c $config_file \
-w $best_model_path \
-l $label_file \
+ -p $predicate_dict_file \
-d $dict_file \
- -i $input_file
+ -i $input_file \
+ -o $output_file
diff --git a/demo/semantic_role_labeling/test.sh b/demo/semantic_role_labeling/test.sh
index 804f722e5b8e9ee5b54c778c54f7833f5e6c4de0..c4ab44f5ca08aefd18f2851a1410aa08563925a9 100644
--- a/demo/semantic_role_labeling/test.sh
+++ b/demo/semantic_role_labeling/test.sh
@@ -36,5 +36,5 @@ paddle train \
--job=test \
--use_gpu=false \
--config_args=is_test=1 \
+ --test_all_data_in_one_period=1 \
2>&1 | tee 'test.log'
-
diff --git a/demo/semantic_role_labeling/train.sh b/demo/semantic_role_labeling/train.sh
index 94c7b6f31df3b5e5e059d6e1323ae0c0bec74753..420768bb2b4ebed7b135a49c5eee5e5538426ae1 100644
--- a/demo/semantic_role_labeling/train.sh
+++ b/demo/semantic_role_labeling/train.sh
@@ -16,12 +16,14 @@
set -e
paddle train \
--config=./db_lstm.py \
+ --use_gpu=0 \
+ --log_period=5000 \
+ --trainer_count=1 \
+ --show_parameter_stats_period=5000 \
--save_dir=./output \
- --trainer_count=4 \
- --log_period=10 \
- --num_passes=500 \
- --use_gpu=false \
- --show_parameter_stats_period=10 \
+ --num_passes=10000 \
+ --average_test_period=10000000 \
+ --init_model_path=./data \
+ --load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
-2>&1 | tee 'train.log'
-
+ 2>&1 | tee 'train.log'
diff --git a/demo/sentiment/data/get_imdb.sh b/demo/sentiment/data/get_imdb.sh
index 41523927afe75428ef1151cef8184ede14eea9a7..28fa86232d89964b3f1680080239cf8a4ebefa9a 100755
--- a/demo/sentiment/data/get_imdb.sh
+++ b/demo/sentiment/data/get_imdb.sh
@@ -38,11 +38,11 @@ unzip master.zip
mkdir -p imdb/train
mkdir -p imdb/test
-cp -r aclImdb/train/pos/ imdb/train/
-cp -r aclImdb/train/neg/ imdb/train/
+cp -r aclImdb/train/pos/ imdb/train/pos
+cp -r aclImdb/train/neg/ imdb/train/neg
-cp -r aclImdb/test/pos/ imdb/test/
-cp -r aclImdb/test/neg/ imdb/test/
+cp -r aclImdb/test/pos/ imdb/test/pos
+cp -r aclImdb/test/neg/ imdb/test/neg
#remove compressed package
rm aclImdb_v1.tar.gz
diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py
index 9a9fd81f030cb1d2a10a5000fd1d12810d12112b..53e3d1d20df92b8815347bd8937064871f326b3f 100755
--- a/demo/sentiment/dataprovider.py
+++ b/demo/sentiment/dataprovider.py
@@ -17,8 +17,8 @@ from paddle.trainer.PyDataProvider2 import *
def hook(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
- integer_value_sequence(len(settings.word_dict)),
- integer_value(2)]
+ integer_value_sequence(len(settings.word_dict)), integer_value(2)
+ ]
settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@@ -29,6 +29,7 @@ def process(settings, file_name):
label, comment = line.strip().split('\t\t')
label = int(label)
words = comment.split()
- word_slot = [settings.word_dict[w] for w in words if w in
- settings.word_dict]
+ word_slot = [
+ settings.word_dict[w] for w in words if w in settings.word_dict
+ ]
yield word_slot, label
diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py
index 7d0baeabbba68b2a160463364d05cd865bf0314f..bc0f6f31264294034ed38309f7fda370865b2845 100755
--- a/demo/sentiment/predict.py
+++ b/demo/sentiment/predict.py
@@ -18,14 +18,14 @@ from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config
-
"""
Usage: run following command to show help message.
python predict.py -h
"""
+
class SentimentPrediction():
- def __init__(self, train_conf, dict_file, model_dir=None, label_file = None):
+ def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
@@ -44,7 +44,8 @@ class SentimentPrediction():
self.load_label(label_file)
conf = parse_config(train_conf, "is_predict=1")
- self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ self.network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
self.network.loadParameters(self.model_dir)
input_types = [integer_value_sequence(self.dict_dim)]
self.converter = DataProviderConverter(input_types)
@@ -61,7 +62,7 @@ class SentimentPrediction():
"""
Load label.
"""
- self.label={}
+ self.label = {}
for v in open(label_file, 'r'):
self.label[int(v.split('\t')[1])] = v.split('\t')[0]
@@ -72,7 +73,9 @@ class SentimentPrediction():
with open(data_file, 'r') as fdata:
for line in fdata:
words = line.strip().split()
- word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
+ word_slot = [
+ self.word_dict[w] for w in words if w in self.word_dict
+ ]
if not word_slot:
print "all words are not in dictionary: %s", line
continue
@@ -89,25 +92,48 @@ class SentimentPrediction():
if self.label is None:
print("%s: predicting label is %d" % (data_file, lab[0][0]))
else:
- print("%s: predicting label is %s" % (data_file, self.label[lab[0][0]]))
+ print("%s: predicting label is %s" %
+ (data_file, self.label[lab[0][0]]))
+
def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
parser = OptionParser(usage="usage: %s [options]" % usage)
- parser.add_option("-n", "--tconf", action="store",
- dest="train_conf", help="network config")
- parser.add_option("-d", "--dict", action="store",
- dest="dict_file",help="dictionary file")
- parser.add_option("-b", "--label", action="store",
- dest="label", default=None,
- help="dictionary file")
- parser.add_option("-i", "--data", action="store",
- dest="data", help="data file to predict")
- parser.add_option("-w", "--model", action="store",
- dest="model_path", default=None,
- help="model path")
+ parser.add_option(
+ "-n",
+ "--tconf",
+ action="store",
+ dest="train_conf",
+ help="network config")
+ parser.add_option(
+ "-d",
+ "--dict",
+ action="store",
+ dest="dict_file",
+ help="dictionary file")
+ parser.add_option(
+ "-b",
+ "--label",
+ action="store",
+ dest="label",
+ default=None,
+ help="dictionary file")
+ parser.add_option(
+ "-i",
+ "--data",
+ action="store",
+ dest="data",
+ help="data file to predict")
+ parser.add_option(
+ "-w",
+ "--model",
+ action="store",
+ dest="model_path",
+ default=None,
+ help="model path")
return parser.parse_args()
+
def main():
options, args = option_parser()
train_conf = options.train_conf
@@ -119,5 +145,6 @@ def main():
predict = SentimentPrediction(train_conf, dict_file, model_path, label)
predict.predict(data)
+
if __name__ == '__main__':
main()
diff --git a/demo/sentiment/preprocess.py b/demo/sentiment/preprocess.py
index 49b53d500a1bf816bde9c9675b251be8e9a68ae9..7146e95d751c4de649e204fab724085994dfa4d3 100755
--- a/demo/sentiment/preprocess.py
+++ b/demo/sentiment/preprocess.py
@@ -22,13 +22,13 @@ from os.path import join as join_path
from optparse import OptionParser
from paddle.utils.preprocess_util import *
-
"""
Usage: run following command to show help message.
python preprocess.py -h
"""
-def save_dict(dict, filename, is_reverse = True):
+
+def save_dict(dict, filename, is_reverse=True):
"""
Save dictionary into file.
dict: input dictionary.
@@ -39,9 +39,10 @@ def save_dict(dict, filename, is_reverse = True):
f = open(filename, 'w')
for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
reverse=is_reverse):
- f.write('%s\t%s\n'%(k, v))
+ f.write('%s\t%s\n' % (k, v))
f.close()
+
def tokenize(sentences):
"""
Use tokenizer.perl to tokenize input sentences.
@@ -58,6 +59,7 @@ def tokenize(sentences):
toks = tok_text.split('\n')[:-1]
return toks
+
def read_lines(path):
"""
path: String, file path.
@@ -71,12 +73,17 @@ def read_lines(path):
seqs.append(line)
return seqs
+
class SentimentDataSetCreate():
"""
A class to process data for sentiment analysis task.
"""
- def __init__(self, data_path, output_path,
- use_okenizer = True, multi_lines = False):
+
+ def __init__(self,
+ data_path,
+ output_path,
+ use_okenizer=True,
+ multi_lines=False):
"""
data_path: string, traing and testing dataset path
output_path: string, output path, store processed dataset
@@ -164,23 +171,17 @@ class SentimentDataSetCreate():
# Preprocess train data.
train_data, train_lab_set = self.data_list(self.train_dir)
print "processing train set..."
- file_lists = self.save_data(train_data,
- "train",
- self.batch_size,
- True,
- True)
+ file_lists = self.save_data(train_data, "train", self.batch_size, True,
+ True)
save_list(file_lists, self.train_list)
# If have test data path, preprocess test data.
if os.path.exists(self.test_dir):
test_data, test_lab_set = self.data_list(self.test_dir)
- assert(train_lab_set == test_lab_set)
+ assert (train_lab_set == test_lab_set)
print "processing test set..."
- file_lists = self.save_data(test_data,
- "test",
- self.batch_size,
- False,
- self.dict_with_test)
+ file_lists = self.save_data(test_data, "test", self.batch_size,
+ False, self.dict_with_test)
save_list(file_lists, self.test_list)
# save labels set.
@@ -191,7 +192,9 @@ class SentimentDataSetCreate():
save_dict(self.word_count, self.dict_file, True)
self.dict_size = len(self.word_count)
- def save_data(self, data, prefix = "",
+ def save_data(self,
+ data,
+ prefix="",
batch_size=50000,
is_shuffle=False,
build_dict=False):
@@ -205,7 +208,8 @@ class SentimentDataSetCreate():
return: list of batch names
"""
if is_shuffle and self.multi_lines:
- return self.save_data_multi_lines(data, prefix, batch_size, build_dict)
+ return self.save_data_multi_lines(data, prefix, batch_size,
+ build_dict)
if is_shuffle:
random.shuffle(data)
@@ -213,7 +217,7 @@ class SentimentDataSetCreate():
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
- "%s_part_%03d" %(prefix, i))
+ "%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, len(data))
# read a batch of data
@@ -246,7 +250,9 @@ class SentimentDataSetCreate():
data_list = tokenize(data_list)
return label_list, data_list
- def save_data_multi_lines(self, data, prefix = "",
+ def save_data_multi_lines(self,
+ data,
+ prefix="",
batch_size=50000,
build_dict=False):
"""
@@ -274,14 +280,14 @@ class SentimentDataSetCreate():
self.create_dict(data_list)
length = len(label_list)
- perm_list = np.array([ i for i in xrange(length) ])
+ perm_list = np.array([i for i in xrange(length)])
random.shuffle(perm_list)
num_batches = int(math.ceil(length / float(batch_size)))
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
- "%s_part_%03d" %(prefix, i))
+ "%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, length)
sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
@@ -304,35 +310,50 @@ class SentimentDataSetCreate():
f.write('%s\t\t%s\n' % (lab, seq))
f.close()
+
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
- parser.add_option("-i", "--data", action="store",
- dest="input", help="Input data directory.")
- parser.add_option("-o", "--output", action="store",
- dest="output", default=None,
- help="Output directory.")
- parser.add_option("-t", "--tokenizer", action="store",
- dest="use_tokenizer", default=True,
- help="Whether to use tokenizer.")
+ parser.add_option(
+ "-i",
+ "--data",
+ action="store",
+ dest="input",
+ help="Input data directory.")
+ parser.add_option(
+ "-o",
+ "--output",
+ action="store",
+ dest="output",
+ default=None,
+ help="Output directory.")
+ parser.add_option(
+ "-t",
+ "--tokenizer",
+ action="store",
+ dest="use_tokenizer",
+ default=True,
+ help="Whether to use tokenizer.")
parser.add_option("-m", "--multi_lines", action="store",
dest="multi_lines", default=False,
help="If input text files have multi lines and they "\
"need to be shuffled, you should set -m True,")
return parser.parse_args()
+
def main():
options, args = option_parser()
- data_dir=options.input
- output_dir=options.output
- use_tokenizer=options.use_tokenizer
- multi_lines=options.multi_lines
+ data_dir = options.input
+ output_dir = options.output
+ use_tokenizer = options.use_tokenizer
+ multi_lines = options.multi_lines
if output_dir is None:
outname = os.path.basename(options.input)
output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
- data_creator = SentimentDataSetCreate(data_dir, output_dir,
- use_tokenizer, multi_lines)
+ data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
+ multi_lines)
data_creator.create_dataset()
+
if __name__ == '__main__':
main()
diff --git a/demo/sentiment/sentiment_net.py b/demo/sentiment/sentiment_net.py
index 31e585edcaa111898c950ad016d3996fae15a7db..ff6a3624a404cb52d5d7ac0934fedba0d489dc22 100644
--- a/demo/sentiment/sentiment_net.py
+++ b/demo/sentiment/sentiment_net.py
@@ -47,10 +47,12 @@ def sentiment_data(data_dir=None,
for i, line in enumerate(open(dict_file, 'r')):
word_dict[line.split('\t')[0]] = i
- define_py_data_sources2(train_list, test_list,
- module="dataprovider",
- obj="process",
- args={'dictionary': word_dict})
+ define_py_data_sources2(
+ train_list,
+ test_list,
+ module="dataprovider",
+ obj="process",
+ args={'dictionary': word_dict})
return dict_dim, class_dim
@@ -64,8 +66,7 @@ def bidirectional_lstm_net(input_dim,
emb = embedding_layer(input=data, size=emb_dim)
bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
- output = fc_layer(input=dropout, size=class_dim,
- act=SoftmaxActivation())
+ output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
if not is_predict:
lbl = data_layer("label", 1)
@@ -109,27 +110,36 @@ def stacked_lstm_net(input_dim,
data = data_layer("word", input_dim)
emb = embedding_layer(input=data, size=emb_dim)
- fc1 = fc_layer(input=emb, size=hid_dim, act=linear,
- bias_attr=bias_attr)
- lstm1 = lstmemory(input=fc1, act=relu, bias_attr=bias_attr,
- layer_attr=layer_attr)
+ fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
+ lstm1 = lstmemory(
+ input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
- fc = fc_layer(input=inputs, size=hid_dim, act=linear,
- param_attr=para_attr, bias_attr=bias_attr)
- lstm = lstmemory(input=fc, reverse=(i % 2) == 0, act=relu,
- bias_attr=bias_attr, layer_attr=layer_attr)
+ fc = fc_layer(
+ input=inputs,
+ size=hid_dim,
+ act=linear,
+ param_attr=para_attr,
+ bias_attr=bias_attr)
+ lstm = lstmemory(
+ input=fc,
+ reverse=(i % 2) == 0,
+ act=relu,
+ bias_attr=bias_attr,
+ layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
- output = fc_layer(input=[fc_last, lstm_last], size=class_dim,
- act=SoftmaxActivation(),
- bias_attr=bias_attr, param_attr=para_attr)
+ output = fc_layer(
+ input=[fc_last, lstm_last],
+ size=class_dim,
+ act=SoftmaxActivation(),
+ bias_attr=bias_attr,
+ param_attr=para_attr)
if is_predict:
outputs(output)
else:
- outputs(
- classification_cost(input=output, label=data_layer('label', 1)))
+ outputs(classification_cost(input=output, label=data_layer('label', 1)))
diff --git a/demo/sentiment/trainer_config.py b/demo/sentiment/trainer_config.py
index db24182a8d7359786bd1f3b2083892cf846605d1..114a9138ebfef054c7d3ba99b4a510a452f8f2cd 100644
--- a/demo/sentiment/trainer_config.py
+++ b/demo/sentiment/trainer_config.py
@@ -20,20 +20,20 @@ is_test = get_config_arg('is_test', bool, False)
# whether this config is used for prediction
is_predict = get_config_arg('is_predict', bool, False)
-data_dir = "./data/pre-imdb"
+data_dir = "./data/pre-imdb"
dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
################## Algorithm Config #####################
settings(
- batch_size=128,
- learning_rate=2e-3,
- learning_method=AdamOptimizer(),
- regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ batch_size=128,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ average_window=0.5,
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25)
#################### Network Config ######################
-stacked_lstm_net(dict_dim, class_dim=class_dim,
- stacked_num=3, is_predict=is_predict)
+stacked_lstm_net(
+ dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
# bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
diff --git a/demo/seqToseq/dataprovider.py b/demo/seqToseq/dataprovider.py
index df19db109ed223c7515c3ebf2cb1918f41163930..c5da1b7685f47fda337921c7c60ac1497b9e48bb 100755
--- a/demo/seqToseq/dataprovider.py
+++ b/demo/seqToseq/dataprovider.py
@@ -30,14 +30,14 @@ def hook(settings, src_dict, trg_dict, file_list, **kwargs):
if settings.job_mode:
settings.trg_dict = trg_dict
settings.slots = [
- integer_value_sequence(len(settings.src_dict)),
- integer_value_sequence(len(settings.trg_dict)),
+ integer_value_sequence(len(settings.src_dict)),
+ integer_value_sequence(len(settings.trg_dict)),
integer_value_sequence(len(settings.trg_dict))
]
settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
else:
settings.slots = [
- integer_value_sequence(len(settings.src_dict)),
+ integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(open(file_list[0], "r").readlines()))
]
@@ -62,8 +62,7 @@ def process(settings, file_name):
if settings.job_mode:
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
- trg_ids = [settings.trg_dict.get(w, UNK_IDX)
- for w in trg_words]
+ trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80:
diff --git a/demo/seqToseq/preprocess.py b/demo/seqToseq/preprocess.py
index 5efb17a664b9a2525972c29b9b5700b483b8c07e..bd1c51b1514b790ec385d48f49197b3e0285e736 100755
--- a/demo/seqToseq/preprocess.py
+++ b/demo/seqToseq/preprocess.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Example:
python preprocess.py -i INPUT [-d DICTSIZE] [-m]
@@ -24,12 +23,13 @@ Options:
-m --mergeDict merge source and target dictionary
"""
import os
-import sys
+import sys
import string
from optparse import OptionParser
from paddle.utils.preprocess_util import save_list, DatasetCreater
+
class SeqToSeqDatasetCreater(DatasetCreater):
"""
A class to process data for sequence to sequence application.
@@ -75,7 +75,7 @@ class SeqToSeqDatasetCreater(DatasetCreater):
if not os.path.exists(output):
os.system(cmd + '> ' + output)
- def build_dict(self, file_path, dict_path, dict_size = -1):
+ def build_dict(self, file_path, dict_path, dict_size=-1):
"""
Create the dictionary for the file, Note that
1. Valid characters include all printable characters
@@ -99,20 +99,23 @@ class SeqToSeqDatasetCreater(DatasetCreater):
for word in words:
if word not in dictory:
dictory[word] = 1
- else:
+ else:
dictory[word] += 1
output = open(dict_path, "w+")
output.write('\n\n\n')
count = 3
- for key, value in sorted(dictory.items(), key = lambda d:d[1], reverse = True):
+ for key, value in sorted(
+ dictory.items(), key=lambda d: d[1], reverse=True):
output.write(key + "\n")
count += 1
if count == dict_size:
break
self.dict_size = count
-
- def create_dataset(self, dict_size = -1, mergeDict = False,
- suffixes = ['.src', '.trg']):
+
+ def create_dataset(self,
+ dict_size=-1,
+ mergeDict=False,
+ suffixes=['.src', '.trg']):
"""
Create seqToseq dataset
"""
@@ -135,13 +138,14 @@ class SeqToSeqDatasetCreater(DatasetCreater):
# checkout dataset should be parallel corpora
suffix_len = len(suffixes[0])
for dataset in dataset_list:
- file_list = os.listdir(dataset)
- if len(file_list) % 2 == 1:
- raise RuntimeError("dataset should be parallel corpora")
- file_list.sort()
- for i in range(0, len(file_list), 2):
- if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
- raise RuntimeError("source and target file name should be equal")
+ file_list = os.listdir(dataset)
+ if len(file_list) % 2 == 1:
+ raise RuntimeError("dataset should be parallel corpora")
+ file_list.sort()
+ for i in range(0, len(file_list), 2):
+ if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
+ raise RuntimeError(
+ "source and target file name should be equal")
# cat all the files with the same suffix in dataset
for suffix in suffixes:
@@ -155,16 +159,18 @@ class SeqToSeqDatasetCreater(DatasetCreater):
list = ['train.list', 'test.list', 'gen.list']
for dataset in dataset_list:
outname = os.path.basename(dataset)
- self.concat_file(dataset, outname + suffixes[0],
+ self.concat_file(dataset, outname + suffixes[0],
outname + suffixes[1], dir_list[id], outname)
- save_list([os.path.join(dir_list[id], outname)],
+ save_list([os.path.join(dir_list[id], outname)],
os.path.join(self.output_path, list[id]))
id += 1
# build dictionary for train data
dict = ['src.dict', 'trg.dict']
- dict_path = [os.path.join(self.output_path, dict[0]),
- os.path.join(self.output_path, dict[1])]
+ dict_path = [
+ os.path.join(self.output_path, dict[0]),
+ os.path.join(self.output_path, dict[1])
+ ]
if mergeDict:
outname = os.path.join(train_dir, train_dataset.split('/')[-1])
print 'build src dictionary for train data'
@@ -173,22 +179,30 @@ class SeqToSeqDatasetCreater(DatasetCreater):
os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
else:
outname = os.path.join(train_dataset, self.train_dir_name)
- for id in range(0,2):
+ for id in range(0, 2):
suffix = suffixes[id]
print 'build ' + suffix[1:] + ' dictionary for train data'
self.build_dict(outname + suffix, dict_path[id], dict_size)
print 'dictionary size is', self.dict_size
+
def main():
usage = "usage: \n" \
"python %prog -i INPUT [-d DICTSIZE] [-m]"
parser = OptionParser(usage)
- parser.add_option("-i", action="store", dest="input",
- help="input original dataset path")
- parser.add_option("-d", action="store", dest="dictsize",
- help="specified word count of dictionary")
- parser.add_option("-m", "--mergeDict", action="store_true", dest="mergeDict",
- help="merge source and target dictionary")
+ parser.add_option(
+ "-i", action="store", dest="input", help="input original dataset path")
+ parser.add_option(
+ "-d",
+ action="store",
+ dest="dictsize",
+ help="specified word count of dictionary")
+ parser.add_option(
+ "-m",
+ "--mergeDict",
+ action="store_true",
+ dest="mergeDict",
+ help="merge source and target dictionary")
(options, args) = parser.parse_args()
if options.input[-1] == os.path.sep:
options.input = options.input[:-1]
@@ -200,5 +214,6 @@ def main():
data_creator = SeqToSeqDatasetCreater(options.input, output_path)
data_creator.create_dataset(dictsize, options.mergeDict)
+
if __name__ == "__main__":
- main();
+ main()
diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py
index edd6ad3f739b6cefc24d235be55c7a8f541e1ab7..ad5e3339c1461de06732eb62aca9e8323eea707b 100644
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@@ -50,16 +50,21 @@ def seq_to_seq_data(data_dir,
trg_dict = None
else:
train_list = os.path.join(data_dir, train_list)
- test_list = os.path.join(data_dir,test_list)
+ test_list = os.path.join(data_dir, test_list)
- define_py_data_sources2(train_list, test_list,
- module = "dataprovider",
- obj = "process",
- args = {"src_dict": src_dict,
- "trg_dict": trg_dict})
+ define_py_data_sources2(
+ train_list,
+ test_list,
+ module="dataprovider",
+ obj="process",
+ args={"src_dict": src_dict,
+ "trg_dict": trg_dict})
- return {"src_dict_path": src_lang_dict, "trg_dict_path": trg_lang_dict,
- "gen_result": gen_result}
+ return {
+ "src_dict_path": src_lang_dict,
+ "trg_dict_path": trg_lang_dict,
+ "gen_result": gen_result
+ }
def gru_encoder_decoder(data_conf,
@@ -90,51 +95,55 @@ def gru_encoder_decoder(data_conf,
size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding'))
src_forward = simple_gru(input=src_embedding, size=encoder_size)
- src_backward = simple_gru(input=src_embedding,
- size=encoder_size,
- reverse=True)
+ src_backward = simple_gru(
+ input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(input=encoded_vector)
backward_first = first_seq(input=src_backward)
- with mixed_layer(size=decoder_size,
- act=TanhActivation(), ) as decoder_boot:
+ with mixed_layer(
+ size=decoder_size,
+ act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
- decoder_mem = memory(name='gru_decoder',
- size=decoder_size,
- boot_layer=decoder_boot)
+ decoder_mem = memory(
+ name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
- context = simple_attention(encoded_sequence=enc_vec,
- encoded_proj=enc_proj,
- decoder_state=decoder_mem, )
+ context = simple_attention(
+ encoded_sequence=enc_vec,
+ encoded_proj=enc_proj,
+ decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word)
- gru_step = gru_step_layer(name='gru_decoder',
- input=decoder_inputs,
- output_mem=decoder_mem,
- size=decoder_size)
+ gru_step = gru_step_layer(
+ name='gru_decoder',
+ input=decoder_inputs,
+ output_mem=decoder_mem,
+ size=decoder_size)
- with mixed_layer(size=target_dict_dim,
- bias_attr=True,
- act=SoftmaxActivation()) as out:
+ with mixed_layer(
+ size=target_dict_dim, bias_attr=True,
+ act=SoftmaxActivation()) as out:
out += full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
- group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
- StaticInput(input=encoded_proj,is_seq=True)]
+ group_inputs = [
+ StaticInput(
+ input=encoded_vector, is_seq=True), StaticInput(
+ input=encoded_proj, is_seq=True)
+ ]
if not is_generating:
trg_embedding = embedding_layer(
- input=data_layer(name='target_language_word',
- size=target_dict_dim),
+ input=data_layer(
+ name='target_language_word', size=target_dict_dim),
size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
@@ -144,12 +153,12 @@ def gru_encoder_decoder(data_conf,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
- decoder = recurrent_group(name=decoder_group_name,
- step=gru_decoder_with_attention,
- input=group_inputs)
+ decoder = recurrent_group(
+ name=decoder_group_name,
+ step=gru_decoder_with_attention,
+ input=group_inputs)
- lbl = data_layer(name='target_language_next_word',
- size=target_dict_dim)
+ lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
cost = classification_cost(input=decoder, label=lbl)
outputs(cost)
else:
@@ -168,16 +177,19 @@ def gru_encoder_decoder(data_conf,
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
- beam_gen = beam_search(name=decoder_group_name,
- step=gru_decoder_with_attention,
- input=group_inputs,
- bos_id=0,
- eos_id=1,
- beam_size=beam_size,
- max_length=max_length)
-
- seqtext_printer_evaluator(input=beam_gen,
- id_input=data_layer(name="sent_id", size=1),
- dict_file=trg_dict_path,
- result_file=gen_trans_file)
+ beam_gen = beam_search(
+ name=decoder_group_name,
+ step=gru_decoder_with_attention,
+ input=group_inputs,
+ bos_id=0,
+ eos_id=1,
+ beam_size=beam_size,
+ max_length=max_length)
+
+ seqtext_printer_evaluator(
+ input=beam_gen,
+ id_input=data_layer(
+ name="sent_id", size=1),
+ dict_file=trg_dict_path,
+ result_file=gen_trans_file)
outputs(beam_gen)
diff --git a/demo/sequence_tagging/dataprovider.py b/demo/sequence_tagging/dataprovider.py
index 6f412d6834be6d02397821215b1317353cd5df18..37dcb7aa17c0abd197ef2f3121bf8be6c54375c2 100644
--- a/demo/sequence_tagging/dataprovider.py
+++ b/demo/sequence_tagging/dataprovider.py
@@ -17,8 +17,7 @@ import gzip
import logging
logging.basicConfig(
- format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s',
-)
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
logger = logging.getLogger('paddle')
logger.setLevel(logging.INFO)
@@ -32,59 +31,58 @@ num_original_columns = 3
# [[-1,0], [0,0]] means previous token at column 0 and current token at
# column 0 are combined as one feature.
patterns = [
- [[-2,0]],
- [[-1,0]],
- [[0,0]],
- [[1,0]],
- [[2,0]],
-
- [[-1,0], [0,0]],
- [[0,0], [1,0]],
-
- [[-2,1]],
- [[-1,1]],
- [[0,1]],
- [[1,1]],
- [[2,1]],
- [[-2,1], [-1,1]],
- [[-1,1], [0,1]],
- [[0,1], [1,1]],
- [[1,1], [2,1]],
-
- [[-2,1], [-1,1], [0,1]],
- [[-1,1], [0,1], [1,1]],
- [[0,1], [1,1], [2,1]],
+ [[-2, 0]],
+ [[-1, 0]],
+ [[0, 0]],
+ [[1, 0]],
+ [[2, 0]],
+ [[-1, 0], [0, 0]],
+ [[0, 0], [1, 0]],
+ [[-2, 1]],
+ [[-1, 1]],
+ [[0, 1]],
+ [[1, 1]],
+ [[2, 1]],
+ [[-2, 1], [-1, 1]],
+ [[-1, 1], [0, 1]],
+ [[0, 1], [1, 1]],
+ [[1, 1], [2, 1]],
+ [[-2, 1], [-1, 1], [0, 1]],
+ [[-1, 1], [0, 1], [1, 1]],
+ [[0, 1], [1, 1], [2, 1]],
]
dict_label = {
- 'B-ADJP': 0,
- 'I-ADJP': 1,
- 'B-ADVP': 2,
- 'I-ADVP': 3,
- 'B-CONJP': 4,
- 'I-CONJP': 5,
- 'B-INTJ': 6,
- 'I-INTJ': 7,
- 'B-LST': 8,
- 'I-LST': 9,
- 'B-NP': 10,
- 'I-NP': 11,
- 'B-PP': 12,
- 'I-PP': 13,
- 'B-PRT': 14,
- 'I-PRT': 15,
- 'B-SBAR': 16,
- 'I-SBAR': 17,
- 'B-UCP': 18,
- 'I-UCP': 19,
- 'B-VP': 20,
- 'I-VP': 21,
- 'O': 22
+ 'B-ADJP': 0,
+ 'I-ADJP': 1,
+ 'B-ADVP': 2,
+ 'I-ADVP': 3,
+ 'B-CONJP': 4,
+ 'I-CONJP': 5,
+ 'B-INTJ': 6,
+ 'I-INTJ': 7,
+ 'B-LST': 8,
+ 'I-LST': 9,
+ 'B-NP': 10,
+ 'I-NP': 11,
+ 'B-PP': 12,
+ 'I-PP': 13,
+ 'B-PRT': 14,
+ 'I-PRT': 15,
+ 'B-SBAR': 16,
+ 'I-SBAR': 17,
+ 'B-UCP': 18,
+ 'I-UCP': 19,
+ 'B-VP': 20,
+ 'I-VP': 21,
+ 'O': 22
}
+
def make_features(sequence):
length = len(sequence)
num_features = len(sequence[0])
+
def get_features(pos):
if pos < 0:
return ['#B%s' % -pos] * num_features
@@ -94,9 +92,10 @@ def make_features(sequence):
for i in xrange(length):
for pattern in patterns:
- fname = '/'.join([get_features(i+pos)[f] for pos, f in pattern])
+ fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
sequence[i].append(fname)
+
'''
Source file format:
Each line is for one timestep. The features are separated by space.
@@ -109,6 +108,8 @@ i-th column.
return a list of dict for each column
'''
+
+
def create_dictionaries(filename, cutoff, oov_policy):
def add_to_dict(sequence, dicts):
num_features = len(dicts)
@@ -140,7 +141,6 @@ def create_dictionaries(filename, cutoff, oov_policy):
features = line.split(' ')
sequence.append(features)
-
for i in xrange(num_features):
dct = dicts[i]
n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
@@ -151,7 +151,7 @@ def create_dictionaries(filename, cutoff, oov_policy):
else:
dct[k] = n
n += 1
-
+
if oov_policy[i] == OOV_POLICY_USE:
# placeholder so that len(dct) will be the number of features
# including OOV
@@ -187,12 +187,15 @@ def initializer(settings, **xargs):
logger.info("feature size=%s" % dim)
settings.input_types = input_types
+
'''
if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
existed in dicts[i] will be assigned to id 0.
if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
in dicts[i].
'''
+
+
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename):
input_file = filename
@@ -231,7 +234,7 @@ def process(settings, filename):
logger.fatal("Unknown token: %s" % features[i])
else:
vec.ids.append(dim + 0)
-
+
dim += len(dicts[i])
sample[-1].append(vec)
return sample
@@ -255,4 +258,3 @@ def process(settings, filename):
f.close()
logger.info("num_sequences=%s" % num_sequences)
-
diff --git a/demo/sequence_tagging/linear_crf.py b/demo/sequence_tagging/linear_crf.py
index 2bd1a20bc52fc546dcd0a0874bc09433e7212152..64895742e1b8c0a11cbedee0b88e61b5b63b007f 100644
--- a/demo/sequence_tagging/linear_crf.py
+++ b/demo/sequence_tagging/linear_crf.py
@@ -16,11 +16,11 @@ from paddle.trainer_config_helpers import *
import math
-define_py_data_sources2(train_list="data/train.list",
- test_list="data/test.list",
- module="dataprovider",
- obj="process")
-
+define_py_data_sources2(
+ train_list="data/train.list",
+ test_list="data/test.list",
+ module="dataprovider",
+ obj="process")
batch_size = 1
settings(
@@ -30,14 +30,15 @@ settings(
average_window=0.5,
learning_rate=1e-1,
learning_rate_decay_a=1e-5,
- learning_rate_decay_b=0.25,
-)
+ learning_rate_decay_b=0.25, )
+
+num_label_types = 23
-num_label_types=23
def get_simd_size(size):
return int(math.ceil(float(size) / 8)) * 8
+
# Currently, in order to use sparse_update=True,
# the size has to be aligned.
num_label_types = get_simd_size(num_label_types)
@@ -45,40 +46,37 @@ num_label_types = get_simd_size(num_label_types)
features = data_layer(name="features", size=76328)
word = data_layer(name="word", size=6778)
pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk",
- size=num_label_types)
+chunk = data_layer(name="chunk", size=num_label_types)
crf_input = fc_layer(
input=features,
size=num_label_types,
act=LinearActivation(),
bias_attr=False,
- param_attr=ParamAttr(initial_std=0, sparse_update=True))
+ param_attr=ParamAttr(
+ initial_std=0, sparse_update=True))
-crf=crf_layer(
+crf = crf_layer(
input=crf_input,
label=chunk,
- param_attr=ParamAttr(name="crfw", initial_std=0),
-)
+ param_attr=ParamAttr(
+ name="crfw", initial_std=0), )
-crf_decoding=crf_decoding_layer(
+crf_decoding = crf_decoding_layer(
size=num_label_types,
input=crf_input,
label=chunk,
- param_attr=ParamAttr(name="crfw"),
-)
+ param_attr=ParamAttr(name="crfw"), )
sum_evaluator(
name="error",
- input=crf_decoding,
-)
+ input=crf_decoding, )
chunk_evaluator(
name="chunk_f1",
- input =[crf_decoding, chunk],
+ input=[crf_decoding, chunk],
chunk_scheme="IOB",
- num_chunk_types=11,
-)
+ num_chunk_types=11, )
inputs(word, pos, chunk, features)
outputs(crf)
diff --git a/demo/sequence_tagging/rnn_crf.py b/demo/sequence_tagging/rnn_crf.py
index fb157bf3ea7193bca2c8a281e1afaf4b5f1d7309..90d4bbdddfdb4e38b930d54a2bc865df9fac589c 100644
--- a/demo/sequence_tagging/rnn_crf.py
+++ b/demo/sequence_tagging/rnn_crf.py
@@ -16,10 +16,11 @@ from paddle.trainer_config_helpers import *
import math
-define_py_data_sources2(train_list="data/train.list",
- test_list="data/test.list",
- module="dataprovider",
- obj="process")
+define_py_data_sources2(
+ train_list="data/train.list",
+ test_list="data/test.list",
+ module="dataprovider",
+ obj="process")
batch_size = 16
settings(
@@ -27,29 +28,27 @@ settings(
batch_size=batch_size,
regularization=L2Regularization(batch_size * 1e-5),
average_window=0.5,
- learning_rate = 2e-3,
- learning_rate_decay_a = 5e-7,
- learning_rate_decay_b = 0.5,
-)
+ learning_rate=2e-3,
+ learning_rate_decay_a=5e-7,
+ learning_rate_decay_b=0.5, )
-word_dim=128
+word_dim = 128
hidden_dim = 128
with_rnn = True
-initial_std=1/math.sqrt(hidden_dim)
-param_attr=ParamAttr(initial_std=initial_std)
-cpu_layer_attr=ExtraLayerAttribute(device=-1)
+initial_std = 1 / math.sqrt(hidden_dim)
+param_attr = ParamAttr(initial_std=initial_std)
+cpu_layer_attr = ExtraLayerAttribute(device=-1)
default_device(0)
-num_label_types=23
+num_label_types = 23
features = data_layer(name="features", size=76328)
word = data_layer(name="word", size=6778)
pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk",
- size=num_label_types,
- layer_attr=cpu_layer_attr)
+chunk = data_layer(
+ name="chunk", size=num_label_types, layer_attr=cpu_layer_attr)
emb = embedding_layer(
input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
@@ -58,73 +57,64 @@ hidden1 = mixed_layer(
size=hidden_dim,
act=STanhActivation(),
bias_attr=True,
- input=[full_matrix_projection(emb),
- table_projection(pos, param_attr=param_attr)]
-)
+ input=[
+ full_matrix_projection(emb), table_projection(
+ pos, param_attr=param_attr)
+ ])
if with_rnn:
rnn1 = recurrent_layer(
act=ReluActivation(),
bias_attr=True,
input=hidden1,
- param_attr=ParamAttr(initial_std=0),
- )
+ param_attr=ParamAttr(initial_std=0), )
hidden2 = mixed_layer(
size=hidden_dim,
act=STanhActivation(),
bias_attr=True,
- input=[full_matrix_projection(hidden1)
- ] + ([
- full_matrix_projection(rnn1, param_attr=ParamAttr(initial_std=0))
- ] if with_rnn else []),
-)
+ input=[full_matrix_projection(hidden1)] +
+ ([full_matrix_projection(
+ rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
if with_rnn:
- rnn2=recurrent_layer(
+ rnn2 = recurrent_layer(
reverse=True,
act=ReluActivation(),
bias_attr=True,
input=hidden2,
- param_attr=ParamAttr(initial_std=0),
- )
+ param_attr=ParamAttr(initial_std=0), )
crf_input = mixed_layer(
size=num_label_types,
bias_attr=False,
- input=[
- full_matrix_projection(hidden2),
- ] + ([
- full_matrix_projection(rnn2, param_attr=ParamAttr(initial_std=0))
- ] if with_rnn else []),
-)
+ input=[full_matrix_projection(hidden2), ] +
+ ([full_matrix_projection(
+ rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
crf = crf_layer(
input=crf_input,
label=chunk,
- param_attr=ParamAttr(name="crfw", initial_std=0),
- layer_attr=cpu_layer_attr,
-)
+ param_attr=ParamAttr(
+ name="crfw", initial_std=0),
+ layer_attr=cpu_layer_attr, )
crf_decoding = crf_decoding_layer(
size=num_label_types,
input=crf_input,
label=chunk,
param_attr=ParamAttr(name="crfw"),
- layer_attr=cpu_layer_attr,
-)
+ layer_attr=cpu_layer_attr, )
sum_evaluator(
name="error",
- input=crf_decoding,
-)
+ input=crf_decoding, )
chunk_evaluator(
name="chunk_f1",
- input =[crf_decoding, chunk],
+ input=[crf_decoding, chunk],
chunk_scheme="IOB",
- num_chunk_types=11,
-)
+ num_chunk_types=11, )
inputs(word, pos, chunk, features)
outputs(crf)
diff --git a/doc/algorithm/index.rst b/doc/algorithm/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..6073add3c0cbb12529eabb0f8d8a051bcb84e628
--- /dev/null
+++ b/doc/algorithm/index.rst
@@ -0,0 +1,7 @@
+Algorithm Tutorial
+==================
+
+.. toctree::
+ :maxdepth: 1
+
+ rnn/rnn.rst
diff --git a/doc/algorithm/rnn/rnn.rst b/doc/algorithm/rnn/rnn.rst
index 343f55a20e464f63f054ebe724b5ef90f848d5e9..01d2caefb5cdf4e949511fd0f5bbafe0e604e881 100644
--- a/doc/algorithm/rnn/rnn.rst
+++ b/doc/algorithm/rnn/rnn.rst
@@ -1,5 +1,5 @@
-Recurrent Neural Network Configuration
-======================================
+RNN Configuration
+=================
This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to:
@@ -17,7 +17,7 @@ PaddlePaddle does not need any preprocessing to sequence data, such as padding.
.. code-block:: python
- settings.slots = [
+ settings.input_types = [
integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(settings.trg_dict)),
integer_value_sequence(len(settings.trg_dict))]
diff --git a/doc/build/build_from_source.md b/doc/build/build_from_source.md
index c37234d3ef14dfcfeaa1f34b0565e40e0672edc0..b8f26f431eb7a04147fe791a8c805427c827fe09 100644
--- a/doc/build/build_from_source.md
+++ b/doc/build/build_from_source.md
@@ -4,7 +4,6 @@ Installing from Sources
* [1. Download and Setup](#download)
* [2. Requirements](#requirements)
* [3. Build on Ubuntu](#ubuntu)
-* [4. Build on Mac OS X](#mac)
## Download and Setup
You can download PaddlePaddle from the [github source](https://github.com/gangliao/Paddle).
@@ -191,121 +190,3 @@ sudo pip install /opt/paddle/share/wheels/*.whl
# or just run
sudo paddle version
```
-
-## Building on Mac OS X
-
-### Prerequisites
-This guide is based on Mac OS X 10.11 (El Capitan). Note that if you are running an up to date version of OS X,
-you will already have Python 2.7.10 and Numpy 1.8 installed.
-
-The best option is to use the package manager homebrew to handle installations and upgrades for you.
-To install [homebrew](http://brew.sh/), first open a terminal window (you can find Terminal in the Utilities folder in Applications), and issue the command:
-
-```bash
-# install brew
-/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
-# install pip
-easy_install pip
-```
-
-### Install Dependencies
-
-- **CPU Dependencies**
-
- ```bash
- # Install fundamental dependents
- brew install glog gflags cmake protobuf openblas
-
- # Install google test on Mac OS X
- # Download gtest 1.7.0
- wget https://github.com/google/googletest/archive/release-1.7.0.tar.gz
- tar -xzf googletest-release-1.7.0.tar.gz && cd googletest-release-1.7.0
- # Build gtest
- mkdir build && cd build && cmake .. && make
- # Install gtest library
- sudo cp -r ../include/gtest /usr/local/include/
- sudo cp lib*.a /usr/local/lib
- ```
-
-- **GPU Dependencies(optional)**
-
- To build GPU version, you will need the following installed:
-
- 1. a CUDA-capable GPU
- 2. Mac OS X 10.11 or later
- 2. the Clang compiler and toolchain installed using Xcode
- 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
- 4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
-
- The CUDA development environment relies on tight integration with the host development environment,
- including the host compiler and C runtime libraries, and is therefore only supported on
- distribution versions that have been qualified for this CUDA Toolkit release.
-
- 1. After downloading cuDNN library, issue the following commands:
-
- ```bash
- sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local
- sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib/libcudnn*
- ```
- 2. Then you need to set DYLD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
-
- ```bash
- export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
- export PATH=/usr/local/cuda/bin:$PATH
- ```
-
-### Build and Install
-
-As usual, the best option is to create build folder under paddle project directory.
-
-```bash
-mkdir build && cd build
-cmake ..
-```
-
-CMake first check PaddlePaddle's dependencies in system default path. After installing some optional
-libraries, corresponding build option will be set automatically (for instance, glog, gtest and gflags).
-If still not found, you can manually set it based on CMake error information from your screen.
-
-As a simple example, consider the following:
-
-- **Only CPU**
-
- ```bash
- cmake .. -DWITH_GPU=OFF
- ```
-- **GPU**
-
- ```bash
- cmake .. -DWITH_GPU=ON
- ```
-
-- **GPU with doc and swig**
-
- ```bash
- cmake .. -DWITH_GPU=ON -DWITH_DOC=ON -DWITH_SWIG_PY=ON
- ```
-
-Finally, you can build PaddlePaddle:
-
-```bash
-# you can add build option here, such as:
-cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=
-# please use sudo make install, if you want to install PaddlePaddle into the system
-make -j `sysctl -n hw.ncpu` && make install
-# set PaddlePaddle installation path in ~/.bashrc
-export PATH=/bin:$PATH
-```
-**Note:**
-
-If you set `WITH_SWIG_PY=ON`, related python dependencies also need to be installed.
-Otherwise, PaddlePaddle will automatically install python dependencies
-at first time when user run paddle commands, such as `paddle version`, `paddle train`.
-It may require sudo privileges:
-
-```bash
-# you can run
-sudo pip install /opt/paddle/share/wheels/*.whl
-# or just run
-sudo paddle version
-```
diff --git a/doc/build/contribute_to_paddle.md b/doc/build/contribute_to_paddle.md
index a9ab69c5f42b8d341dca87479a642e28ca58fbf4..1d03eb7362b1b6f2fcdac7b53f8b7f93fb75e49c 100644
--- a/doc/build/contribute_to_paddle.md
+++ b/doc/build/contribute_to_paddle.md
@@ -1,4 +1,4 @@
-# Contribute to PaddlePaddle
+# Contribute Code
We sincerely appreciate your contributions. You can use fork and pull request
workflow to merge your code.
diff --git a/doc/build/index.rst b/doc/build/index.rst
index 511cdea145c7fd0e41566d0a85115dbb06f84058..b4fe4596047c7d201fdf36bc76c26d5134611560 100644
--- a/doc/build/index.rst
+++ b/doc/build/index.rst
@@ -1,5 +1,5 @@
-Build And Install PaddlePaddle
-================================
+Install and Build
+=================
Install PaddlePaddle
----------------------
@@ -18,11 +18,7 @@ Build from Source
.. warning::
- Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing to PaddlePaddle.
-
-
-If you want to hack and contribute PaddlePaddle source code, following guides can help you\:
-
+ Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code.
.. toctree::
:maxdepth: 1
@@ -30,4 +26,3 @@ If you want to hack and contribute PaddlePaddle source code, following guides ca
build_from_source.md
contribute_to_paddle.md
-
diff --git a/doc/cluster/opensource/cluster_train.md b/doc/cluster/opensource/cluster_train.md
index 4763ede39b049b6c49225dc9ae7add77325d704e..cb493a88f031850cb6a5eeed0ebe9e41bb7e01c3 100644
--- a/doc/cluster/opensource/cluster_train.md
+++ b/doc/cluster/opensource/cluster_train.md
@@ -1,26 +1,24 @@
-# Cluster Training
+# Distributed Training
-We provide some simple scripts ```paddle/scripts/cluster_train``` to help you to launch cluster training Job to harness PaddlePaddle's distributed trainning. For MPI and other cluster scheduler refer this naive script to implement more robust cluster training platform by yourself.
+In this article, we explain how to run distributed Paddle training jobs on clusters. We will create the distributed version of the single-process training example, [recommendation](https://github.com/baidu/Paddle/tree/develop/demo/recommendation).
-The following cluster demo is based on RECOMMENDATION local training demo in PaddlePaddle ```demo/recommendation``` directory. Assuming you enter the ```paddle/scripts/cluster_train/``` directory.
+[Scripts](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train) used in this article launch distributed jobs via SSH. They also work as a reference for users running more sophisticated cluster management systems like MPI and Kubernetes.
-## Pre-requirements
+## Prerequisite
-Firstly,
+1. Aforementioned scripts use a Python library [fabric](http://www.fabfile.org/) to run SSH commands. We can use `pip` to install fabric:
-```bash
+ ```bash
pip install fabric
-```
-
-Secondly, go through installing scripts to install PaddlePaddle at all nodes to make sure demo can run as local mode. For CUDA enabled training, we assume that CUDA is installed in ```/usr/local/cuda```, otherwise missed cuda runtime libraries error could be reported at cluster runtime. In one word, the local training environment should be well prepared for the simple scripts.
+ ```
-Then you should prepare same ROOT_DIR directory in all nodes. ROOT_DIR is from in cluster_train/conf.py. Assuming that the ROOT_DIR = /home/paddle, you can create ```paddle``` user account as well, at last ```paddle.py``` can ssh connections to all nodes with ```paddle``` user automatically.
+1. We need to install PaddlePaddle on all nodes in the cluster. To enable GPUs, we need to install CUDA in `/usr/local/cuda`; otherwise Paddle would report errors at runtime.
-At last you can create ssh mutual trust relationship between all nodes for easy ssh login, otherwise ```password``` should be provided at runtime from ```paddle.py```.
+1. Set the `ROOT_DIR` variable in [`cluster_train/conf.py`] on all nodes. For convenience, we often create a Unix user `paddle` on all nodes and set `ROOT_DIR=/home/paddle`. In this way, we can write public SSH keys into `/home/paddle/.ssh/authorized_keys` so that user `paddle` can SSH to all nodes without password.
## Prepare Job Workspace
-```Job workspace``` is defined as one package directory which contains dependency libraries, train data, test data, model config file and all other related file dependencies.
+We refer to the directory where we put dependent libraries, config files, etc., as *workspace*.
These ```train/test``` data should be prepared before launching cluster job. To satisfy the requirement that train/test data are placed in different directory from workspace, PADDLE refers train/test data according to index file named as ```train.list/test.list``` which are used in model config file. So the train/test data also contains train.list/test.list two list file. All local training demo already provides scripts to help you create these two files, and all nodes in cluster job will handle files with same logical code in normal condition.
diff --git a/doc/demo/quick_start/index_en.md b/doc/demo/quick_start/index_en.md
index e7d74512292c89233373c48d05895794d56702d8..80d816a768a71156ce72cda6ea92b749fbcdbe1f 100644
--- a/doc/demo/quick_start/index_en.md
+++ b/doc/demo/quick_start/index_en.md
@@ -1,4 +1,4 @@
-# Quick Start Tutorial
+# Quick Start
This tutorial will teach the basics of deep learning (DL), including how to implement many different models in PaddlePaddle. You will learn how to:
- Prepare data into the standardized format that PaddlePaddle accepts.
diff --git a/doc/demo/semantic_role_labeling/curve.jpg b/doc/demo/semantic_role_labeling/curve.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..baa35ae7f0a0b6c246f3a0d331735477ab8bcd70
Binary files /dev/null and b/doc/demo/semantic_role_labeling/curve.jpg differ
diff --git a/doc/demo/semantic_role_labeling/semantic_role_labeling.md b/doc/demo/semantic_role_labeling/semantic_role_labeling.md
index 05fbc8278daf204df60ad19b742c920e47128c27..e2793b2b3494160a7a80f07ec2127bd1f1a4f2e4 100644
--- a/doc/demo/semantic_role_labeling/semantic_role_labeling.md
+++ b/doc/demo/semantic_role_labeling/semantic_role_labeling.md
@@ -30,8 +30,6 @@ Several new files appear in the `data `directory as follows.
conll05st-release:the test data set of CoNll-2005 shared task
test.wsj.words:the Wall Street Journal data sentences
test.wsj.props: the propositional arguments
-src.dict:the dictionary of words in sentences
-tgt.dict:the labels dictionary
feature: the extracted features from data set
```
@@ -67,6 +65,8 @@ def hook(settings, word_dict, label_dict, **kwargs):
settings.label_dict = label_dict
#all inputs are integral and sequential type
settings.slots = [
+ integer_value_sequence(len(word_dict)),
+ integer_value_sequence(len(predicate_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
@@ -77,34 +77,39 @@ def hook(settings, word_dict, label_dict, **kwargs):
```
The corresponding data iterator is as following:
```
-@provider(use_seq=True, init_hook=hook)
-def process(obj, file_name):
+@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
+ can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
- sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = line.strip().split('\t')
+ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
+ line.strip().split('\t')
+
words = sentence.split()
sen_len = len(words)
- word_slot = [obj.word_dict.get(w, UNK_IDX) for w in words]
+ word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
- predicate_slot = [obj.word_dict.get(predicate, UNK_IDX)] * sen_len
- ctx_n1_slot = [obj.word_dict.get(ctx_n1, UNK_IDX) ] * sen_len
- ctx_0_slot = [obj.word_dict.get(ctx_0, UNK_IDX) ] * sen_len
- ctx_p1_slot = [obj.word_dict.get(ctx_p1, UNK_IDX) ] * sen_len
+ predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
+ ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
+ ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
+ ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
+ ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+ ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
- label_slot = [obj.label_dict.get(w) for w in label_list]
-
- yield word_slot, predicate_slot, ctx_n1_slot, ctx_0_slot, ctx_p1_slot, mark_slot, label_slot
+ label_slot = [settings.label_dict.get(w) for w in label_list]
+ yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
+ ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
```
-The `process`function yield 7 lists which are six features and labels.
+The `process`function yield 9 lists which are 8 features and label.
### Neural Network Config
`db_lstm.py` is the neural network config file to load the dictionaries and define the data provider module and network architecture during the training procedure.
-Seven `data_layer` load instances from data provider. Six features are transformed into embedddings respectively, and mixed by `mixed_layer` . Deep bidirectional LSTM layers extract features for the softmax layer. The objective function is cross entropy of labels.
+Nine `data_layer` load instances from data provider. Eight features are transformed into embedddings respectively, and mixed by `mixed_layer` . Deep bidirectional LSTM layers extract features for the softmax layer. The objective function is cross entropy of labels.
### Run Training
The script for training is `train.sh`, user just need to execute:
@@ -115,27 +120,36 @@ The content in `train.sh`:
```
paddle train \
--config=./db_lstm.py \
+ --use_gpu=0 \
+ --log_period=5000 \
+ --trainer_count=1 \
+ --show_parameter_stats_period=5000 \
--save_dir=./output \
- --trainer_count=4 \
- --log_period=10 \
- --num_passes=500 \
- --use_gpu=false \
- --show_parameter_stats_period=10 \
+ --num_passes=10000 \
+ --average_test_period=10000000 \
+ --init_model_path=./data \
+ --load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
```
- \--config=./db_lstm.py : network config file.
-- \--save_di=./output: output path to save models.
-- \--trainer_count=4 : set thread number (or GPU count).
-- \--log_period=10 : print log every 20 batches.
-- \--num_passes=500: set pass number, one pass in PaddlePaddle means training all samples in dataset one time.
-- \--use_gpu=false: use CPU to train, set true, if you install GPU version of PaddlePaddle and want to use GPU to train.
-- \--show_parameter_stats_period=10: show parameter statistic every 100 batches.
-- \--test_all_data_in_one_period=1: test all data in every testing.
-
-
-After training, the models will be saved in directory `output`.
+- \--use_gpu=false: use CPU to train, set true, if you install GPU version of PaddlePaddle and want to use GPU to train, until now crf_layer do not support GPU
+- \--log_period=500: print log every 20 batches.
+- \--trainer_count=1: set thread number (or GPU count).
+- \--show_parameter_stats_period=5000: show parameter statistic every 100 batches.
+- \--save_dir=./output: output path to save models.
+- \--num_passes=10000: set pass number, one pass in PaddlePaddle means training all samples in dataset one time.
+- \--average_test_period=10000000: do test on average parameter every average_test_period batches
+- \--init_model_path=./data: parameter initialization path
+- \--load_missing_parameter_strategy=rand: random initialization unexisted parameters
+- \--test_all_data_in_one_period=1: test all data in one period
+
+
+After training, the models will be saved in directory `output`. Our training curve is as following:
+
+
+
### Run testing
The script for testing is `test.sh`, user just need to execute:
@@ -155,6 +169,7 @@ paddle train \
- \--model_list=$model_list.list: model list file
- \--job=test: indicate the test job
- \--config_args=is_test=1: flag to indicate test
+ - \--test_all_data_in_one_period=1: test all data in 1 period
### Run prediction
@@ -166,11 +181,13 @@ The script for prediction is `predict.sh`, user just need to execute:
In `predict.sh`, user should offer the network config file, model path, label file, word dictionary file, feature file
```
python predict.py
- -c $config_file
- -w $model_path
- -l $label_file
- -d $dict_file
- -i $input_file
+ -c $config_file \
+ -w $best_model_path \
+ -l $label_file \
+ -p $predicate_dict_file \
+ -d $dict_file \
+ -i $input_file \
+ -o $output_file
```
`predict.py` is the main executable python script, which includes functions: load model, load data, data prediction. The network model will output the probability distribution of labels. In the demo, we take the label with maximum probability as result. User can also implement the beam search or viterbi decoding upon the probability distribution matrix.
diff --git a/doc/demo/sentiment_analysis/sentiment_analysis.md b/doc/demo/sentiment_analysis/sentiment_analysis.md
index 385f49891dcd840c525f7d1c3aaf7f08a7e4903f..c53952c544de9fa88a6318432e34b0d05b149445 100644
--- a/doc/demo/sentiment_analysis/sentiment_analysis.md
+++ b/doc/demo/sentiment_analysis/sentiment_analysis.md
@@ -6,7 +6,7 @@ Sentiment analysis is also used to monitor social media based on large amount of
On the other hand, grabbing the user comments of products and analyzing their sentiment are useful to understand user preferences for companies, products, even competing products.
-This tutorial will guide you through the process of training a Long Short Term Memory (LSTM) Network to classify the sentiment of sentences from [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/), sometimes known as the [Internet Movie Database (IMDB)](http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf). This dataset contains movie reviews along with their associated binary sentiment polarity labels, namely positive and negative. So randomly guessing yields 50% accuracy.
+This tutorial will guide you through the process of training a Long Short Term Memory (LSTM) Network to classify the sentiment of sentences from [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/), sometimes known as the Internet Movie Database (IMDB). This dataset contains movie reviews along with their associated binary sentiment polarity labels, namely positive and negative. So randomly guessing yields 50% accuracy.
## Data Preparation
@@ -39,7 +39,7 @@ imdbEr.txt imdb.vocab README test train
* imdbEr.txt: expected rating for each token in imdb.vocab.
* README: data documentation.
-Both train and test set directory contains:
+The file in train set directory is as follows. The test set also contains them except `unsup` and `urls_unsup.txt`.
```
labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt
@@ -151,6 +151,7 @@ settings(
batch_size=128,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
+ average_window=0.5,
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
@@ -163,17 +164,18 @@ stacked_lstm_net(dict_dim, class_dim=class_dim,
* **Data Definition**:
* get\_config\_arg(): get arguments setted by `--config_args=xx` in commandline argument.
- * Define TrainData and TestData provider, here using Python interface (PyDataProviderWrapper) of PaddlePaddle to load data. For details, you can refer to the document of PyDataProvider.
+ * Define data provider, here using Python interface to load data. For details, you can refer to the document of PyDataProvider2.
* **Algorithm Configuration**:
- * use sgd algorithm.
- * use adam optimization.
* set batch size of 128.
- * set average sgd window.
* set global learning rate.
+ * use adam optimization.
+ * set average sgd window.
+ * set L2 regularization.
+ * set gradient clipping threshold.
* **Network Configuration**:
- * dict_dim: get dictionary dimension.
- * class_dim: set category number, IMDB has two label, namely positive and negative label.
+ * dict_dim: dictionary dimension.
+ * class_dim: category number, IMDB has two label, namely positive and negative label.
* `stacked_lstm_net`: predefined network as shown in Figure 3, use this network by default.
* `bidirectional_lstm_net`: predefined network as shown in Figure 2.
diff --git a/doc/dev/index.rst b/doc/dev/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..0468dd492b6246cfe0771a05c3597ddee95b3ddd
--- /dev/null
+++ b/doc/dev/index.rst
@@ -0,0 +1,9 @@
+Development Guide
+=================
+
+.. toctree::
+ :maxdepth: 1
+
+ layer.md
+ new_layer/new_layer.rst
+ ../source/index.md
diff --git a/doc/dev/layer.md b/doc/dev/layer.md
new file mode 100644
index 0000000000000000000000000000000000000000..930fb0de1ac074b15d06197ed0e732f92288b411
--- /dev/null
+++ b/doc/dev/layer.md
@@ -0,0 +1,4 @@
+# Layer Documents
+
+* [Layer Source Code Document](../source/gserver/layers/index.rst)
+* [Layer Python API Document](../ui/api/trainer_config_helpers/index.rst)
diff --git a/doc/dev/new_layer/index.rst b/doc/dev/new_layer/index.rst
deleted file mode 100644
index 37dac3a14dedf2aaa99335e1b0ebe110dc746174..0000000000000000000000000000000000000000
--- a/doc/dev/new_layer/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Writing New Layers
-==================
-
-.. toctree::
- :maxdepth: 3
-
- new_layer.rst
diff --git a/doc/dev/new_layer/new_layer.rst b/doc/dev/new_layer/new_layer.rst
index bd4a4c46c87f6429338b4d220a80b6265a1f253f..af8b76a3075194ead9be40d2c943238b2cfadecc 100644
--- a/doc/dev/new_layer/new_layer.rst
+++ b/doc/dev/new_layer/new_layer.rst
@@ -1,3 +1,4 @@
+==================
Writing New Layers
==================
@@ -59,7 +60,7 @@ Implement C++ Class
The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below.
-It needs to derive the base class :code:`paddle::BaseLayer`, and it needs to override the following functions:
+It needs to derive the base class :code:`paddle::Layer`, and it needs to override the following functions:
- constructor and destructor.
- :code:`init` function. It is used to initialize the parameters and settings.
diff --git a/doc/index.md b/doc/index.md
deleted file mode 100644
index a4dffb0405a6b23c88473307a1d199e3caaadf55..0000000000000000000000000000000000000000
--- a/doc/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-PaddlePaddle Documentation
-==========================
-
-User Guide
-----------
-* [Introduction](introduction/index.md)
-* [Quick Start](demo/quick_start/index_en.md)
-* [Build and Installation](build/index.rst)
-* [Contribute Code](build/contribute_to_paddle.md)
-* [User Interface](ui/index.md)
-* [Model Config Interface](ui/api/trainer_config_helpers/index.md)
-* [Example and Demo](demo/index.md)
-* [Cluster Train](cluster/index.md)
-
-Development Guide
------------------
-* [Layer Documents](layer.md)
-* [Writing New Layers](dev/new_layer/index.rst)
-* [Source Code Documents](source/index.md)
-
-Algorithm Tutorial
-------------------
-* [RNN Configuration](algorithm/rnn/rnn.rst)
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..668ad75a902bdd14c6198c41380ae93e29cec0d3
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,10 @@
+PaddlePaddle Documentation
+==========================
+
+.. toctree::
+ :maxdepth: 1
+
+ introduction/index.md
+ user_guide.rst
+ dev/index.rst
+ algorithm/index.rst
diff --git a/doc/introduction/index.md b/doc/introduction/index.md
index 004ca07844da0fdbea359508c9fae1012aaad421..01f52031a1d0247cd0b885218c17001f23685239 100644
--- a/doc/introduction/index.md
+++ b/doc/introduction/index.md
@@ -98,4 +98,3 @@ There, you have recovered the underlying pattern between `X` and `Y` only from o
- Build and Installation
- Quick Start
- Example and Demo
-
diff --git a/doc/layer.md b/doc/layer.md
deleted file mode 100644
index 45f2e2bad542ff5c29c89201b356728cf7ca8c1c..0000000000000000000000000000000000000000
--- a/doc/layer.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Layer Documents
-
-* [Layer Source Code Document](source/gserver/layers/index.rst)
-* [Layer Python API Document](ui/api/trainer_config_helpers/layers_index.rst)
diff --git a/doc/source/api/api.rst b/doc/source/api.rst
similarity index 90%
rename from doc/source/api/api.rst
rename to doc/source/api.rst
index 6fc450202df73f5ca99c2c52f257243aa37c90d4..30396c26b61827847cc5acc29cee1c3c8e7b226e 100644
--- a/doc/source/api/api.rst
+++ b/doc/source/api.rst
@@ -1,5 +1,5 @@
API
-========
+===
.. doxygenfile:: paddle/api/PaddleAPI.h
.. doxygenfile:: paddle/api/Internal.h
diff --git a/doc/source/cuda/cuda/cuda.rst b/doc/source/cuda/cuda/cuda.rst
deleted file mode 100644
index 52f17c2b2e48aec8e6fc8d5a7e4f443ad72d96a6..0000000000000000000000000000000000000000
--- a/doc/source/cuda/cuda/cuda.rst
+++ /dev/null
@@ -1,39 +0,0 @@
-Cuda
-=============
-
-Dynamic Link Libs
---------------------------
-
-hl_dso_loader.h
-``````````````````
-.. doxygenfile:: paddle/cuda/include/hl_dso_loader.h
-
-GPU Resources
-----------------
-
-hl_cuda.ph
-``````````````
-.. doxygenfile:: paddle/cuda/include/hl_cuda.ph
-
-hl_cuda.h
-``````````````
-.. doxygenfile:: paddle/cuda/include/hl_cuda.h
-
-CUDA Wrapper
---------------
-
-hl_cuda_cublas.h
-``````````````````````
-.. doxygenfile:: paddle/cuda/include/hl_cuda_cublas.h
-
-hl_cuda_cudnn.h
-``````````````````````
-.. doxygenfile:: paddle/cuda/include/hl_cuda_cudnn.h
-
-hl_cuda_cudnn.h
-``````````````````````
-.. doxygenfile:: paddle/cuda/include/hl_cuda_cudnn.ph
-
-
-
-
diff --git a/doc/source/cuda/cuda/index.rst b/doc/source/cuda/cuda/index.rst
deleted file mode 100644
index 5fa38ff0fc8cea2b97262ea5493dea27b322dc1c..0000000000000000000000000000000000000000
--- a/doc/source/cuda/cuda/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-CUDA
-====================
-
-.. toctree::
- :maxdepth: 3
-
- cuda.rst
diff --git a/doc/source/cuda/index.rst b/doc/source/cuda/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..b0fed2e7f72c9a9671e56e114edfc88d72504dbe
--- /dev/null
+++ b/doc/source/cuda/index.rst
@@ -0,0 +1,9 @@
+CUDA
+====
+
+.. toctree::
+ :maxdepth: 2
+
+ matrix.rst
+ nn.rst
+ utils.rst
diff --git a/doc/source/cuda/matrix/matrix.rst b/doc/source/cuda/matrix.rst
similarity index 76%
rename from doc/source/cuda/matrix/matrix.rst
rename to doc/source/cuda/matrix.rst
index dd4f06599c5af29a0278617ffd1bd9f6ae6b222e..b7699c83eda15d9003506f5fc57b51d52e7af823 100644
--- a/doc/source/cuda/matrix/matrix.rst
+++ b/doc/source/cuda/matrix.rst
@@ -1,61 +1,59 @@
Matrix
-=======
+======
-Base Matrix
--------------
+Base
+----
hl_matrix.h
-``````````````````
+```````````
.. doxygenfile:: paddle/cuda/include/hl_matrix.h
hl_matrix_base.h
-``````````````````
+````````````````
.. doxygenfile:: paddle/cuda/include/hl_matrix_base.cuh
hl_matrix_apply.cuh
-``````````````````````
+```````````````````
.. doxygenfile:: paddle/cuda/include/hl_matrix_apply.cuh
hl_matrix_ops.cuh
-``````````````````````
+`````````````````
.. doxygenfile:: paddle/cuda/include/hl_matrix_ops.cuh
hl_matrix_type.cuh
-``````````````````````
+``````````````````
.. doxygenfile:: paddle/cuda/include/hl_matrix_type.cuh
hl_sse_matrix_kernel.cuh
-``````````````````````````
+````````````````````````
.. doxygenfile:: paddle/cuda/include/hl_sse_matrix_kernel.cuh
+Matrix Function
+---------------
+
hl_batch_transpose.h
-``````````````````````````
+````````````````````
.. doxygenfile:: paddle/cuda/include/hl_batch_transpose.h
-Sparse Matrix
---------------
-
-hl_sparse.h
-``````````````````
-.. doxygenfile:: paddle/cuda/include/hl_sparse.h
-
-hl_sparse.ph
-``````````````````````
-.. doxygenfile:: paddle/cuda/include/hl_sparse.ph
-
-Others
----------------
-
hl_aggregate.h
-``````````````````
+``````````````
.. doxygenfile:: paddle/cuda/include/hl_aggregate.h
+hl_top_k.h
+``````````
+.. doxygenfile:: paddle/cuda/include/hl_top_k.h
+
hl_table_apply.h
-``````````````````
+````````````````
.. doxygenfile:: paddle/cuda/include/hl_table_apply.h
-hl_top_k.h
-``````````````````
-.. doxygenfile:: paddle/cuda/include/hl_top_k.h
+Sparse Matrix
+-------------
+hl_sparse.h
+```````````
+.. doxygenfile:: paddle/cuda/include/hl_sparse.h
+hl_sparse.ph
+````````````
+.. doxygenfile:: paddle/cuda/include/hl_sparse.ph
diff --git a/doc/source/cuda/matrix/index.rst b/doc/source/cuda/matrix/index.rst
deleted file mode 100644
index 63f95eb46618fd43a1140e4d857ae7e2fc89a6ae..0000000000000000000000000000000000000000
--- a/doc/source/cuda/matrix/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Matrix
-====================
-
-.. toctree::
- :maxdepth: 3
-
- matrix.rst
diff --git a/doc/source/cuda/rnn/rnn.rst b/doc/source/cuda/nn.rst
similarity index 79%
rename from doc/source/cuda/rnn/rnn.rst
rename to doc/source/cuda/nn.rst
index ce8ed96692bcb79eec0e5e6ae52a8bf5f6573418..5577d01e72a5b22847bda40528c46a28cacc1490 100644
--- a/doc/source/cuda/rnn/rnn.rst
+++ b/doc/source/cuda/nn.rst
@@ -1,36 +1,39 @@
-Neural Networks
-==================
+Neural Network
+==============
Base
--------
+----
+
.. doxygenfile:: paddle/cuda/include/hl_gpu.h
-.. doxygenfile:: paddle/cuda/include/hl_cnn.h
.. doxygenfile:: paddle/cuda/include/hl_functions.h
.. doxygenfile:: paddle/cuda/include/hl_avx_functions.h
-.. doxygenfile:: paddle/cuda/include/hl_device_functions.cuh
.. doxygenfile:: paddle/cuda/include/hl_gpu_functions.cuh
-
-Activation Functions
------------------------
.. doxygenfile:: paddle/cuda/include/hl_activation_functions.h
+
+CNN Related APIs
+----------------
+.. doxygenfile:: paddle/cuda/include/hl_cnn.h
+.. doxygenfile:: paddle/cuda/include/hl_cuda_cudnn.h
+.. doxygenfile:: paddle/cuda/include/hl_cuda_cudnn.ph
+
RNN Related APIs
------------------
+----------------
.. doxygenfile:: paddle/cuda/include/hl_recurrent_apply.cuh
.. doxygenfile:: paddle/cuda/include/hl_sequence.h
LSTM Model
-``````````````
+``````````
+
.. doxygenfile:: paddle/cuda/include/hl_lstm.h
.. dpxygenfile:: paddle/cuda/include/hl_cpu_lstm.cuh
.. doxygenfile:: paddle/cuda/include/hl_gpu_lstm.cuh
.. doxygenfile:: paddle/cuda/include/hl_lstm_ops.cuh
GRU Model
-````````````````
+`````````
+
.. doxygenfile:: paddle/cuda/include/hl_gru_ops.cuh
.. doxygenfile:: paddle/cuda/include/hl_cpu_gru.cuh
.. doxygenfile:: paddle/cuda/include/hl_gpu_gru.cuh
-
-
diff --git a/doc/source/cuda/rnn/index.rst b/doc/source/cuda/rnn/index.rst
deleted file mode 100644
index 4913e47ba1cbc1c2b93fe3e128626a8e66aedc62..0000000000000000000000000000000000000000
--- a/doc/source/cuda/rnn/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-RNN
-====================
-
-.. toctree::
- :maxdepth: 3
-
- rnn.rst
diff --git a/doc/source/cuda/utils.rst b/doc/source/cuda/utils.rst
new file mode 100644
index 0000000000000000000000000000000000000000..850e8bd1c6670947e2a5f1b6f9b0d5b252117cbf
--- /dev/null
+++ b/doc/source/cuda/utils.rst
@@ -0,0 +1,37 @@
+Utils
+=====
+
+Dynamic Link Libs
+-----------------
+.. doxygenfile:: paddle/cuda/include/hl_dso_loader.h
+
+GPU Resources
+-------------
+
+hl_cuda.ph
+``````````
+.. doxygenfile:: paddle/cuda/include/hl_cuda.ph
+
+hl_cuda.h
+`````````
+.. doxygenfile:: paddle/cuda/include/hl_cuda.h
+
+HPPL Base
+---------
+.. doxygenfile:: paddle/cuda/include/hl_base.h
+
+CUBLAS Wrapper
+--------------
+.. doxygenfile:: paddle/cuda/include/hl_cuda_cublas.h
+
+Timer
+-----
+.. doxygenfile:: paddle/cuda/include/hl_time.h
+
+Thread Resource
+---------------
+.. doxygenfile:: paddle/cuda/include/hl_thread.ph
+
+Device Function
+---------------
+.. doxygenfile:: paddle/cuda/include/hl_device_functions.cuh
diff --git a/doc/source/cuda/utils/index.rst b/doc/source/cuda/utils/index.rst
deleted file mode 100644
index 7a84cbe27dd21e326add1a0a1774cbaa089e195f..0000000000000000000000000000000000000000
--- a/doc/source/cuda/utils/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Utils
-====================
-
-.. toctree::
- :maxdepth: 3
-
- utils.rst
diff --git a/doc/source/cuda/utils/utils.rst b/doc/source/cuda/utils/utils.rst
deleted file mode 100644
index 1ea3e5404aa5fc792075aa09c7fd7a1986332c79..0000000000000000000000000000000000000000
--- a/doc/source/cuda/utils/utils.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-Utilities
-===========
-
-HPPL Base
-------------
-
-hl_base.h
-``````````````
-.. doxygenfile:: paddle/cuda/include/hl_base.h
-
-Timer
------------
-
-hl_time.h
-``````````````
-.. doxygenfile:: paddle/cuda/include/hl_time.h
-
-Thread Resource
------------
-
-hl_thread.ph
-``````````````
-.. doxygenfile:: paddle/cuda/include/hl_thread.ph
diff --git a/doc/source/gserver/activations/index.rst b/doc/source/gserver/activations.rst
similarity index 83%
rename from doc/source/gserver/activations/index.rst
rename to doc/source/gserver/activations.rst
index ccdae41128cd6b4edddda0ac44a825082d7495c9..55b9d3be383c07842d7066280cc0e174788db1fb 100644
--- a/doc/source/gserver/activations/index.rst
+++ b/doc/source/gserver/activations.rst
@@ -1,5 +1,5 @@
Activations
-=============
+===========
.. doxygenclass:: paddle::ActivationFunction
:members:
diff --git a/doc/source/gserver/dataprovider/index.rst b/doc/source/gserver/dataprovider/index.rst
deleted file mode 100644
index 4f6077f1224f90f693515d3414da4d96dc652345..0000000000000000000000000000000000000000
--- a/doc/source/gserver/dataprovider/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Data Providers Documents
-==========================
-
-.. toctree::
- :maxdepth: 3
-
- dataproviders.rst
diff --git a/doc/source/gserver/dataprovider/dataproviders.rst b/doc/source/gserver/dataproviders.rst
similarity index 87%
rename from doc/source/gserver/dataprovider/dataproviders.rst
rename to doc/source/gserver/dataproviders.rst
index e8aa4bc35634a0c6ede192a15b276564f7a2c13e..c30d9d6a36a6fbb664ae001274b6a7b0e721070f 100644
--- a/doc/source/gserver/dataprovider/dataproviders.rst
+++ b/doc/source/gserver/dataproviders.rst
@@ -1,23 +1,27 @@
+==============
Data Providers
-================
+==============
-Base DataProvider
-------------------
+DataProviders
+=============
+
+Base
+----
.. doxygenclass:: paddle::DataProvider
:members:
DataProviderGroup
--------------------
+-----------------
.. doxygenclass:: paddle::DataProviderGroup
:members:
MultiDataProvider
--------------------
+-----------------
.. doxygenclass:: paddle::MultiDataProvider
:members:
PyDataProvider
-===================
+==============
IFieldScanner
-------------
@@ -45,7 +49,7 @@ SparseValueScanner
:members:
SequenceScanner
-------------------
+---------------
.. doxygenclass:: paddle::SparseValueScanner
:members:
@@ -69,8 +73,8 @@ IPyDataProvider
.. doxygenclass:: paddle::PyDataProvider2
:members:
-Proto Data Provider
-===================
+ProtoDataProvider
+=================
ProtoDataProvider
----------------
@@ -78,6 +82,6 @@ ProtoDataProvider
:members:
ProtoSequenceDataProvider
-----------------
+-------------------------
.. doxygenclass:: paddle::ProtoSequenceDataProvider
:members:
diff --git a/doc/source/gserver/evaluators/evaluators.rst b/doc/source/gserver/evaluators.rst
similarity index 96%
rename from doc/source/gserver/evaluators/evaluators.rst
rename to doc/source/gserver/evaluators.rst
index 0c5cc85e7dff31693bdc9d2ee44ef470a0fc5f90..f5361f76cd2b1c9c004221c03ea05b2c1f3a652e 100644
--- a/doc/source/gserver/evaluators/evaluators.rst
+++ b/doc/source/gserver/evaluators.rst
@@ -1,14 +1,15 @@
-Base Evaluator
-==============
+==========
+Evaluators
+==========
+
+Base
+====
-Evaluator
----------
.. doxygenclass:: paddle::Evaluator
:members:
-
-Utils
-=====
+Sum
+===
SumEvaluator
------------
diff --git a/doc/source/gserver/evaluators/index.rst b/doc/source/gserver/evaluators/index.rst
deleted file mode 100644
index 298de3e1a32d36b9102f5ad64cc1b968f418041b..0000000000000000000000000000000000000000
--- a/doc/source/gserver/evaluators/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Evaluators
-==========
-
-.. toctree::
- :maxdepth: 3
-
- evaluators.rst
diff --git a/doc/source/gserver/gradientmachines/gradientmachines.rst b/doc/source/gserver/gradientmachines.rst
similarity index 54%
rename from doc/source/gserver/gradientmachines/gradientmachines.rst
rename to doc/source/gserver/gradientmachines.rst
index 3607664c850cdf4df4e10151b05f15e275adceaf..04c8e91d0316a45ad10b0ed0513d3e8916b7c3d9 100644
--- a/doc/source/gserver/gradientmachines/gradientmachines.rst
+++ b/doc/source/gserver/gradientmachines.rst
@@ -1,18 +1,18 @@
Gradient Machines
-================
+=================
GradientMachine
----------------------
+---------------
.. doxygenclass:: paddle::GradientMachine
:members:
-GradientMachineModel
---------------------
+GradientMachineMode
+-------------------
.. doxygenclass:: paddle::IGradientMachineMode
:members:
MultiGradientMachine
----------------------
+--------------------
.. doxygenclass:: paddle::MultiGradientMachine
:members:
@@ -21,20 +21,7 @@ TrainerThread
.. doxygenclass:: paddle::TrainerThread
:members:
-Recurrent Gradient Machines
----------------------------
+RecurrentGradientMachine
+------------------------
.. doxygenclass:: paddle::RecurrentGradientMachine
:members:
-
-Networks
-========
-
-NeuralNetwork
--------------
-.. doxygenclass:: paddle::NeuralNetwork
- :members:
-
-ParallelNeuralNetwork
----------------------
-.. doxygenclass:: paddle::ParallelNeuralNetwork
- :members:
diff --git a/doc/source/gserver/gradientmachines/index.rst b/doc/source/gserver/gradientmachines/index.rst
deleted file mode 100644
index 997c29a102f53c165c70ff11cd9650b83bcecf44..0000000000000000000000000000000000000000
--- a/doc/source/gserver/gradientmachines/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Gradient Machines Documents
-=============================
-
-.. toctree::
- :maxdepth: 3
-
- gradientmachines.rst
diff --git a/doc/source/gserver/index.rst b/doc/source/gserver/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..223b00b9a9dbf1db40ce702cf0e154e5e53a8644
--- /dev/null
+++ b/doc/source/gserver/index.rst
@@ -0,0 +1,12 @@
+GServer
+=======
+
+.. toctree::
+ :maxdepth: 2
+
+ activations.rst
+ dataproviders.rst
+ evaluators.rst
+ gradientmachines.rst
+ layers.rst
+ neworks.rst
diff --git a/doc/source/gserver/layers/layer.rst b/doc/source/gserver/layers.rst
similarity index 94%
rename from doc/source/gserver/layers/layer.rst
rename to doc/source/gserver/layers.rst
index 807b22ca140ee71208a96e2877b9c5636620b165..191b2bdff26ed17437370a12036f9dbb174dae15 100644
--- a/doc/source/gserver/layers/layer.rst
+++ b/doc/source/gserver/layers.rst
@@ -1,6 +1,10 @@
-Base
+======
+Layers
======
+Base
+====
+
Layer
-----
.. doxygenclass:: paddle::Layer
@@ -17,7 +21,7 @@ Operator
:members:
Data Layer
-===========
+==========
.. doxygenclass:: paddle::DataLayer
:members:
@@ -58,6 +62,11 @@ CudnnConvLayer
.. doxygenclass:: paddle::CudnnConvLayer
:members:
+ExpandConvBaseLayer
+-------------------
+.. doxygenclass:: paddle::ExpandConvBaseLayer
+ :members:
+
ExpandConvLayer
---------------
.. doxygenclass:: paddle::ExpandConvLayer
@@ -86,6 +95,16 @@ CudnnPoolLayer
.. doxygenclass:: paddle::CudnnPoolLayer
:members:
+SpatialPyramidPoolLayer
+-----------------------
+.. doxygenclass:: paddle::SpatialPyramidPoolLayer
+ :members:
+
+MaxOutLayer
+-----------
+.. doxygenclass:: paddle::MaxOutLayer
+ :members:
+
Norm Layers
===========
@@ -402,6 +421,11 @@ TransLayer
Sampling Layers
===============
+BilinearInterpLayer
+-------------------
+.. doxygenclass:: paddle::BilinearInterpLayer
+ :members:
+
MultinomialSampler
------------------
.. doxygenclass:: paddle::MultinomialSampler
@@ -465,6 +489,11 @@ SumOfSquaresCostLayer
.. doxygenclass:: paddle::SumOfSquaresCostLayer
:members:
+SumCostLayer
+`````````````````````
+.. doxygenclass:: paddle::SumCostLayer
+ :members:
+
CosSimLayer
-----------
.. doxygenclass:: paddle::CosSimLayer
diff --git a/doc/source/gserver/layers/index.rst b/doc/source/gserver/layers/index.rst
deleted file mode 100644
index 559c5436b10a5977ac347611639b32d43f1ed123..0000000000000000000000000000000000000000
--- a/doc/source/gserver/layers/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Layers Documents
-====================
-
-.. toctree::
- :maxdepth: 3
-
- layer.rst
diff --git a/doc/source/gserver/neworks.rst b/doc/source/gserver/neworks.rst
new file mode 100644
index 0000000000000000000000000000000000000000..73fb60d549cc88f61d2e2d18c9ec31c37cf4fa9a
--- /dev/null
+++ b/doc/source/gserver/neworks.rst
@@ -0,0 +1,12 @@
+Networks
+========
+
+NeuralNetwork
+-------------
+.. doxygenclass:: paddle::NeuralNetwork
+ :members:
+
+ParallelNeuralNetwork
+---------------------
+.. doxygenclass:: paddle::ParallelNeuralNetwork
+ :members:
diff --git a/doc/source/index.md b/doc/source/index.md
deleted file mode 100644
index 55fcdeb3dfcedd8589bf7986682708a957c05746..0000000000000000000000000000000000000000
--- a/doc/source/index.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Source Code Documents
-
-## cuda
-
-- [CUDA](cuda/cuda/index.rst)
-- [Matrix](cuda/matrix/index.rst)
-- [RNN](cuda/rnn/index.rst)
-- [Utils](cuda/utils/index.rst)
-
-## gserver
-
-- [Activations](gserver/activations/index.rst)
-- [Data Providers](gserver/dataprovider/index.rst)
-- [Evaluators](gserver/evaluators/index.rst)
-- [Gradient Machines](gserver/gradientmachines/index.rst)
-- [Layers](gserver/layers/index.rst)
-
-## math
-
-- [Matrix](math/matrix/index.rst)
-- [Utils](math/utils/index.rst)
-
-## parameter
-
-- [Parameter](parameter/parameter/index.rst)
-- [Update](parameter/update/index.rst)
-- [Optimizer](parameter/optimizer/index.rst)
-
-## pserver
-
-- [Client](pserver/client/index.rst)
-- [Network](pserver/network/index.rst)
-- [Server](pserver/server/index.rst)
-
-## trainer
-
-- [Trainer](trainer/trainer.rst)
-
-## api
-
-- [API](api/api.rst)
-
-## utils
-
-- [CustomStackTrace](utils/customStackTrace.rst)
-- [Enumeration wrapper](utils/enum.rst)
-- [Lock](utils/lock.rst)
-- [Queue](utils/queue.rst)
-- [Thread](utils/thread.rst)
diff --git a/doc/source/index.rst b/doc/source/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..36323c888ee65147f59f28160dc26ca29235ba63
--- /dev/null
+++ b/doc/source/index.rst
@@ -0,0 +1,14 @@
+Source Code Documents
+=====================
+
+.. toctree::
+ :maxdepth: 1
+
+ gserver/index.rst
+ trainer.rst
+ parameter/index.rst
+ pserver/index.rst
+ api.rst
+ cuda/index.rst
+ math/index.rst
+ utils/index.rst
diff --git a/doc/source/math/functions.rst b/doc/source/math/functions.rst
new file mode 100644
index 0000000000000000000000000000000000000000..aef12e0f005226c6d40d74d0e858a11585339758
--- /dev/null
+++ b/doc/source/math/functions.rst
@@ -0,0 +1,10 @@
+Functions
+=========
+
+MathFunctions
+-------------
+.. doxygenfile:: paddle/math/MathFunctions.h
+
+SIMDFunctions
+-------------
+.. doxygenfile:: paddle/math/SIMDFunctions.h
diff --git a/doc/source/math/index.rst b/doc/source/math/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2ec16f2b4450c870f9590aea4ad4ca7dc415b75d
--- /dev/null
+++ b/doc/source/math/index.rst
@@ -0,0 +1,10 @@
+Math
+====
+
+.. toctree::
+ :maxdepth: 2
+
+ vector.rst
+ matrix.rst
+ functions.rst
+ utils.rst
diff --git a/doc/source/math/matrix.rst b/doc/source/math/matrix.rst
new file mode 100644
index 0000000000000000000000000000000000000000..9bb20f618d229e1baea15e26378bf40d7c6e1783
--- /dev/null
+++ b/doc/source/math/matrix.rst
@@ -0,0 +1,76 @@
+Matrix
+======
+
+Base
+----
+
+BaseMatrix Template
+```````````````````
+.. doxygenclass:: paddle::BaseMatrixT
+ :members:
+
+Matrix
+``````
+.. doxygenclass:: paddle::Matrix
+ :members:
+
+MatrixOffset
+````````````
+.. doxygenclass:: paddle::MatrixOffset
+ :members:
+
+CpuMatrix
+---------
+
+CpuMatrix
+`````````
+.. doxygenclass:: paddle::CpuMatrix
+ :members:
+
+SharedCpuMatrix
+```````````````
+.. doxygenclass:: paddle::SharedCpuMatrix
+ :members:
+
+GpuMatrix
+---------
+.. doxygenclass:: paddle::GpuMatrix
+ :members:
+
+CpuSparseMatrix
+---------------
+
+CpuSparseMatrix
+```````````````
+.. doxygenclass:: paddle::CpuSparseMatrix
+ :members:
+
+SparseRowCpuMatrix
+``````````````````
+.. doxygenclass:: paddle::SparseRowCpuMatrix
+ :members:
+
+SparseAutoGrowRowCpuMatrix
+``````````````````````````
+.. doxygenclass:: paddle::SparseAutoGrowRowCpuMatrix
+ :members:
+
+SparsePrefetchRowCpuMatrix
+``````````````````````````
+.. doxygenclass:: paddle::SparsePrefetchRowCpuMatrix
+ :members:
+
+SparseRowIdsCpuMatrix
+`````````````````````
+.. doxygenclass:: paddle::SparseRowIdsCpuMatrix
+ :members:
+
+CacheRowCpuMatrix
+`````````````````
+.. doxygenclass:: paddle::CacheRowCpuMatrix
+ :members:
+
+GpuSparseMatrix
+---------------
+.. doxygenclass:: paddle::GpuSparseMatrix
+ :members:
diff --git a/doc/source/math/matrix/index.rst b/doc/source/math/matrix/index.rst
deleted file mode 100644
index 68410f2a27b68c87087f2c17de351495ac6a6cd0..0000000000000000000000000000000000000000
--- a/doc/source/math/matrix/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Matrix Documents
-====================
-
-.. toctree::
- :maxdepth: 3
-
- matrix.rst
diff --git a/doc/source/math/matrix/matrix.rst b/doc/source/math/matrix/matrix.rst
deleted file mode 100644
index b12e3934f4705d4a2b7d3d790873701ddfe27d9f..0000000000000000000000000000000000000000
--- a/doc/source/math/matrix/matrix.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-Matrix
-=======
-
-Base
---------
-.. doxygenfile:: paddle/math/BaseMatrix.h
-
-Sparse Matrix
-----------------
-.. doxygenfile:: paddle/math/Matrix.h
-.. doxygenfile:: paddle/math/Vector.h
-.. doxygenfile:: paddle/math/MathUtils.h
-.. doxygenfile:: paddle/math/SparseMatrix.h
-.. doxygenfile:: paddle/math/SparseRowMatrix.h
-.. doxygenfile:: paddle/math/CpuSparseMatrix.h
-
-Others
-----------
-.. doxygenfile:: paddle/math/MathFunctions.h
-.. doxygenfile:: paddle/math/SIMDFunctions.h
diff --git a/doc/source/math/utils/utils.rst b/doc/source/math/utils.rst
similarity index 62%
rename from doc/source/math/utils/utils.rst
rename to doc/source/math/utils.rst
index 3df721a47b93bce950185f2d6ffe22d4a801af30..55d9961a390c205563a9ae4fbd87ac4ae90fc314 100644
--- a/doc/source/math/utils/utils.rst
+++ b/doc/source/math/utils.rst
@@ -1,9 +1,18 @@
-Utils
-=======
+Memory Manager
+==============
Memory Handle
---------------
+-------------
.. doxygenfile:: paddle/math/MemoryHandle.h
+
+Allocator
+---------
.. doxygenfile:: paddle/math/Allocator.h
+
+PoolAllocator
+`````````````
.. doxygenfile:: paddle/math/PoolAllocator.h
+
+Storage
+-------
.. doxygenfile:: paddle/math/Storage.h
diff --git a/doc/source/math/utils/index.rst b/doc/source/math/utils/index.rst
deleted file mode 100644
index e5fe335da29b957706ed52662682d11c425e5908..0000000000000000000000000000000000000000
--- a/doc/source/math/utils/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Utils Documents
-====================
-
-.. toctree::
- :maxdepth: 3
-
- utils.rst
diff --git a/doc/source/math/vector.rst b/doc/source/math/vector.rst
new file mode 100644
index 0000000000000000000000000000000000000000..07f7062abaf4f30b8967b594f4e16ab881f5414f
--- /dev/null
+++ b/doc/source/math/vector.rst
@@ -0,0 +1,37 @@
+Vector
+======
+
+BaseVector
+``````````
+.. doxygenclass:: paddle::BaseVector
+ :members:
+
+Vector Template
+```````````````
+.. doxygenclass:: paddle::VectorT
+ :members:
+
+CpuVector Template
+``````````````````
+.. doxygenclass:: paddle::CpuVectorT
+ :members:
+
+GpuVector Template
+``````````````````
+.. doxygenclass:: paddle::GpuVectorT
+ :members:
+
+ParallelCpuVector Template
+``````````````````````````
+.. doxygenclass:: paddle::ParallelCpuVectorT
+ :members:
+
+ParallelGpuVector Template
+``````````````````````````
+.. doxygenclass:: paddle::ParallelGpuVectorT
+ :members:
+
+CpuGpuVector Template
+`````````````````````
+.. doxygenclass:: paddle::CpuGpuVectorT
+ :members:
diff --git a/doc/source/parameter/index.rst b/doc/source/parameter/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3bf6948dc3478574d8d125d8461235f8827e4e42
--- /dev/null
+++ b/doc/source/parameter/index.rst
@@ -0,0 +1,9 @@
+Parameter
+=========
+
+.. toctree::
+ :maxdepth: 2
+
+ parameter.rst
+ optimizer.rst
+ updater.rst
diff --git a/doc/source/parameter/optimizer.rst b/doc/source/parameter/optimizer.rst
new file mode 100644
index 0000000000000000000000000000000000000000..b5b8b850b349d547c9e5508d3ebec3d7e00ea310
--- /dev/null
+++ b/doc/source/parameter/optimizer.rst
@@ -0,0 +1,22 @@
+Optimizer
+=========
+
+ParameterOptimizer
+------------------
+.. doxygenfile:: paddle/parameter/ParameterOptimizer.h
+
+Regularizer
+-----------
+.. doxygenfile:: paddle/parameter/Regularizer.h
+
+FirstOrderOptimizer
+-------------------
+.. doxygenfile:: paddle/parameter/FirstOrderOptimizer.h
+
+AverageOptimizer
+----------------
+.. doxygenfile:: paddle/parameter/AverageOptimizer.h
+
+OptimizerWithRegularizer
+------------------------
+.. doxygenfile:: paddle/parameter/OptimizerWithRegularizer.h
diff --git a/doc/source/parameter/optimizer/index.rst b/doc/source/parameter/optimizer/index.rst
deleted file mode 100644
index 3338af5608a03ee853e3a5f16d2483b810215514..0000000000000000000000000000000000000000
--- a/doc/source/parameter/optimizer/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Parameter Documents
-====================
-
-.. toctree::
- :maxdepth: 3
-
- optimizer.rst
diff --git a/doc/source/parameter/optimizer/optimizer.rst b/doc/source/parameter/optimizer/optimizer.rst
deleted file mode 100644
index 3d9e49217eb17541c14d8d64715278e62c99d2b4..0000000000000000000000000000000000000000
--- a/doc/source/parameter/optimizer/optimizer.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Optimizer
-============
-
-.. doxygenfile:: paddle/parameter/FirstOrderOptimizer.h
-.. doxygenfile:: paddle/parameter/AverageOptimizer.h
-.. doxygenfile:: paddle/parameter/ParameterOptimizer.h
-.. doxygenfile:: paddle/parameter/OptimizerWithRegularizer.h
diff --git a/doc/source/parameter/parameter/parameter.rst b/doc/source/parameter/parameter.rst
similarity index 66%
rename from doc/source/parameter/parameter/parameter.rst
rename to doc/source/parameter/parameter.rst
index 2b7afdb4093753598d73c686b1dc81b970d199d5..2daa62d4e63b952cd93bba35ee32ce35ce768a0d 100644
--- a/doc/source/parameter/parameter/parameter.rst
+++ b/doc/source/parameter/parameter.rst
@@ -1,16 +1,12 @@
Parameter
-=============
-
-Weight
---------
-.. doxygenfile:: paddle/parameter/Weight.h
-
-Regularizer
-------------
-.. doxygenfile:: paddle/parameter/Regularizer.h
+=========
Parameter
--------------
+---------
.. doxygenfile:: paddle/parameter/Argument.h
.. doxygenfile:: paddle/parameter/Parameter.h
.. doxygenfile:: paddle/parameter/ParallelParameter.h
+
+Weight
+------
+.. doxygenfile:: paddle/parameter/Weight.h
diff --git a/doc/source/parameter/parameter/index.rst b/doc/source/parameter/parameter/index.rst
deleted file mode 100644
index e7ed70ec4c87b3613cd8450f1e7fca1fb974afca..0000000000000000000000000000000000000000
--- a/doc/source/parameter/parameter/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Parameter Documents
-====================
-
-.. toctree::
- :maxdepth: 3
-
- parameter.rst
diff --git a/doc/source/parameter/update/index.rst b/doc/source/parameter/update/index.rst
deleted file mode 100644
index 1bbd73319396e7b8ea32c78e0fe3569919bacf2d..0000000000000000000000000000000000000000
--- a/doc/source/parameter/update/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Parameter Documents
-====================
-
-.. toctree::
- :maxdepth: 3
-
- update.rst
diff --git a/doc/source/parameter/update/update.rst b/doc/source/parameter/updater.rst
similarity index 75%
rename from doc/source/parameter/update/update.rst
rename to doc/source/parameter/updater.rst
index c417602f0338dbd84ae2bd2ca4eb09330202a0e8..dfa22e8e7d1d6f0713974835de93194d2cc58e6f 100644
--- a/doc/source/parameter/update/update.rst
+++ b/doc/source/parameter/updater.rst
@@ -1,7 +1,14 @@
-Update
-==========
+Updater
+=======
+Base
+----
.. doxygenfile:: paddle/parameter/ParameterUpdaterBase.h
+
+Hook
+----
.. doxygenfile:: paddle/parameter/ParameterUpdaterHook.h
-.. doxygenfile:: paddle/parameter/ParameterUpdateFunctions.h
+Functions
+---------
+.. doxygenfile:: paddle/parameter/ParameterUpdateFunctions.h
diff --git a/doc/source/pserver/client.rst b/doc/source/pserver/client.rst
new file mode 100644
index 0000000000000000000000000000000000000000..e5bba0706a1d919104b85e23861ba490a2c828db
--- /dev/null
+++ b/doc/source/pserver/client.rst
@@ -0,0 +1,12 @@
+Client
+======
+
+BaseClient
+----------
+.. doxygenclass:: paddle::BaseClient
+ :members:
+
+ParameterClient2
+----------------
+.. doxygenclass:: paddle::ParameterClient2
+ :members:
diff --git a/doc/source/pserver/client/client.rst b/doc/source/pserver/client/client.rst
deleted file mode 100644
index fc7ed90d3dc8beb0baa30d63ccc956fbba2a4e4c..0000000000000000000000000000000000000000
--- a/doc/source/pserver/client/client.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-Client
-=========
-
-.. doxygenclass:: paddle::BaseClient
- :members:
- :protected-members:
- :private-members:
- :undoc-members:
-
-.. doxygenclass:: paddle::ParameterClient2
- :members:
- :protected-members:
- :private-members:
- :undoc-members:
diff --git a/doc/source/pserver/client/index.rst b/doc/source/pserver/client/index.rst
deleted file mode 100644
index dc924c9ca8e7b9965638fd299dc2f5e78591c91b..0000000000000000000000000000000000000000
--- a/doc/source/pserver/client/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Client Documents
-====================
-
-.. toctree::
- :maxdepth: 3
-
- client.rst
diff --git a/doc/source/pserver/index.rst b/doc/source/pserver/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..0031e9476bd063511cc2f0a8c209f35627cf44ba
--- /dev/null
+++ b/doc/source/pserver/index.rst
@@ -0,0 +1,10 @@
+PServer
+=======
+
+.. toctree::
+ :maxdepth: 2
+
+ client.rst
+ network.rst
+ server.rst
+ utils.rst
diff --git a/doc/source/pserver/network.rst b/doc/source/pserver/network.rst
new file mode 100644
index 0000000000000000000000000000000000000000..7004c9d91fa9f2af11e15791ef682c108761027e
--- /dev/null
+++ b/doc/source/pserver/network.rst
@@ -0,0 +1,27 @@
+Network
+=======
+
+SocketServer
+------------
+.. doxygenclass:: paddle::SocketServer
+ :members:
+
+SocketWorker
+------------
+.. doxygenclass:: paddle::SocketWorker
+ :members:
+
+SocketClient
+------------
+.. doxygenclass:: paddle::SocketClient
+ :members:
+
+SocketChannel
+-------------
+.. doxygenclass:: paddle::SocketChannel
+ :members:
+
+MessageReader
+-------------
+.. doxygenclass:: paddle::MsgReader
+ :members:
diff --git a/doc/source/pserver/network/index.rst b/doc/source/pserver/network/index.rst
deleted file mode 100644
index 2fdf95e17d339d69de8e027d92cbb385e2bd51ec..0000000000000000000000000000000000000000
--- a/doc/source/pserver/network/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Network Documents
-====================
-
-.. toctree::
- :maxdepth: 3
-
- network.rst
diff --git a/doc/source/pserver/network/network.rst b/doc/source/pserver/network/network.rst
deleted file mode 100644
index e000ff8dbbdc37e9d638d18d20a8ba53e21dd245..0000000000000000000000000000000000000000
--- a/doc/source/pserver/network/network.rst
+++ /dev/null
@@ -1,42 +0,0 @@
-Network
-==========
-
-Socket Server
-----------------
-.. doxygenclass:: paddle::SocketServer
- :members:
- :protected-members:
- :private-members:
- :undoc-members:
-
-Socket Worker
-----------------
-.. doxygenclass:: paddle::SocketWorker
- :members:
- :protected-members:
- :private-members:
- :undoc-members:
-
-Socket Client
-----------------
-.. doxygenclass:: paddle::SocketClient
- :members:
- :protected-members:
- :private-members:
- :undoc-members:
-
-Socket Channel
----------------
-.. doxygenclass:: paddle::SocketChannel
- :members:
- :protected-members:
- :private-members:
- :undoc-members:
-
-Message Reader
----------------
-.. doxygenclass:: paddle::MsgReader
- :members:
- :protected-members:
- :private-members:
- :undoc-members:
diff --git a/doc/source/pserver/server.rst b/doc/source/pserver/server.rst
new file mode 100644
index 0000000000000000000000000000000000000000..35301acf8ffe3d97e6124c37cf8fe1b43071e14e
--- /dev/null
+++ b/doc/source/pserver/server.rst
@@ -0,0 +1,12 @@
+Server
+======
+
+ProtoServer
+-----------
+.. doxygenclass:: paddle::ProtoServer
+ :members:
+
+ParameterServer2
+----------------
+.. doxygenclass:: paddle::ParameterServer2
+ :members:
diff --git a/doc/source/pserver/server/index.rst b/doc/source/pserver/server/index.rst
deleted file mode 100644
index 09e3530bfeaf56ebbadb1694a69a036813e8970f..0000000000000000000000000000000000000000
--- a/doc/source/pserver/server/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Server Documents
-====================
-
-.. toctree::
- :maxdepth: 3
-
- server.rst
diff --git a/doc/source/pserver/server/server.rst b/doc/source/pserver/server/server.rst
deleted file mode 100644
index f3110fdd731d246ce4211d05e32ddd98584bdbb7..0000000000000000000000000000000000000000
--- a/doc/source/pserver/server/server.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-Server
-==========
-
-.. doxygenclass:: paddle::ProtoServer
- :members:
- :protected-members:
- :private-members:
- :undoc-members:
-
-.. doxygenclass:: paddle::ParameterServer2
- :members:
- :protected-members:
- :private-members:
- :undoc-members:
diff --git a/doc/source/trainer/trainer.rst b/doc/source/trainer.rst
similarity index 94%
rename from doc/source/trainer/trainer.rst
rename to doc/source/trainer.rst
index 12c24597e7f99cd489204602ae25a89d7b960630..85f1feb4fc941f94e65a6b1d037445d2367f65ec 100644
--- a/doc/source/trainer/trainer.rst
+++ b/doc/source/trainer.rst
@@ -14,7 +14,7 @@ RemoteParameterUpdater
:members:
ConcurrentRemoteParameterUpdater
----------------------------------
+--------------------------------
.. doxygenclass:: paddle::ConcurrentRemoteParameterUpdater
:members:
diff --git a/doc/source/utils/customStackTrace.rst b/doc/source/utils/customStackTrace.rst
index a4e6f05a406f33256548fc0ef32bbbf3daff1536..cdc8930739eb4b4d6308ff1fbce170d2977d42e8 100644
--- a/doc/source/utils/customStackTrace.rst
+++ b/doc/source/utils/customStackTrace.rst
@@ -1,9 +1,4 @@
CustomStackTrace
================
-
-
-class CustomStackTrace
-----------------------
-
.. doxygenclass:: paddle::CustomStackTrace
:members:
diff --git a/doc/source/utils/enum.rst b/doc/source/utils/enum.rst
index 17166d35f7cfa63e51058cc5f86165b1e22bbe1e..e0da75afe164f9dab59b862faa7230fc57423e50 100644
--- a/doc/source/utils/enum.rst
+++ b/doc/source/utils/enum.rst
@@ -1,9 +1,3 @@
-enumeration_wrapper
+Enumeration wrapper
===================
-
-
-namespace paddle::enumeration_wrapper
--------------------------------------
-
.. doxygennamespace:: paddle::enumeration_wrapper
-
diff --git a/doc/source/utils/index.rst b/doc/source/utils/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..7ddc47d1726f7627852be922d2b769d0752aa799
--- /dev/null
+++ b/doc/source/utils/index.rst
@@ -0,0 +1,11 @@
+Utils
+=====
+
+.. toctree::
+ :maxdepth: 2
+
+ lock.rst
+ queue.rst
+ thread.rst
+ customStackTrace.rst
+ enum.rst
diff --git a/doc/source/utils/lock.rst b/doc/source/utils/lock.rst
index 0b027e403f49fc1720904cf4b502d81e4148e1e3..f011acb9431f0f3dc3b2ba27fcfe71fe6eb07ae9 100644
--- a/doc/source/utils/lock.rst
+++ b/doc/source/utils/lock.rst
@@ -1,37 +1,32 @@
-Thread
-======
+Lock
+====
-
-class Thread
-------------
-
-.. doxygenclass:: paddle::Thread
+RWLock
+------
+.. doxygenclass:: paddle::RWLock
:members:
-
-class ThreadWorker
-------------------
-
-.. doxygenclass:: paddle::ThreadWorker
+ReadLockGuard
+-------------
+.. doxygenclass:: paddle::ReadLockGuard
:members:
-
-class SyncThreadPool
---------------------
-
-.. doxygenclass:: paddle::SyncThreadPool
+SpinLock
+--------
+.. doxygenclass:: paddle::SpinLock
:members:
-
-
-class MultiThreadWorker
------------------------
-.. doxygenclass:: paddle::MultiThreadWorker
+Semaphore
+---------
+.. doxygenclass:: paddle::Semaphore
:members:
-
-class AsyncThreadPool
----------------------
+ThreadBarrier
+-------------
+.. doxygenclass:: paddle::ThreadBarrier
+ :members:
-.. doxygenclass:: paddle::AsyncThreadPool
+LockedCondition
+---------------
+.. doxygenclass:: paddle::LockedCondition
:members:
diff --git a/doc/source/utils/queue.rst b/doc/source/utils/queue.rst
index 72a464ca67288d0d0e24980d59c3bbc85f111081..98192648e2d61e622c2337d10ba024dd676ee685 100644
--- a/doc/source/utils/queue.rst
+++ b/doc/source/utils/queue.rst
@@ -1,16 +1,12 @@
Queue
=====
-
-class Queue
-------------
-
+Queue
+-----
.. doxygenclass:: paddle::Queue
:members:
-
-class BlockingQueue
--------------------
-
+BlockingQueue
+-------------
.. doxygenclass:: paddle::BlockingQueue
:members:
diff --git a/doc/source/utils/thread.rst b/doc/source/utils/thread.rst
index 2eb67dde6a945cc8e250989f7fc8cefed942950e..23d379a9894e5fc22bc6795a480a53d768e608e6 100644
--- a/doc/source/utils/thread.rst
+++ b/doc/source/utils/thread.rst
@@ -1,40 +1,27 @@
-Lock
-====
+Thread
+======
-
-class RWLock
-------------
-
-.. doxygenclass:: paddle::RWLock
+Thread
+------
+.. doxygenclass:: paddle::Thread
:members:
-class ReadLockGuard
--------------------
-
-.. doxygenclass:: paddle::ReadLockGuard
+ThreadWorker
+------------
+.. doxygenclass:: paddle::ThreadWorker
:members:
-class SpinLock
+SyncThreadPool
--------------
-
-.. doxygenclass:: paddle::SpinLock
+.. doxygenclass:: paddle::SyncThreadPool
:members:
-
-class Semaphore
----------------
-
-.. doxygenclass:: paddle::Semaphore
- :members:
-
-class ThreadBarrier
--------------------
-
-.. doxygenclass:: paddle::ThreadBarrier
+
+MultiThreadWorker
+-----------------
+.. doxygenclass:: paddle::MultiThreadWorker
:members:
-class LockedCondition
----------------------
-
-.. doxygenclass:: paddle::LockedCondition
+AsyncThreadPool
+---------------
+.. doxygenclass:: paddle::AsyncThreadPool
:members:
-
diff --git a/doc/ui/api/trainer_config_helpers/activations.rst b/doc/ui/api/trainer_config_helpers/activations.rst
index 070ed03ab6cc938f735667701bd46eec33ea77b4..269e6491e7ebe3899c3fb24fca756a393043473b 100644
--- a/doc/ui/api/trainer_config_helpers/activations.rst
+++ b/doc/ui/api/trainer_config_helpers/activations.rst
@@ -1,3 +1,7 @@
+===========
+Activations
+===========
+
BaseActivation
==============
@@ -102,4 +106,3 @@ STanhActivation
.. automodule:: paddle.trainer_config_helpers.activations
:members: STanhActivation
:noindex:
-
diff --git a/doc/ui/api/trainer_config_helpers/activations_index.rst b/doc/ui/api/trainer_config_helpers/activations_index.rst
deleted file mode 100644
index 1c0b71ab77eec62859c1d7615f6ebe637f3108ac..0000000000000000000000000000000000000000
--- a/doc/ui/api/trainer_config_helpers/activations_index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Activations
-===========
-
-.. toctree::
- :maxdepth: 3
-
- activations.rst
diff --git a/doc/ui/api/trainer_config_helpers/evaluators.rst b/doc/ui/api/trainer_config_helpers/evaluators.rst
index 0586c9907e472dd98c5f7e9098251f3bc6b88bab..d6a79c13e2316b0fd3d53eb47960a767bcf8abdb 100644
--- a/doc/ui/api/trainer_config_helpers/evaluators.rst
+++ b/doc/ui/api/trainer_config_helpers/evaluators.rst
@@ -1,3 +1,7 @@
+==========
+Evaluators
+==========
+
Base
====
.. automodule:: paddle.trainer_config_helpers.evaluators
diff --git a/doc/ui/api/trainer_config_helpers/evaluators_index.rst b/doc/ui/api/trainer_config_helpers/evaluators_index.rst
deleted file mode 100644
index 298de3e1a32d36b9102f5ad64cc1b968f418041b..0000000000000000000000000000000000000000
--- a/doc/ui/api/trainer_config_helpers/evaluators_index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Evaluators
-==========
-
-.. toctree::
- :maxdepth: 3
-
- evaluators.rst
diff --git a/doc/ui/api/trainer_config_helpers/index.md b/doc/ui/api/trainer_config_helpers/index.md
deleted file mode 100644
index 00fa99bb3fa4c407dc867f91f4c7c495dc4061a1..0000000000000000000000000000000000000000
--- a/doc/ui/api/trainer_config_helpers/index.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Model Config Interface
-
-* [Optimizer](optimizers_index.rst)
-* [Data Source](data_sources.rst)
-* [Layers](layers_index.rst)
-* [Activations](activations_index.rst)
-* [Poolings](poolings_index.rst)
-* [Networks](networks_index.rst)
-* [Evaluators](evaluators_index.rst)
-* [Parameter and Extra Layer Attribute](attrs.rst)
diff --git a/doc/ui/api/trainer_config_helpers/index.rst b/doc/ui/api/trainer_config_helpers/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..8395eb75710b3e67ec0c5442f79c999bdacdff42
--- /dev/null
+++ b/doc/ui/api/trainer_config_helpers/index.rst
@@ -0,0 +1,14 @@
+Model Config Interface
+======================
+
+.. toctree::
+ :maxdepth: 1
+
+ optimizers.rst
+ data_sources.rst
+ layers.rst
+ activations.rst
+ poolings.rst
+ networks.rst
+ evaluators.rst
+ attrs.rst
diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst
index 5bb88b0615c12a44e1506e0bdbb974c16f5584ea..b487b739a719e9f7118efcc143301da36f7a978e 100644
--- a/doc/ui/api/trainer_config_helpers/layers.rst
+++ b/doc/ui/api/trainer_config_helpers/layers.rst
@@ -1,3 +1,7 @@
+======
+Layers
+======
+
Base
======
@@ -46,6 +50,12 @@ conv_operator
:members: conv_operator
:noindex:
+conv_projection
+---------------
+.. automodule:: paddle.trainer_config_helpers.layers
+ :members: conv_projection
+ :noindex:
+
conv_shift_layer
------------------
.. automodule:: paddle.trainer_config_helpers.layers
@@ -71,6 +81,12 @@ img_pool_layer
--------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: img_pool_layer
+ :noindex:
+
+spp_layer
+--------------
+.. automodule:: paddle.trainer_config_helpers.layers
+ :members: spp_layer
:noindex:
maxout_layer
@@ -175,6 +191,12 @@ embedding_layer
:members: embedding_layer
:noindex:
+scaling_projection
+------------------
+.. automodule:: paddle.trainer_config_helpers.layers
+ :members: scaling_projection
+ :noindex:
+
dotmul_projection
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
@@ -254,6 +276,12 @@ expand_layer
:members: expand_layer
:noindex:
+repeat_layer
+------------
+.. automodule:: paddle.trainer_config_helpers.layers
+ :members: repeat_layer
+ :noindex:
+
Math Layers
===========
@@ -275,6 +303,12 @@ interpolation_layer
:members: interpolation_layer
:noindex:
+bilinear_interp_layer
+----------------------
+.. automodule:: paddle.trainer_config_helpers.layers
+ :members: bilinear_interp_layer
+ :noindex:
+
power_layer
-----------
.. automodule:: paddle.trainer_config_helpers.layers
@@ -395,6 +429,12 @@ hsigmoid
:members: hsigmoid
:noindex:
+sum_cost
+---------
+.. automodule:: paddle.trainer_config_helpers.layers
+ :members: sum_cost
+ :noindex:
+
Check Layer
============
diff --git a/doc/ui/api/trainer_config_helpers/layers_index.rst b/doc/ui/api/trainer_config_helpers/layers_index.rst
deleted file mode 100644
index c0daab152148ce769948f600c3101bd79f5a1013..0000000000000000000000000000000000000000
--- a/doc/ui/api/trainer_config_helpers/layers_index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Layers
-======
-
-.. toctree::
- :maxdepth: 3
-
- layers.rst
diff --git a/doc/ui/api/trainer_config_helpers/networks.rst b/doc/ui/api/trainer_config_helpers/networks.rst
index 2a15b34eaea0b763f992a7225550e6af747f303c..29c52c5ce3078f1755162dbbdd65a059d8ba9fa4 100644
--- a/doc/ui/api/trainer_config_helpers/networks.rst
+++ b/doc/ui/api/trainer_config_helpers/networks.rst
@@ -1,3 +1,9 @@
+========
+Networks
+========
+
+The networks module contains pieces of neural network that combine multiple layers.
+
NLP
===
@@ -111,4 +117,3 @@ outputs
.. automodule:: paddle.trainer_config_helpers.networks
:members: outputs
:noindex:
-
diff --git a/doc/ui/api/trainer_config_helpers/networks_index.rst b/doc/ui/api/trainer_config_helpers/networks_index.rst
deleted file mode 100644
index 17bc4dfaa6c4ed3cd5daf0476d0d4c15a2067a22..0000000000000000000000000000000000000000
--- a/doc/ui/api/trainer_config_helpers/networks_index.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-Networks
-========
-
-The networks module contains pieces of neural network that combine multiple layers.
-
-.. toctree::
- :maxdepth: 3
-
- networks.rst
diff --git a/doc/ui/api/trainer_config_helpers/optimizers.rst b/doc/ui/api/trainer_config_helpers/optimizers.rst
index b487fec64c4ebb5cfbdff1aa101d9b3675776a2c..7ca4e34156e273caf66cc71e6927bfb23bb5235e 100644
--- a/doc/ui/api/trainer_config_helpers/optimizers.rst
+++ b/doc/ui/api/trainer_config_helpers/optimizers.rst
@@ -1,3 +1,7 @@
+==========
+Optimizers
+==========
+
BaseSGDOptimizer
================
.. automodule:: paddle.trainer_config_helpers.optimizers
@@ -51,4 +55,3 @@ settings
.. automodule:: paddle.trainer_config_helpers.optimizers
:members: settings
:noindex:
-
diff --git a/doc/ui/api/trainer_config_helpers/optimizers_index.rst b/doc/ui/api/trainer_config_helpers/optimizers_index.rst
deleted file mode 100644
index f39f94f0cd6e1a6c3c25eeceb7820a7fbc070570..0000000000000000000000000000000000000000
--- a/doc/ui/api/trainer_config_helpers/optimizers_index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Optimizers
-==========
-
-.. toctree::
- :maxdepth: 3
-
- optimizers.rst
diff --git a/doc/ui/api/trainer_config_helpers/poolings.rst b/doc/ui/api/trainer_config_helpers/poolings.rst
index caadec639383aad24ed477d8bdaeaa31c0026bb5..66566809d26f59263597b5286c5b27e0bbc9415a 100644
--- a/doc/ui/api/trainer_config_helpers/poolings.rst
+++ b/doc/ui/api/trainer_config_helpers/poolings.rst
@@ -1,3 +1,7 @@
+========
+Poolings
+========
+
BasePoolingType
===============
.. automodule:: paddle.trainer_config_helpers.poolings
@@ -27,4 +31,3 @@ SquareRootNPooling
.. automodule:: paddle.trainer_config_helpers.poolings
:members: SquareRootNPooling
:noindex:
-
diff --git a/doc/ui/api/trainer_config_helpers/poolings_index.rst b/doc/ui/api/trainer_config_helpers/poolings_index.rst
deleted file mode 100644
index 250d3fa69c0dcedfd689b685fe7b47ec71d02fee..0000000000000000000000000000000000000000
--- a/doc/ui/api/trainer_config_helpers/poolings_index.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-Poolings
-========
-
-These pooling types are used for sequence input, not for images.
-
-.. toctree::
- :maxdepth: 3
-
- poolings.rst
diff --git a/doc/ui/predict/predict_sample.py b/doc/ui/predict/predict_sample.py
index d55d2c730dece07f068b728d0a75f34c70b817bd..63e8b36d26057d4a87dabb8745de8e13efe2524f 100644
--- a/doc/ui/predict/predict_sample.py
+++ b/doc/ui/predict/predict_sample.py
@@ -16,82 +16,113 @@ from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
-TEST_DATA = [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.215686,
- 0.533333, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.67451,
- 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.070588, 0.886275,
- 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192157, 0.070588, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0.670588, 0.992157, 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.117647, 0.933333, 0.858824, 0.313725, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0.090196, 0.858824, 0.992157, 0.831373, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.141176,
- 0.992157, 0.992157, 0.611765, 0.054902, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.258824, 0.992157, 0.992157,
- 0.529412, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.368627, 0.992157, 0.992157, 0.419608, 0.003922, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0.094118, 0.835294, 0.992157, 0.992157, 0.517647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.603922, 0.992157,
- 0.992157, 0.992157, 0.603922, 0.545098, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0.447059, 0.992157, 0.992157,
- 0.956863, 0.062745, 0, 0, 0, 0, 0, 0, 0, 0, 0.011765, 0.666667, 0.992157, 0.992157, 0.992157, 0.992157,
- 0.992157, 0.745098, 0.137255, 0, 0, 0, 0, 0, 0.152941, 0.866667, 0.992157, 0.992157, 0.521569, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0.070588, 0.992157, 0.992157, 0.992157, 0.803922, 0.352941, 0.745098, 0.992157,
- 0.945098, 0.317647, 0, 0, 0, 0, 0.580392, 0.992157, 0.992157, 0.764706, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0.070588, 0.992157, 0.992157, 0.776471, 0.043137, 0, 0.007843, 0.27451, 0.882353, 0.941176, 0.176471,
- 0, 0, 0.180392, 0.898039, 0.992157, 0.992157, 0.313725, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.070588, 0.992157,
- 0.992157, 0.713725, 0, 0, 0, 0, 0.627451, 0.992157, 0.729412, 0.062745, 0, 0.509804, 0.992157, 0.992157,
- 0.776471, 0.035294, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.494118, 0.992157, 0.992157, 0.968627, 0.168627, 0, 0,
- 0, 0.423529, 0.992157, 0.992157, 0.364706, 0, 0.717647, 0.992157, 0.992157, 0.317647, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0.533333, 0.992157, 0.984314, 0.945098, 0.603922, 0, 0, 0, 0.003922, 0.466667, 0.992157,
- 0.988235, 0.976471, 0.992157, 0.992157, 0.788235, 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.686275,
- 0.882353, 0.364706, 0, 0, 0, 0, 0, 0, 0.098039, 0.588235, 0.992157, 0.992157, 0.992157, 0.980392,
- 0.305882, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.101961, 0.67451, 0.321569, 0, 0, 0, 0, 0, 0, 0, 0.105882,
- 0.733333, 0.976471, 0.811765, 0.713725, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.65098, 0.992157,
- 0.321569, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.25098, 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
- 0.94902, 0.219608, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.968627,
- 0.764706, 0.152941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.498039,
- 0.25098, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.298039, 0.333333, 0.333333, 0.333333, 0.337255, 0.333333,
- 0.333333, 0.109804, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.027451, 0.223529, 0.776471,
- 0.964706, 0.988235, 0.988235, 0.988235, 0.992157, 0.988235, 0.988235, 0.780392, 0.098039, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.14902, 0.698039, 0.988235, 0.992157, 0.988235, 0.901961, 0.87451,
- 0.568627, 0.882353, 0.976471, 0.988235, 0.988235, 0.501961, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0.188235, 0.647059, 0.988235, 0.988235, 0.745098, 0.439216, 0.098039, 0, 0, 0, 0.572549, 0.988235,
- 0.988235, 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.933333, 0.992157, 0.941176,
- 0.247059, 0, 0, 0, 0, 0, 0, 0.188235, 0.898039, 0.992157, 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0.039216, 0.639216, 0.933333, 0.988235, 0.913725, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0.113725, 0.843137,
- 0.988235, 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.235294, 0.988235, 0.992157, 0.988235, 0.815686,
- 0.07451, 0, 0, 0, 0, 0, 0, 0, 0.333333, 0.988235, 0.988235, 0.552941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0.211765, 0.878431, 0.988235, 0.992157, 0.701961, 0.329412, 0.109804, 0, 0, 0, 0, 0, 0, 0, 0.698039,
- 0.988235, 0.913725, 0.145098, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.188235, 0.890196, 0.988235, 0.988235,
- 0.745098, 0.047059, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.882353, 0.988235, 0.568627, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0.2, 0.933333, 0.992157, 0.992157, 0.992157, 0.447059, 0.294118, 0, 0, 0, 0, 0, 0, 0, 0, 0.447059,
- 0.992157, 0.768627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.623529, 0.988235, 0.988235, 0.988235, 0.988235,
- 0.992157, 0.47451, 0, 0, 0, 0, 0, 0, 0, 0.188235, 0.933333, 0.87451, 0.509804, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0.992157, 0.988235, 0.937255, 0.792157, 0.988235, 0.894118, 0.082353, 0, 0, 0, 0, 0, 0,
- 0.027451, 0.647059, 0.992157, 0.654902, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.623529, 0.988235, 0.913725,
- 0.329412, 0.376471, 0.184314, 0, 0, 0, 0, 0, 0, 0.027451, 0.513725, 0.988235, 0.635294, 0.219608, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.196078, 0.929412, 0.988235, 0.988235, 0.741176, 0.309804, 0, 0, 0, 0,
- 0, 0, 0.529412, 0.988235, 0.678431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.223529, 0.992157,
- 0.992157, 1, 0.992157, 0.992157, 0.992157, 0.992157, 1, 0.992157, 0.992157, 0.882353, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.023529, 0.478431, 0.654902, 0.658824, 0.952941, 0.988235, 0.988235,
- 0.988235, 0.992157, 0.988235, 0.729412, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0.196078, 0.647059, 0.764706, 0.764706, 0.768627, 0.580392, 0.047059, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0]]]
+TEST_DATA = [[[
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.215686, 0.533333, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.67451, 0.992157, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0.070588, 0.886275, 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192157,
+ 0.070588, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.670588, 0.992157,
+ 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.117647, 0.933333, 0.858824, 0.313725,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.090196, 0.858824, 0.992157, 0.831373, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0.141176, 0.992157, 0.992157, 0.611765, 0.054902, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.258824, 0.992157, 0.992157, 0.529412, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0.368627, 0.992157, 0.992157, 0.419608, 0.003922, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0.094118, 0.835294, 0.992157, 0.992157, 0.517647, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0.603922, 0.992157, 0.992157, 0.992157, 0.603922,
+ 0.545098, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0.447059, 0.992157, 0.992157,
+ 0.956863, 0.062745, 0, 0, 0, 0, 0, 0, 0, 0, 0.011765, 0.666667, 0.992157,
+ 0.992157, 0.992157, 0.992157, 0.992157, 0.745098, 0.137255, 0, 0, 0, 0, 0,
+ 0.152941, 0.866667, 0.992157, 0.992157, 0.521569, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0.070588, 0.992157, 0.992157, 0.992157, 0.803922, 0.352941, 0.745098,
+ 0.992157, 0.945098, 0.317647, 0, 0, 0, 0, 0.580392, 0.992157, 0.992157,
+ 0.764706, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.070588, 0.992157, 0.992157,
+ 0.776471, 0.043137, 0, 0.007843, 0.27451, 0.882353, 0.941176, 0.176471, 0,
+ 0, 0.180392, 0.898039, 0.992157, 0.992157, 0.313725, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0.070588, 0.992157, 0.992157, 0.713725, 0, 0, 0, 0, 0.627451,
+ 0.992157, 0.729412, 0.062745, 0, 0.509804, 0.992157, 0.992157, 0.776471,
+ 0.035294, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.494118, 0.992157, 0.992157,
+ 0.968627, 0.168627, 0, 0, 0, 0.423529, 0.992157, 0.992157, 0.364706, 0,
+ 0.717647, 0.992157, 0.992157, 0.317647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0.533333, 0.992157, 0.984314, 0.945098, 0.603922, 0, 0, 0, 0.003922,
+ 0.466667, 0.992157, 0.988235, 0.976471, 0.992157, 0.992157, 0.788235,
+ 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.686275, 0.882353, 0.364706, 0,
+ 0, 0, 0, 0, 0, 0.098039, 0.588235, 0.992157, 0.992157, 0.992157, 0.980392,
+ 0.305882, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.101961, 0.67451, 0.321569,
+ 0, 0, 0, 0, 0, 0, 0, 0.105882, 0.733333, 0.976471, 0.811765, 0.713725, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.65098, 0.992157, 0.321569, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0.25098, 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0.94902, 0.219608, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0.968627, 0.764706, 0.152941, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.498039, 0.25098, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0
+]], [[
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0.298039, 0.333333, 0.333333, 0.333333, 0.337255,
+ 0.333333, 0.333333, 0.109804, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0.027451, 0.223529, 0.776471, 0.964706, 0.988235, 0.988235, 0.988235,
+ 0.992157, 0.988235, 0.988235, 0.780392, 0.098039, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0.14902, 0.698039, 0.988235, 0.992157, 0.988235, 0.901961,
+ 0.87451, 0.568627, 0.882353, 0.976471, 0.988235, 0.988235, 0.501961, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.188235, 0.647059, 0.988235, 0.988235,
+ 0.745098, 0.439216, 0.098039, 0, 0, 0, 0.572549, 0.988235, 0.988235,
+ 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.933333, 0.992157,
+ 0.941176, 0.247059, 0, 0, 0, 0, 0, 0, 0.188235, 0.898039, 0.992157,
+ 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.039216, 0.639216, 0.933333,
+ 0.988235, 0.913725, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0.113725, 0.843137,
+ 0.988235, 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.235294, 0.988235,
+ 0.992157, 0.988235, 0.815686, 0.07451, 0, 0, 0, 0, 0, 0, 0, 0.333333,
+ 0.988235, 0.988235, 0.552941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.211765,
+ 0.878431, 0.988235, 0.992157, 0.701961, 0.329412, 0.109804, 0, 0, 0, 0, 0,
+ 0, 0, 0.698039, 0.988235, 0.913725, 0.145098, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0.188235, 0.890196, 0.988235, 0.988235, 0.745098, 0.047059, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0.882353, 0.988235, 0.568627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2,
+ 0.933333, 0.992157, 0.992157, 0.992157, 0.447059, 0.294118, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0.447059, 0.992157, 0.768627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0.623529, 0.988235, 0.988235, 0.988235, 0.988235, 0.992157, 0.47451, 0, 0,
+ 0, 0, 0, 0, 0, 0.188235, 0.933333, 0.87451, 0.509804, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0.992157, 0.988235, 0.937255, 0.792157, 0.988235, 0.894118,
+ 0.082353, 0, 0, 0, 0, 0, 0, 0.027451, 0.647059, 0.992157, 0.654902, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0.623529, 0.988235, 0.913725, 0.329412, 0.376471,
+ 0.184314, 0, 0, 0, 0, 0, 0, 0.027451, 0.513725, 0.988235, 0.635294,
+ 0.219608, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.196078, 0.929412, 0.988235,
+ 0.988235, 0.741176, 0.309804, 0, 0, 0, 0, 0, 0, 0.529412, 0.988235,
+ 0.678431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.223529, 0.992157,
+ 0.992157, 1, 0.992157, 0.992157, 0.992157, 0.992157, 1, 0.992157, 0.992157,
+ 0.882353, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.023529,
+ 0.478431, 0.654902, 0.658824, 0.952941, 0.988235, 0.988235, 0.988235,
+ 0.992157, 0.988235, 0.729412, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0.196078, 0.647059, 0.764706, 0.764706, 0.768627,
+ 0.580392, 0.047059, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0
+]]]
def main():
conf = parse_config("./mnist_model/trainer_config.py", "")
print conf.data_config.load_data_args
- network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine) # For code hint.
network.loadParameters("./mnist_model/")
converter = DataProviderConverter([dense_vector(784)])
diff --git a/doc/user_guide.rst b/doc/user_guide.rst
new file mode 100644
index 0000000000000000000000000000000000000000..d4deb3ca5a4523b509ea5082f32be8a315570dea
--- /dev/null
+++ b/doc/user_guide.rst
@@ -0,0 +1,13 @@
+User Guide
+==========
+
+.. toctree::
+ :maxdepth: 1
+
+ demo/quick_start/index_en.md
+ build/index.rst
+ build/contribute_to_paddle.md
+ ui/index.md
+ ui/api/trainer_config_helpers/index.rst
+ demo/index.md
+ cluster/index.md
diff --git a/doc_cn/algorithm/rnn/hierarchical-layer.md b/doc_cn/algorithm/rnn/hierarchical-layer.md
index 5282bbbcb82d00f5aed7b784d2bd44f9ec33fa42..519653df081d6e7919ada3cbff6aaf4d2a2f6115 100644
--- a/doc_cn/algorithm/rnn/hierarchical-layer.md
+++ b/doc_cn/algorithm/rnn/hierarchical-layer.md
@@ -1,66 +1,66 @@
-# 支持双层序列作为输入的Layer
-
-## 概述
-
-在自然语言处理任务中,序列是一种常见的数据类型。一个独立的词语,可以看作是一个非序列输入,或者,我们称之为一个0层的序列;由词语构成的句子,是一个单层序列;若干个句子构成一个段落,是一个双层的序列。
-
-双层序列是一个嵌套的序列,它的每一个元素,又是一个单层的序列。这是一种非常灵活的数据组织方式,帮助我们构造一些复杂的输入信息。
-
-我们可以按照如下层次定义非序列,单层序列,以及双层序列。
-
-+ 0层序列:一个独立的元素,类型可以是PaddlePaddle支持的任意输入数据类型
-+ 单层序列:排成一列的多个元素,每个元素是一个0层序列,元素之间的顺序是重要的输入信息
-+ 双层序列:排成一列的多个元素,每个元素是一个单层序列,称之为双层序列的一个子序列(subseq),subseq的每个元素是一个0层序列
-
-
-在 PaddlePaddle中,下面这些Layer能够接受双层序列作为输入,完成相应的计算。
-## pooling_layer
-
-pooling_layer的使用示例如下,详细见配置API。
-```python
-seq_pool = pooling_layer(input=layer,
- pooling_type=AvgPooling(),
- agg_level=AggregateLevel.EACH_SEQUENCE)
-```
-- `pooling_type` 目前支持两种,分别是:MaxPooling()和AvgPooling()。
-- `agg_level=AggregateLevel.TIMESTEP`时(默认值):
- - 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列
- - 输入:一个双层序列,或一个单层序列
- - 输出:一个0层序列,即整个输入序列(单层或双层)的平均值(或最大值)
-- `agg_level=AggregateLevel.EACH_SEQUENCE`时:
- - 作用:一个双层序列经过运算变成一个单层序列
- - 输入:必须是一个双层序列
- - 输出:一个单层序列,序列的每个元素是原来双层序列每个subseq元素的平均值(或最大值)
-
-## last_seq 和 first_seq
-
-last_seq的使用示例如下(first_seq类似),详细见配置API。
-```python
-last = last_seq(input=layer,
- agg_level=AggregateLevel.EACH_SEQUENCE)
-```
-- `agg_level=AggregateLevel.TIMESTEP`时(默认值):
- - 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列
- - 输入:一个双层序列或一个单层序列
- - 输出:一个0层序列,即整个输入序列(双层或者单层)最后一个,或第一个元素。
-- `agg_level=AggregateLevel.EACH_SEQUENCE`时:
- - 作用:一个双层序列经过运算变成一个单层序列
- - 输入:必须是一个双层序列
- - 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。
-
-## expand_layer
-
-expand_layer的使用示例如下,详细见配置API。
-```python
-expand = expand_layer(input=layer1,
- expand_as=layer2,
- expand_level=ExpandLevel.FROM_TIMESTEP)
-```
-- `expand_level=ExpandLevel.FROM_TIMESTEP`时(默认值):
- - 作用:一个0层序列经过运算扩展成一个单层序列,或者一个双层序列
- - 输入:layer1必须是一个0层序列,是待扩展的数据;layer2可以是一个单层序列,或者是一个双层序列,提供扩展的长度信息
- - 输出:一个单层序列,或一个双层序列,输出序列的类型(双层序列,或单层序列)和序列中含有元素的数目同 layer2一致。若输出是单层序列,单层序列的每个元素(0层序列),都是对layer1元素的拷贝;若输出是双层序列,双层序列每个subseq中每个元素(0层序列),都是对layer1元素的拷贝
-- `expand_level=ExpandLevel.FROM_SEQUENCE`时:
- - 作用:一个单层序列经过运算扩展成一个双层序列
- - 输入:layer1必须是一个单层序列,是待扩展的数据;layer2必须是一个双层序列,提供扩展的长度信息
- - 输出:一个双层序列,序列中含有元素的数目同layer2一致。要求单层序列含有元素的数目(0层序列),和双层序列含有subseq 的数目一致。单层序列第i个元素(0层序列),被扩展为一个单层序列,构成了输出双层序列的第i个subseq。
\ No newline at end of file
+# 支持双层序列作为输入的Layer
+
+## 概述
+
+在自然语言处理任务中,序列是一种常见的数据类型。一个独立的词语,可以看作是一个非序列输入,或者,我们称之为一个0层的序列;由词语构成的句子,是一个单层序列;若干个句子构成一个段落,是一个双层的序列。
+
+双层序列是一个嵌套的序列,它的每一个元素,又是一个单层的序列。这是一种非常灵活的数据组织方式,帮助我们构造一些复杂的输入信息。
+
+我们可以按照如下层次定义非序列,单层序列,以及双层序列。
+
++ 0层序列:一个独立的元素,类型可以是PaddlePaddle支持的任意输入数据类型
++ 单层序列:排成一列的多个元素,每个元素是一个0层序列,元素之间的顺序是重要的输入信息
++ 双层序列:排成一列的多个元素,每个元素是一个单层序列,称之为双层序列的一个子序列(subseq),subseq的每个元素是一个0层序列
+
+
+在 PaddlePaddle中,下面这些Layer能够接受双层序列作为输入,完成相应的计算。
+## pooling_layer
+
+pooling_layer的使用示例如下,详细见配置API。
+```python
+seq_pool = pooling_layer(input=layer,
+ pooling_type=AvgPooling(),
+ agg_level=AggregateLevel.EACH_SEQUENCE)
+```
+- `pooling_type` 目前支持两种,分别是:MaxPooling()和AvgPooling()。
+- `agg_level=AggregateLevel.TIMESTEP`时(默认值):
+ - 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列
+ - 输入:一个双层序列,或一个单层序列
+ - 输出:一个0层序列,即整个输入序列(单层或双层)的平均值(或最大值)
+- `agg_level=AggregateLevel.EACH_SEQUENCE`时:
+ - 作用:一个双层序列经过运算变成一个单层序列
+ - 输入:必须是一个双层序列
+ - 输出:一个单层序列,序列的每个元素是原来双层序列每个subseq元素的平均值(或最大值)
+
+## last_seq 和 first_seq
+
+last_seq的使用示例如下(first_seq类似),详细见配置API。
+```python
+last = last_seq(input=layer,
+ agg_level=AggregateLevel.EACH_SEQUENCE)
+```
+- `agg_level=AggregateLevel.TIMESTEP`时(默认值):
+ - 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列
+ - 输入:一个双层序列或一个单层序列
+ - 输出:一个0层序列,即整个输入序列(双层或者单层)最后一个,或第一个元素。
+- `agg_level=AggregateLevel.EACH_SEQUENCE`时:
+ - 作用:一个双层序列经过运算变成一个单层序列
+ - 输入:必须是一个双层序列
+ - 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。
+
+## expand_layer
+
+expand_layer的使用示例如下,详细见配置API。
+```python
+expand = expand_layer(input=layer1,
+ expand_as=layer2,
+ expand_level=ExpandLevel.FROM_TIMESTEP)
+```
+- `expand_level=ExpandLevel.FROM_TIMESTEP`时(默认值):
+ - 作用:一个0层序列经过运算扩展成一个单层序列,或者一个双层序列
+ - 输入:layer1必须是一个0层序列,是待扩展的数据;layer2可以是一个单层序列,或者是一个双层序列,提供扩展的长度信息
+ - 输出:一个单层序列,或一个双层序列,输出序列的类型(双层序列,或单层序列)和序列中含有元素的数目同 layer2一致。若输出是单层序列,单层序列的每个元素(0层序列),都是对layer1元素的拷贝;若输出是双层序列,双层序列每个subseq中每个元素(0层序列),都是对layer1元素的拷贝
+- `expand_level=ExpandLevel.FROM_SEQUENCE`时:
+ - 作用:一个单层序列经过运算扩展成一个双层序列
+ - 输入:layer1必须是一个单层序列,是待扩展的数据;layer2必须是一个双层序列,提供扩展的长度信息
+ - 输出:一个双层序列,序列中含有元素的数目同layer2一致。要求单层序列含有元素的数目(0层序列),和双层序列含有subseq 的数目一致。单层序列第i个元素(0层序列),被扩展为一个单层序列,构成了输出双层序列的第i个subseq。
diff --git a/doc_cn/algorithm/rnn/hierarchical-rnn.md b/doc_cn/algorithm/rnn/hierarchical-rnn.md
index 4a85cf336146ef368b04c13fdc74f39ee7a361d3..c184a34e85a571e98e88c14ef653356fdd555a19 100644
--- a/doc_cn/algorithm/rnn/hierarchical-rnn.md
+++ b/doc_cn/algorithm/rnn/hierarchical-rnn.md
@@ -1,403 +1,403 @@
-# 双层RNN配置与示例
-
-我们在`paddle/gserver/tests/test_RecurrentGradientMachine`单测中,通过多组语义相同的单双层RNN配置,讲解如何使用双层RNN。
-
-## 示例1:双进双出,subseq间无memory
-
-配置:单层RNN(`sequence_layer_group`)和双层RNN(`sequence_nest_layer_group`),语义完全相同。
-
-### 读取双层序列的方法
-
-首先,我们看一下单双层序列的不同数据组织形式(您也可以采用别的组织形式):
-
-- 单层序列的数据(`Sequence/tour_train_wdseg`)如下,一共有10个样本。每个样本由两部分组成,一个label(此处都为2)和一个已经分词后的句子。
-
-```text
-2 酒店 有 很 舒适 的 床垫 子 , 床上用品 也 应该 是 一人 一 换 , 感觉 很 利落 对 卫生 很 放心 呀 。
-2 很 温馨 , 也 挺 干净 的 * 地段 不错 , 出来 就 有 全家 , 离 地铁站 也 近 , 交通 很方便 * 就是 都 不 给 刷牙 的 杯子 啊 , 就 第一天 给 了 一次性杯子 *
-2 位置 方便 , 强烈推荐 , 十一 出去玩 的 时候 选 的 , 对面 就是 华润万家 , 周围 吃饭 的 也 不少 。
-2 交通便利 , 吃 很 便利 , 乾 浄 、 安静 , 商务 房 有 电脑 、 上网 快 , 价格 可以 , 就 早餐 不 好吃 。 整体 是 不错 的 。 適 合 出差 來 住 。
-2 本来 准备 住 两 晚 , 第 2 天 一早 居然 停电 , 且 无 通知 , 只有 口头 道歉 。 总体来说 性价比 尚可 , 房间 较 新 , 还是 推荐 .
-2 这个 酒店 去过 很多 次 了 , 选择 的 主要原因 是 离 客户 最 便宜 相对 又 近 的 酒店
-2 挺好 的 汉庭 , 前台 服务 很 热情 , 卫生 很 整洁 , 房间 安静 , 水温 适中 , 挺好 !
-2 HowardJohnson 的 品质 , 服务 相当 好 的 一 家 五星级 。 房间 不错 、 泳池 不错 、 楼层 安排 很 合理 。 还有 就是 地理位置 , 简直 一 流 。 就 在 天一阁 、 月湖 旁边 , 离 天一广场 也 不远 。 下次 来 宁波 还会 住 。
-2 酒店 很干净 , 很安静 , 很 温馨 , 服务员 服务 好 , 各方面 都 不错 *
-2 挺好 的 , 就是 没 窗户 , 不过 对 得 起 这 价格
-```
-
-- 双层序列的数据(`Sequence/tour_train_wdseg.nest`)如下,一共有4个样本。样本间用空行分开,代表不同的双层序列,序列数据和上面的完全一样。每个样本的子句数分别为2,3,2,3。
-
-```text
-2 酒店 有 很 舒适 的 床垫 子 , 床上用品 也 应该 是 一人 一 换 , 感觉 很 利落 对 卫生 很 放心 呀 。
-2 很 温馨 , 也 挺 干净 的 * 地段 不错 , 出来 就 有 全家 , 离 地铁站 也 近 , 交通 很方便 * 就是 都 不 给 刷牙 的 杯子 啊 , 就 第一天 给 了 一次性杯子 *
-
-2 位置 方便 , 强烈推荐 , 十一 出去玩 的 时候 选 的 , 对面 就是 华润万家 , 周围 吃饭 的 也 不少 。
-2 交通便利 , 吃 很 便利 , 乾 浄 、 安静 , 商务 房 有 电脑 、 上网 快 , 价格 可以 , 就 早餐 不 好吃 。 整体 是 不错 的 。 適 合 出差 來 住 。
-2 本来 准备 住 两 晚 , 第 2 天 一早 居然 停电 , 且 无 通知 , 只有 口头 道歉 。 总体来说 性价比 尚可 , 房间 较 新 , 还是 推荐 .
-
-2 这个 酒店 去过 很多 次 了 , 选择 的 主要原因 是 离 客户 最 便宜 相对 又 近 的 酒店
-2 挺好 的 汉庭 , 前台 服务 很 热情 , 卫生 很 整洁 , 房间 安静 , 水温 适中 , 挺好 !
-
-2 HowardJohnson 的 品质 , 服务 相当 好 的 一 家 五星级 。 房间 不错 、 泳池 不错 、 楼层 安排 很 合理 。 还有 就是 地理位置 , 简直 一 流 。 就 在 天一阁 、 月湖 旁边 , 离 天一广场 也 不远 。 下次 来 宁波 还会 住 。
-2 酒店 很干净 , 很安静 , 很 温馨 , 服务员 服务 好 , 各方面 都 不错 *
-2 挺好 的 , 就是 没 窗户 , 不过 对 得 起 这 价格
-```
-
-其次,我们看一下单双层序列的不同dataprovider(见`sequenceGen.py`):
-
-- 单层序列的dataprovider如下:
- - word_slot是integer_value_sequence类型,代表单层序列。
- - label是integer_value类型,代表一个向量。
-
-```python
-def hook(settings, dict_file, **kwargs):
- settings.word_dict = dict_file
- settings.input_types = [integer_value_sequence(len(settings.word_dict)),
- integer_value(3)]
-
-@provider(init_hook=hook)
-def process(settings, file_name):
- with open(file_name, 'r') as fdata:
- for line in fdata:
- label, comment = line.strip().split('\t')
- label = int(''.join(label.split()))
- words = comment.split()
- word_slot = [settings.word_dict[w] for w in words if w in settings.word_dict]
- yield word_slot, label
-```
-
-- 双层序列的dataprovider如下:
- - word_slot是integer_value_sub_sequence类型,代表双层序列。
- - label是integer_value_sequence类型,代表单层序列,即一个子句一个label。注意:也可以为integer_value类型,代表一个向量,即一个句子一个label。通常根据任务需求进行不同设置。
- - 关于dataprovider中input_types的详细用法,参见PyDataProvider2。
-
-```python
-def hook2(settings, dict_file, **kwargs):
- settings.word_dict = dict_file
- settings.input_types = [integer_value_sub_sequence(len(settings.word_dict)),
- integer_value_sequence(3)]
-
-@provider(init_hook=hook2)
-def process2(settings, file_name):
- with open(file_name) as fdata:
- label_list = []
- word_slot_list = []
- for line in fdata:
- if (len(line)) > 1:
- label,comment = line.strip().split('\t')
- label = int(''.join(label.split()))
- words = comment.split()
- word_slot = [settings.word_dict[w] for w in words if w in settings.word_dict]
- label_list.append(label)
- word_slot_list.append(word_slot)
- else:
- yield word_slot_list, label_list
- label_list = []
- word_slot_list = []
-```
-
-### 模型中的配置
-
-首先,我们看一下单层序列的配置(见`sequence_layer_group.conf`)。注意:batchsize=5表示一次过5句单层序列,因此2个batch就可以完成1个pass。
-
-```python
-settings(batch_size=5)
-
-data = data_layer(name="word", size=dict_dim)
-
-emb = embedding_layer(input=data, size=word_dim)
-
-# (lstm_input + lstm) is equal to lstmemory
-with mixed_layer(size=hidden_dim*4) as lstm_input:
- lstm_input += full_matrix_projection(input=emb)
-
-lstm = lstmemory_group(input=lstm_input,
- size=hidden_dim,
- act=TanhActivation(),
- gate_act=SigmoidActivation(),
- state_act=TanhActivation(),
- lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50))
-
-lstm_last = last_seq(input=lstm)
-
-with mixed_layer(size=label_dim,
- act=SoftmaxActivation(),
- bias_attr=True) as output:
- output += full_matrix_projection(input=lstm_last)
-
-outputs(classification_cost(input=output, label=data_layer(name="label", size=1)))
-
-```
-其次,我们看一下语义相同的双层序列配置(见`sequence_nest_layer_group.conf`),并对其详细分析:
-
-- batchsize=2表示一次过2句双层序列。但从上面的数据格式可知,2句双层序列和5句单层序列的数据完全一样。
-- data_layer和embedding_layer不关心数据是否是序列格式,因此两个配置在这两层上的输出是一样的。
-- lstmemory:
- - 单层序列过了一个mixed_layer和lstmemory_group。
- - 双层序列在同样的mixed_layer和lstmemory_group外,直接加了一层group。由于这个外层group里面没有memory,表示subseq间不存在联系,即起到的作用仅仅是把双层seq拆成单层,因此双层序列过完lstmemory的输出和单层的一样。
-- last_seq:
- - 单层序列直接取了最后一个元素
- - 双层序列首先(last_seq层)取了每个subseq的最后一个元素,将其拼接成一个新的单层序列;接着(expand_layer层)将其扩展成一个新的双层序列,其中第i个subseq中的所有向量均为输入的单层序列中的第i个向量;最后(average_layer层)取了每个subseq的平均值。
- - 分析得出:第一个last_seq后,每个subseq的最后一个元素就等于单层序列的最后一个元素,而expand_layer和average_layer后,依然保持每个subseq最后一个元素的值不变(这两层仅是为了展示它们的用法,实际中并不需要)。因此单双层序列的输出是一样旳。
-
-```python
-settings(batch_size=2)
-
-data = data_layer(name="word", size=dict_dim)
-
-emb_group = embedding_layer(input=data, size=word_dim)
-
-# (lstm_input + lstm) is equal to lstmemory
-def lstm_group(lstm_group_input):
- with mixed_layer(size=hidden_dim*4) as group_input:
- group_input += full_matrix_projection(input=lstm_group_input)
-
- lstm_output = lstmemory_group(input=group_input,
- name="lstm_group",
- size=hidden_dim,
- act=TanhActivation(),
- gate_act=SigmoidActivation(),
- state_act=TanhActivation(),
- lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50))
- return lstm_output
-
-lstm_nest_group = recurrent_group(input=SubsequenceInput(emb_group),
- step=lstm_group,
- name="lstm_nest_group")
-# hasSubseq ->(seqlastins) seq
-lstm_last = last_seq(input=lstm_nest_group, agg_level=AggregateLevel.EACH_SEQUENCE)
-
-# seq ->(expand) hasSubseq
-lstm_expand = expand_layer(input=lstm_last, expand_as=emb_group, expand_level=ExpandLevel.FROM_SEQUENCE)
-
-# hasSubseq ->(average) seq
-lstm_average = pooling_layer(input=lstm_expand,
- pooling_type=AvgPooling(),
- agg_level=AggregateLevel.EACH_SEQUENCE)
-
-with mixed_layer(size=label_dim,
- act=SoftmaxActivation(),
- bias_attr=True) as output:
- output += full_matrix_projection(input=lstm_average)
-
-outputs(classification_cost(input=output, label=data_layer(name="label", size=1)))
-```
-## 示例2:双进双出,subseq间有memory
-
-配置:单层RNN(`sequence_rnn.conf`),双层RNN(`sequence_nest_rnn.conf`和`sequence_nest_rnn_readonly_memory.conf`),语义完全相同。
-
-### 读取双层序列的方法
-
-我们看一下单双层序列的不同数据组织形式和dataprovider(见`rnn_data_provider.py`)
-```python
-data = [
- [[[1, 3, 2], [4, 5, 2]], 0],
- [[[0, 2], [2, 5], [0, 1, 2]], 1],
-]
-
-@provider(input_types=[integer_value_sub_sequence(10),
- integer_value(3)])
-def process_subseq(settings, file_name):
- for d in data:
- yield d
-
-@provider(input_types=[integer_value_sequence(10),
- integer_value(3)])
-def process_seq(settings, file_name):
- for d in data:
- seq = []
-```
-- 单层序列:有两句,分别为[1,3,2,4,5,2]和[0,2,2,5,0,1,2]。
-- 双层序列:有两句,分别为[[1,3,2],[4,5,2]](2个子句)和[[0,2],[2,5],[0,1,2]](3个子句)。
-- 单双层序列的label都分别是0和1
-
-### 模型中的配置
-
-我们选取单双层序列配置中的不同部分,来对比分析两者语义相同的原因。
-
-- 单层序列:过了一个很简单的recurrent_group。每一个时间步,当前的输入y和上一个时间步的输出rnn_state做了一个全链接。
-
-```python
-def step(y):
- mem = memory(name="rnn_state", size=hidden_dim)
- return fc_layer(input=[y, mem],
- size=hidden_dim,
- act=TanhActivation(),
- bias_attr=True,
- name="rnn_state")
-
-out = recurrent_group(step=step, input=emb)
-```
-- 双层序列,外层memory是一个元素:
- - 内层inner_step的recurrent_group和单层序列的几乎一样。除了boot_layer=outer_mem,表示将外层的outer_mem作为内层memory的初始状态。外层outer_step中,outer_mem是一个子句的最后一个向量,即整个双层group是将前一个子句的最后一个向量,作为下一个子句memory的初始状态。
- - 从输入数据上看,单双层序列的句子是一样的,只是双层序列将其又做了子序列划分。因此双层序列的配置中,必须将前一个子句的最后一个元素,作为boot_layer传给下一个子句的memory,才能保证和单层序列的配置中“每一个时间步都用了上一个时间步的输出结果”一致。
-
-```python
-def outer_step(x):
- outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
- def inner_step(y):
- inner_mem = memory(name="inner_rnn_state",
- size=hidden_dim,
- boot_layer=outer_mem)
- return fc_layer(input=[y, inner_mem],
- size=hidden_dim,
- act=TanhActivation(),
- bias_attr=True,
- name="inner_rnn_state")
-
- inner_rnn_output = recurrent_group(
- step=inner_step,
- input=x)
- last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
-
- return inner_rnn_output
-
-out = recurrent_group(step=outer_step, input=SubsequenceInput(emb))
-```
-- 双层序列,外层memory是单层序列:
- - 由于外层每个时间步返回的是一个子句,这些子句的长度往往不等长。因此当外层有is_seq=True的memory时,内层是**无法直接使用**它的,即内层memory的boot_layer不能链接外层的这个memory。
- - 如果内层memory想**间接使用**这个外层memory,只能通过`pooling_layer`、`last_seq`或`first_seq`这三个layer将它先变成一个元素。但这种情况下,外层memory必须有boot_layer,否则在第0个时间步时,由于外层memory没有任何seq信息,因此上述三个layer的前向会报出“**Check failed: input.sequenceStartPositions**”的错误。
-
-## 示例3:双进双出,输入不等长
-
-**输入不等长**是指recurrent_group的多个输入在各时刻的长度可以不相等, 但需要指定一个和输出长度一致的input,用targetInlink表示。参考配置:单层RNN(`sequence_rnn_multi_unequalength_inputs.conf`),双层RNN(`sequence_nest_rnn_multi_unequalength_inputs.conf`)
-
-### 读取双层序列的方法
-
-我们看一下单双层序列的数据组织形式和dataprovider(见`rnn_data_provider.py`)
-```python
-data2 = [
- [[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]] ,0],
- [[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]], 1],
-]
-
-@provider(input_types=[integer_value_sub_sequence(10),
- integer_value_sub_sequence(10),
- integer_value(2)],
- should_shuffle=False)
-def process_unequalength_subseq(settings, file_name): #双层RNN的dataprovider
- for d in data2:
- yield d
-
-
-@provider(input_types=[integer_value_sequence(10),
- integer_value_sequence(10),
- integer_value(2)],
- should_shuffle=False)
-def process_unequalength_seq(settings, file_name): #单层RNN的dataprovider
- for d in data2:
- words1=reduce(lambda x,y: x+y, d[0])
- words2=reduce(lambda x,y: x+y, d[1])
- yield words1, words2, d[2]
-```
-
-data2 中有两个样本,每个样本有两个特征, 记fea1, fea2。
-
-- 单层序列:两个样本分别为[[1, 2, 4, 5, 2], [5, 4, 1, 3, 1]] 和 [[0, 2, 2, 5, 0, 1, 2], [1, 5, 4, 2, 3, 6, 1]]
-- 双层序列:两个样本分别为
- - **样本1**:[[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]]]。fea1和fea2都分别有2个子句,fea1=[[1, 2], [4, 5, 2]], fea2=[[5, 4, 1], [3, 1]]
- - **样本2**:[[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]]]。fea1和fea2都分别有3个子句, fea1=[[0, 2], [2, 5], [0, 1, 2]], fea2=[[1, 5], [4], [2, 3, 6, 1]]。
- - **注意**:每个样本中,各特征的子句数目需要相等。这里说的“双进双出,输入不等长”是指fea1在i时刻的输入的长度可以不等于fea2在i时刻的输入的长度。如对于第1个样本,时刻i=2, fea1[2]=[4, 5, 2],fea2[2]=[3, 1],3≠2。
-- 单双层序列中,两个样本的label都分别是0和1
-
-### 模型中的配置
-
-单层RNN(`sequence_rnn_multi_unequalength_inputs.conf`)和双层RNN(`sequence_nest_rnn_multi_unequalength_inputs.conf`)两个模型配置达到的效果完全一样,区别只在于输入为单层还是双层序列,现在我们来看它们内部分别是如何实现的。
-
-- 单层序列:
- - 过了一个简单的recurrent_group。每一个时间步,当前的输入y和上一个时间步的输出rnn_state做了一个全连接,功能与示例2中`sequence_rnn.conf`的`step`函数完全相同。这里,两个输入x1,x2分别通过calrnn返回最后时刻的状态。结果得到的encoder1_rep和encoder2_rep分别是单层序列,最后取encoder1_rep的最后一个时刻和encoder2_rep的所有时刻分别相加得到context。
- - 注意到这里recurrent_group输入的每个样本中,fea1和fea2的长度都分别相等,这并非偶然,而是因为recurrent_group要求输入为单层序列时,所有输入的长度都必须相等。
-
-```python
-def step(x1, x2):
- def calrnn(y):
- mem = memory(name = 'rnn_state_' + y.name, size = hidden_dim)
- out = fc_layer(input = [y, mem],
- size = hidden_dim,
- act = TanhActivation(),
- bias_attr = True,
- name = 'rnn_state_' + y.name)
- return out
-
- encoder1 = calrnn(x1)
- encoder2 = calrnn(x2)
- return [encoder1, encoder2]
-
-encoder1_rep, encoder2_rep = recurrent_group(
- name="stepout",
- step=step,
- input=[emb1, emb2])
-
-encoder1_last = last_seq(input = encoder1_rep)
-encoder1_expandlast = expand_layer(input = encoder1_last,
- expand_as = encoder2_rep)
-context = mixed_layer(input = [identity_projection(encoder1_expandlast),
- identity_projection(encoder2_rep)],
- size = hidden_dim)
-```
-- 双层序列:
- - 双层RNN中,对输入的两个特征分别求时序上的连续全连接(`inner_step1`和`inner_step2`分别处理fea1和fea2),其功能与示例2中`sequence_nest_rnn.conf`的`outer_step`函数完全相同。不同之处是,此时输入`[SubsequenceInput(emb1), SubsequenceInput(emb2)]`在各时刻并不等长。
- - 函数`outer_step`中可以分别处理这两个特征,但我们需要用targetInlink指定recurrent_group的输出的格式(各子句长度)只能和其中一个保持一致,如这里选择了和emb2的长度一致。
- - 最后,依然是取encoder1_rep的最后一个时刻和encoder2_rep的所有时刻分别相加得到context。
-
-```python
-def outer_step(x1, x2):
- outer_mem1 = memory(name = "outer_rnn_state1", size = hidden_dim)
- outer_mem2 = memory(name = "outer_rnn_state2", size = hidden_dim)
- def inner_step1(y):
- inner_mem = memory(name = 'inner_rnn_state_' + y.name,
- size = hidden_dim,
- boot_layer = outer_mem1)
- out = fc_layer(input = [y, inner_mem],
- size = hidden_dim,
- act = TanhActivation(),
- bias_attr = True,
- name = 'inner_rnn_state_' + y.name)
- return out
-
- def inner_step2(y):
- inner_mem = memory(name = 'inner_rnn_state_' + y.name,
- size = hidden_dim,
- boot_layer = outer_mem2)
- out = fc_layer(input = [y, inner_mem],
- size = hidden_dim,
- act = TanhActivation(),
- bias_attr = True,
- name = 'inner_rnn_state_' + y.name)
- return out
-
- encoder1 = recurrent_group(
- step = inner_step1,
- name = 'inner1',
- input = x1)
-
- encoder2 = recurrent_group(
- step = inner_step2,
- name = 'inner2',
- input = x2)
-
- sentence_last_state1 = last_seq(input = encoder1, name = 'outer_rnn_state1')
- sentence_last_state2_ = last_seq(input = encoder2, name = 'outer_rnn_state2')
-
- encoder1_expand = expand_layer(input = sentence_last_state1,
- expand_as = encoder2)
-
- return [encoder1_expand, encoder2]
-
-encoder1_rep, encoder2_rep = recurrent_group(
- name="outer",
- step=outer_step,
- input=[SubsequenceInput(emb1), SubsequenceInput(emb2)],
- targetInlink=emb2)
-
-encoder1_last = last_seq(input = encoder1_rep)
-encoder1_expandlast = expand_layer(input = encoder1_last,
- expand_as = encoder2_rep)
-context = mixed_layer(input = [identity_projection(encoder1_expandlast),
- identity_projection(encoder2_rep)],
- size = hidden_dim)
-```
-
-## 示例4:beam_search的生成
-
-TBD
\ No newline at end of file
+# 双层RNN配置与示例
+
+我们在`paddle/gserver/tests/test_RecurrentGradientMachine`单测中,通过多组语义相同的单双层RNN配置,讲解如何使用双层RNN。
+
+## 示例1:双进双出,subseq间无memory
+
+配置:单层RNN(`sequence_layer_group`)和双层RNN(`sequence_nest_layer_group`),语义完全相同。
+
+### 读取双层序列的方法
+
+首先,我们看一下单双层序列的不同数据组织形式(您也可以采用别的组织形式):
+
+- 单层序列的数据(`Sequence/tour_train_wdseg`)如下,一共有10个样本。每个样本由两部分组成,一个label(此处都为2)和一个已经分词后的句子。
+
+```text
+2 酒店 有 很 舒适 的 床垫 子 , 床上用品 也 应该 是 一人 一 换 , 感觉 很 利落 对 卫生 很 放心 呀 。
+2 很 温馨 , 也 挺 干净 的 * 地段 不错 , 出来 就 有 全家 , 离 地铁站 也 近 , 交通 很方便 * 就是 都 不 给 刷牙 的 杯子 啊 , 就 第一天 给 了 一次性杯子 *
+2 位置 方便 , 强烈推荐 , 十一 出去玩 的 时候 选 的 , 对面 就是 华润万家 , 周围 吃饭 的 也 不少 。
+2 交通便利 , 吃 很 便利 , 乾 浄 、 安静 , 商务 房 有 电脑 、 上网 快 , 价格 可以 , 就 早餐 不 好吃 。 整体 是 不错 的 。 適 合 出差 來 住 。
+2 本来 准备 住 两 晚 , 第 2 天 一早 居然 停电 , 且 无 通知 , 只有 口头 道歉 。 总体来说 性价比 尚可 , 房间 较 新 , 还是 推荐 .
+2 这个 酒店 去过 很多 次 了 , 选择 的 主要原因 是 离 客户 最 便宜 相对 又 近 的 酒店
+2 挺好 的 汉庭 , 前台 服务 很 热情 , 卫生 很 整洁 , 房间 安静 , 水温 适中 , 挺好 !
+2 HowardJohnson 的 品质 , 服务 相当 好 的 一 家 五星级 。 房间 不错 、 泳池 不错 、 楼层 安排 很 合理 。 还有 就是 地理位置 , 简直 一 流 。 就 在 天一阁 、 月湖 旁边 , 离 天一广场 也 不远 。 下次 来 宁波 还会 住 。
+2 酒店 很干净 , 很安静 , 很 温馨 , 服务员 服务 好 , 各方面 都 不错 *
+2 挺好 的 , 就是 没 窗户 , 不过 对 得 起 这 价格
+```
+
+- 双层序列的数据(`Sequence/tour_train_wdseg.nest`)如下,一共有4个样本。样本间用空行分开,代表不同的双层序列,序列数据和上面的完全一样。每个样本的子句数分别为2,3,2,3。
+
+```text
+2 酒店 有 很 舒适 的 床垫 子 , 床上用品 也 应该 是 一人 一 换 , 感觉 很 利落 对 卫生 很 放心 呀 。
+2 很 温馨 , 也 挺 干净 的 * 地段 不错 , 出来 就 有 全家 , 离 地铁站 也 近 , 交通 很方便 * 就是 都 不 给 刷牙 的 杯子 啊 , 就 第一天 给 了 一次性杯子 *
+
+2 位置 方便 , 强烈推荐 , 十一 出去玩 的 时候 选 的 , 对面 就是 华润万家 , 周围 吃饭 的 也 不少 。
+2 交通便利 , 吃 很 便利 , 乾 浄 、 安静 , 商务 房 有 电脑 、 上网 快 , 价格 可以 , 就 早餐 不 好吃 。 整体 是 不错 的 。 適 合 出差 來 住 。
+2 本来 准备 住 两 晚 , 第 2 天 一早 居然 停电 , 且 无 通知 , 只有 口头 道歉 。 总体来说 性价比 尚可 , 房间 较 新 , 还是 推荐 .
+
+2 这个 酒店 去过 很多 次 了 , 选择 的 主要原因 是 离 客户 最 便宜 相对 又 近 的 酒店
+2 挺好 的 汉庭 , 前台 服务 很 热情 , 卫生 很 整洁 , 房间 安静 , 水温 适中 , 挺好 !
+
+2 HowardJohnson 的 品质 , 服务 相当 好 的 一 家 五星级 。 房间 不错 、 泳池 不错 、 楼层 安排 很 合理 。 还有 就是 地理位置 , 简直 一 流 。 就 在 天一阁 、 月湖 旁边 , 离 天一广场 也 不远 。 下次 来 宁波 还会 住 。
+2 酒店 很干净 , 很安静 , 很 温馨 , 服务员 服务 好 , 各方面 都 不错 *
+2 挺好 的 , 就是 没 窗户 , 不过 对 得 起 这 价格
+```
+
+其次,我们看一下单双层序列的不同dataprovider(见`sequenceGen.py`):
+
+- 单层序列的dataprovider如下:
+ - word_slot是integer_value_sequence类型,代表单层序列。
+ - label是integer_value类型,代表一个向量。
+
+```python
+def hook(settings, dict_file, **kwargs):
+ settings.word_dict = dict_file
+ settings.input_types = [integer_value_sequence(len(settings.word_dict)),
+ integer_value(3)]
+
+@provider(init_hook=hook)
+def process(settings, file_name):
+ with open(file_name, 'r') as fdata:
+ for line in fdata:
+ label, comment = line.strip().split('\t')
+ label = int(''.join(label.split()))
+ words = comment.split()
+ word_slot = [settings.word_dict[w] for w in words if w in settings.word_dict]
+ yield word_slot, label
+```
+
+- 双层序列的dataprovider如下:
+ - word_slot是integer_value_sub_sequence类型,代表双层序列。
+ - label是integer_value_sequence类型,代表单层序列,即一个子句一个label。注意:也可以为integer_value类型,代表一个向量,即一个句子一个label。通常根据任务需求进行不同设置。
+ - 关于dataprovider中input_types的详细用法,参见PyDataProvider2。
+
+```python
+def hook2(settings, dict_file, **kwargs):
+ settings.word_dict = dict_file
+ settings.input_types = [integer_value_sub_sequence(len(settings.word_dict)),
+ integer_value_sequence(3)]
+
+@provider(init_hook=hook2)
+def process2(settings, file_name):
+ with open(file_name) as fdata:
+ label_list = []
+ word_slot_list = []
+ for line in fdata:
+ if (len(line)) > 1:
+ label,comment = line.strip().split('\t')
+ label = int(''.join(label.split()))
+ words = comment.split()
+ word_slot = [settings.word_dict[w] for w in words if w in settings.word_dict]
+ label_list.append(label)
+ word_slot_list.append(word_slot)
+ else:
+ yield word_slot_list, label_list
+ label_list = []
+ word_slot_list = []
+```
+
+### 模型中的配置
+
+首先,我们看一下单层序列的配置(见`sequence_layer_group.conf`)。注意:batchsize=5表示一次过5句单层序列,因此2个batch就可以完成1个pass。
+
+```python
+settings(batch_size=5)
+
+data = data_layer(name="word", size=dict_dim)
+
+emb = embedding_layer(input=data, size=word_dim)
+
+# (lstm_input + lstm) is equal to lstmemory
+with mixed_layer(size=hidden_dim*4) as lstm_input:
+ lstm_input += full_matrix_projection(input=emb)
+
+lstm = lstmemory_group(input=lstm_input,
+ size=hidden_dim,
+ act=TanhActivation(),
+ gate_act=SigmoidActivation(),
+ state_act=TanhActivation(),
+ lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50))
+
+lstm_last = last_seq(input=lstm)
+
+with mixed_layer(size=label_dim,
+ act=SoftmaxActivation(),
+ bias_attr=True) as output:
+ output += full_matrix_projection(input=lstm_last)
+
+outputs(classification_cost(input=output, label=data_layer(name="label", size=1)))
+
+```
+其次,我们看一下语义相同的双层序列配置(见`sequence_nest_layer_group.conf`),并对其详细分析:
+
+- batchsize=2表示一次过2句双层序列。但从上面的数据格式可知,2句双层序列和5句单层序列的数据完全一样。
+- data_layer和embedding_layer不关心数据是否是序列格式,因此两个配置在这两层上的输出是一样的。
+- lstmemory:
+ - 单层序列过了一个mixed_layer和lstmemory_group。
+ - 双层序列在同样的mixed_layer和lstmemory_group外,直接加了一层group。由于这个外层group里面没有memory,表示subseq间不存在联系,即起到的作用仅仅是把双层seq拆成单层,因此双层序列过完lstmemory的输出和单层的一样。
+- last_seq:
+ - 单层序列直接取了最后一个元素
+ - 双层序列首先(last_seq层)取了每个subseq的最后一个元素,将其拼接成一个新的单层序列;接着(expand_layer层)将其扩展成一个新的双层序列,其中第i个subseq中的所有向量均为输入的单层序列中的第i个向量;最后(average_layer层)取了每个subseq的平均值。
+ - 分析得出:第一个last_seq后,每个subseq的最后一个元素就等于单层序列的最后一个元素,而expand_layer和average_layer后,依然保持每个subseq最后一个元素的值不变(这两层仅是为了展示它们的用法,实际中并不需要)。因此单双层序列的输出是一样旳。
+
+```python
+settings(batch_size=2)
+
+data = data_layer(name="word", size=dict_dim)
+
+emb_group = embedding_layer(input=data, size=word_dim)
+
+# (lstm_input + lstm) is equal to lstmemory
+def lstm_group(lstm_group_input):
+ with mixed_layer(size=hidden_dim*4) as group_input:
+ group_input += full_matrix_projection(input=lstm_group_input)
+
+ lstm_output = lstmemory_group(input=group_input,
+ name="lstm_group",
+ size=hidden_dim,
+ act=TanhActivation(),
+ gate_act=SigmoidActivation(),
+ state_act=TanhActivation(),
+ lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50))
+ return lstm_output
+
+lstm_nest_group = recurrent_group(input=SubsequenceInput(emb_group),
+ step=lstm_group,
+ name="lstm_nest_group")
+# hasSubseq ->(seqlastins) seq
+lstm_last = last_seq(input=lstm_nest_group, agg_level=AggregateLevel.EACH_SEQUENCE)
+
+# seq ->(expand) hasSubseq
+lstm_expand = expand_layer(input=lstm_last, expand_as=emb_group, expand_level=ExpandLevel.FROM_SEQUENCE)
+
+# hasSubseq ->(average) seq
+lstm_average = pooling_layer(input=lstm_expand,
+ pooling_type=AvgPooling(),
+ agg_level=AggregateLevel.EACH_SEQUENCE)
+
+with mixed_layer(size=label_dim,
+ act=SoftmaxActivation(),
+ bias_attr=True) as output:
+ output += full_matrix_projection(input=lstm_average)
+
+outputs(classification_cost(input=output, label=data_layer(name="label", size=1)))
+```
+## 示例2:双进双出,subseq间有memory
+
+配置:单层RNN(`sequence_rnn.conf`),双层RNN(`sequence_nest_rnn.conf`和`sequence_nest_rnn_readonly_memory.conf`),语义完全相同。
+
+### 读取双层序列的方法
+
+我们看一下单双层序列的不同数据组织形式和dataprovider(见`rnn_data_provider.py`)
+```python
+data = [
+ [[[1, 3, 2], [4, 5, 2]], 0],
+ [[[0, 2], [2, 5], [0, 1, 2]], 1],
+]
+
+@provider(input_types=[integer_value_sub_sequence(10),
+ integer_value(3)])
+def process_subseq(settings, file_name):
+ for d in data:
+ yield d
+
+@provider(input_types=[integer_value_sequence(10),
+ integer_value(3)])
+def process_seq(settings, file_name):
+ for d in data:
+ seq = []
+```
+- 单层序列:有两句,分别为[1,3,2,4,5,2]和[0,2,2,5,0,1,2]。
+- 双层序列:有两句,分别为[[1,3,2],[4,5,2]](2个子句)和[[0,2],[2,5],[0,1,2]](3个子句)。
+- 单双层序列的label都分别是0和1
+
+### 模型中的配置
+
+我们选取单双层序列配置中的不同部分,来对比分析两者语义相同的原因。
+
+- 单层序列:过了一个很简单的recurrent_group。每一个时间步,当前的输入y和上一个时间步的输出rnn_state做了一个全链接。
+
+```python
+def step(y):
+ mem = memory(name="rnn_state", size=hidden_dim)
+ return fc_layer(input=[y, mem],
+ size=hidden_dim,
+ act=TanhActivation(),
+ bias_attr=True,
+ name="rnn_state")
+
+out = recurrent_group(step=step, input=emb)
+```
+- 双层序列,外层memory是一个元素:
+ - 内层inner_step的recurrent_group和单层序列的几乎一样。除了boot_layer=outer_mem,表示将外层的outer_mem作为内层memory的初始状态。外层outer_step中,outer_mem是一个子句的最后一个向量,即整个双层group是将前一个子句的最后一个向量,作为下一个子句memory的初始状态。
+ - 从输入数据上看,单双层序列的句子是一样的,只是双层序列将其又做了子序列划分。因此双层序列的配置中,必须将前一个子句的最后一个元素,作为boot_layer传给下一个子句的memory,才能保证和单层序列的配置中“每一个时间步都用了上一个时间步的输出结果”一致。
+
+```python
+def outer_step(x):
+ outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
+ def inner_step(y):
+ inner_mem = memory(name="inner_rnn_state",
+ size=hidden_dim,
+ boot_layer=outer_mem)
+ return fc_layer(input=[y, inner_mem],
+ size=hidden_dim,
+ act=TanhActivation(),
+ bias_attr=True,
+ name="inner_rnn_state")
+
+ inner_rnn_output = recurrent_group(
+ step=inner_step,
+ input=x)
+ last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
+
+ return inner_rnn_output
+
+out = recurrent_group(step=outer_step, input=SubsequenceInput(emb))
+```
+- 双层序列,外层memory是单层序列:
+ - 由于外层每个时间步返回的是一个子句,这些子句的长度往往不等长。因此当外层有is_seq=True的memory时,内层是**无法直接使用**它的,即内层memory的boot_layer不能链接外层的这个memory。
+ - 如果内层memory想**间接使用**这个外层memory,只能通过`pooling_layer`、`last_seq`或`first_seq`这三个layer将它先变成一个元素。但这种情况下,外层memory必须有boot_layer,否则在第0个时间步时,由于外层memory没有任何seq信息,因此上述三个layer的前向会报出“**Check failed: input.sequenceStartPositions**”的错误。
+
+## 示例3:双进双出,输入不等长
+
+**输入不等长**是指recurrent_group的多个输入在各时刻的长度可以不相等, 但需要指定一个和输出长度一致的input,用targetInlink表示。参考配置:单层RNN(`sequence_rnn_multi_unequalength_inputs.conf`),双层RNN(`sequence_nest_rnn_multi_unequalength_inputs.conf`)
+
+### 读取双层序列的方法
+
+我们看一下单双层序列的数据组织形式和dataprovider(见`rnn_data_provider.py`)
+```python
+data2 = [
+ [[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]] ,0],
+ [[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]], 1],
+]
+
+@provider(input_types=[integer_value_sub_sequence(10),
+ integer_value_sub_sequence(10),
+ integer_value(2)],
+ should_shuffle=False)
+def process_unequalength_subseq(settings, file_name): #双层RNN的dataprovider
+ for d in data2:
+ yield d
+
+
+@provider(input_types=[integer_value_sequence(10),
+ integer_value_sequence(10),
+ integer_value(2)],
+ should_shuffle=False)
+def process_unequalength_seq(settings, file_name): #单层RNN的dataprovider
+ for d in data2:
+ words1=reduce(lambda x,y: x+y, d[0])
+ words2=reduce(lambda x,y: x+y, d[1])
+ yield words1, words2, d[2]
+```
+
+data2 中有两个样本,每个样本有两个特征, 记fea1, fea2。
+
+- 单层序列:两个样本分别为[[1, 2, 4, 5, 2], [5, 4, 1, 3, 1]] 和 [[0, 2, 2, 5, 0, 1, 2], [1, 5, 4, 2, 3, 6, 1]]
+- 双层序列:两个样本分别为
+ - **样本1**:[[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]]]。fea1和fea2都分别有2个子句,fea1=[[1, 2], [4, 5, 2]], fea2=[[5, 4, 1], [3, 1]]
+ - **样本2**:[[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]]]。fea1和fea2都分别有3个子句, fea1=[[0, 2], [2, 5], [0, 1, 2]], fea2=[[1, 5], [4], [2, 3, 6, 1]]。
+ - **注意**:每个样本中,各特征的子句数目需要相等。这里说的“双进双出,输入不等长”是指fea1在i时刻的输入的长度可以不等于fea2在i时刻的输入的长度。如对于第1个样本,时刻i=2, fea1[2]=[4, 5, 2],fea2[2]=[3, 1],3≠2。
+- 单双层序列中,两个样本的label都分别是0和1
+
+### 模型中的配置
+
+单层RNN(`sequence_rnn_multi_unequalength_inputs.conf`)和双层RNN(`sequence_nest_rnn_multi_unequalength_inputs.conf`)两个模型配置达到的效果完全一样,区别只在于输入为单层还是双层序列,现在我们来看它们内部分别是如何实现的。
+
+- 单层序列:
+ - 过了一个简单的recurrent_group。每一个时间步,当前的输入y和上一个时间步的输出rnn_state做了一个全连接,功能与示例2中`sequence_rnn.conf`的`step`函数完全相同。这里,两个输入x1,x2分别通过calrnn返回最后时刻的状态。结果得到的encoder1_rep和encoder2_rep分别是单层序列,最后取encoder1_rep的最后一个时刻和encoder2_rep的所有时刻分别相加得到context。
+ - 注意到这里recurrent_group输入的每个样本中,fea1和fea2的长度都分别相等,这并非偶然,而是因为recurrent_group要求输入为单层序列时,所有输入的长度都必须相等。
+
+```python
+def step(x1, x2):
+ def calrnn(y):
+ mem = memory(name = 'rnn_state_' + y.name, size = hidden_dim)
+ out = fc_layer(input = [y, mem],
+ size = hidden_dim,
+ act = TanhActivation(),
+ bias_attr = True,
+ name = 'rnn_state_' + y.name)
+ return out
+
+ encoder1 = calrnn(x1)
+ encoder2 = calrnn(x2)
+ return [encoder1, encoder2]
+
+encoder1_rep, encoder2_rep = recurrent_group(
+ name="stepout",
+ step=step,
+ input=[emb1, emb2])
+
+encoder1_last = last_seq(input = encoder1_rep)
+encoder1_expandlast = expand_layer(input = encoder1_last,
+ expand_as = encoder2_rep)
+context = mixed_layer(input = [identity_projection(encoder1_expandlast),
+ identity_projection(encoder2_rep)],
+ size = hidden_dim)
+```
+- 双层序列:
+ - 双层RNN中,对输入的两个特征分别求时序上的连续全连接(`inner_step1`和`inner_step2`分别处理fea1和fea2),其功能与示例2中`sequence_nest_rnn.conf`的`outer_step`函数完全相同。不同之处是,此时输入`[SubsequenceInput(emb1), SubsequenceInput(emb2)]`在各时刻并不等长。
+ - 函数`outer_step`中可以分别处理这两个特征,但我们需要用targetInlink指定recurrent_group的输出的格式(各子句长度)只能和其中一个保持一致,如这里选择了和emb2的长度一致。
+ - 最后,依然是取encoder1_rep的最后一个时刻和encoder2_rep的所有时刻分别相加得到context。
+
+```python
+def outer_step(x1, x2):
+ outer_mem1 = memory(name = "outer_rnn_state1", size = hidden_dim)
+ outer_mem2 = memory(name = "outer_rnn_state2", size = hidden_dim)
+ def inner_step1(y):
+ inner_mem = memory(name = 'inner_rnn_state_' + y.name,
+ size = hidden_dim,
+ boot_layer = outer_mem1)
+ out = fc_layer(input = [y, inner_mem],
+ size = hidden_dim,
+ act = TanhActivation(),
+ bias_attr = True,
+ name = 'inner_rnn_state_' + y.name)
+ return out
+
+ def inner_step2(y):
+ inner_mem = memory(name = 'inner_rnn_state_' + y.name,
+ size = hidden_dim,
+ boot_layer = outer_mem2)
+ out = fc_layer(input = [y, inner_mem],
+ size = hidden_dim,
+ act = TanhActivation(),
+ bias_attr = True,
+ name = 'inner_rnn_state_' + y.name)
+ return out
+
+ encoder1 = recurrent_group(
+ step = inner_step1,
+ name = 'inner1',
+ input = x1)
+
+ encoder2 = recurrent_group(
+ step = inner_step2,
+ name = 'inner2',
+ input = x2)
+
+ sentence_last_state1 = last_seq(input = encoder1, name = 'outer_rnn_state1')
+ sentence_last_state2_ = last_seq(input = encoder2, name = 'outer_rnn_state2')
+
+ encoder1_expand = expand_layer(input = sentence_last_state1,
+ expand_as = encoder2)
+
+ return [encoder1_expand, encoder2]
+
+encoder1_rep, encoder2_rep = recurrent_group(
+ name="outer",
+ step=outer_step,
+ input=[SubsequenceInput(emb1), SubsequenceInput(emb2)],
+ targetInlink=emb2)
+
+encoder1_last = last_seq(input = encoder1_rep)
+encoder1_expandlast = expand_layer(input = encoder1_last,
+ expand_as = encoder2_rep)
+context = mixed_layer(input = [identity_projection(encoder1_expandlast),
+ identity_projection(encoder2_rep)],
+ size = hidden_dim)
+```
+
+## 示例4:beam_search的生成
+
+TBD
diff --git a/doc_cn/algorithm/rnn/rnn-tutorial.md b/doc_cn/algorithm/rnn/rnn-tutorial.md
index 7a553054c80392946ba5b16cc31bcaea18cfc977..9e488b0d51956e86f9fb76f450fdb438f596e239 100644
--- a/doc_cn/algorithm/rnn/rnn-tutorial.md
+++ b/doc_cn/algorithm/rnn/rnn-tutorial.md
@@ -93,4 +93,4 @@ memory只能在`recurrent_group`中定义和使用。memory不能独立存在,
使用`beam_search`需要遵循以下约定:
- 单层RNN:从一个word生成下一个word。
-- 双层RNN:即把单层RNN生成后的subseq给拼接成一个新的双层seq。从语义上看,也不存在一个subseq直接生成下一个subseq的情况。
\ No newline at end of file
+- 双层RNN:即把单层RNN生成后的subseq给拼接成一个新的双层seq。从语义上看,也不存在一个subseq直接生成下一个subseq的情况。
diff --git a/doc_cn/build_and_install/cmake/cblas_settings.csv b/doc_cn/build_and_install/cmake/cblas_settings.csv
index d804c0a662cb652dbefb0d09fb18538308c20aec..a6356baf16a0d3d2499e39d2055d8ee878dcaef2 100644
--- a/doc_cn/build_and_install/cmake/cblas_settings.csv
+++ b/doc_cn/build_and_install/cmake/cblas_settings.csv
@@ -1,4 +1,5 @@
-MKL_ROOT,mkl的路径,在${MKL_ROOT}/include下需要包含mkl.h,在${MKL_ROOT}/lib目录下需要包含 mkl_core,mkl_sequential和mkl_intel_lp64三个库
-ATLAS_ROOT,ATLAS库的路径,在${ATLAS_ROOT}/include下需要包含cblas.h,而在${ATLAS_ROOT}/lib下需要包含cblas和atlas两个库
-OPENBLAS_ROOT,在${OPENBLAS_ROOT}/include下需要包含cblas.h,而在${OPENBLAS_ROOT}/lib下需要包含openblas库
-REFERENCE_CBLAS_ROOT,在${REFERENCE_CBLAS_ROOT}/include下需要包含cblas.h,在${REFERENCE_CBLAS_ROOT}/lib下需要包含cblas库
\ No newline at end of file
+编译选项,描述,注意
+MKL_ROOT,MKL的路径,${MKL_ROOT}/include下需要包含mkl.h,${MKL_ROOT}/lib目录下需要包含mkl_core,mkl_sequential和mkl_intel_lp64三个库。
+ATLAS_ROOT,ATLAS的路径,${ATLAS_ROOT}/include下需要包含cblas.h,${ATLAS_ROOT}/lib下需要包含cblas和atlas两个库。
+OPENBLAS_ROOT,OpenBLAS的路径,${OPENBLAS_ROOT}/include下需要包含cblas.h,${OPENBLAS_ROOT}/lib下需要包含openblas库。
+REFERENCE_CBLAS_ROOT,REFERENCE BLAS的路径,${REFERENCE_CBLAS_ROOT}/include下需要包含cblas.h,${REFERENCE_CBLAS_ROOT}/lib下需要包含cblas库。
\ No newline at end of file
diff --git a/doc_cn/build_and_install/cmake/compile_options.csv b/doc_cn/build_and_install/cmake/compile_options.csv
index 0b8015aaee4d7b9068cb4a8de5d9967569e37f0c..12b45eebb2822d77447fa1bc754360605971dcab 100644
--- a/doc_cn/build_and_install/cmake/compile_options.csv
+++ b/doc_cn/build_and_install/cmake/compile_options.csv
@@ -1,15 +1,14 @@
-选项,说明,默认值
-WITH_GPU,是否编译GPU支持。,是否寻找到cuda工具链
-WITH_DOUBLE,是否使用双精度浮点数。,否
-WITH_DSO,是否使用运行时动态加载cuda动态库,而非静态加载cuda动态库。,是
-WITH_AVX,是否编译含有AVX指令集的PaddlePaddle二进制,是
-WITH_PYTHON,是否内嵌python解释器。可以方便嵌入式工作。,是
-WITH_STYLE_CHECK,是否编译时进行代码风格检查,是
-WITH_RDMA,是否开启RDMA支持,否
-WITH_GLOG,是否使用GLOG,如果不使用则会使用一个简化版的日志实现。可以方便嵌入式工作。,取决于是否寻找到GLOG
-WITH_GFLAGS,是否使用GFLAGS,如果不使用则会使用一个简化版的命令行参数解析。可以方便嵌入式工作。,取决于是否寻找到GFLAGS
-WITH_TIMER,是否开启计时功能开启计时功能会导致运行略慢,打印的日志变多。但是方便调试和benchmark,否
-WITH_TESTING,是否开启单元测试,取决于是否寻找到gtest
-WITH_DOC,是否编译英文文档,否
-WITH_DOC_CN,是否编译中文文档,否
-WITH_SWIG_PY,是否编译python的swig接口,python的swig接口可以方便进行预测和定制化训练,取决于是否找到swig
+选项,说明,默认值
+WITH_GPU,是否支持GPU。,取决于是否寻找到CUDA工具链
+WITH_DOUBLE,是否使用双精度浮点数。,否
+WITH_DSO,是否运行时动态加载CUDA动态库,而非静态加载CUDA动态库。,是
+WITH_AVX,是否编译含有AVX指令集的PaddlePaddle二进制文件,是
+WITH_PYTHON,是否内嵌PYTHON解释器。方便今后的嵌入式移植工作。,是
+WITH_STYLE_CHECK,是否编译时进行代码风格检查,是
+WITH_RDMA,是否开启RDMA,否
+WITH_GLOG,是否开启GLOG。如果不开启,则会使用一个简化版的日志,同时方便今后的嵌入式移植工作。,取决于是否寻找到GLOG
+WITH_GFLAGS,是否使用GFLAGS。如果不开启,则会使用一个简化版的命令行参数解析器,同时方便今后的嵌入式移植工作。,取决于是否寻找到GFLAGS
+WITH_TIMER,是否开启计时功能。如果开启会导致运行略慢,打印的日志变多,但是方便调试和测Benchmark,否
+WITH_TESTING,是否开启单元测试,取决于是否寻找到GTEST
+WITH_DOC,是否编译中英文文档,否
+WITH_SWIG_PY,是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练,取决于是否寻找到SWIG
\ No newline at end of file
diff --git a/doc_cn/build_and_install/cmake/compile_options.rst b/doc_cn/build_and_install/cmake/compile_options.rst
index bb5b18a073803662774cb6b7bcbdbafe3ad51112..f345ead2bf851bdad7be2fb8185d16fd2a318a66 100644
--- a/doc_cn/build_and_install/cmake/compile_options.rst
+++ b/doc_cn/build_and_install/cmake/compile_options.rst
@@ -1,62 +1,43 @@
-设置PaddlePaddle的编译选项
-==========================
-
-PaddlePaddle的编译选项可以在调用cmake的时候设置。cmake是一个跨平台的编译脚本,调用
-cmake可以将cmake项目文件,生成各个平台的makefile。详细的cmake使用方法可以参考
-`cmake的官方文档 `_ 。
-
-PaddlePaddle的编译选项是可以控制PaddlePaddle生成CPU/GPU版本二进制,链接何种blas等等。所有的
-编译选项列表如下
-
-PaddlePaddle的编译选项
-----------------------
-
-bool型的编译选项
-++++++++++++++++
-设置下列编译选项时,可以在cmake的命令行设置。使用 -D命令即可。例如
-:code:`cmake -D WITH_GPU=OFF`
-
-.. csv-table:: PaddlePaddle的bool型编译选项
- :widths: 1, 7, 2
- :file: compile_options.csv
-
-blas相关的编译选项
-++++++++++++++++++
-
-PaddlePaddle可以使用 `MKL `_ ,
-`Atlas `_ ,
-`OpenBlas `_ 和
-`refference Blas `_ ,任意一种cblas实现。
-通过编译时指定路径来实现引用各种blas。
-
-cmake编译时会首先在系统路径(/usr/lib\:/usr/local/lib)中寻找这些blas的实现。同时
-也会读取相关路径变量来进行搜索。路径变量为\:
-
-
-.. csv-table:: PaddlePaddle的cblas编译选项
- :widths: 1, 9
- :header: "编译选项", "描述"
- :file: cblas_settings.csv
-
-这些变量均可以使用 -D命令指定。例如 :code:`cmake -D MKL_ROOT=/opt/mkl/`。这些变
-量也可以通过调用cmake命令前通过环境变量指定。例如
-
-.. code-block:: bash
-
- export MKL_ROOT=/opt/mkl
- cmake
-
-需要注意的是,这些变量只在第一次cmake的时候有效。如果在第一次cmake之后想要重新设
-置这些变量,推荐清理( :code:`rm -rf` )掉编译目录后,再指定。
-
-cuda/cudnn相关的编译选项
-++++++++++++++++++++++++
-
-PaddlePaddle可以使用 cudnn v2之后的任何一个cudnn版本来编译运行。但需要注意的是编译和
-运行使用的cudnn尽量是同一个版本。推荐使用最新版本的cudnn v5.1。
-
-在cmake配置时可以使用 :code:`CUDNN_ROOT` 来配置CUDNN的安装路径。使用的命令也是
--D,例如 :code:`cmake -D CUDNN_ROOT=/opt/cudnnv5` 。
-
-需要注意的是,这些变量只在第一次cmake的时候有效。如果在第一次cmake之后想要重新设
-置这些变量,推荐清理( :code:`rm -rf` )掉编译目录后,再指定。
+PaddlePaddle的编译选项
+======================
+
+PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种BLAS库等。用户可在调用cmake的时候设置它们,详细的cmake使用方法可以参考 `官方文档 `_ 。
+
+Bool型的编译选项
+----------------
+用户可在cmake的命令行中,通过使用 ``-D`` 命令设置该类编译选项,例如
+
+.. code-block:: bash
+
+ cmake .. -DWITH_GPU=OFF
+
+.. csv-table:: Bool型的编译选项
+ :widths: 1, 7, 2
+ :file: compile_options.csv
+
+BLAS/CUDA/Cudnn的编译选项
+--------------------------
+BLAS
++++++
+
+PaddlePaddle支持以下任意一种BLAS库:`MKL `_ ,`ATLAS `_ ,`OpenBlAS `_ 和 `REFERENCE BLAS `_ 。
+
+.. csv-table:: BLAS路径相关的编译选项
+ :widths: 1, 2, 7
+ :file: cblas_settings.csv
+
+CUDA/Cudnn
++++++++++++
+
+PaddlePaddle可以使用cudnn v2之后的任何一个版本来编译运行,但尽量请保持编译和运行使用的cudnn是同一个版本。 我们推荐使用最新版本的cudnn v5.1。
+
+编译选项的设置
+++++++++++++++
+
+PaddePaddle通过编译时指定路径来实现引用各种BLAS/CUDA/Cudnn库。cmake编译时,首先在系统路径(/usr/lib\:/usr/local/lib)中搜索这几个库,同时也会读取相关路径变量来进行搜索。 通过使用 ``-D`` 命令可以设置,例如
+
+.. code-block:: bash
+
+ cmake .. -DMKL_ROOT=/opt/mkl/ -DCUDNN_ROOT=/opt/cudnnv5
+
+注意:这几个编译选项的设置,只在第一次cmake的时候有效。如果之后想要重新设置,推荐清理整个编译目录(``rm -rf``)后,再指定。
\ No newline at end of file
diff --git a/doc_cn/build_and_install/install/paddle_version.txt b/doc_cn/build_and_install/install/paddle_version.txt
index 7b2bfd2b1b3a9850665d118e424fd0cf6c24a062..a80873303fd0d05d963482629000d76260185ef6 100644
--- a/doc_cn/build_and_install/install/paddle_version.txt
+++ b/doc_cn/build_and_install/install/paddle_version.txt
@@ -8,4 +8,4 @@ PaddlePaddle 0.8.0b1, compiled with
with_gflags: ON
with_metric_learning:
with_timer: OFF
- with_predict_sdk:
\ No newline at end of file
+ with_predict_sdk:
diff --git a/doc_cn/concepts/trainer_config.py b/doc_cn/concepts/trainer_config.py
index 8d8c79fb39e0c0ddf13aee5d41297506d3404362..3eccbd7bc11f4865130286de718d1be74e4d1722 100644
--- a/doc_cn/concepts/trainer_config.py
+++ b/doc_cn/concepts/trainer_config.py
@@ -1,23 +1,29 @@
from paddle.trainer_config_helpers import *
-define_py_data_sources2(train_list='train.list',
- test_list='test.list',
- module='provider',
- obj='process')
+define_py_data_sources2(
+ train_list='train.list',
+ test_list='test.list',
+ module='provider',
+ obj='process')
settings(
batch_size=128,
learning_rate=1e-3,
learning_method=AdamOptimizer(),
- regularization=L2Regularization(0.5)
-)
+ regularization=L2Regularization(0.5))
img = data_layer(name='pixel', size=28 * 28)
-hidden1 = simple_img_conv_pool(input=img, filter_size=3, num_filters=32, pool_size=3,
- num_channel=1)
+hidden1 = simple_img_conv_pool(
+ input=img, filter_size=3, num_filters=32, pool_size=3, num_channel=1)
-hidden2 = fc_layer(input=hidden1, size=200, act=TanhActivation(),
- layer_attr=ExtraAttr(drop_rate=0.5))
+hidden2 = fc_layer(
+ input=hidden1,
+ size=200,
+ act=TanhActivation(),
+ layer_attr=ExtraAttr(drop_rate=0.5))
predict = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
-outputs(classification_cost(input=predict, label=data_layer(name='label', size=10)))
+outputs(
+ classification_cost(
+ input=predict, label=data_layer(
+ name='label', size=10)))
diff --git a/doc_cn/demo/index.rst b/doc_cn/demo/index.rst
index 71f54bc18fbb5b1b8cdd0e6cbee2ee028c0af218..e15e839f93d4ac0d455e49fd8b1cde8bf60a29ac 100644
--- a/doc_cn/demo/index.rst
+++ b/doc_cn/demo/index.rst
@@ -9,7 +9,7 @@
自然语言处理
''''''''''''
-* `情感分析 <../../doc/demo/sentiment_analysis/index.html>`_
+* `情感分析 `_
* `文本生成 <../../doc/demo/text_generation/index.html>`_
* `词性标注 <../../doc/demo/semantic_role_labeling/index.html>`_
diff --git a/doc_cn/demo/quick_start/index.md b/doc_cn/demo/quick_start/index.md
index aa6b66ca8c02411016420bf9d99c5e1b4e3cefdd..4d9b24ba851a7aaaeb0d79bfbeb0703b8878b77f 100644
--- a/doc_cn/demo/quick_start/index.md
+++ b/doc_cn/demo/quick_start/index.md
@@ -134,9 +134,8 @@ define_py_data_sources2(train_list='data/train.list',
* obj="process": 指定生成数据的函数
* args={"dictionary": word_dict}: 额外的参数,这里指定词典
-更详细用例请参考文档Python Use Case,
-数据格式和详细文档请参考
-PyDataProviderWrapper。
+更详细数据格式和用例请参考
+PyDataProvider2。
## 网络结构(Network Architecture)
本节我们将专注于网络结构的介绍。
diff --git a/doc_cn/demo/sentiment_analysis/index.rst b/doc_cn/demo/sentiment_analysis/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..82400b2459ebcaf89ff5e884edfe721b9ec01d7f
--- /dev/null
+++ b/doc_cn/demo/sentiment_analysis/index.rst
@@ -0,0 +1,8 @@
+情感分析教程
+===========================
+
+.. toctree::
+ :maxdepth: 3
+ :glob:
+
+ Training Locally
\ No newline at end of file
diff --git a/doc_cn/demo/sentiment_analysis/sentiment_analysis.md b/doc_cn/demo/sentiment_analysis/sentiment_analysis.md
new file mode 100644
index 0000000000000000000000000000000000000000..b70f2d59675615c26b29932cdf99d728bb206148
--- /dev/null
+++ b/doc_cn/demo/sentiment_analysis/sentiment_analysis.md
@@ -0,0 +1,324 @@
+# 情感分析教程
+
+情感分析有许多应用场景。 一个基本的应用场景是区分给定文本的褒贬两极性,给定的文本可以是一个文档、句子、或者是一个小的文本片段。 一个简单的例子如:把用户在购物网站、旅游网站、团购网站(亚马逊、天猫、淘宝等)上发表的评论分成正面评论和负面评论两类。
+
+情感分析也常用于基于大量评论和个人博客来监控社会媒体。 例如,研究人员分析了几个关于消费者信心和政治观点的调查,结果发现它们与同时期的Twitter消息中的情绪词频率相关 [1]。 另一个例子是通过分析每日Twitter博客的文本内容来预测股票变动 [2]。
+
+另一方面,抓取产品的用户评论并分析他们的情感,有助于理解用户对不同公司,不同产品,甚至不同竞争对手产品的偏好。
+
+本教程将指导您完成长期短期记忆(LSTM)网络的训练过程,以分类来自[大型电影评论数据集](http://ai.stanford.edu/~amaas/data/sentiment/)(有时称为[互联网电影数据库 (IMDB)](http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf))的句子的情感 。 此数据集包含电影评论及其相关联的类别标签,即正面和负面。
+
+## 数椐准备
+
+### IMDB 数椐介绍
+
+训练模型之前, 我们需要预处理数椐并构建一个字典。 首先, 你可以使用下面的脚本下载 IMDB 数椐集和[Moses](http://www.statmt.org/moses/)工具, 这是一个基于统计的机器翻译系统. 我们提供了一个数据预处理脚本,它不仅能够处理IMDB数据,还能处理其他用户自定义的数据。 为了使用提前编写的脚本,需要将标记的训练和测试样本移动到另一个路径,这已经在`get_imdb.sh`中完成。
+
+```
+cd demo/sentiment/data
+./get_imdb.sh
+```
+如果数椐获取成功,你将在目录```./demo/sentiment/data```中看到下面的文件:
+
+```
+aclImdb get_imdb.sh imdb mosesdecoder-master
+```
+
+* aclImdb: 从外部网站上下载的原始数椐集。
+* imdb: 仅包含训练和测试数椐集。
+* mosesdecoder-master: Moses 工具。
+
+IMDB数据集包含25,000个已标注过的高极性电影评论用于训练,25,000个用于测试。负面的评论的得分小于等于4,正面的评论的得大于等于7,总评分10分。 运行完脚本 `./get_imdb.sh`后, 我们可以看到在目录 `aclImdb`中的数椐集的结构如下:
+
+```
+imdbEr.txt imdb.vocab README test train
+```
+* train: 训练数椐集。
+* test : 测试数椐集。
+* imdb.vocab: 字典文件。
+* imdbEr.txt: 字典imdb.vocab中每个切分单词的预期评级。
+* README: 数椐说明文档。
+
+测试集和训练集目录包含下面的文件:
+
+```
+labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt
+```
+
+* pos: 正面评价样本,包含12,500个txt文件,每个文件是一个电影评论。
+* neg: 负面评价样本,包含12,500个txt文件,每个文件是一个电影评论。
+* unsup: 未标记的评价样本,包含50,000个txt文件。
+* urls_xx.txt: 每个评论的网址。
+* xxBow.feat: 用于统计词频的Bow模型特征。
+
+### IMDB 数椐准备
+
+在这个例子中,我们只使用已经标注过的训练集和测试集,且默认在训练集上构建字典,而不使用IMDB数椐集中的imdb.vocab做为字典。训练集已经做了随机打乱排序而测试集没有。 Moses 工具中的脚本`tokenizer.perl` 用于切分单单词和标点符号。执行下面的命令就可以预处理数椐。
+
+```
+cd demo/sentiment/
+./preprocess.sh
+```
+preprocess.sh:
+
+```
+data_dir="./data/imdb"
+python preprocess.py -i data_dir
+```
+
+* data_dir: 输入数椐所在目录。
+* preprocess.py: 预处理脚本。
+
+运行成功后目录`demo/sentiment/data/pre-imdb` 结构如下:
+
+```
+dict.txt labels.list test.list test_part_000 train.list train_part_000
+```
+* test\_part\_000 and train\_part\_000: 所有标记的测试集和训练集, 训练集已经随机打乱。
+* train.list and test.list: 训练集和测试集文件列表。
+* dict.txt: 利用训练集生成的字典。
+* labels.txt: neg 0, pos 1, 含义:标签0表示负面的评论,标签1表示正面的评论。
+
+### 用户自定义数椐预处理
+
+如果你执行其它的用情感分析来分类文本的任务,可以按如下的结构来准备数椐. 我们提供了脚本来构建字典和预处理数椐。所以你只用按下面的结构来组织数椐就行了。
+
+```
+dataset
+|----train
+| |----class1
+| | |----text_files
+| |----class2
+| | |----text_files
+| | ...
+|----test
+| |----class1
+| | |----text_files
+| |----class2
+| | |----text_files
+| | ...
+```
+* dataset: 一级目录。
+* train, test: 二级目录。
+* class1,class2,...: 三级目录。
+* text_files: 文本格式的实例文件。
+
+所有同目录下的文本实例文件都是同级别的。 每个文本文件包含一个或者多个实例,每一行表示一个实例。 为了充分的随机打乱训练集, 在预处理含有多行数椐的文本文件时参数设置稍有不同, 执行`preprocess.sh`脚本时需要加上`-m True`参数。 tokenizer.perl 默认用来切分单记和标点符号,如果你不需要这个操作,在运行`preprocess.sh`时加上`-t False`参数即可。
+
+## 训练模型
+
+在这步任务中,我们使用了循环神经网络(RNN)的 LSTM 架构来训练情感分析模型。 引入LSTM模型主要是为了克服消失梯度的问题。 LSTM网络类似于具有隐藏层的标准循环神经网络, 但是隐藏层中的每个普通节点被一个记忆单元替换。 每个记忆单元包含四个主要的元素: 输入门, 具有自循环连接的神经元,忘记门和输出门。 更多的细节可以在文献中找到[4]。 LSTM架构的最大优点是它可以在长时间间隔内记忆信息,而没有短时记忆的损失。在有新的单词来临的每一个时间步骤内,存储在记忆单元区块的历史信息被更新用来迭代的学习单词以合理的序列程现。
+
+
+图表 1. LSTM [3]
+
+情感分析是自然语言理解中最典型的问题之一。 它的目的是预测在一个序列中表达的情感态度。 通常, ,仅仅是一些关键词,如形容词和副词,在预测序列或段落的情感中起主要作用。然而有些评论上下文非常长,例如 IMDB的数椐集。 我们只所以使用LSTM来执行这个任务是因为其改进的设计并且具有门机制。 首先,它能够从词级到具有可变上下文长度的上下文级别来总结表示。 第二,它可以在句子级别利用可扩展的上下文, 而大多数方法只是利用n-gram级别的知识。第三,它直接学习段落表示,而不是组合上下文级别信息。
+
+在本演示中,我们提供两个网络,即双向LSTM和三层堆叠LSTM。
+
+#### 双向LSTM
+
+图2是双向LSTM网络,后面连全连接层和softmax层。
+
+
+图 2. Bidirectional-LSTM
+
+#### Stacked-LSTM
+图3是三层LSTM结构。图的底部是word embedding(对文档处理后形成的单词向量)。 接下来,连接三个LSTM隐藏层,并且第二个是反向LSTM。然后提取隐藏LSTM层的所有时间步长的最大词向量作为整个序列的表示。 最后,使用具有softmax激活的全连接前馈层来执行分类任务。 更多内容可查看参考文献 [5]。
+
+
+图 3. Stacked-LSTM for sentiment analysis
+
+**配置**
+
+进入`demo/sentiment` 目录 , `trainer_config.py` 是一个配置文件的例子, 其中包含算法和网络配置。第一行从`sentiment_net.py`中导出预定义的网络。
+
+trainer_config.py:
+
+```python
+from sentiment_net import *
+
+data_dir = "./data/pre-imdb"
+# whether this config is used for test
+is_test = get_config_arg('is_test', bool, False)
+# whether this config is used for prediction
+is_predict = get_config_arg('is_predict', bool, False)
+dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
+
+################## Algorithm Config #####################
+
+settings(
+ batch_size=128,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25
+)
+
+#################### Network Config ######################
+stacked_lstm_net(dict_dim, class_dim=class_dim,
+ stacked_num=3, is_predict=is_predict)
+#bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
+```
+
+* **数椐定义**:
+ * get\_config\_arg(): 获取通过 `--config_args=xx` 设置的命令行参数。
+ * 定义训练数椐和测试数椐提供者, 这里使用了PaddlePaddle的Python接口来加载数椐。想了解更多细节可以参考PyDataProvider部分的文档
+
+* **算法配置**:
+ * 使用随机梯度下降(sgd)算法。
+ * 使用 adam 优化。
+ * 设置batch size大小为128。
+ * 设置平均sgd窗口。
+ * 设置全局学习率。
+* **网络配置**:
+ * dict_dim: 获取字典维度。
+ * class_dim: 设置类别数,IMDB有两个标签,即正面评价标签和负面评价标签。
+ * `stacked_lstm_net`: 预定义网络如图3所示,默认情况下使用此网络
+ * `bidirectional_lstm_net`: 预定义网络,如图2所示。
+
+**训练**
+
+首先安装PaddlePaddle。 然后使用下面的脚本 `train.sh` 来开启本地的训练。
+
+```
+cd demo/sentiment/
+./train.sh
+```
+
+train.sh:
+
+```
+config=trainer_config.py
+output=./model_output
+paddle train --config=$config \
+ --save_dir=$output \
+ --job=train \
+ --use_gpu=false \
+ --trainer_count=4 \
+ --num_passes=10 \
+ --log_period=20 \
+ --dot_period=20 \
+ --show_parameter_stats_period=100 \
+ --test_all_data_in_one_period=1 \
+ 2>&1 | tee 'train.log'
+```
+
+* \--config=$config: 设置网络配置。
+* \--save\_dir=$output: 设置输出路径以保存训练完成的模型。
+* \--job=train: 设置工作模式为训练。
+* \--use\_gpu=false: 使用CPU训练,如果你安装GPU版本的PaddlePaddle,并想使用GPU来训练设置为true。
+* \--trainer\_count=4:设置线程数(或GPU个数)。
+* \--num\_passes=15: 设置pass,PaddlePaddle中的一个pass意味着对数据集中的所有样本进行一次训练。
+* \--log\_period=20: 每20个batch打印一次日志。
+* \--show\_parameter\_stats\_period=100: 每100个batch打印一次统计信息。
+* \--test\_all_data\_in\_one\_period=1: 每次测试都测试所有数据。
+
+如果运行成功,输出日志保存在路径 `demo/sentiment/train.log`中,模型保存在目录`demo/sentiment/model_output/`中。 输出日志说明如下:
+
+```
+Batch=20 samples=2560 AvgCost=0.681644 CurrentCost=0.681644 Eval: classification_error_evaluator=0.36875 CurrentEval: classification_error_evaluator=0.36875
+...
+Pass=0 Batch=196 samples=25000 AvgCost=0.418964 Eval: classification_error_evaluator=0.1922
+Test samples=24999 cost=0.39297 Eval: classification_error_evaluator=0.149406
+```
+- Batch=xx: 表示训练了xx个Batch。
+- samples=xx: 表示训练了xx个样本。。
+- AvgCost=xx: 从第0个batch到当前batch的平均损失。
+- CurrentCost=xx: 最新log_period个batch处理的当前损失。
+- Eval: classification\_error\_evaluator=xx: 表示第0个batch到当前batch的分类错误。
+- CurrentEval: classification\_error\_evaluator: 最新log_period个batch的分类错误。
+- Pass=0: 通过所有训练集一次称为一遍。 0表示第一次经过训练集。
+
+默认情况下,我们使用`stacked_lstm_net`网络,当传递相同的样本数时,它的收敛速度比`bidirectional_lstm_net`快。如果要使用双向LSTM,只需删除最后一行中的注释并把“stacked_lstm_net”注释掉。
+
+## 测试模型
+
+测试模型是指使用训练出的模型评估已标记的验证集。
+
+```
+cd demo/sentiment
+./test.sh
+```
+
+test.sh:
+
+```bash
+function get_best_pass() {
+ cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
+ sed -r 'N;s/Test.* error=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \
+ sort | head -n 1
+}
+
+log=train.log
+LOG=`get_best_pass $log`
+LOG=(${LOG})
+evaluate_pass="model_output/pass-${LOG[1]}"
+
+echo 'evaluating from pass '$evaluate_pass
+
+model_list=./model.list
+touch $model_list | echo $evaluate_pass > $model_list
+net_conf=trainer_config.py
+paddle train --config=$net_conf \
+ --model_list=$model_list \
+ --job=test \
+ --use_gpu=false \
+ --trainer_count=4 \
+ --config_args=is_test=1 \
+ 2>&1 | tee 'test.log'
+```
+
+函数`get_best_pass`依据分类错误率获得最佳模型进行测试。 在本示例中,我们默认使用IMDB的测试数据集作为验证。 与训练不同,它需要在这里指定`--job = test`和模型路径,即`--model_list = $model_list`。如果运行成功,日志将保存在“demo / sentiment / test.log”的路径中。例如,在我们的测试中,最好的模型是`model_output / pass-00002`,分类误差是0.115645,如下:
+
+```
+Pass=0 samples=24999 AvgCost=0.280471 Eval: classification_error_evaluator=0.115645
+```
+
+## 预测
+
+`predict.py`脚本提供了一个预测接口。在使用它之前请安装PaddlePaddle的python api。 预测IMDB的未标记评论的一个实例如下:
+
+```
+cd demo/sentiment
+./predict.sh
+```
+predict.sh:
+
+```
+#Note the default model is pass-00002, you shold make sure the model path
+#exists or change the mode path.
+model=model_output/pass-00002/
+config=trainer_config.py
+label=data/pre-imdb/labels.list
+python predict.py \
+ -n $config\
+ -w $model \
+ -b $label \
+ -d data/pre-imdb/dict.txt \
+ -i data/aclImdb/test/pos/10007_10.txt
+```
+
+* `predict.py`: 预测接口脚本。
+* -n $config : 设置网络配置。
+* -w $model: 设置模型路径。
+* -b $label: 设置标签类别字典,这个字典是整数标签和字符串标签的一个对应。
+* -d data/pre-imdb/dict.txt: 设置字典文件。
+* -i data/aclImdb/test/pos/10014_7.txt: 设置一个要预测的示例文件。
+
+注意应该确保默认模型路径`model_output / pass-00002`存在或更改为其它模型路径。
+
+本示例的预测结果:
+
+```
+Loading parameters from model_output/pass-00002/
+./data/aclImdb/test/pos/10014_7.txt: predicting label is pos
+```
+我们真诚地感谢您的关注,并欢迎您来参与贡献。
+
+## 参考文档
+[1] Brendan O'Connor, Ramnath Balasubramanyan, Bryan R. Routledge, and Noah A. Smith. 2010. [From Tweets to Polls: Linking Text Sentiment to Public Opinion Time Series](http://homes.cs.washington.edu/~nasmith/papers/oconnor+balasubramanyan+routledge+smith.icwsm10.pdf). In ICWSM-2010.
+[2] Johan Bollen, Huina Mao, Xiaojun Zeng. 2011. [Twitter mood predicts the stock market](http://arxiv.org/abs/1010.3003), Journal of Computational Science.
+[3] Alex Graves, Marcus Liwicki, Santiago Fernan- dez, Roman Bertolami, Horst Bunke, and Ju ̈rgen Schmidhuber. 2009. [A novel connectionist system for unconstrained handwriting recognition. IEEE Transactions on Pattern Analysis and Machine In- telligence](http://www.cs.toronto.edu/~graves/tpami_2009.pdf), 31(5):855–868.
+[4] Zachary C. Lipton, [A Critical Review of Recurrent Neural Networks for Sequence Learning](http://arxiv.org/abs/1506.00019v1), arXiv:1506.00019.
+[5] Jie Zhou and Wei Xu; [End-to-end Learning of Semantic Role Labeling Using Recurrent Neural Networks](http://www.aclweb.org/anthology/P/P15/P15-1109.pdf); ACL-IJCNLP 2015.
diff --git a/doc_cn/faq/index.rst b/doc_cn/faq/index.rst
index db28b4436fe5e76882861a4cf06f358a63d8ebd4..3eb0e10ae2228740cd384270db5070e367f7007b 100644
--- a/doc_cn/faq/index.rst
+++ b/doc_cn/faq/index.rst
@@ -177,3 +177,40 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字
pip install --upgrade pip
+8. python相关的单元测试都过不了
+--------------------------------
+
+如果出现以下python相关的单元测试都过不了的情况:
+
+.. code-block:: bash
+
+ 24 - test_PyDataProvider (Failed)
+ 26 - test_RecurrentGradientMachine (Failed)
+ 27 - test_NetworkCompare (Failed)
+ 28 - test_PyDataProvider2 (Failed)
+ 32 - test_Prediction (Failed)
+ 33 - test_Compare (Failed)
+ 34 - test_Trainer (Failed)
+ 35 - test_TrainerOnePass (Failed)
+ 36 - test_CompareTwoNets (Failed)
+ 37 - test_CompareTwoOpts (Failed)
+ 38 - test_CompareSparse (Failed)
+ 39 - test_recurrent_machine_generation (Failed)
+ 40 - test_PyDataProviderWrapper (Failed)
+ 41 - test_config_parser (Failed)
+ 42 - test_swig_api (Failed)
+ 43 - layers_test (Failed)
+
+并且查询PaddlePaddle单元测试的日志,提示:
+
+.. code-block:: bash
+
+ paddle package is already in your PYTHONPATH. But unittest need a clean environment.
+ Please uninstall paddle package before start unittest. Try to 'pip uninstall paddle'.
+
+解决办法是:卸载paddle包 :code:`pip uninstall paddle`。
+
+原因是:单元测试使用了一个旧版本的python包,而没有测试到代码中实际修改的python包。即单元测试需要一个干净的环境:
+
+* 如果paddle包已经在python的site-packages里面了,那么单元测试时使用的paddle包,就是site-packages里面的python包,而不是源码目录里 :code:`/python` 目录下的python包。
+* 即便设置了 :code:`PYTHONPATH` 到 :code:`/python` 也没用,因为python的搜索路径是优先已经安装的python包。
\ No newline at end of file
diff --git a/doc_cn/faq/reduce_min_pool_size.py b/doc_cn/faq/reduce_min_pool_size.py
index 2811b134b66b1ec55903d89e3f38a0cef8c9ef8d..5715397cc11e18246b8522fcc5b4f05780c9a0a7 100644
--- a/doc_cn/faq/reduce_min_pool_size.py
+++ b/doc_cn/faq/reduce_min_pool_size.py
@@ -3,4 +3,4 @@ def process(settings, filename):
os.system('shuf %s > %s.shuf' % (filename, filename)) # shuffle before.
with open('%s.shuf' % filename, 'r') as f:
for line in f:
- yield get_sample_from_line(line)
\ No newline at end of file
+ yield get_sample_from_line(line)
diff --git a/doc_cn/faq/word2vec_config.py b/doc_cn/faq/word2vec_config.py
index e347252476eab670abfa2cf2dc126d96b6e04857..866b40c3d4c96c1213b3f716f29b14dd38763edb 100644
--- a/doc_cn/faq/word2vec_config.py
+++ b/doc_cn/faq/word2vec_config.py
@@ -1,8 +1,12 @@
-... # the settings and define data provider is omitted.
-DICT_DIM=3000 # dictionary dimension.
-word_ids=data_layer('word_ids', size=DICT_DIM)
+... # the settings and define data provider is omitted.
+DICT_DIM = 3000 # dictionary dimension.
+word_ids = data_layer('word_ids', size=DICT_DIM)
-emb = embedding_layer(input=word_ids, size=256, param_attr=ParamAttr(sparse_update=True))
+emb = embedding_layer(
+ input=word_ids, size=256, param_attr=ParamAttr(sparse_update=True))
emb_sum = pooling_layer(input=emb, pooling_type=SumPooling())
predict = fc_layer(input=emb_sum, size=DICT_DIM, act=Softmax())
-outputs(classification_cost(input=predict, label=data_layer('label', size=DICT_DIM)))
\ No newline at end of file
+outputs(
+ classification_cost(
+ input=predict, label=data_layer(
+ 'label', size=DICT_DIM)))
diff --git a/doc_cn/faq/word2vec_dataprovider.py b/doc_cn/faq/word2vec_dataprovider.py
index a0a39080cece90c6c4096bba4396bfa91b3ef759..ec2753a7d01d7dd4d804c3bed0bac1be9c3fb3d3 100644
--- a/doc_cn/faq/word2vec_dataprovider.py
+++ b/doc_cn/faq/word2vec_dataprovider.py
@@ -1,8 +1,10 @@
-DICT_DIM=3000
+DICT_DIM = 3000
+
+
@provider(input_types=[integer_sequence(DICT_DIM), integer_value(DICT_DIM)])
def process(settings, filename):
- with open(filename) as f:
- # yield word ids to predict inner word id
- # such as [28, 29, 10, 4], 4
- # It means the sentance is 28, 29, 4, 10, 4.
- yield read_next_from_file(f)
\ No newline at end of file
+ with open(filename) as f:
+ # yield word ids to predict inner word id
+ # such as [28, 29, 10, 4], 4
+ # It means the sentance is 28, 29, 4, 10, 4.
+ yield read_next_from_file(f)
diff --git a/doc_cn/howto/how_to_write_docs/index.rst b/doc_cn/howto/how_to_write_docs/index.rst
index 869ef747f9f88c7dbb5efdf6e03111a3f76c4014..a1f983b3405fa40f436885e40fca2ebbb4695491 100644
--- a/doc_cn/howto/how_to_write_docs/index.rst
+++ b/doc_cn/howto/how_to_write_docs/index.rst
@@ -2,32 +2,19 @@
如何贡献/修改PaddlePaddle的文档
###############################
-PaddlePaddle的文档使用 `cmake`_ 驱动 `sphinx`_ 生成。公有两个文档,:code:`doc` 和 :code:`doc_cn` 。这两者会在 `cmake`_ 中进行编译,生成后的文档会存储在服务器的 :code:`doc` 和 :code:`doc_cn` 两个目录下。
+PaddlePaddle的文档包括英文文档 ``doc`` 和中文文档 ``doc_cn`` 两个部分。文档都是通过 `cmake`_ 驱动 `sphinx`_ 编译生成,生成后的文档分别存储在编译目录的 ``doc`` 和 ``doc_cn`` 两个子目录下。
-下面分几个部分介绍一下PaddlePaddle文档的贡献方法。
-
-如何书写PaddlePaddle的文档
-==========================
-
-TBD
如何构建PaddlePaddle的文档
==========================
-构建PaddlePaddle文档,需要使用构建Paddle的全部环境。准备这个环境相对来说比较复杂,所以本文档提供两种方式构建PaddlePaddle的文档,即
-
-* 使用Docker构建PaddlePaddle的文档
-* 直接构建PaddlePaddle的文档。
-
-并且,我们推荐使用Docker来构建PaddlePaddle的文档。
+PaddlePaddle的文档构建有直接构建和基于Docker构建两种方式。构建PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使用基于Docker来构建PaddlePaddle的文档。
使用Docker构建PaddlePaddle的文档
--------------------------------
-使用Docker构建PaddlePaddle的文档,首先要求在系统里安装好Docker工具包。安装Docker请参考 `Docker的官网 `_ 。
-
-安装好Docker之后可以使用源码目录下的脚本构建文档,即
+使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即
.. code-block:: bash
@@ -35,10 +22,10 @@ TBD
cd paddle/scripts/tools/build_docs
bash build_docs.sh
-执行完这个脚本后,该目录下会生成两个目录,分别是\:
+编译完成后,该目录下会生成如下两个子目录\:
-* doc 目录,英文文档地址
-* doc_cn 目录,中文文档地址
+* doc 英文文档目录
+* doc_cn 中文文档目录
打开浏览器访问对应目录下的index.html即可访问本地文档。
@@ -52,6 +39,10 @@ TBD
TBD
+如何书写PaddlePaddle的文档
+==========================
+
+TBD
如何更新www.paddlepaddle.org文档
================================
diff --git a/doc_cn/ui/data_provider/mnist_config.py b/doc_cn/ui/data_provider/mnist_config.py
index 7ba344338c374a7f9e7e4faa804e2e124577c0be..39becff03b08f5e75b8503aaf01e782d2b0fb3be 100644
--- a/doc_cn/ui/data_provider/mnist_config.py
+++ b/doc_cn/ui/data_provider/mnist_config.py
@@ -1,8 +1,9 @@
from paddle.trainer_config_helpers import *
-define_py_data_sources2(train_list='train.list',
- test_list=None,
- module='mnist_provider',
- obj='process')
+define_py_data_sources2(
+ train_list='train.list',
+ test_list=None,
+ module='mnist_provider',
+ obj='process')
img = data_layer(name='pixel', size=784)
label = data_layer(name='label', size=10)
diff --git a/doc_cn/ui/data_provider/mnist_provider.dict.py b/doc_cn/ui/data_provider/mnist_provider.dict.py
index bf13b56372b56a1e810fad159cd51371ef46c468..2ba0b126a0d6239f84950e130410aaaa6e1f24cd 100644
--- a/doc_cn/ui/data_provider/mnist_provider.dict.py
+++ b/doc_cn/ui/data_provider/mnist_provider.dict.py
@@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *
# Define a py data provider
-@provider(input_types={
- 'pixel': dense_vector(28 * 28),
- 'label': integer_value(10)
-})
+@provider(
+ input_types={'pixel': dense_vector(28 * 28),
+ 'label': integer_value(10)})
def process(settings, filename): # settings is not used currently.
f = open(filename, 'r') # open one of training file
diff --git a/doc_cn/ui/data_provider/mnist_provider.py b/doc_cn/ui/data_provider/mnist_provider.py
index 92f1915c1072562a174a62b436de8f5b39dab2d4..8b828641d55735e67ca634107d5b239150649651 100644
--- a/doc_cn/ui/data_provider/mnist_provider.py
+++ b/doc_cn/ui/data_provider/mnist_provider.py
@@ -2,10 +2,7 @@ from paddle.trainer.PyDataProvider2 import *
# Define a py data provider
-@provider(input_types=[
- dense_vector(28 * 28),
- integer_value(10)
-])
+@provider(input_types=[dense_vector(28 * 28), integer_value(10)])
def process(settings, filename): # settings is not used currently.
f = open(filename, 'r') # open one of training file
diff --git a/doc_cn/ui/data_provider/sentimental_config.py b/doc_cn/ui/data_provider/sentimental_config.py
index 051f75e32b5c0b1f36d27a54c42db94a4682ce7b..7ce71608a2372b2484ae40ccf01f0621728ddef2 100644
--- a/doc_cn/ui/data_provider/sentimental_config.py
+++ b/doc_cn/ui/data_provider/sentimental_config.py
@@ -3,9 +3,12 @@ from paddle.trainer_config_helpers import *
dictionary = dict()
... # read dictionary from outside
-define_py_data_sources2(train_list='train.list', test_list=None,
- module='sentimental_provider', obj='process',
- # above codes same as mnist sample.
- args={ # pass to provider.
- 'dictionary': dictionary
- })
+define_py_data_sources2(
+ train_list='train.list',
+ test_list=None,
+ module='sentimental_provider',
+ obj='process',
+ # above codes same as mnist sample.
+ args={ # pass to provider.
+ 'dictionary': dictionary
+ })
diff --git a/doc_cn/ui/data_provider/sentimental_provider.py b/doc_cn/ui/data_provider/sentimental_provider.py
index bda37d7722a0bb98c2c681c790bb308c0e146515..0fb0bb88e95a230f01f18b78ebb37b659c3768f1 100644
--- a/doc_cn/ui/data_provider/sentimental_provider.py
+++ b/doc_cn/ui/data_provider/sentimental_provider.py
@@ -12,7 +12,8 @@ def on_init(settings, dictionary, **kwargs):
# The text is a sequence of integer values, and each value is a word id.
# The whole sequence is the sentences that we want to predict its
# sentimental.
- integer_value(len(dictionary), seq_type=SequenceType), # text input
+ integer_value(
+ len(dictionary), seq_type=SequenceType), # text input
# label positive/negative
integer_value(2)
diff --git a/paddle/.common_test_util.sh b/paddle/.common_test_util.sh
index dec22e45619fb5d393be96e929a7e301bf266224..dc1525061590808e3cc9c7b606aca5d5d9195a3a 100644
--- a/paddle/.common_test_util.sh
+++ b/paddle/.common_test_util.sh
@@ -117,4 +117,4 @@ set_port()
fi
done
-}
\ No newline at end of file
+}
diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index cae0f64400a7e618bffb4f7fc6a044011baf04d4..fb3af8ea92feed96a9669bfb29ef7353a256c308 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -17,5 +17,3 @@ endif()
if(WITH_SWIG_PY)
add_subdirectory(api)
endif()
-
-
diff --git a/paddle/api/Arguments.cpp b/paddle/api/Arguments.cpp
index 6f51d551200696ebafade2a46243b78086975265..b539374cd4aa5a9510cdb728c1b22edf65a9f880 100644
--- a/paddle/api/Arguments.cpp
+++ b/paddle/api/Arguments.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
@@ -112,7 +111,7 @@ void Arguments::setSlotSequenceStartPositions(size_t idx,
}
void Arguments::setSlotSubSequenceStartPositions(
- size_t idx, IVector *vec) throw(RangeError) {
+ size_t idx, IVector* vec) throw(RangeError) {
auto& a = m->getArg(idx);
auto& v = m->cast(vec->getSharedPtr());
a.subSequenceStartPositions = std::make_shared(v);
diff --git a/paddle/api/ConfigParser.cpp b/paddle/api/ConfigParser.cpp
index 25d94f5a6a1255f3e2faff9816cfd003b20c0418..bc40d871d180a6bfe21200c866181dc161f5f078 100644
--- a/paddle/api/ConfigParser.cpp
+++ b/paddle/api/ConfigParser.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
#include "paddle/trainer/Trainer.h"
@@ -44,8 +43,7 @@ TrainerConfig* TrainerConfig::createFromTrainerConfigFile(
return retv;
}
-TrainerConfig* TrainerConfig::createFromProtoString(
- const std::string& str) {
+TrainerConfig* TrainerConfig::createFromProtoString(const std::string& str) {
auto retv = new TrainerConfig();
paddle::TrainerConfig trainerConfigProto;
auto conf = std::make_shared(trainerConfigProto);
diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp
index bef499c67858b8e2d5432155a8defca56af6019c..9a4846d80980e23e97f89b6134e15af71207ae6b 100644
--- a/paddle/api/GradientMachine.cpp
+++ b/paddle/api/GradientMachine.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
@@ -27,7 +26,8 @@ GradientMachine::GradientMachine() : m(new GradientMachinePrivate()) {}
GradientMachine::~GradientMachine() { delete m; }
GradientMachine* GradientMachine::createFromPaddleModelPtr(
- const void* confPtr, GradientMatchineCreateMode mode,
+ const void* confPtr,
+ GradientMatchineCreateMode mode,
const std::vector& types) {
auto& conf = *(const paddle::ModelConfig*)(confPtr);
std::vector realTypes;
@@ -44,7 +44,8 @@ GradientMachine* GradientMachine::createFromPaddleModelPtr(
}
GradientMachine* GradientMachine::createByConfigProtoStr(
- const std::string& protoStr, GradientMatchineCreateMode mode,
+ const std::string& protoStr,
+ GradientMatchineCreateMode mode,
const std::vector& types) {
paddle::ModelConfig conf;
conf.ParseFromString(protoStr);
@@ -56,13 +57,15 @@ GradientMachine* GradientMachine::createByConfigProtoStr(
}
GradientMachine* GradientMachine::createByModelConfig(
- ModelConfig* conf, GradientMatchineCreateMode mode,
+ ModelConfig* conf,
+ GradientMatchineCreateMode mode,
const std::vector& types) {
auto confPtr = &conf->m->conf->getModelConfig();
return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types);
}
-void GradientMachine::forward(const Arguments& inArgs, Arguments* outArgs,
+void GradientMachine::forward(const Arguments& inArgs,
+ Arguments* outArgs,
PassType passType) {
auto& in =
m->cast>(inArgs.getInternalArgumentsPtr());
@@ -99,7 +102,8 @@ void GradientMachine::backward(const UpdateCallback& callback) {
}
void GradientMachine::forwardBackward(const Arguments& inArgs,
- Arguments* outArgs, PassType passType,
+ Arguments* outArgs,
+ PassType passType,
const UpdateCallback& callback) {
auto& in =
m->cast>(inArgs.getInternalArgumentsPtr());
@@ -129,7 +133,7 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
void GradientMachine::randParameters() { m->machine->randParameters(); }
Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const
- throw(UnsupportError) {
+ throw(UnsupportError) {
auto nn = std::dynamic_pointer_cast(m->machine);
if (nn) {
auto mat = nn->getLayerOutput(layerName);
@@ -140,8 +144,11 @@ Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const
}
SequenceGenerator* GradientMachine::asSequenceGenerator(
- const std::vector& dict, size_t begin_id, size_t end_id,
- size_t max_length, size_t beam_size) {
+ const std::vector& dict,
+ size_t begin_id,
+ size_t end_id,
+ size_t max_length,
+ size_t beam_size) {
SequenceGenerator* r =
SequenceGenerator::createByGradientMachineSharedPtr(&m->machine);
r->setDict(dict);
diff --git a/paddle/api/Internal.h b/paddle/api/Internal.h
index b990f650be9fa401898a8c6d10c21d9c90eb728a..66a13bc603ed5098997f168d3f527160ac3822ef 100644
--- a/paddle/api/Internal.h
+++ b/paddle/api/Internal.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#pragma once
#include "PaddleAPI.h"
@@ -23,7 +22,8 @@ limitations under the License. */
template
void staticCastVector(std::vector* dest, const std::vector& src) {
dest->resize(src.size());
- std::transform(src.begin(), src.end(), dest->begin(), [](T1 t){
- return static_cast(t);
- });
+ std::transform(src.begin(),
+ src.end(),
+ dest->begin(),
+ [](T1 t) { return static_cast(t); });
}
diff --git a/paddle/api/Matrix.cpp b/paddle/api/Matrix.cpp
index 6a79f83495a56907fec9d3f77b581eddd3a8baeb..f257ee65aa4a12dfcd1914ddbf0e16461a9b128c 100644
--- a/paddle/api/Matrix.cpp
+++ b/paddle/api/Matrix.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "PaddleAPI.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
@@ -44,15 +43,35 @@ Matrix* Matrix::createZero(size_t height, size_t width, bool useGpu) {
return m;
}
-Matrix* Matrix::createDense(const std::vector& data, size_t height,
- size_t width, bool useGpu) {
+Matrix* Matrix::createDense(const std::vector& data,
+ size_t height,
+ size_t width,
+ bool useGpu) {
auto m = new Matrix();
m->m->mat = paddle::Matrix::create(height, width, useGpu);
m->m->mat->copyFrom(data.data(), data.size());
return m;
}
-Matrix* Matrix::createCpuDenseFromNumpy(float* data, int dim1, int dim2,
+Matrix* Matrix::createDenseFromNumpy(float* data,
+ int dim1,
+ int dim2,
+ bool copy,
+ bool useGpu) throw(UnsupportError) {
+ if (useGpu) {
+ /// Gpu mode only supports copy=True
+ if (!copy) {
+ throw UnsupportError("Gpu mode only supports copy=True");
+ }
+ return Matrix::createGpuDenseFromNumpy(data, dim1, dim2);
+ } else {
+ return Matrix::createCpuDenseFromNumpy(data, dim1, dim2, copy);
+ }
+}
+
+Matrix* Matrix::createCpuDenseFromNumpy(float* data,
+ int dim1,
+ int dim2,
bool copy) {
auto m = new Matrix();
if (copy) {
@@ -71,12 +90,20 @@ Matrix* Matrix::createGpuDenseFromNumpy(float* data, int dim1, int dim2) {
return m;
}
-Matrix* Matrix::createSparse(size_t height, size_t width, size_t nnz,
- bool isNonVal, bool isTrans, bool useGpu) {
+Matrix* Matrix::createSparse(size_t height,
+ size_t width,
+ size_t nnz,
+ bool isNonVal,
+ bool isTrans,
+ bool useGpu) {
auto m = new Matrix();
m->m->mat = paddle::Matrix::createSparseMatrix(
- height, width, nnz, isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE,
- isTrans, useGpu);
+ height,
+ width,
+ nnz,
+ isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE,
+ isTrans,
+ useGpu);
return m;
}
@@ -207,7 +234,8 @@ FloatArray Matrix::getData() const {
}
void Matrix::sparseCopyFrom(
- const std::vector& rows, const std::vector& cols,
+ const std::vector& rows,
+ const std::vector& cols,
const std::vector& vals) throw(UnsupportError) {
auto cpuSparseMat =
std::dynamic_pointer_cast(m->mat);
@@ -226,7 +254,8 @@ void Matrix::sparseCopyFrom(
void* Matrix::getSharedPtr() const { return &m->mat; }
-void Matrix::toNumpyMatInplace(float** view_data, int* dim1,
+void Matrix::toNumpyMatInplace(float** view_data,
+ int* dim1,
int* dim2) throw(UnsupportError) {
auto cpuMat = std::dynamic_pointer_cast(m->mat);
if (cpuMat) {
@@ -237,7 +266,8 @@ void Matrix::toNumpyMatInplace(float** view_data, int* dim1,
throw UnsupportError();
}
}
-void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
+void Matrix::copyToNumpyMat(float** view_m_data,
+ int* dim1,
int* dim2) throw(UnsupportError) {
static_assert(sizeof(paddle::real) == sizeof(float),
"Currently PaddleAPI only support for single "
@@ -255,8 +285,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
} else if (auto gpuMat = dynamic_cast(m->mat.get())) {
auto src = gpuMat->getData();
auto dest = *view_m_data;
- hl_memcpy_device2host(dest, src,
- sizeof(paddle::real) * (*dim1) * (*dim2));
+ hl_memcpy_device2host(
+ dest, src, sizeof(paddle::real) * (*dim1) * (*dim2));
} else {
LOG(WARNING) << "Unexpected Situation";
throw UnsupportError();
@@ -264,7 +294,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
}
}
-void Matrix::copyFromNumpyMat(float* data, int dim1,
+void Matrix::copyFromNumpyMat(float* data,
+ int dim1,
int dim2) throw(UnsupportError, RangeError) {
if (isSparse()) {
throw UnsupportError();
diff --git a/paddle/api/Paddle.swig b/paddle/api/Paddle.swig
index a09f24ce1ccf5d026bf9431255c258483854b74b..6a0fbc537d9345f2221ab65d90733f4696be6880 100644
--- a/paddle/api/Paddle.swig
+++ b/paddle/api/Paddle.swig
@@ -4,6 +4,13 @@
#define SWIG_FILE_WITH_INIT
#include "api/PaddleAPI.h"
%}
+
+%include "exception.i"
+%typemap(throws) UnsupportError %{
+ SWIG_exception(SWIG_RuntimeError, $1.what());
+ SWIG_fail;
+%}
+
%include "std_vector.i"
%include "std_pair.i"
#ifdef SWIGPYTHON
@@ -133,14 +140,21 @@ namespace std {
%newobject Matrix::createZero;
%newobject Matrix::createSparse;
%newobject Matrix::createDense;
+%newobject Matrix::createDenseFromNumpy;
+%newobject Matrix::createCpuDenseFromNumpy;
+%newobject Matrix::createGpuDenseFromNumpy;
%newobject Vector::createZero;
%newobject Vector::create;
+%newobject Vector::createVectorFromNumpy;
%newobject Vector::createCpuVectorFromNumpy;
%newobject Vector::createGpuVectorFromNumpy;
%newobject IVector::createZero;
%newobject IVector::create;
+%newobject IVector::createVectorFromNumpy;
+%newobject IVector::createCpuVectorFromNumpy;
+%newobject IVector::createGpuVectorFromNumpy;
%newobject Trainer::createByCommandLine;
-%newobject Trainer::getNetworkOutput;
+%newobject Trainer::getForwardOutput;
%newobject Trainer::getLayerOutput;
%newobject Arguments::getSlotValue;
%newobject Arguments::getSlotIds;
diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h
index cf790f2f8ef1dbdce37b279227e95328490c518d..c07facdb1292b34ac31247160a4347ea359e718b 100644
--- a/paddle/api/PaddleAPI.h
+++ b/paddle/api/PaddleAPI.h
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#pragma once
#include
#include
#include
+#include
#include
#include "paddle/utils/GlobalConstants.h"
#include "paddle/utils/TypeDefs.h"
@@ -42,6 +42,12 @@ using namespace paddle::enumeration_wrapper; // NOLINT
*/
void initPaddle(int argc, char** argv);
+/// Return FLAGS_use_gpu
+bool isUsingGpu();
+
+/// Set the Flags_use_gpu to the given parameter
+void setUseGpu(bool useGpu);
+
/// Return true if this py_paddle is compiled in GPU Version
bool isGpuVersion();
@@ -52,7 +58,11 @@ class IOError {};
class RangeError {};
/// Not support Error, such as access GPU memory directly, etc.
-class UnsupportError {};
+class UnsupportError : public std::runtime_error {
+public:
+ UnsupportError() : std::runtime_error(" "){};
+ UnsupportError(const std::string& message) : std::runtime_error(message){};
+};
/// This type will map to python's list of float.
struct FloatArray {
@@ -101,7 +111,9 @@ public:
/**
* Create A Matrix with height,width, which is filled by zero.
*/
- static Matrix* createZero(size_t height, size_t width, bool useGpu = false);
+ static Matrix* createZero(size_t height,
+ size_t width,
+ bool useGpu = isUsingGpu());
/**
* Create Sparse Matrix.
@@ -112,9 +124,12 @@ public:
*
* @note the default sparse type is SPARSE_CSR.
*/
- static Matrix* createSparse(size_t height, size_t width, size_t nnz,
- bool isNonVal = true, bool trans = false,
- bool useGpu = false);
+ static Matrix* createSparse(size_t height,
+ size_t width,
+ size_t nnz,
+ bool isNonVal = true,
+ bool trans = false,
+ bool useGpu = isUsingGpu());
/**
* Create Dense Matrix.
@@ -122,8 +137,17 @@ public:
* @param data list of float should be passed in python.
* @note the value will be copy into a new matrix.
*/
- static Matrix* createDense(const std::vector& data, size_t height,
- size_t width, bool useGpu = false);
+ static Matrix* createDense(const std::vector& data,
+ size_t height,
+ size_t width,
+ bool useGpu = isUsingGpu());
+
+ static Matrix* createDenseFromNumpy(
+ float* data,
+ int dim1,
+ int dim2,
+ bool copy = true,
+ bool useGpu = isUsingGpu()) throw(UnsupportError);
/**
* Create Cpu Dense Matrix from numpy matrix, dtype=float32
@@ -134,7 +158,9 @@ public:
* @param copy true if copy into a new matrix, false will create
* matrix inplace.
*/
- static Matrix* createCpuDenseFromNumpy(float* data, int dim1, int dim2,
+ static Matrix* createCpuDenseFromNumpy(float* data,
+ int dim1,
+ int dim2,
bool copy = false);
/// Create Gpu Dense Matrix from numpy matrix, dtype=float32
@@ -154,11 +180,13 @@ public:
* numpy_mat = m.toNumpyMat()
* @endcode
*/
- void toNumpyMatInplace(float** view_data, int* dim1,
+ void toNumpyMatInplace(float** view_data,
+ int* dim1,
int* dim2) throw(UnsupportError);
/// Copy To numpy mat.
- void copyToNumpyMat(float** view_m_data, int* dim1,
+ void copyToNumpyMat(float** view_m_data,
+ int* dim1,
int* dim2) throw(UnsupportError);
/// Copy From Numpy Mat
@@ -221,21 +249,28 @@ public:
~Vector();
/// Create Vector filled with zero.
- static Vector* createZero(size_t sz, bool useGpu = false);
+ static Vector* createZero(size_t sz, bool useGpu = isUsingGpu());
/**
* Create Vector from list of float.
*
* It will create a new vector, and copy data into it.
*/
- static Vector* create(const std::vector& data, bool useGpu = false);
-
+ static Vector* create(const std::vector& data,
+ bool useGpu = isUsingGpu());
+
+ static Vector* createVectorFromNumpy(
+ float* data,
+ int dim,
+ bool copy = true,
+ bool useGpu = isUsingGpu()) throw(UnsupportError);
/**
* Create Cpu Vector from numpy array, which dtype=float32
*
* If copy is false, it will create vector inplace.
*/
- static Vector* createCpuVectorFromNumpy(float* data, int dim,
+ static Vector* createCpuVectorFromNumpy(float* data,
+ int dim,
bool copy = false);
/// Create Gpu Vector from numpy array, which dtype=float32
@@ -259,6 +294,9 @@ public:
/// Return is GPU vector or not.
bool isGpu() const;
+ /// Return a list of float, the memory is alloced and copied.
+ FloatArray getData() const;
+
/// __len__ in python
size_t getSize() const;
@@ -279,25 +317,33 @@ class IVector {
public:
/// Create IVector filled with zero
- static IVector* createZero(size_t sz, bool useGpu = false);
+ static IVector* createZero(size_t sz, bool useGpu = isUsingGpu());
/**
* Create IVector from list of int.
* It will create a new vector, and copy data into it.
*/
- static IVector* create(const std::vector& data, bool useGpu = false);
+ static IVector* create(const std::vector& data,
+ bool useGpu = isUsingGpu());
+
+ static IVector* createVectorFromNumpy(
+ int* data,
+ int dim,
+ bool copy = true,
+ bool useGpu = isUsingGpu()) throw(UnsupportError);
/**
* Create Cpu IVector from numpy array, which dtype=int32
*
* If copy is false, it will create vector inplace
*/
- static IVector* createCpuVectorFromNumpy(int* data, int dim,
+ static IVector* createCpuVectorFromNumpy(int* data,
+ int dim,
bool copy = false);
/**
* Create Gpu IVector from numpy array, which dtype=int32
*/
- static IVector* createGpuVectorFromNumy(int* data, int dim);
+ static IVector* createGpuVectorFromNumpy(int* data, int dim);
/// Cast to numpy array inplace.
void toNumpyArrayInplace(int** view_data, int* dim1) throw(UnsupportError);
@@ -576,7 +622,8 @@ class ParameterTraverseCallback {
public:
~ParameterTraverseCallback();
- void apply(const std::vector& vecs, const ParameterConfig& config,
+ void apply(const std::vector& vecs,
+ const ParameterConfig& config,
size_t sparseId);
private:
@@ -609,7 +656,8 @@ public:
void finishBatch();
- void update(const std::vector& vecs, const ParameterConfig& conf,
+ void update(const std::vector& vecs,
+ const ParameterConfig& conf,
size_t sparseId = NO_SPARSE_ID);
std::vector getParameterTypes() const;
@@ -649,7 +697,8 @@ public:
* model config by TrainerConfig
*/
static GradientMachine* createByModelConfig(
- ModelConfig* conf, GradientMatchineCreateMode mode = CREATE_MODE_NORMAL,
+ ModelConfig* conf,
+ GradientMatchineCreateMode mode = CREATE_MODE_NORMAL,
const std::vector& parameterTypes = defaultParamTypes);
/**
@@ -672,7 +721,8 @@ public:
/**
* Combine forward/backward
*/
- void forwardBackward(const Arguments& inArgs, Arguments* outArgs,
+ void forwardBackward(const Arguments& inArgs,
+ Arguments* outArgs,
PassType passType,
const UpdateCallback& callback = UpdateCallback());
@@ -693,14 +743,17 @@ public:
*/
SequenceGenerator* asSequenceGenerator(
const std::vector& dict = std::vector(),
- size_t begin_id = 0UL, size_t end_id = 0UL, size_t max_length = 100UL,
+ size_t begin_id = 0UL,
+ size_t end_id = 0UL,
+ size_t max_length = 100UL,
size_t beam_size = -1UL);
private:
GradientMachinePrivate* m;
static GradientMachine* createFromPaddleModelPtr(
- const void* confPtr, GradientMatchineCreateMode mode,
+ const void* confPtr,
+ GradientMatchineCreateMode mode,
const std::vector& types);
// Not to use c++ 11 init-list, so we use static var as function default arg.
@@ -722,8 +775,8 @@ public:
/// Create A Trainer By TrainerConfig. using paddle command line.
static Trainer* createByCommandLine() throw(IOError);
- static Trainer* create(TrainerConfig* optConfig, GradientMachine* gm)
- throw(IOError);
+ static Trainer* create(TrainerConfig* optConfig,
+ GradientMachine* gm) throw(IOError);
/// Start training
void startTrain();
diff --git a/paddle/api/PaddleAPIPrivate.h b/paddle/api/PaddleAPIPrivate.h
index 93cdca8c4beaaad70a40e5899ccd908594425f4f..5ffeff6a9726c7445db36c7c1bec7c74825884a0 100644
--- a/paddle/api/PaddleAPIPrivate.h
+++ b/paddle/api/PaddleAPIPrivate.h
@@ -65,4 +65,3 @@ struct ArgumentsPrivate {
return *(std::shared_ptr*)(rawPtr);
}
};
-
diff --git a/paddle/api/Parameter.cpp b/paddle/api/Parameter.cpp
index 8b56adc97c2d6178a9e0b272a9af89732a3573f6..c5876bb1c71438578831ffffd85840c706b6224c 100644
--- a/paddle/api/Parameter.cpp
+++ b/paddle/api/Parameter.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "PaddleAPI.h"
#include "paddle/parameter/Parameter.h"
diff --git a/paddle/api/ParameterOptimizer.cpp b/paddle/api/ParameterOptimizer.cpp
index b13761ab0900d57008c17094c5199ef31a040f54..21d031e4bcb897eb693e5cff56bc77a637dc6bd2 100644
--- a/paddle/api/ParameterOptimizer.cpp
+++ b/paddle/api/ParameterOptimizer.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
#include "paddle/parameter/ParameterOptimizer.h"
@@ -32,17 +31,21 @@ struct ParameterTraverseCallbackPrivate {
const paddle::ParameterOptimizer::TraverseCallback& callback)
: callback(callback) {}
- void apply(const std::vector& vecs, const ParameterConfig& conf,
+ void apply(const std::vector& vecs,
+ const ParameterConfig& conf,
size_t sparseId) {
std::vector real_vecs;
real_vecs.resize(vecs.size());
- std::transform(vecs.begin(), vecs.end(), real_vecs.begin(), [](Vector* v) {
- if (v) {
- return *(paddle::VectorPtr*)(v->getSharedPtr());
- } else {
- return paddle::VectorPtr();
- }
- });
+ std::transform(vecs.begin(),
+ vecs.end(),
+ real_vecs.begin(),
+ [](Vector* v) {
+ if (v) {
+ return *(paddle::VectorPtr*)(v->getSharedPtr());
+ } else {
+ return paddle::VectorPtr();
+ }
+ });
paddle::ParameterConfig& real_conf =
*(paddle::ParameterConfig*)(const_cast(conf)
@@ -86,10 +89,12 @@ void ParameterOptimizer::startBatch(size_t numSamplesProcessed) {
void ParameterOptimizer::finishBatch() { m->optimizer->finishBatch(); }
void ParameterOptimizer::update(const std::vector& vecs,
- const ParameterConfig& conf, size_t sparseId) {
- ParameterTraverseCallbackPrivate invoker([&](
- const paddle::VectorPtr _vecs[], const paddle::ParameterConfig& config,
- size_t sid = -1UL) { m->optimizer->update(_vecs, config, sid); });
+ const ParameterConfig& conf,
+ size_t sparseId) {
+ ParameterTraverseCallbackPrivate invoker(
+ [&](const paddle::VectorPtr _vecs[],
+ const paddle::ParameterConfig& config,
+ size_t sid = -1UL) { m->optimizer->update(_vecs, config, sid); });
invoker.apply(vecs, conf, sparseId);
}
@@ -116,8 +121,9 @@ void ParameterTraverseCallback::apply(const std::vector& vecs,
ParameterTraverseCallback* ParameterOptimizer::needSpecialTraversal(
const ParameterConfig& config) const {
- auto& param_config = *(paddle::ParameterConfig*)const_cast(
- config).getRawPtr();
+ auto& param_config =
+ *(paddle::ParameterConfig*)const_cast(config)
+ .getRawPtr();
auto callback = m->optimizer->needSpecialTraversal(param_config);
if (callback) {
auto retCallback = new ParameterTraverseCallback();
diff --git a/paddle/api/SequenceGenerator.cpp b/paddle/api/SequenceGenerator.cpp
index 9d353ccc8e281e72a207ba19f45517fd256d6df2..d51be78d45902967107f4bf0af995958faed931a 100644
--- a/paddle/api/SequenceGenerator.cpp
+++ b/paddle/api/SequenceGenerator.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "PaddleAPI.h"
#include "paddle/gserver/gradientmachines/GradientMachine.h"
#include "paddle/parameter/Argument.h"
@@ -42,8 +41,10 @@ struct Path {
// position
static void findNBest(paddle::GradientMachine* gradMachine,
std::vector& inArgs,
- std::vector& finalPaths, size_t bos_id,
- size_t eos_id, size_t max_length) {
+ std::vector& finalPaths,
+ size_t bos_id,
+ size_t eos_id,
+ size_t max_length) {
std::vector paths;
Path emptyPath;
paths.push_back(emptyPath);
@@ -166,7 +167,8 @@ public:
if (id < getSize()) {
Path& p = (*path_)[id];
std::ostringstream sout;
- std::transform(p.ids.begin(), p.ids.end(),
+ std::transform(p.ids.begin(),
+ p.ids.end(),
std::ostream_iterator(sout, split ? " " : ""),
[&](int id) { return (*dict_)[id]; });
return sout.str();
diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp
index b61f36f740d47fe785b30361f26059bf0b64829d..7a6aa69fb652313748b1fa787847ffd74fda7a22 100644
--- a/paddle/api/Trainer.cpp
+++ b/paddle/api/Trainer.cpp
@@ -64,12 +64,11 @@ Trainer* Trainer::createByCommandLine() throw(IOError) {
Trainer::Trainer(TrainerConfig* config, GradientMachine* gm)
: m(new TrainerPrivate()) {
- m->init(config->m->conf, /* testing= */false, gm ? gm->m->machine : nullptr);
+ m->init(config->m->conf, /* testing= */ false, gm ? gm->m->machine : nullptr);
}
-Trainer* Trainer::create(TrainerConfig* config, GradientMachine* gm)
- throw(IOError)
-{
+Trainer* Trainer::create(TrainerConfig* config,
+ GradientMachine* gm) throw(IOError) {
auto retv = new Trainer(config, gm);
if (retv->m->getConfig().IsInitialized()) {
return retv;
@@ -134,15 +133,17 @@ void Trainer::finishTestPeriod() { m->finishTestPeriod(); }
Matrix* Trainer::getLayerOutput(const std::string& layerName) {
auto nn = std::dynamic_pointer_cast(
- this->m->getGradientMachine());
+ this->m->getGradientMachine());
CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork";
auto m = nn->getLayerOutput(layerName);
return Matrix::createByPaddleMatrixPtr(&m);
}
-void Trainer::forwardOneBatch(size_t batchSize) { m->forwardOneBatch(batchSize); }
+void Trainer::forwardOneBatch(size_t batchSize) {
+ m->forwardOneBatch(batchSize);
+}
-bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
+bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
CHECK(dataProvider_) << "data_provider is not specified";
paddle::DataBatch dataBatch;
int num = dataProvider_->getNextBatch(batchSize, &dataBatch);
@@ -156,7 +157,6 @@ bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
void TrainerPrivate::forwardOneDataBatch(
const std::vector& inArgs) {
-
std::vector& outArgs = forwardOutput_;
if (config_->getOptConfig().use_sparse_remote_updater()) {
diff --git a/paddle/api/Util.cpp b/paddle/api/Util.cpp
index 8a6741078f2f19d8c3cb081f129447d6fc5801c9..1bba1df2e1c0a2d3cd2d8307ed3a0d784bb949b4 100644
--- a/paddle/api/Util.cpp
+++ b/paddle/api/Util.cpp
@@ -37,10 +37,16 @@ FloatArray::FloatArray(const float* b, const size_t l)
IntArray::IntArray(const int* b, const size_t l, bool f)
: buf(b), length(l), needFree(f) {}
-IntWithFloatArray::IntWithFloatArray(const float* v, const int* i, size_t l,
+IntWithFloatArray::IntWithFloatArray(const float* v,
+ const int* i,
+ size_t l,
bool f)
: valBuf(v), idxBuf(i), length(l), needFree(f) {}
+bool isUsingGpu() { return FLAGS_use_gpu; }
+
+void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; }
+
bool isGpuVersion() {
#ifdef PADDLE_ONLY_CPU
return false;
diff --git a/paddle/api/Vector.cpp b/paddle/api/Vector.cpp
index 1affc1a5fefb8a1109d2a442db10b7d7641cd9ee..cc1c098223826a06fea291a95730d7fc1fd1beb3 100644
--- a/paddle/api/Vector.cpp
+++ b/paddle/api/Vector.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "PaddleAPI.h"
#include "paddle/math/Vector.h"
@@ -39,6 +38,21 @@ IVector* IVector::create(const std::vector& data, bool useGpu) {
return v;
}
+IVector* IVector::createVectorFromNumpy(int* data,
+ int dim,
+ bool copy,
+ bool useGpu) throw(UnsupportError) {
+ if (useGpu) {
+ /// if use gpu only copy=true is supported
+ if (!copy) {
+ throw UnsupportError("Gpu mode only supports copy=True");
+ }
+ return IVector::createGpuVectorFromNumpy(data, dim);
+ } else {
+ return IVector::createCpuVectorFromNumpy(data, dim, copy);
+ }
+}
+
IVector* IVector::createCpuVectorFromNumpy(int* data, int dim, bool copy) {
auto v = new IVector();
if (copy) {
@@ -50,7 +64,7 @@ IVector* IVector::createCpuVectorFromNumpy(int* data, int dim, bool copy) {
return v;
}
-IVector* IVector::createGpuVectorFromNumy(int* data, int dim) {
+IVector* IVector::createGpuVectorFromNumpy(int* data, int dim) {
auto v = new IVector();
v->m->vec = paddle::IVector::create(dim, true);
v->m->vec->copyFrom(data, dim);
@@ -124,8 +138,8 @@ void IVector::copyToNumpyArray(int** view_m_data, int* dim1) {
if (auto cpuVec = dynamic_cast(m->vec.get())) {
std::memcpy(*view_m_data, cpuVec->getData(), sizeof(int) * (*dim1));
} else if (auto gpuVec = dynamic_cast(m->vec.get())) {
- hl_memcpy_device2host(*view_m_data, gpuVec->getData(),
- sizeof(int) * (*dim1));
+ hl_memcpy_device2host(
+ *view_m_data, gpuVec->getData(), sizeof(int) * (*dim1));
} else {
LOG(INFO) << "Unexpected situation";
}
@@ -188,12 +202,27 @@ Vector* Vector::createByPaddleVectorPtr(void* ptr) {
}
}
+Vector* Vector::createVectorFromNumpy(float* data,
+ int dim,
+ bool copy,
+ bool useGpu) throw(UnsupportError) {
+ if (useGpu) {
+ /// if use gpu only copy=True is supported
+ if (!copy) {
+ throw UnsupportError("Gpu mode only supports copy=True");
+ }
+ return Vector::createGpuVectorFromNumpy(data, dim);
+ } else {
+ return Vector::createCpuVectorFromNumpy(data, dim, copy);
+ }
+}
+
Vector* Vector::createCpuVectorFromNumpy(float* data, int dim, bool copy) {
CHECK_GT(dim, 0);
auto retVec = new Vector();
if (copy) {
retVec->m->vec = paddle::Vector::create((size_t)dim, false);
- return retVec;
+ retVec->m->vec->copyFrom(data, dim);
} else {
retVec->m->vec = paddle::Vector::create(data, (size_t)dim, false);
}
@@ -225,8 +254,8 @@ void Vector::copyToNumpyArray(float** view_m_data, int* dim1) {
if (auto cpuVec = dynamic_cast(m->vec.get())) {
std::memcpy(*view_m_data, cpuVec->getData(), sizeof(float) * (*dim1));
} else if (auto gpuVec = dynamic_cast(m->vec.get())) {
- hl_memcpy_device2host(*view_m_data, gpuVec->getData(),
- sizeof(float) * (*dim1));
+ hl_memcpy_device2host(
+ *view_m_data, gpuVec->getData(), sizeof(float) * (*dim1));
} else {
LOG(INFO) << "Unexpected situation";
}
@@ -237,6 +266,21 @@ void Vector::copyFromNumpyArray(float* data, int dim) {
m->vec->copyFrom(data, dim);
}
+FloatArray Vector::getData() const {
+ if (this->isGpu()) {
+ float* src = m->vec->getData();
+ size_t len = m->vec->getSize();
+ float* dest = new float[len];
+ hl_memcpy_device2host(dest, src, len * sizeof(float));
+ FloatArray ret_val(dest, len);
+ ret_val.needFree = true;
+ return ret_val;
+ } else {
+ FloatArray ret_val(m->vec->getData(), m->vec->getSize());
+ return ret_val;
+ }
+}
+
bool Vector::isGpu() const {
return std::dynamic_pointer_cast(m->vec) != nullptr;
}
diff --git a/paddle/api/__init__.py b/paddle/api/__init__.py
index 7f9e87eee6037666b86420fba194624859d356b3..c90af2ee000d46a032984ee23559e7e99b49ddad 100644
--- a/paddle/api/__init__.py
+++ b/paddle/api/__init__.py
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
diff --git a/paddle/api/paddle_api_config.py.in b/paddle/api/paddle_api_config.py.in
index 6531e5ccb3dba39315c7e35191ea1bdf0504d220..a2352250c31efa7ee3c4c8338d95dce5a5b9a511 100644
--- a/paddle/api/paddle_api_config.py.in
+++ b/paddle/api/paddle_api_config.py.in
@@ -1,6 +1,7 @@
PADDLE_BUILD_DIR="@CMAKE_CURRENT_BINARY_DIR@/../"
WITH_GPU="@WITH_GPU@"
PROTOBUF_LIB="@PROTOBUF_LIBRARY@"
+ZLIB_LIB="@ZLIB_LIBRARIES@"
CMAKE_THREAD_LIB="@CMAKE_THREAD_LIBS_INIT@"
CMAKE_DL_LIBS="@CMAKE_DL_LIBS@"
@@ -15,3 +16,4 @@ GFLAGS_LOCATION="@GFLAGS_LOCATION@"
CBLAS_LIBRARIES="@CBLAS_LIBS@"
CUDA_LIBRARIES="@CUDA_LIBRARIES@"
+WITH_COVERALLS="@ON_COVERALLS@"
diff --git a/paddle/api/paddle_ld_flags.py b/paddle/api/paddle_ld_flags.py
index bc1afc5898e829bc271b62b702b3743bf7eb782b..ebe00798e8b7169ecbbef53e287ab4b78334bcf9 100644
--- a/paddle/api/paddle_ld_flags.py
+++ b/paddle/api/paddle_ld_flags.py
@@ -29,7 +29,10 @@ try:
whole_start = ""
whole_end = ""
- LIB_DIRS = ["math", 'utils', 'parameter', "gserver", "api", "cuda", "pserver", "trainer"]
+ LIB_DIRS = [
+ "math", 'utils', 'parameter', "gserver", "api", "cuda", "pserver",
+ "trainer"
+ ]
PARENT_LIB_DIRS = ['proto']
class PaddleLDFlag(object):
@@ -38,6 +41,7 @@ try:
self.paddle_build_dir = os.path.abspath(self.paddle_build_dir)
self.with_gpu = PaddleLDFlag.cmake_bool(WITH_GPU)
self.protolib = PROTOBUF_LIB
+ self.zlib = ZLIB_LIB
self.thread = CMAKE_THREAD_LIB
self.dl_libs = CMAKE_DL_LIBS
self.with_python = PaddleLDFlag.cmake_bool(WITH_PYTHON)
@@ -47,25 +51,27 @@ try:
self.glog_libs = LIBGLOG_LIBRARY
self.with_gflags = PaddleLDFlag.cmake_bool(WITH_GFLAGS)
+ self.with_coverage = PaddleLDFlag.cmake_bool(WITH_COVERALLS)
self.gflags_libs = GFLAGS_LIBRARIES
self.gflags_location = GFLAGS_LOCATION
self.cblas_libs = CBLAS_LIBRARIES
self.curt = CUDA_LIBRARIES
def ldflag_str(self):
- return " ".join([self.libs_dir_str(),
- self.parent_dir_str(),
- self.libs_str()])
+ return " ".join(
+ [self.libs_dir_str(), self.parent_dir_str(), self.libs_str()])
def libs_dir_str(self):
libdirs = LIB_DIRS
- return " ".join(map(lambda x: "-L" + os.path.join(self.paddle_build_dir, x),
- libdirs))
+ return " ".join(
+ map(lambda x: "-L" + os.path.join(self.paddle_build_dir, x),
+ libdirs))
def parent_dir_str(self):
libdirs = PARENT_LIB_DIRS
- return " ".join(map(lambda x: "-L" + os.path.join(self.paddle_build_dir, '..', x),
- libdirs))
+ return " ".join(
+ map(lambda x: "-L" + os.path.join(self.paddle_build_dir, '..', x),
+ libdirs))
def libs_str(self):
libs = [
@@ -82,6 +88,7 @@ try:
"-lpaddle_cuda",
"-lpaddle_api",
self.normalize_flag(self.protolib),
+ self.normalize_flag(self.zlib),
self.normalize_flag(self.thread),
self.normalize_flag(self.dl_libs),
self.normalize_flag(self.cblas_libs),
@@ -95,6 +102,8 @@ try:
libs.append(self.normalize_flag(self.gflags_libs))
if self.with_gpu:
libs.append(self.normalize_flag(self.curt))
+ if self.with_coverage:
+ libs.append("-fprofile-arcs")
return " ".join(filter(lambda l: len(l) != 0, libs))
def normalize_flag(self, cmake_flag):
@@ -108,10 +117,10 @@ try:
return cmake_flag
elif cmake_flag.startswith("-l"): # normal link command
return cmake_flag
- elif cmake_flag in ["gflags-shared",
- "gflags-static",
- "gflags_nothreads-shared",
- "gflags_nothreads-static"]: # special for gflags
+ elif cmake_flag in [
+ "gflags-shared", "gflags-static", "gflags_nothreads-shared",
+ "gflags_nothreads-static"
+ ]: # special for gflags
assert PaddleLDFlag.cmake_bool(self.gflags_location)
return self.gflags_location
elif len(cmake_flag) != 0:
@@ -127,12 +136,22 @@ try:
:type cmake_str: str
:rtype: bool
"""
- if cmake_str in ["FALSE", "OFF", "NO"] or cmake_str.endswith("-NOTFOUND"):
+ if cmake_str in ["FALSE", "OFF", "NO"] or cmake_str.endswith(
+ "-NOTFOUND"):
return False
else:
return True
+ def c_flag(self):
+ if self.with_coverage:
+ return ["-fprofile-arcs", "-ftest-coverage", "-O0", "-g"]
+ else:
+ return None
except ImportError:
+
class PaddleLDFlag(object):
def ldflag_str(self):
pass
+
+ def c_flag(self):
+ pass
diff --git a/paddle/api/test/CMakeLists.txt b/paddle/api/test/CMakeLists.txt
index c4c26e6c03fdff51696f75f4d6a522cff60e7cca..08a0fe96a004d38b81d0bac881da1faeb52685f4 100644
--- a/paddle/api/test/CMakeLists.txt
+++ b/paddle/api/test/CMakeLists.txt
@@ -1,2 +1,2 @@
add_test(NAME test_swig_api
- COMMAND bash ${PROJ_ROOT}/paddle/api/test/run_tests.sh)
\ No newline at end of file
+ COMMAND bash ${PROJ_ROOT}/paddle/api/test/run_tests.sh)
diff --git a/paddle/api/test/testArguments.py b/paddle/api/test/testArguments.py
index daedd2409effccba27ff6818fc2603d3e1665bde..70fb169fd5c43d5768e67ad8e4c62a9f4d302eaf 100644
--- a/paddle/api/test/testArguments.py
+++ b/paddle/api/test/testArguments.py
@@ -32,7 +32,7 @@ class TestArguments(unittest.TestCase):
iv = args.getSlotIds(0)
assert isinstance(iv, swig_paddle.IVector)
np_arr = iv.toNumpyArrayInplace()
- self.assertEqual(np_arr.shape, (6,))
+ self.assertEqual(np_arr.shape, (6, ))
if __name__ == '__main__':
diff --git a/paddle/api/test/testGradientMachine.py b/paddle/api/test/testGradientMachine.py
index 59b36a012a239730a1d0a5b239a3ba69f0cee1fb..e12613fbb8a66545dd3ad20d59b0b951e86e8683 100644
--- a/paddle/api/test/testGradientMachine.py
+++ b/paddle/api/test/testGradientMachine.py
@@ -30,8 +30,8 @@ class TestGradientMachine(unittest.TestCase):
self.assertIsNotNone(model_config)
machine = swig_paddle.GradientMachine.createByModelConfig(
model_config, swig_paddle.CREATE_MODE_NORMAL,
- swig_paddle.ParameterOptimizer.create(
- opt_config).getParameterTypes())
+ swig_paddle.ParameterOptimizer.create(opt_config).getParameterTypes(
+ ))
self.assertIsNotNone(machine)
ipt, _ = util.loadMNISTTrainData()
output = swig_paddle.Arguments.createArguments(0)
@@ -43,7 +43,7 @@ class TestGradientMachine(unittest.TestCase):
assert isinstance(param, swig_paddle.Parameter)
val = param.getBuf(swig_paddle.PARAMETER_VALUE)
assert isinstance(val, swig_paddle.Vector)
- arr = numpy.full((len(val),), 0.1, dtype="float32")
+ arr = numpy.full((len(val), ), 0.1, dtype="float32")
val.copyFromNumpyArray(arr)
param_config = param.getConfig().toProto()
assert isinstance(param_config,
diff --git a/paddle/api/test/testMatrix.py b/paddle/api/test/testMatrix.py
index 2216ef30a58b0d97bba210bf0edee02a18264076..0432345edd659f13bddb1b99f62622c5ea64a4cb 100644
--- a/paddle/api/test/testMatrix.py
+++ b/paddle/api/test/testMatrix.py
@@ -42,7 +42,7 @@ class TestMatrix(unittest.TestCase):
self.assertEqual(m.getSparseRowCols(2), [])
def test_sparse_value(self):
- m = swig_paddle.Matrix.createSparse(3, 3, 6, False)
+ m = swig_paddle.Matrix.createSparse(3, 3, 6, False, False, False)
self.assertIsNotNone(m)
m.sparseCopyFrom([0, 2, 3, 3], [0, 1, 2], [7.3, 4.2, 3.2])
@@ -66,10 +66,11 @@ class TestMatrix(unittest.TestCase):
self.assertIsNotNone(m)
self.assertTrue(abs(m.get(1, 1) - 0.5) < 1e-5)
- def test_numpy(self):
+ def test_numpyCpu(self):
numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype="float32")
m = swig_paddle.Matrix.createCpuDenseFromNumpy(numpy_mat)
- self.assertEqual((int(m.getHeight()), int(m.getWidth())), numpy_mat.shape)
+ self.assertEqual((int(m.getHeight()), int(m.getWidth())),
+ numpy_mat.shape)
# the numpy matrix and paddle matrix shared the same memory.
numpy_mat[0, 1] = 342.23
@@ -99,8 +100,20 @@ class TestMatrix(unittest.TestCase):
for a, e in zip(gpu_m.getData(), [1.0, 3.23, 3.0, 4.0, 5.0, 6.0]):
self.assertAlmostEqual(a, e)
+
+ def test_numpy(self):
+ numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype="float32")
+ m = swig_paddle.Matrix.createDenseFromNumpy(numpy_mat)
+ self.assertEqual((int(m.getHeight()), int(m.getWidth())), numpy_mat.shape)
+ self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu())
+ for a, e in zip(m.getData(), [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]):
+ self.assertAlmostEqual(a, e)
if __name__ == "__main__":
swig_paddle.initPaddle("--use_gpu=0")
- unittest.main()
+ suite = unittest.TestLoader().loadTestsFromTestCase(TestMatrix)
+ unittest.TextTestRunner().run(suite)
+ if swig_paddle.isGpuVersion():
+ swig_paddle.setUseGpu(True)
+ unittest.main()
diff --git a/paddle/api/test/testTrain.py b/paddle/api/test/testTrain.py
index 7759118a3d9d108f0c05d985ac74a5122799ccb4..a3ba4eaaa69b39b75e7ece3095b6f236c1248d41 100644
--- a/paddle/api/test/testTrain.py
+++ b/paddle/api/test/testTrain.py
@@ -98,7 +98,8 @@ def main():
cost_vec = outArgs.getSlotValue(0)
assert isinstance(cost_vec, swig_paddle.Matrix)
cost_vec = cost_vec.copyToNumpyMat()
- print 'Finish Batch', batch_id, 'with cost ', cost_vec.sum() / batch_size
+ print 'Finish Batch', batch_id, 'with cost ', cost_vec.sum(
+ ) / batch_size
batch_id += 1
for optimizer in optimizers:
diff --git a/paddle/api/test/testTrainConfig.py b/paddle/api/test/testTrainConfig.py
index 22148e31915da0c21609fe0694274cfaee4b3950..77e0cd37d566d2571fada76b9948a9b0616ad044 100644
--- a/paddle/api/test/testTrainConfig.py
+++ b/paddle/api/test/testTrainConfig.py
@@ -1,9 +1,6 @@
from paddle.trainer_config_helpers import *
-settings(
- batch_size=100,
- learning_method=AdamOptimizer()
-)
+settings(batch_size=100, learning_method=AdamOptimizer())
din = data_layer(name='input', size=784)
diff --git a/paddle/api/test/testTrainer.py b/paddle/api/test/testTrainer.py
index da69a60f84f4d7c6fad54fc116a31b54ef162f60..edd5a2da5785c405b46c2559ee93837ac68d7c3a 100644
--- a/paddle/api/test/testTrainer.py
+++ b/paddle/api/test/testTrainer.py
@@ -17,9 +17,9 @@ from paddle.trainer.config_parser import logger
from py_paddle import swig_paddle
import util
+
def main():
- trainer_config = parse_config(
- "./testTrainConfig.py", "")
+ trainer_config = parse_config("./testTrainConfig.py", "")
model = swig_paddle.GradientMachine.createFromConfigProto(
trainer_config.model_config)
trainer = swig_paddle.Trainer.create(trainer_config, model)
@@ -56,7 +56,7 @@ def main():
logger.info('test cost=%f' % (cost / num))
trainer.finishTrain()
-
+
if __name__ == '__main__':
swig_paddle.initPaddle("--use_gpu=0", "--trainer_count=1")
diff --git a/paddle/api/test/testVector.py b/paddle/api/test/testVector.py
index f5b5d0e32e4208e7becb9755d1aed131f52ff146..48aaa1d73da9e6c207ad5fa2be14a531267bd901 100644
--- a/paddle/api/test/testVector.py
+++ b/paddle/api/test/testVector.py
@@ -20,20 +20,28 @@ import unittest
class TestIVector(unittest.TestCase):
def test_createZero(self):
- m = swig_paddle.IVector.createZero(10)
+ m = swig_paddle.IVector.createZero(10, False)
self.assertIsNotNone(m)
for i in xrange(10):
self.assertEqual(m[i], 0)
m[i] = i
self.assertEqual(m[i], i)
+
+ m = swig_paddle.IVector.createZero(10)
+ self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu())
+ self.assertEqual(m.getData(), [0]*10)
def test_create(self):
- m = swig_paddle.IVector.create(range(10))
+ m = swig_paddle.IVector.create(range(10), False)
self.assertIsNotNone(m)
for i in xrange(10):
self.assertEqual(m[i], i)
+
+ m = swig_paddle.IVector.create(range(10))
+ self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu())
+ self.assertEqual(m.getData(), range(10))
- def test_numpy(self):
+ def test_cpu_numpy(self):
vec = np.array([1, 3, 4, 65, 78, 1, 4], dtype="int32")
iv = swig_paddle.IVector.createCpuVectorFromNumpy(vec)
self.assertEqual(vec.shape[0], int(iv.__len__()))
@@ -61,25 +69,43 @@ class TestIVector(unittest.TestCase):
expect_vec = range(0, 10)
expect_vec[4] = 7
self.assertEqual(vec.getData(), expect_vec)
+
+ def test_numpy(self):
+ vec = np.array([1, 3, 4, 65, 78, 1, 4], dtype="int32")
+ iv = swig_paddle.IVector.createVectorFromNumpy(vec)
+ self.assertEqual(iv.isGpu(), swig_paddle.isUsingGpu())
+ self.assertEqual(iv.getData(), list(vec))
class TestVector(unittest.TestCase):
def testCreateZero(self):
- v = swig_paddle.Vector.createZero(10)
+ v = swig_paddle.Vector.createZero(10, False)
self.assertIsNotNone(v)
for i in xrange(len(v)):
self.assertTrue(util.doubleEqual(v[i], 0))
v[i] = i
self.assertTrue(util.doubleEqual(v[i], i))
+
+ v = swig_paddle.Vector.createZero(10)
+ self.assertEqual(v.isGpu(), swig_paddle.isUsingGpu())
+ self.assertEqual(v.getData(), [0]*10)
def testCreate(self):
- v = swig_paddle.Vector.create([x / 100.0 for x in xrange(100)])
+ v = swig_paddle.Vector.create([x / 100.0 for x in xrange(100)], False)
self.assertIsNotNone(v)
for i in xrange(len(v)):
self.assertTrue(util.doubleEqual(v[i], i / 100.0))
self.assertEqual(100, len(v))
+
+ v = swig_paddle.Vector.create([x / 100.0 for x in xrange(100)])
+ self.assertEqual(v.isGpu(), swig_paddle.isUsingGpu())
+ self.assertEqual(100, len(v))
+ vdata = v.getData()
+ for i in xrange(len(v)):
+ self.assertTrue(util.doubleEqual(vdata[i], i / 100.0))
+
- def testNumpy(self):
+ def testCpuNumpy(self):
numpy_arr = np.array([1.2, 2.3, 3.4, 4.5], dtype="float32")
vec = swig_paddle.Vector.createCpuVectorFromNumpy(numpy_arr)
assert isinstance(vec, swig_paddle.Vector)
@@ -102,9 +128,18 @@ class TestVector(unittest.TestCase):
for i in xrange(1, len(numpy_3)):
util.doubleEqual(numpy_3[i], vec[i])
+
+ def testNumpy(self):
+ numpy_arr = np.array([1.2, 2.3, 3.4, 4.5], dtype="float32")
+ vec = swig_paddle.Vector.createVectorFromNumpy(numpy_arr)
+ self.assertEqual(vec.isGpu(), swig_paddle.isUsingGpu())
+ vecData = vec.getData()
+ for n, v in zip(numpy_arr, vecData):
+ self.assertTrue(util.doubleEqual(n, v))
+
def testCopyFromNumpy(self):
- vec = swig_paddle.Vector.createZero(1)
+ vec = swig_paddle.Vector.createZero(1, False)
arr = np.array([1.3, 3.2, 2.4], dtype="float32")
vec.copyFromNumpyArray(arr)
for i in xrange(len(vec)):
@@ -112,5 +147,9 @@ class TestVector(unittest.TestCase):
if __name__ == '__main__':
- swig_paddle.initPaddle("--use_gpu=1" if swig_paddle.isGpuVersion() else "--use_gpu=0")
- unittest.main()
+ swig_paddle.initPaddle("--use_gpu=0")
+ suite = unittest.TestLoader().loadTestsFromTestCase(TestVector)
+ unittest.TextTestRunner().run(suite)
+ if swig_paddle.isGpuVersion():
+ swig_paddle.setUseGpu(True)
+ unittest.main()
\ No newline at end of file
diff --git a/paddle/cuda/CMakeLists.txt b/paddle/cuda/CMakeLists.txt
index cdb730bb3cec7a32fa42cf4c6738d575b76c6032..11dbfb54b268774405ade1e532bef9a0e8c7ada9 100755
--- a/paddle/cuda/CMakeLists.txt
+++ b/paddle/cuda/CMakeLists.txt
@@ -81,5 +81,8 @@ else()
add_library(paddle_cuda ${CUDA_SOURCES})
endif()
-add_style_check_target(paddle_cuda ${CUDA_SOURCES})
-add_style_check_target(paddle_cuda ${CUDA_HEADERS})
+add_style_check_target(paddle_cuda
+ ${CUDA_SOURCES}
+ ${CUDA_HEADERS}
+ ${CUDA_DSO_SOURCES}
+ ${CUDA_CXX_WITH_GPU_SOURCES})
diff --git a/paddle/cuda/include/hl_activation_functions.h b/paddle/cuda/include/hl_activation_functions.h
index c8aabc7844cd48d7ebdd0077684f9efa50f941a2..03e15b2223a50625c6999f6b081ae984e76b182b 100644
--- a/paddle/cuda/include/hl_activation_functions.h
+++ b/paddle/cuda/include/hl_activation_functions.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_ACTIVATION_FUNCTIONS_H_
#define HL_ACTIVATION_FUNCTIONS_H_
@@ -21,11 +20,8 @@ limitations under the License. */
/**
* Active functions: sigmoid, relu, tanh and linear.
*/
-#define HPPL_ACTIVE_FUNCTION {hppl::sigmoid, \
- hppl::relu, \
- hppl::tanh, \
- hppl::linear \
- }
+#define HPPL_ACTIVE_FUNCTION \
+ { hppl::sigmoid, hppl::relu, hppl::tanh, hppl::linear }
namespace hppl {
@@ -42,18 +38,18 @@ public:
#ifdef __NVCC__
namespace gpu {
-static __device__ Active::forward forward[] = HPPL_ACTIVE_FUNCTION;
+static __device__ Active::forward forward[] = HPPL_ACTIVE_FUNCTION;
static __device__ Active::backward backward[] = HPPL_ACTIVE_FUNCTION;
}
#else
namespace cpu {
-static Active::forward forward[] = HPPL_ACTIVE_FUNCTION;
+static Active::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active::backward backward[] = HPPL_ACTIVE_FUNCTION;
}
#ifdef __AVX__
namespace avx {
-static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION;
+static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION;
}
#endif
diff --git a/paddle/cuda/include/hl_aggregate.h b/paddle/cuda/include/hl_aggregate.h
index db75809f5de195d41577ed6569e8508f48241b69..a6d9ff8483eee28b2c8a380f0aca097c7662a02e 100644
--- a/paddle/cuda/include/hl_aggregate.h
+++ b/paddle/cuda/include/hl_aggregate.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_AGGREGATE_H_
#define HL_AGGREGATE_H_
diff --git a/paddle/cuda/include/hl_avx_functions.h b/paddle/cuda/include/hl_avx_functions.h
index cf062dd969bf79554e00369367e3b85c2ae7fc0d..ed339e312a7639cf9b78f130a43d67a7446576bb 100644
--- a/paddle/cuda/include/hl_avx_functions.h
+++ b/paddle/cuda/include/hl_avx_functions.h
@@ -12,22 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_AVX_FUNCTIONS_H_
#define HL_AVX_FUNCTIONS_H_
#include
namespace hppl {
- __m256 relu(const __m256 a);
- __m256 sigmoid(const __m256 a);
- __m256 tanh(const __m256 a);
- __m256 linear(const __m256 a);
-
- __m256 relu(const __m256 a, const __m256 b);
- __m256 sigmoid(const __m256 a, const __m256 b);
- __m256 tanh(const __m256 a, const __m256 b);
- __m256 linear(const __m256 a, const __m256 b);
+__m256 relu(const __m256 a);
+__m256 sigmoid(const __m256 a);
+__m256 tanh(const __m256 a);
+__m256 linear(const __m256 a);
+
+__m256 relu(const __m256 a, const __m256 b);
+__m256 sigmoid(const __m256 a, const __m256 b);
+__m256 tanh(const __m256 a, const __m256 b);
+__m256 linear(const __m256 a, const __m256 b);
} // namespace hppl
#endif // HL_AVX_FUNCTIONS_H_
diff --git a/paddle/cuda/include/hl_base.h b/paddle/cuda/include/hl_base.h
index 1fe2774cc5a291dbafb61b50d63553b086512e4d..a076952467a5ce10dc1f58007dda2170aa694fbb 100644
--- a/paddle/cuda/include/hl_base.h
+++ b/paddle/cuda/include/hl_base.h
@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
-
#ifndef HL_BASE_H_
#define HL_BASE_H_
@@ -33,36 +31,36 @@ limitations under the License. */
* HPPL_STREAM_DEFAULT is HPPL default stream.
*/
typedef enum {
- HPPL_STREAM_DEFAULT = 0, /* Thread Default Stream*/
- HPPL_STREAM_1 = 1,
- HPPL_STREAM_2 = 2,
- HPPL_STREAM_3 = 3,
- HPPL_STREAM_4 = 4,
- HPPL_THREAD_STREAM_1 = 5,
- HPPL_THREAD_STREAM_2 = 6,
- HPPL_THREAD_STREAM_3 = 7,
- HPPL_THREAD_STREAM_4 = 8,
- HPPL_STREAM_END
+ HPPL_STREAM_DEFAULT = 0, /* Thread Default Stream*/
+ HPPL_STREAM_1 = 1,
+ HPPL_STREAM_2 = 2,
+ HPPL_STREAM_3 = 3,
+ HPPL_STREAM_4 = 4,
+ HPPL_THREAD_STREAM_1 = 5,
+ HPPL_THREAD_STREAM_2 = 6,
+ HPPL_THREAD_STREAM_3 = 7,
+ HPPL_THREAD_STREAM_4 = 8,
+ HPPL_STREAM_END
} hl_stream_t;
/**
* @brief HPPL activation mode.
*/
typedef enum {
- HL_ACTIVATION_SIGMOID = 0,
- HL_ACTIVATION_RELU = 1,
- HL_ACTIVATION_TANH = 2,
- HL_ACTIVATION_LINEAR = 3,
- HL_ACTIVATION_END
+ HL_ACTIVATION_SIGMOID = 0,
+ HL_ACTIVATION_RELU = 1,
+ HL_ACTIVATION_TANH = 2,
+ HL_ACTIVATION_LINEAR = 3,
+ HL_ACTIVATION_END
} hl_activation_mode_t;
/**
* @brief Transpose type.
*/
typedef enum {
- HPPL_OP_N = 0, /* transpose */
- HPPL_OP_T = 1, /* non transpose */
- HPPL_OP_END
+ HPPL_OP_N = 0, /* transpose */
+ HPPL_OP_T = 1, /* non transpose */
+ HPPL_OP_END
} hl_trans_op_t;
/**
@@ -148,23 +146,21 @@ typedef struct {
* @brief Sparse matrix value type.
*/
typedef enum {
- HL_NO_VALUE = 0, /* matrix values only 0 or 1 */
- HL_FLOAT_VALUE = 1,
- HL_VALUE_END
+ HL_NO_VALUE = 0, /* matrix values only 0 or 1 */
+ HL_FLOAT_VALUE = 1,
+ HL_VALUE_END
} hl_matrix_value_t;
-
/**
* @brief HPPL matrix format.
*/
typedef enum {
- HL_SPARSE_CSR = 0,
- HL_SPARSE_CSC = 1,
- HL_SPARSE_END
+ HL_SPARSE_CSR = 0,
+ HL_SPARSE_CSC = 1,
+ HL_SPARSE_END
} hl_matrix_format_t;
-
-typedef struct _hl_matrix_s * hl_matrix_s;
+typedef struct _hl_matrix_s *hl_matrix_s;
/**
* @brief HPPL sparse matrix.
@@ -177,12 +173,12 @@ typedef struct _hl_matrix_s * hl_matrix_s;
* @param nnz nonzero values of sparse matrix.
*/
typedef struct {
- hl_matrix_s matrix;
- hl_matrix_format_t format;
- hl_matrix_value_t type;
- int rows;
- int cols;
- size_t nnz;
+ hl_matrix_s matrix;
+ hl_matrix_format_t format;
+ hl_matrix_value_t type;
+ int rows;
+ int cols;
+ size_t nnz;
} _hl_sparse_matrix_s, *hl_sparse_matrix_s;
#ifndef PADDLE_TYPE_DOUBLE
@@ -195,7 +191,7 @@ typedef struct {
*
* HL_FLOAT_MIN: 1.17549435e-38F
*/
-#define HL_FLOAT_MAX 3.40282347e+38F
+#define HL_FLOAT_MAX 3.40282347e+38F
/**
* if real == double
*
@@ -203,19 +199,26 @@ typedef struct {
*
* HL_FLOAT_MIN: 2.2250738585072014e-308
*/
-#define HL_FLOAT_MIN 1.17549435e-38F
+#define HL_FLOAT_MIN 1.17549435e-38F
#else
-#define HL_FLOAT_MAX 1.7976931348623157e+308
-#define HL_FLOAT_MIN 2.2250738585072014e-308
+#define HL_FLOAT_MAX 1.7976931348623157e+308
+#define HL_FLOAT_MIN 2.2250738585072014e-308
#endif
+/**
+ * The maximum input value for exp, used to avoid overflow problem.
+ *
+ * Currently only used for tanh function.
+ */
+#define EXP_MAX_INPUT 40.0
+
/**
* @brief DIVUP(x, y) is similar to ceil(x / y).
* @note For CUDA, DIVUP will be used to specify
* the size of blockDim.
*/
#ifndef DIVUP
-#define DIVUP(x, y) (((x) + (y) - 1) / (y))
+#define DIVUP(x, y) (((x) + (y)-1) / (y))
#endif
#ifdef __NVCC__
@@ -224,7 +227,7 @@ typedef struct {
#include "hl_cuda.h"
#include "cuda_runtime.h"
-extern __thread bool g_sync_flag;
+extern __thread bool g_sync_flag;
extern __thread cudaStream_t default_stream;
#define STREAM_DEFAULT default_stream
@@ -232,17 +235,15 @@ extern __thread cudaStream_t default_stream;
* @brief Check cuda kernel execution.
* @param msg error string
*/
-#define CHECK_SYNC(msg) \
- if (true == g_sync_flag) { \
- hl_stream_synchronize(HPPL_STREAM_DEFAULT); \
- cudaError_t err \
- = (cudaError_t)hl_get_device_last_error(); \
- CHECK_EQ(cudaSuccess, err) << "[" << msg << "] " \
- << "CUDA error: " \
- << hl_get_device_error_string((size_t)err); \
+#define CHECK_SYNC(msg) \
+ if (true == g_sync_flag) { \
+ hl_stream_synchronize(HPPL_STREAM_DEFAULT); \
+ cudaError_t err = (cudaError_t)hl_get_device_last_error(); \
+ CHECK_EQ(cudaSuccess, err) \
+ << "[" << msg << "] " \
+ << "CUDA error: " << hl_get_device_error_string((size_t)err); \
}
-#endif /* __NVCC__ */
-
-#endif /* HL_BASE_H_ */
+#endif /* __NVCC__ */
+#endif /* HL_BASE_H_ */
diff --git a/paddle/cuda/include/hl_batch_transpose.h b/paddle/cuda/include/hl_batch_transpose.h
index 414c7996acee4ccbe2d7dbd093e25a23119fea3c..f3630e9762508fd39935e62e0007de04f9140fff 100644
--- a/paddle/cuda/include/hl_batch_transpose.h
+++ b/paddle/cuda/include/hl_batch_transpose.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_BATCH_TRANSPOSE_H_
#define HL_BATCH_TRANSPOSE_H_
@@ -31,10 +30,7 @@ limitations under the License. */
* order. Each batch has height * width data, which are
* arranged in height-first (or row-first) manner.
*/
-extern void batchTranspose(const real* input,
- real* output,
- int width,
- int height,
- int batchSize);
+extern void batchTranspose(
+ const real* input, real* output, int width, int height, int batchSize);
#endif // HL_BATCH_TRANSPOSE_H_
diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h
index d19f4a4bb310a73d896bc8f4179f41b1a5752e54..cffaac634f0f64be5ddab961d549ae43775bb7b0 100644
--- a/paddle/cuda/include/hl_cnn.h
+++ b/paddle/cuda/include/hl_cnn.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_CNN_H_
#define HL_CNN_H_
@@ -37,15 +36,21 @@ limitations under the License. */
* @param[in] alpha
* @param[in] beta
*/
-extern void hl_shrink_col2feature(
- const real * dataCol, size_t channels,
- size_t height, size_t width,
- size_t blockH, size_t blockW,
- size_t strideH, size_t strideW,
- size_t paddingH, size_t paddingW,
- size_t outputH, size_t outputW,
- real* dataIm,
- real alpha = 1.0f, real beta = 0.0f);
+extern void hl_shrink_col2feature(const real* dataCol,
+ size_t channels,
+ size_t height,
+ size_t width,
+ size_t blockH,
+ size_t blockW,
+ size_t strideH,
+ size_t strideW,
+ size_t paddingH,
+ size_t paddingW,
+ size_t outputH,
+ size_t outputW,
+ real* dataIm,
+ real alpha = 1.0f,
+ real beta = 0.0f);
/**
* @brief Expand feature to column.
@@ -65,14 +70,19 @@ extern void hl_shrink_col2feature(
* @param[out] dataCol expand data.
*
*/
-extern void hl_expand_feature2col(
- const real* dataIm, size_t channels,
- size_t height, size_t width,
- size_t blockH, size_t blockW,
- size_t strideH, size_t strideW,
- size_t paddingH, size_t paddingW,
- size_t outputH, size_t outputW,
- real* dataCol);
+extern void hl_expand_feature2col(const real* dataIm,
+ size_t channels,
+ size_t height,
+ size_t width,
+ size_t blockH,
+ size_t blockW,
+ size_t strideH,
+ size_t strideW,
+ size_t paddingH,
+ size_t paddingW,
+ size_t outputH,
+ size_t outputW,
+ real* dataCol);
/**
* @brief Maximum pool forward.
@@ -91,16 +101,24 @@ extern void hl_expand_feature2col(
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
+ * @param[in] tgtStride stride between output data samples.
*
*/
-extern void hl_maxpool_forward(
- const int frameCnt, const real* inputData,
- const int channels,
- const int height, const int width,
- const int pooledH, const int pooledW,
- const int sizeX, const int sizeY,
- const int strideH, const int strideW,
- const int paddingH, const int paddingW, real* tgtData);
+extern void hl_maxpool_forward(const int frameCnt,
+ const real* inputData,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooledH,
+ const int pooledW,
+ const int sizeX,
+ const int sizeY,
+ const int strideH,
+ const int strideW,
+ const int paddingH,
+ const int paddingW,
+ real* tgtData,
+ const int tgtStride);
/**
* @brief Maximum pool backward.
@@ -123,19 +141,28 @@ extern void hl_maxpool_forward(
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] targetGrad output grad.
+ * @param[in] outStride stride between output data samples.
*
*/
-extern void hl_maxpool_backward(
- const int frameCnt, const real* inputData,
- const real* outData, const real* outGrad,
- const int channels, const int height,
- const int width,
- const int pooledH, const int pooledW,
- const int sizeX, const int sizeY,
- const int strideH, const int strideW,
- const int paddingH, const int paddingW,
- real scaleA, real scaleB,
- real* targetGrad);
+extern void hl_maxpool_backward(const int frameCnt,
+ const real* inputData,
+ const real* outData,
+ const real* outGrad,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooledH,
+ const int pooledW,
+ const int sizeX,
+ const int sizeY,
+ const int strideH,
+ const int strideW,
+ const int paddingH,
+ const int paddingW,
+ real scaleA,
+ real scaleB,
+ real* targetGrad,
+ const int outStride);
/**
* @brief Averge pool forward.
@@ -154,16 +181,24 @@ extern void hl_maxpool_backward(
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
+ * @param[in] tgtStride stride between output data samples.
*
*/
-extern void hl_avgpool_forward(
- const int frameCnt, const real* inputData,
- const int channels,
- const int height, const int width,
- const int pooledH, const int pooledW,
- const int sizeX, const int sizeY,
- const int strideH, const int strideW,
- const int paddingH, const int paddingW, real* tgtData);
+extern void hl_avgpool_forward(const int frameCnt,
+ const real* inputData,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooledH,
+ const int pooledW,
+ const int sizeX,
+ const int sizeY,
+ const int strideH,
+ const int strideW,
+ const int paddingH,
+ const int paddingW,
+ real* tgtData,
+ const int tgtStride);
/**
* @brief Maximum pool backward.
@@ -184,18 +219,26 @@ extern void hl_avgpool_forward(
* @param[in] scaleA scale.
* @param[in] scaleB scale.
* @param[out] backGrad output grad.
+ * @param[in] outStride stride between output data samples.
*
*/
-extern void hl_avgpool_backward(
- const int frameCnt, const real* outGrad,
- const int channels, const int height,
- const int width,
- const int pooledH, const int pooledW,
- const int sizeX, const int sizeY,
- const int strideH, const int strideW,
- int paddingH, int paddingW,
- real scaleA, real scaleB,
- real* backGrad);
+extern void hl_avgpool_backward(const int frameCnt,
+ const real* outGrad,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooledH,
+ const int pooledW,
+ const int sizeX,
+ const int sizeY,
+ const int strideH,
+ const int strideW,
+ int paddingH,
+ int paddingW,
+ real scaleA,
+ real scaleB,
+ real* backGrad,
+ const int outStride);
/**
* @brief Cross-map-respose normalize forward.
@@ -212,10 +255,16 @@ extern void hl_avgpool_backward(
* @param[in] beta scale.
*
*/
-extern void hl_CMRNorm_forward(
- size_t frameCnt, const real* in, real* scale, real* out,
- size_t channels, size_t height, size_t width, size_t sizeX,
- real alpha, real beta);
+extern void hl_CMRNorm_forward(size_t frameCnt,
+ const real* in,
+ real* scale,
+ real* out,
+ size_t channels,
+ size_t height,
+ size_t width,
+ size_t sizeX,
+ real alpha,
+ real beta);
/**
* @brief Cross-map-respose normalize backward.
@@ -234,11 +283,82 @@ extern void hl_CMRNorm_forward(
* @param[in] beta scale.
*
*/
-extern void hl_CMRNorm_backward(
- size_t frameCnt, const real* inV, const real* scale,
- const real* outV, const real* outDiff, real *inDiff,
- size_t channels, size_t height, size_t width, size_t sizeX,
- real alpha, real beta);
+extern void hl_CMRNorm_backward(size_t frameCnt,
+ const real* inV,
+ const real* scale,
+ const real* outV,
+ const real* outDiff,
+ real* inDiff,
+ size_t channels,
+ size_t height,
+ size_t width,
+ size_t sizeX,
+ real alpha,
+ real beta);
+
+/**
+ * @brief Bilinear interpolation forward.
+ *
+ * @param[in] inData input value.
+ * @param[in] inImgH input image height.
+ * @param[in] inImgW input image width.
+ * @param[in] inputH input batchSize.
+ * @param[in] inputW input image data dim.
+ * @param[out] outData output value.
+ * @param[in] outImgH output image height.
+ * @param[in] outImgW output image width.
+ * @param[in] outputH output batchSize.
+ * @param[in] outputW output image data dim.
+ * @param[in] numChannels number of channels.
+ * @param[in] ratioH inImgH / outImgH.
+ * @param[in] ratioW inImgW / outImgW.
+ *
+ */
+extern void hl_bilinear_forward(const real* inData,
+ const size_t inImgH,
+ const size_t inImgW,
+ const size_t inputH,
+ const size_t inputW,
+ real* outData,
+ const size_t outImgH,
+ const size_t outImgW,
+ const size_t outputH,
+ const size_t outputW,
+ const size_t numChannels,
+ const real ratioH,
+ const real ratioW);
+
+/**
+* @brief Bilinear interpolation backward.
+*
+* @param[out] inGrad input gradient.
+* @param[in] inImgH input image height.
+* @param[in] inImgW input image width.
+* @param[in] inputH input batchSize.
+* @param[in] inputW input image data dim.
+* @param[in] outGrad output gradient.
+* @param[in] outImgH output image height.
+* @param[in] outImgW output image width.
+* @param[in] outputH output batchSize.
+* @param[in] outputW output image data dim.
+* @param[in] numChannels number of channels.
+* @param[in] ratioH inImgH / outImgH.
+* @param[in] ratioW inImgW / outImgW.
+*
+*/
+extern void hl_bilinear_backward(real* inGrad,
+ const size_t inImgH,
+ const size_t inImgW,
+ const size_t inputH,
+ const size_t inputW,
+ const real* outGrad,
+ const size_t outImgH,
+ const size_t outImgW,
+ const size_t outputH,
+ const size_t outputW,
+ const size_t numChannels,
+ const real ratioH,
+ const real ratioW);
/**
* @brief MaxOut forward.
@@ -251,9 +371,13 @@ extern void hl_CMRNorm_backward(
* @param[in] featLen feature length = image height * image width.
* @param[in] groups number of groups.
*/
-extern void hl_maxout_forward(
- const real* inData, real* outData, int* idData,
- size_t batchSize, size_t size, size_t featLen, size_t groups);
+extern void hl_maxout_forward(const real* inData,
+ real* outData,
+ int* idData,
+ size_t batchSize,
+ size_t size,
+ size_t featLen,
+ size_t groups);
/**
* @brief MaxOut backward.
@@ -266,8 +390,12 @@ extern void hl_maxout_forward(
* @param[in] featLen feature length = image height * image width.
* @param[in] groups number of groups.
*/
-extern void hl_maxout_backward(
- real* inGrad, const real* outGrad, const int* idData,
- size_t batchSize, size_t size, size_t featLen, size_t groups);
+extern void hl_maxout_backward(real* inGrad,
+ const real* outGrad,
+ const int* idData,
+ size_t batchSize,
+ size_t size,
+ size_t featLen,
+ size_t groups);
#endif /* HL_CNN_H_ */
diff --git a/paddle/cuda/include/hl_cuda.h b/paddle/cuda/include/hl_cuda.h
index 3196db67f61fd2e6b75df4abb3652df4456a0366..357286e3188a6f3184bc56e75232bf2e1ec54e44 100644
--- a/paddle/cuda/include/hl_cuda.h
+++ b/paddle/cuda/include/hl_cuda.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_CUDA_H_
#define HL_CUDA_H_
@@ -22,8 +21,7 @@ limitations under the License. */
/**
* @brief HPPL event.
*/
-typedef struct _hl_event_st * hl_event_t;
-
+typedef struct _hl_event_st *hl_event_t;
/**
* @brief return cuda runtime api version.
@@ -42,7 +40,7 @@ extern void hl_start();
* if device is NULL, will start all GPU.
* @param[in] number number of devices.
*/
-extern void hl_specify_devices_start(int* device, int number);
+extern void hl_specify_devices_start(int *device, int number);
/**
* @brief Queries if a device may directly access a peer device's memory.
@@ -126,7 +124,7 @@ extern int hl_get_device();
*
* @return dest_d pointer to device memory.
*/
-extern void* hl_malloc_device(size_t size);
+extern void *hl_malloc_device(size_t size);
/**
* @brief Free device memory.
@@ -143,7 +141,7 @@ extern void hl_free_mem_device(void *dest_d);
*
* @return dest_h pointer to host memory.
*/
-extern void* hl_malloc_host(size_t size);
+extern void *hl_malloc_host(size_t size);
/**
* @brief Free host page-lock memory.
@@ -228,9 +226,9 @@ extern void hl_srand(unsigned int seed);
* @param[in] stream stream id.
*/
extern void hl_memcpy_async(void *dst,
- void *src,
- size_t size,
- hl_stream_t stream);
+ void *src,
+ size_t size,
+ hl_stream_t stream);
/**
* @brief Waits for stream tasks to complete.
@@ -261,8 +259,7 @@ extern void hl_destroy_event(hl_event_t event);
*
* @return time Time between start and end in ms.
*/
-extern float hl_event_elapsed_time(hl_event_t start,
- hl_event_t end);
+extern float hl_event_elapsed_time(hl_event_t start, hl_event_t end);
/**
* @brief Records an event.
@@ -300,7 +297,7 @@ extern void hl_set_device_flags_block();
/**
* @brief Returns the last error string from a cuda runtime call.
*/
-extern const char* hl_get_device_error_string();
+extern const char *hl_get_device_error_string();
/**
* @brief Returns the last error string from a cuda runtime call.
@@ -309,7 +306,7 @@ extern const char* hl_get_device_error_string();
*
* @see hl_get_device_last_error()
*/
-extern const char* hl_get_device_error_string(size_t err);
+extern const char *hl_get_device_error_string(size_t err);
/**
* @brief Returns the last error number.
diff --git a/paddle/cuda/include/hl_cuda_cublas.h b/paddle/cuda/include/hl_cuda_cublas.h
index d757317eb4a97559feef22d4fd8edf7c10ca6745..db8c03c2c01c67788622d37b5330e22c31e03f34 100644
--- a/paddle/cuda/include/hl_cuda_cublas.h
+++ b/paddle/cuda/include/hl_cuda_cublas.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_CUDA_CUBLAS_H_
#define HL_CUDA_CUBLAS_H_
@@ -29,12 +28,8 @@ limitations under the License. */
* @param[in] ldc the first dimension of C_d.
*
*/
-extern void hl_matrix_transpose(real *A_d,
- real *C_d,
- int dimM,
- int dimN,
- int lda,
- int ldc);
+extern void hl_matrix_transpose(
+ real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc);
/*
* @brief Matrix transpose, while lda = dimN, ldc = dimM.
@@ -45,10 +40,7 @@ extern void hl_matrix_transpose(real *A_d,
* @param[in] dimN matrix width.
*
*/
-extern void hl_matrix_transpose(real *A_d,
- real *C_d,
- int dimM,
- int dimN);
+extern void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN);
/*
* @brief Matrix inverse
@@ -60,11 +52,7 @@ extern void hl_matrix_transpose(real *A_d,
* @param[in] ldc the first dimension of C_d
*
*/
-extern void hl_matrix_inverse(real *A_d,
- real *C_d,
- int dimN,
- int lda,
- int ldc);
+extern void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc);
/**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
@@ -84,12 +72,19 @@ extern void hl_matrix_inverse(real *A_d,
* @param[in] ldc the first dimension of C_d.
*
*/
-extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
+extern void hl_matrix_mul(real *A_d,
+ hl_trans_op_t transa,
+ real *B_d,
+ hl_trans_op_t transb,
real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta,
- int lda, int ldb, int ldc);
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta,
+ int lda,
+ int ldb,
+ int ldc);
/**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
@@ -106,11 +101,16 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
* @param[in] beta scalar used for multiplication.
*
*/
-extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
+extern void hl_matrix_mul(real *A_d,
+ hl_trans_op_t transa,
+ real *B_d,
+ hl_trans_op_t transb,
real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta);
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta);
/**
* @brief This function performs the matrix-vector multiplication.
@@ -132,11 +132,17 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
*
*/
-extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
- real *B_d, real *C_d,
- int dimM, int dimN,
- real alpha, real beta,
- int lda, int incb, int incc);
+extern void hl_matrix_mul_vector(real *A_d,
+ hl_trans_op_t trans,
+ real *B_d,
+ real *C_d,
+ int dimM,
+ int dimN,
+ real alpha,
+ real beta,
+ int lda,
+ int incb,
+ int incc);
/**
* @brief This function performs the matrix-vector multiplication.
@@ -154,9 +160,13 @@ extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
* @param[in] beta scalar used for multiplication.
*
*/
-extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
- real *B_d, real *C_d,
- int dimM, int dimN,
- real alpha, real beta);
+extern void hl_matrix_mul_vector(real *A_d,
+ hl_trans_op_t trans,
+ real *B_d,
+ real *C_d,
+ int dimM,
+ int dimN,
+ real alpha,
+ real beta);
#endif /* HL_CUDA_CUBLAS_H_ */
diff --git a/paddle/cuda/include/hl_cuda_cudnn.h b/paddle/cuda/include/hl_cuda_cudnn.h
index f256cb54dfe69e8df7cc7fcc0ed0a58f3574acd3..3a2f916210277145efa8f6d7663a2698ea546b0b 100644
--- a/paddle/cuda/include/hl_cuda_cudnn.h
+++ b/paddle/cuda/include/hl_cuda_cudnn.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_CUDA_CUDNN_H_
#define HL_CUDA_CUDNN_H_
@@ -22,7 +21,7 @@ limitations under the License. */
* hppl pooling mode
*/
typedef enum {
- HL_POOLING_MAX = 0,
+ HL_POOLING_MAX = 0,
// average includes padded values
HL_POOLING_AVERAGE = 1,
// average does not include padded values
@@ -324,17 +323,16 @@ extern void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
* @param[in] sizeInBytes gpu workspace size (bytes).
* @param[in] convBwdFilterAlgo backward filter algorithm.
*/
-extern void hl_convolution_backward_filter(
- hl_tensor_descriptor input,
- real* input_data,
- hl_tensor_descriptor output,
- real* output_grad_data,
- hl_filter_descriptor filter,
- real* filter_grad_data,
- hl_convolution_descriptor conv,
- void* gpuWorkSpace,
- size_t sizeInBytes,
- int convBwdFilterAlgo);
+extern void hl_convolution_backward_filter(hl_tensor_descriptor input,
+ real* input_data,
+ hl_tensor_descriptor output,
+ real* output_grad_data,
+ hl_filter_descriptor filter,
+ real* filter_grad_data,
+ hl_convolution_descriptor conv,
+ void* gpuWorkSpace,
+ size_t sizeInBytes,
+ int convBwdFilterAlgo);
/**
* @brief convolution backward data(calculate input image grad data).
@@ -350,17 +348,16 @@ extern void hl_convolution_backward_filter(
* @param[in] sizeInBytes gpu workspace size (bytes).
* @param[in] convBwdDataAlgo backward data algorithm.
*/
-extern void hl_convolution_backward_data(
- hl_tensor_descriptor input,
- real* input_data_grad,
- hl_tensor_descriptor output,
- real* output_grad_data,
- hl_filter_descriptor filter,
- real* filter_data,
- hl_convolution_descriptor conv,
- void* gpuWorkSpace,
- size_t sizeInBytes,
- int convBwdDataAlgo);
+extern void hl_convolution_backward_data(hl_tensor_descriptor input,
+ real* input_data_grad,
+ hl_tensor_descriptor output,
+ real* output_grad_data,
+ hl_filter_descriptor filter,
+ real* filter_data,
+ hl_convolution_descriptor conv,
+ void* gpuWorkSpace,
+ size_t sizeInBytes,
+ int convBwdDataAlgo);
/**
* @brief convolution backward bias(calculate bias grad data).
@@ -383,8 +380,8 @@ extern void hl_convolution_backward_bias(hl_tensor_descriptor bias,
* @param[in] height matrix height.
* @param[in] width matrix width.
*/
-extern void hl_softmax_forward(real *input,
- real *output,
+extern void hl_softmax_forward(real* input,
+ real* output,
int height,
int width);
@@ -396,8 +393,8 @@ extern void hl_softmax_forward(real *input,
* @param[in] height matrix height.
* @param[in] width matrix width.
*/
-extern void hl_softmax_backward(real *output_value,
- real *output_grad,
+extern void hl_softmax_backward(real* output_value,
+ real* output_grad,
int height,
int width);
@@ -426,18 +423,18 @@ extern void hl_softmax_backward(real *output_value,
*
*/
extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
- real *input,
+ real* input,
hl_tensor_descriptor outputDesc,
- real *output,
+ real* output,
hl_tensor_descriptor bnParamDesc,
- real *scale,
- real *bias,
+ real* scale,
+ real* bias,
double factor,
- real *runningMean,
- real *runningInvVar,
+ real* runningMean,
+ real* runningInvVar,
double epsilon,
- real *savedMean,
- real *savedVar);
+ real* savedMean,
+ real* savedVar);
/**
* @brief cudnn batch norm forward.
@@ -463,14 +460,14 @@ extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
*
*/
extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
- real *input,
+ real* input,
hl_tensor_descriptor outputDesc,
- real *output,
+ real* output,
hl_tensor_descriptor bnParamDesc,
- real *scale,
- real *bias,
- real *estimatedMean,
- real *estimatedVar,
+ real* scale,
+ real* bias,
+ real* estimatedMean,
+ real* estimatedVar,
double epsilon);
/**
@@ -483,7 +480,8 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
* @param[in] inGradDesc input tensor descriptor desc.
* @param[in] inGrad input data.
* @param[in] dBnParamDesc tensor descriptor desc.
- * bnScale, bnBias, running mean/var, save_mean/var.
+ * bnScale, bnBias, running mean/var,
+ * save_mean/var.
* @param[in] scale batch normalization scale parameter (in original
* paper scale is referred to as gamma).
* @param[in] scaleGrad batch normalization scale parameter (in original
@@ -497,17 +495,17 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
*
*/
extern void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
- real *input,
+ real* input,
hl_tensor_descriptor outGradDesc,
- real *outGrad,
+ real* outGrad,
hl_tensor_descriptor inGradDesc,
- real *inGrad,
+ real* inGrad,
hl_tensor_descriptor dBnParamDesc,
- real *scale,
- real *scaleGrad,
- real *biasGrad,
+ real* scale,
+ real* scaleGrad,
+ real* biasGrad,
double epsilon,
- real *savedMean,
- real *savedInvVar);
+ real* savedMean,
+ real* savedInvVar);
#endif // HL_CUDA_CUDNN_H_
diff --git a/paddle/cuda/include/hl_dso_loader.h b/paddle/cuda/include/hl_dso_loader.h
index f36c724e2da3dce11696fcda7daf98f5cda36dd6..1eb9f9ca888d3a93f04621e10346b5f9ff34cdca 100644
--- a/paddle/cuda/include/hl_dso_loader.h
+++ b/paddle/cuda/include/hl_dso_loader.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_DSO_LOADER_H_
#define HL_DSO_LOADER_H_
diff --git a/paddle/cuda/include/hl_functions.h b/paddle/cuda/include/hl_functions.h
index 65f366461ced0f9ee31ff9075f6dfaeb6c9b72a2..91ce9a0678463597df88c548aeac322ee19d95de 100644
--- a/paddle/cuda/include/hl_functions.h
+++ b/paddle/cuda/include/hl_functions.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_FUNCTIONS_H_
#define HL_FUNCTIONS_H_
@@ -21,30 +20,30 @@ limitations under the License. */
/**
* sigmoid threshold maximum
*/
-#define SIGMOID_THRESHOLD_MIN -40.0
+#define SIGMOID_THRESHOLD_MIN -40.0
/**
* sigmoid threshold minimum
*/
-#define SIGMOID_THRESHOLD_MAX 13.0
+#define SIGMOID_THRESHOLD_MAX 13.0
#ifndef __NVCC__
namespace hppl {
- /*
- * forward activation
- */
- real relu(const real a);
- real sigmoid(const real a);
- real tanh(const real a);
- real linear(const real a);
-
- /*
- * backward activation
- */
- real relu(const real a, const real b);
- real sigmoid(const real a, const real b);
- real tanh(const real a, const real b);
- real linear(const real a, const real b);
+/*
+ * forward activation
+ */
+real relu(const real a);
+real sigmoid(const real a);
+real tanh(const real a);
+real linear(const real a);
+
+/*
+ * backward activation
+ */
+real relu(const real a, const real b);
+real sigmoid(const real a, const real b);
+real tanh(const real a, const real b);
+real linear(const real a, const real b);
} // namespace hppl
#ifdef __AVX__
diff --git a/paddle/cuda/include/hl_gpu.h b/paddle/cuda/include/hl_gpu.h
index 05039663b6e9f5e4a72f15ab822d723635f9b282..3be0df3b93b69811fb9c36dae223cbd927b02559 100644
--- a/paddle/cuda/include/hl_gpu.h
+++ b/paddle/cuda/include/hl_gpu.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_GPU_H_
#define HL_GPU_H_
diff --git a/paddle/cuda/include/hl_lstm.h b/paddle/cuda/include/hl_lstm.h
index 1f95e318a1fe06050bbd31c2e276974f4a8bdc1e..7e527a79025969320f1aca75d313fd9d0194efd1 100644
--- a/paddle/cuda/include/hl_lstm.h
+++ b/paddle/cuda/include/hl_lstm.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_LSTM_H_
#define HL_LSTM_H_
diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h
index 71e8f8e3a60c9ff340f36c5057a22cecc112fd48..96648661e345d8fa5d50cb2aae3a56ee53921f90 100644
--- a/paddle/cuda/include/hl_matrix.h
+++ b/paddle/cuda/include/hl_matrix.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_MATRIX_H_
#define HL_MATRIX_H_
@@ -30,13 +29,8 @@ limitations under the License. */
* @param[in] beta scalar used for addition.
*
*/
-extern void hl_matrix_add(real* A_d,
- real* B_d,
- real* C_d,
- int dimM,
- int dimN,
- real alpha,
- real beta);
+extern void hl_matrix_add(
+ real* A_d, real* B_d, real* C_d, int dimM, int dimN, real alpha, real beta);
/**
* @brief Matrix Softmax.
*
@@ -46,7 +40,7 @@ extern void hl_matrix_add(real* A_d,
* @param[in] dimN matrix width.
*
*/
-extern void hl_matrix_softmax(real *A_d, real *C_d, int dimM, int dimN);
+extern void hl_matrix_softmax(real* A_d, real* C_d, int dimM, int dimN);
/**
* @brief Matrix softmax derivative.
@@ -58,11 +52,8 @@ extern void hl_matrix_softmax(real *A_d, real *C_d, int dimM, int dimN);
* @param[in] dimN matrix width.
*
*/
-extern void hl_matrix_softmax_derivative(real* grad_d,
- real* output_d,
- real* sftmaxSum_d,
- int dimM,
- int dimN);
+extern void hl_matrix_softmax_derivative(
+ real* grad_d, real* output_d, real* sftmaxSum_d, int dimM, int dimN);
/**
* @brief Sequence softmax.
@@ -73,8 +64,8 @@ extern void hl_matrix_softmax_derivative(real* grad_d,
* @param[in] numSequence sequence number.
*
*/
-extern void hl_sequence_softmax_forward(real *A_d,
- real *C_d,
+extern void hl_sequence_softmax_forward(real* A_d,
+ real* C_d,
const int* index,
int numSequence);
@@ -88,11 +79,8 @@ extern void hl_sequence_softmax_forward(real *A_d,
* @param[in] dimN matrix width.
*
*/
-extern void hl_matrix_classification_error(real* A_d,
- int* B_d,
- real* C_d,
- int dimM,
- int dimN);
+extern void hl_matrix_classification_error(
+ real* A_d, int* B_d, real* C_d, int dimM, int dimN);
/**
* @brief Matrix cross entropy.
@@ -104,11 +92,8 @@ extern void hl_matrix_classification_error(real* A_d,
* @param[in] dimN matrix width.
*
*/
-extern void hl_matrix_cross_entropy(real* A_d,
- real* C_d,
- int* label_d,
- int dimM,
- int dimN);
+extern void hl_matrix_cross_entropy(
+ real* A_d, real* C_d, int* label_d, int dimM, int dimN);
/**
* @brief Matrix cross entropy back propagation.
@@ -120,11 +105,32 @@ extern void hl_matrix_cross_entropy(real* A_d,
* @param[in] dimN matrix width.
*
*/
-extern void hl_matrix_cross_entropy_bp(real* grad_d,
- real* output_d,
- int* label_d,
- int dimM,
- int dimN);
+extern void hl_matrix_cross_entropy_bp(
+ real* grad_d, real* output_d, int* label_d, int dimM, int dimN);
+
+/**
+ * @brief Matrix multi-binary label cross entropy
+ *
+ * @param[in] output input matrix (M x N).
+ * @param[out] entropy output matrix (M x 1).
+ * @param[in] mat input sparse matrix.
+ * @param[in] dimM matrix height.
+ * @param[in] dimN matrix width.
+ */
+extern void hl_matrix_multi_binary_cross_entropy(
+ real* output, real* entropy, hl_sparse_matrix_s mat, int dimM, int dimN);
+
+/**
+ * @brief Matrix multi-binary label cross entropy backprop
+ *
+ * @param[in] output input matrix (M x N).
+ * @param[out] grad output matrix (M x N).
+ * @param[in] mat input sparse matrix.
+ * @param[in] dimM matrix height.
+ * @param[in] dimN matrix width.
+ */
+extern void hl_matrix_multi_binary_cross_entropy_bp(
+ real* output, real* grad, hl_sparse_matrix_s mat, int dimM, int dimN);
/**
* @brief Matrix zero memory.
@@ -146,12 +152,8 @@ extern void hl_matrix_zero_mem(real* data, int num);
* @param[in] partial_sum
*/
-extern void hl_param_relu_forward(real* output,
- real* input,
- real* w,
- int width,
- int height,
- int partial_sum);
+extern void hl_param_relu_forward(
+ real* output, real* input, real* w, int width, int height, int partial_sum);
/**
* @brief parameter relu backward w
*
diff --git a/paddle/cuda/include/hl_sequence.h b/paddle/cuda/include/hl_sequence.h
index 46d86b2982f065802eec83ca7554f787d1d02f3a..bb5124df44b492bd8fdeb2a0c75ebcf74d2c8157 100644
--- a/paddle/cuda/include/hl_sequence.h
+++ b/paddle/cuda/include/hl_sequence.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_SEQUENCE_H_
#define HL_SEQUENCE_H_
@@ -32,7 +31,7 @@ limitations under the License. */
extern void hl_max_sequence_forward(real* input,
const int* sequence,
real* output,
- int *index,
+ int* index,
int numSequences,
int dim);
@@ -46,11 +45,8 @@ extern void hl_max_sequence_forward(real* input,
* @param[in] dim input dimension.
*
*/
-extern void hl_max_sequence_backward(real* outputGrad,
- int *index,
- real* inputGrad,
- int numSequences,
- int dim);
+extern void hl_max_sequence_backward(
+ real* outputGrad, int* index, real* inputGrad, int numSequences, int dim);
/**
* @brief Context projection forward.
@@ -63,7 +59,8 @@ extern void hl_max_sequence_backward(real* outputGrad,
* @param[in] inputDim input sequence dimension.
* @param[in] contextLength context length.
* @param[in] contextStart context start.
- * @param[in] beginPad number of extra timesteps added at the beginning.
+ * @param[in] beginPad number of extra timesteps added at the
+ * beginning.
* @param[in] isPadding trainable padding.
*
*/
@@ -109,7 +106,8 @@ extern void hl_context_projection_backward_data(real* outputGrad,
* @param[in] totalPad number of extra timesteps.
* @param[in] contextLength context length.
* @param[in] contextStart context start.
- * @param[in] beginPad number of extra timesteps added at the beginning.
+ * @param[in] beginPad number of extra timesteps added at the
+ * beginning.
*
*/
extern void hl_context_projection_backward_weight(real* outputGrad,
@@ -141,9 +139,9 @@ extern void hl_context_projection_backward_weight(real* outputGrad,
* @param[in] seq2batch copy direction.
*
*/
-extern void hl_sequence2batch_copy(real *batch,
- real *sequence,
- const int *batchIndex,
+extern void hl_sequence2batch_copy(real* batch,
+ real* sequence,
+ const int* batchIndex,
int seqWidth,
int batchCount,
bool seq2batch);
@@ -167,9 +165,9 @@ extern void hl_sequence2batch_copy(real *batch,
* @param[in] seq2batch copy direction.
*
*/
-extern void hl_sequence2batch_add(real *batch,
- real *sequence,
- int *batchIndex,
+extern void hl_sequence2batch_add(real* batch,
+ real* sequence,
+ int* batchIndex,
int seqWidth,
int batchCount,
bool seq2batch);
diff --git a/paddle/cuda/include/hl_sparse.h b/paddle/cuda/include/hl_sparse.h
index 9acdebdebf37761e1485e3441963586ead9f3c85..c4e0be23e2031cbcb124b532216a23d8a344668d 100644
--- a/paddle/cuda/include/hl_sparse.h
+++ b/paddle/cuda/include/hl_sparse.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_SPARSE_H_
#define HL_SPARSE_H_
@@ -31,7 +30,7 @@ limitations under the License. */
*/
extern void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d,
hl_matrix_format_t format,
- hl_matrix_value_t value_type,
+ hl_matrix_value_t value_type,
int dimM,
int dimN,
int nnz);
@@ -60,10 +59,10 @@ extern void hl_free_sparse_matrix(hl_sparse_matrix_s A_d);
*
*/
extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
- void * dest_d,
+ void *dest_d,
size_t size,
hl_matrix_format_t format,
- hl_matrix_value_t value_type,
+ hl_matrix_value_t value_type,
int dimM,
int dimN,
int nnz);
@@ -94,11 +93,11 @@ extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
*
*/
extern void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
- real* value_d,
- int* rows_d,
- int* cols_d,
+ real *value_d,
+ int *rows_d,
+ int *cols_d,
hl_matrix_format_t format,
- hl_matrix_value_t value_type,
+ hl_matrix_value_t value_type,
int dimM,
int dimN,
int nnz);
@@ -259,10 +258,14 @@ extern void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d,
*/
extern void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d,
hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
+ real *B_d,
+ hl_trans_op_t transb,
real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta);
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta);
/**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d.
@@ -311,11 +314,16 @@ extern void hl_matrix_dense_mul_csc(real *A_d,
* @note transb is not support HPPL_OP_T.
*
*/
-extern void hl_sparse_matrix_mul(real* A_d, hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
+extern void hl_sparse_matrix_mul(real *A_d,
+ hl_trans_op_t transa,
+ real *B_d,
+ hl_trans_op_t transb,
hl_sparse_matrix_s C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta);
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta);
/**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
@@ -336,12 +344,16 @@ extern void hl_sparse_matrix_mul(real* A_d, hl_trans_op_t transa,
* @note transa is not support HPPL_OP_T.
*
*/
-extern void hl_matrix_dense_mul_csr(real *A_d, hl_trans_op_t transa,
+extern void hl_matrix_dense_mul_csr(real *A_d,
+ hl_trans_op_t transa,
hl_sparse_matrix_s B_d,
hl_trans_op_t transb,
real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta);
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta);
/**
* @brief Memcpy csc_matrix to host.
@@ -412,7 +424,6 @@ extern void hl_memcpy_from_csr_matrix(real *csr_val,
hl_sparse_matrix_s csr_matrix,
hl_stream_t stream);
-
/**
* @brief A_d[j] += B_d[i,j] for i in range(height)
*
@@ -423,19 +434,13 @@ extern void hl_memcpy_from_csr_matrix(real *csr_val,
* @param[in] scale scale of B_d
*
*/
-extern void hl_sparse_matrix_column_sum(real* A_d,
- hl_sparse_matrix_s B_d,
- int dimM,
- int dimN,
- real scale);
+extern void hl_sparse_matrix_column_sum(
+ real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale);
/**
* @brief implementation of csr sparse matrix in hl_sparse_matirx_column_sum
*/
-extern void hl_matrix_csr_column_sum(real* A_d,
- hl_sparse_matrix_s B_d,
- int dimM,
- int dimN,
- real scale);
+extern void hl_matrix_csr_column_sum(
+ real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale);
/**
* @brief A_d[i,j] += B_d[j]
@@ -446,13 +451,13 @@ extern void hl_matrix_csr_column_sum(real* A_d,
*
*/
extern void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d,
- real* B_d,
+ real *B_d,
real scale);
/**
* @brief implementation of csr sparse matrix in hl_sparse_matrix_add_bias
*/
extern void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d,
- real* B_d,
+ real *B_d,
real scale);
/**
@@ -470,7 +475,7 @@ extern void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d,
*
*/
extern void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d,
- real* B_d,
+ real *B_d,
int dimM,
int dimN,
real alpha,
@@ -479,7 +484,7 @@ extern void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d,
* @brief implementation of csr sparse matrix in hl_sparse_matrix_add_dense
*/
extern void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d,
- real* B_d,
+ real *B_d,
int dimM,
int dimN,
real alpha,
@@ -493,7 +498,7 @@ extern void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d,
* @return return rows pointer, which is gpu address
*
*/
-extern int* hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat);
+extern int *hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat);
/**
* @brief get cols pionter of GpuSparseMatrix
@@ -503,7 +508,7 @@ extern int* hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat);
* @return return cols pointer, which is gpu address
*
*/
-extern int* hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat);
+extern int *hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat);
/**
* @brief get value pionter of GpuSparseMatrix
@@ -513,7 +518,6 @@ extern int* hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat);
* @return return value pointer, which is gpu address
*
*/
-extern real* hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat);
-
+extern real *hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat);
#endif /* HL_SPARSE_H_ */
diff --git a/paddle/cuda/include/hl_table_apply.h b/paddle/cuda/include/hl_table_apply.h
index 3c9428e9253d5ed563e4e9f62d8842667496b83c..b4ac83a66af13c2a843872faba2ebd972008a738 100644
--- a/paddle/cuda/include/hl_table_apply.h
+++ b/paddle/cuda/include/hl_table_apply.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_TABLE_APPLY_H_
#define HL_TABLE_APPLY_H_
@@ -31,8 +30,10 @@ limitations under the License. */
* @param[in] dim width of table.
*
*/
-extern void hl_matrix_select_rows(real* output, int ldo,
- real* table, int ldt,
+extern void hl_matrix_select_rows(real* output,
+ int ldo,
+ real* table,
+ int ldt,
int* ids,
int numSamples,
int tableSize,
@@ -53,8 +54,10 @@ extern void hl_matrix_select_rows(real* output, int ldo,
* @param[in] dim width of table.
*
*/
-extern void hl_matrix_add_to_rows(real* table, int ldt,
- real* input, int ldi,
+extern void hl_matrix_add_to_rows(real* table,
+ int ldt,
+ real* input,
+ int ldi,
int* ids,
int numSamples,
int tableSize,
@@ -72,8 +75,7 @@ extern void hl_matrix_add_to_rows(real* table, int ldt,
*
*/
template
-extern void hl_vector_select_from(T* dst, int sized,
- const T* src, int sizes,
- const int* ids, int sizei);
+extern void hl_vector_select_from(
+ T* dst, int sized, const T* src, int sizes, const int* ids, int sizei);
-#endif /* HL_TABLE_APPLY_H_ */
+#endif /* HL_TABLE_APPLY_H_ */
diff --git a/paddle/cuda/include/hl_time.h b/paddle/cuda/include/hl_time.h
index 4414b0b2d2ed4ab6a48294ffaed3a43a639e5950..b0a88c66a12fcfec6ea96b877423f907dac8dfa1 100644
--- a/paddle/cuda/include/hl_time.h
+++ b/paddle/cuda/include/hl_time.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_TIME_H_
#define HL_TIME_H_
diff --git a/paddle/cuda/include/hl_top_k.h b/paddle/cuda/include/hl_top_k.h
index a38d4cf862278a060f72b970d723895dc3735d0a..e8cfebbf6a3bd27c10a71d7817238bc304681fa4 100644
--- a/paddle/cuda/include/hl_top_k.h
+++ b/paddle/cuda/include/hl_top_k.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_TOP_K_H_
#define HL_TOP_K_H_
@@ -31,9 +30,11 @@ limitations under the License. */
* @param[in] numSamples height of input value.
*
*/
-extern void hl_matrix_top_k(real* topVal, int ldv,
- int * topIds,
- real* src, int lds,
+extern void hl_matrix_top_k(real* topVal,
+ int ldv,
+ int* topIds,
+ real* src,
+ int lds,
int dim,
int beamSize,
int numSamples);
@@ -50,8 +51,9 @@ extern void hl_matrix_top_k(real* topVal, int ldv,
*
* @note Only support HL_SPARSE_CSR format.
*/
-extern void hl_sparse_matrix_top_k(real* topVal, int ldv,
- int * topIds,
+extern void hl_sparse_matrix_top_k(real* topVal,
+ int ldv,
+ int* topIds,
hl_sparse_matrix_s src,
int beamSize,
int numSamples);
diff --git a/paddle/cuda/include/stub/hl_aggregate_stub.h b/paddle/cuda/include/stub/hl_aggregate_stub.h
index 4c0c68f3c98fe95f01060b82c3a1b9822d2a3715..bb53fc581e09905aa7a9b2d8dfe44b04c677c40a 100644
--- a/paddle/cuda/include/stub/hl_aggregate_stub.h
+++ b/paddle/cuda/include/stub/hl_aggregate_stub.h
@@ -12,29 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_AGGREGATE_STUB_H_
#define HL_AGGREGATE_STUB_H_
#include "hl_aggregate.h"
-inline void hl_matrix_row_sum(real *A_d, real *C_d,
- int dimM, int dimN) {}
+inline void hl_matrix_row_sum(real *A_d, real *C_d, int dimM, int dimN) {}
-inline void hl_matrix_row_max(real *A_d, real *C_d,
- int dimM, int dimN) {}
+inline void hl_matrix_row_max(real *A_d, real *C_d, int dimM, int dimN) {}
-inline void hl_matrix_row_min(real *A_d, real *C_d,
- int dimM, int dimN) {}
+inline void hl_matrix_row_min(real *A_d, real *C_d, int dimM, int dimN) {}
-inline void hl_matrix_column_sum(real *A_d, real *C_d,
- int dimM, int dimN) {}
+inline void hl_matrix_column_sum(real *A_d, real *C_d, int dimM, int dimN) {}
-inline void hl_matrix_column_max(real *A_d, real *C_d,
- int dimM, int dimN) {}
+inline void hl_matrix_column_max(real *A_d, real *C_d, int dimM, int dimN) {}
-inline void hl_matrix_column_min(real *A_d, real *C_d,
- int dimM, int dimN) {}
+inline void hl_matrix_column_min(real *A_d, real *C_d, int dimM, int dimN) {}
inline void hl_vector_sum(real *A_d, real *C_h, int dimM) {}
diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h
index 5f696986e3c8fa19e1f234b03d5ef758c95e3aaf..2f73b9671edd3609996aebff2913f5262805f869 100644
--- a/paddle/cuda/include/stub/hl_cnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cnn_stub.h
@@ -12,89 +12,177 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_CNN_STUB_H_
#define HL_CNN_STUB_H_
#include "hl_cnn.h"
-inline void hl_shrink_col2feature(
- const real * dataCol, size_t channels,
- size_t height, size_t width,
- size_t blockH, size_t blockW,
- size_t strideH, size_t strideW,
- size_t paddingH, size_t paddingW,
- size_t outputH, size_t outputW,
- real* dataIm,
- real alpha, real beta) {}
-
-inline void hl_expand_feature2col(
- const real* dataIm, size_t channels,
- size_t height, size_t width,
- size_t blockH, size_t blockW,
- size_t strideH, size_t strideW,
- size_t paddingH, size_t paddingW,
- size_t outputH, size_t outputW,
- real* dataCol) {}
-
-inline void hl_maxpool_forward(
- const int frameCnt, const real* inputData,
- const int channels,
- const int height, const int width,
- const int pooledH, const int pooledW,
- const int sizeX, const int sizeY,
- const int strideH, const int strideW,
- const int paddingH, const int paddingW, real* tgtData) {}
-
-inline void hl_maxpool_backward(
- const int frameCnt, const real* inputData,
- const real* outData, const real* outGrad,
- const int channels, const int height,
- const int width,
- const int pooledH, const int pooledW,
- const int sizeX, const int sizeY,
- const int strideH, const int strideW,
- const int paddingH, const int paddingW,
- real scaleA, real scaleB,
- real* targetGrad) {}
-
-inline void hl_avgpool_forward(
- const int frameCnt, const real* inputData,
- const int channels,
- const int height, const int width,
- const int pooledH, const int pooledW,
- const int sizeX, const int sizeY,
- const int strideH, const int strideW,
- const int paddingH, const int paddingW, real* tgtData) {}
-
-inline void hl_avgpool_backward(
- const int frameCnt, const real* outGrad,
- const int channels, const int height,
- const int width,
- const int pooledH, const int pooledW,
- const int sizeX, const int sizeY,
- const int strideH, const int strideW,
- int paddingH, int paddingW,
- real scaleA, real scaleB,
- real* backGrad) {}
-
-inline void hl_CMRNorm_forward(
- size_t frameCnt, const real* in, real* scale, real* out,
- size_t channels, size_t height, size_t width, size_t sizeX,
- real alpha, real beta) {}
-
-inline void hl_CMRNorm_backward(
- size_t frameCnt, const real* inV, const real* scale,
- const real* outV, const real* outDiff, real *inDiff,
- size_t channels, size_t height, size_t width, size_t sizeX,
- real alpha, real beta) {}
-
-inline void hl_maxout_forward(
- const real* inData, real* outData, int* idData,
- size_t batchSize, size_t size, size_t featLen, size_t group) {}
-
-inline void hl_maxout_backward(
- real* inGrad, const real* outGrad, const int* idData,
- size_t batchSize, size_t size, size_t featLen, size_t group) {}
+inline void hl_shrink_col2feature(const real* dataCol,
+ size_t channels,
+ size_t height,
+ size_t width,
+ size_t blockH,
+ size_t blockW,
+ size_t strideH,
+ size_t strideW,
+ size_t paddingH,
+ size_t paddingW,
+ size_t outputH,
+ size_t outputW,
+ real* dataIm,
+ real alpha,
+ real beta) {}
+
+inline void hl_expand_feature2col(const real* dataIm,
+ size_t channels,
+ size_t height,
+ size_t width,
+ size_t blockH,
+ size_t blockW,
+ size_t strideH,
+ size_t strideW,
+ size_t paddingH,
+ size_t paddingW,
+ size_t outputH,
+ size_t outputW,
+ real* dataCol) {}
+
+inline void hl_maxpool_forward(const int frameCnt,
+ const real* inputData,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooledH,
+ const int pooledW,
+ const int sizeX,
+ const int sizeY,
+ const int strideH,
+ const int strideW,
+ const int paddingH,
+ const int paddingW,
+ real* tgtData,
+ const int tgtStride) {}
+
+inline void hl_maxpool_backward(const int frameCnt,
+ const real* inputData,
+ const real* outData,
+ const real* outGrad,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooledH,
+ const int pooledW,
+ const int sizeX,
+ const int sizeY,
+ const int strideH,
+ const int strideW,
+ const int paddingH,
+ const int paddingW,
+ real scaleA,
+ real scaleB,
+ real* targetGrad,
+ const int outStride) {}
+
+inline void hl_avgpool_forward(const int frameCnt,
+ const real* inputData,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooledH,
+ const int pooledW,
+ const int sizeX,
+ const int sizeY,
+ const int strideH,
+ const int strideW,
+ const int paddingH,
+ const int paddingW,
+ real* tgtData,
+ const int tgtStride) {}
+
+inline void hl_avgpool_backward(const int frameCnt,
+ const real* outGrad,
+ const int channels,
+ const int height,
+ const int width,
+ const int pooledH,
+ const int pooledW,
+ const int sizeX,
+ const int sizeY,
+ const int strideH,
+ const int strideW,
+ int paddingH,
+ int paddingW,
+ real scaleA,
+ real scaleB,
+ real* backGrad,
+ const int outStride) {}
+
+inline void hl_CMRNorm_forward(size_t frameCnt,
+ const real* in,
+ real* scale,
+ real* out,
+ size_t channels,
+ size_t height,
+ size_t width,
+ size_t sizeX,
+ real alpha,
+ real beta) {}
+
+inline void hl_CMRNorm_backward(size_t frameCnt,
+ const real* inV,
+ const real* scale,
+ const real* outV,
+ const real* outDiff,
+ real* inDiff,
+ size_t channels,
+ size_t height,
+ size_t width,
+ size_t sizeX,
+ real alpha,
+ real beta) {}
+
+inline void hl_bilinear_forward(const real* inData,
+ const size_t inImgH,
+ const size_t inImgW,
+ const size_t inputH,
+ const size_t inputW,
+ real* outData,
+ const size_t outImgH,
+ const size_t outImgW,
+ const size_t outputH,
+ const size_t outputW,
+ const size_t numChannels,
+ const real ratioH,
+ const real ratioW) {}
+
+inline void hl_bilinear_backward(real* inGrad,
+ const size_t inImgH,
+ const size_t inImgW,
+ const size_t inputH,
+ const size_t inputW,
+ const real* outGrad,
+ const size_t outImgH,
+ const size_t outImgW,
+ const size_t outputH,
+ const size_t outputW,
+ const size_t numChannels,
+ const real ratioH,
+ const real ratioW) {}
+
+inline void hl_maxout_forward(const real* inData,
+ real* outData,
+ int* idData,
+ size_t batchSize,
+ size_t size,
+ size_t featLen,
+ size_t group) {}
+
+inline void hl_maxout_backward(real* inGrad,
+ const real* outGrad,
+ const int* idData,
+ size_t batchSize,
+ size_t size,
+ size_t featLen,
+ size_t group) {}
#endif // HL_CNN_STUB_H_
diff --git a/paddle/cuda/include/stub/hl_cuda_cublas_stub.h b/paddle/cuda/include/stub/hl_cuda_cublas_stub.h
index 903dcbe8355d6f593d96bc1f9e686d54035a9366..85f7c390c47397127487b16fdc933f0afe2fb880 100644
--- a/paddle/cuda/include/stub/hl_cuda_cublas_stub.h
+++ b/paddle/cuda/include/stub/hl_cuda_cublas_stub.h
@@ -12,41 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_CUDA_CUBLAS_STUB_H_
#define HL_CUDA_CUBLAS_STUB_H_
#include "hl_cuda_cublas.h"
-inline void hl_matrix_transpose(real *A_d,
- real *C_d,
- int dimM,
- int dimN,
- int lda,
- int ldc) {}
-
-inline void hl_matrix_transpose(real *A_d,
- real *C_d,
- int dimM,
- int dimN) {}
-
-inline void hl_matrix_inverse(real *A_d,
- real *C_d,
- int dimN,
- int lda,
- int ldc) {}
-
-inline void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
- real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta,
- int lda, int ldb, int ldc) {}
+inline void hl_matrix_transpose(
+ real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc) {}
+
+inline void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN) {}
-inline void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
+inline void hl_matrix_inverse(
+ real *A_d, real *C_d, int dimN, int lda, int ldc) {}
+
+inline void hl_matrix_mul(real *A_d,
+ hl_trans_op_t transa,
+ real *B_d,
+ hl_trans_op_t transb,
+ real *C_d,
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta,
+ int lda,
+ int ldb,
+ int ldc) {}
+
+inline void hl_matrix_mul(real *A_d,
+ hl_trans_op_t transa,
+ real *B_d,
+ hl_trans_op_t transb,
real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta) {}
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta) {}
#endif // HL_CUDA_CUBLAS_STUB_H_
diff --git a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
index 34c173908246e4a48c327c8aa58730756bbc72b7..3beb0e5b5170261a6c453936b8b0347f3e97dbff 100644
--- a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h
@@ -12,15 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_CUDA_CUDNN_STUB_H_
#define HL_CUDA_CUDNN_STUB_H_
#include "hl_cuda_cudnn.h"
-inline int hl_get_cudnn_lib_version() {
- return 0;
-}
+inline int hl_get_cudnn_lib_version() { return 0; }
inline void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc) {}
@@ -68,41 +65,41 @@ inline void hl_pooling_backward(hl_tensor_descriptor input,
hl_pooling_descriptor pooling) {}
inline void hl_create_filter_descriptor(hl_filter_descriptor* filter,
- int input_feature_maps,
- int output_feature_maps,
- int height,
- int width) {}
+ int input_feature_maps,
+ int output_feature_maps,
+ int height,
+ int width) {}
inline void hl_destroy_filter_descriptor(hl_filter_descriptor filter) {}
inline void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
- hl_tensor_descriptor image,
- hl_filter_descriptor filter,
- int padding_height,
- int padding_width,
- int stride_height,
- int stride_width) {}
+ hl_tensor_descriptor image,
+ hl_filter_descriptor filter,
+ int padding_height,
+ int padding_width,
+ int stride_height,
+ int stride_width) {}
inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
- hl_tensor_descriptor image,
- hl_filter_descriptor filter,
- int padding_height,
- int padding_width,
- int stride_height,
- int stride_width) {}
+ hl_tensor_descriptor image,
+ hl_filter_descriptor filter,
+ int padding_height,
+ int padding_width,
+ int stride_height,
+ int stride_width) {}
inline void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {}
inline void hl_conv_workspace(hl_tensor_descriptor input,
- hl_tensor_descriptor output,
- hl_filter_descriptor filter,
- hl_convolution_descriptor conv,
- int* convFwdAlgo,
- size_t* fwdLimitBytes,
- int* convBwdDataAlgo,
- size_t* bwdDataLimitBytes,
- int* convBwdFilterAlgo,
- size_t* bwdFilterLimitBytes) {}
+ hl_tensor_descriptor output,
+ hl_filter_descriptor filter,
+ hl_convolution_descriptor conv,
+ int* convFwdAlgo,
+ size_t* fwdLimitBytes,
+ int* convBwdDataAlgo,
+ size_t* bwdDataLimitBytes,
+ int* convBwdFilterAlgo,
+ size_t* bwdFilterLimitBytes) {}
inline void hl_convolution_forward(hl_tensor_descriptor input,
real* input_data,
@@ -116,87 +113,84 @@ inline void hl_convolution_forward(hl_tensor_descriptor input,
int convFwdAlgo) {}
inline void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
- real* bias_data,
- hl_tensor_descriptor output,
- real* output_data) {}
-
-inline void hl_convolution_backward_filter(
- hl_tensor_descriptor input,
- real* input_data,
- hl_tensor_descriptor output,
- real* output_grad_data,
- hl_filter_descriptor filter,
- real* filter_grad_data,
- hl_convolution_descriptor conv,
- void* gpuWorkSpace,
- size_t sizeInBytes,
- int convBwdFilterAlgo) {}
-
-inline void hl_convolution_backward_data(
- hl_tensor_descriptor input,
- real* input_data_grad,
- hl_tensor_descriptor output,
- real* output_grad_data,
- hl_filter_descriptor filter,
- real* filter_data,
- hl_convolution_descriptor conv,
- void* gpuWorkSpace,
- size_t sizeInBytes,
- int convBwdDataAlgo) {}
+ real* bias_data,
+ hl_tensor_descriptor output,
+ real* output_data) {}
+
+inline void hl_convolution_backward_filter(hl_tensor_descriptor input,
+ real* input_data,
+ hl_tensor_descriptor output,
+ real* output_grad_data,
+ hl_filter_descriptor filter,
+ real* filter_grad_data,
+ hl_convolution_descriptor conv,
+ void* gpuWorkSpace,
+ size_t sizeInBytes,
+ int convBwdFilterAlgo) {}
+
+inline void hl_convolution_backward_data(hl_tensor_descriptor input,
+ real* input_data_grad,
+ hl_tensor_descriptor output,
+ real* output_grad_data,
+ hl_filter_descriptor filter,
+ real* filter_data,
+ hl_convolution_descriptor conv,
+ void* gpuWorkSpace,
+ size_t sizeInBytes,
+ int convBwdDataAlgo) {}
inline void hl_convolution_backward_bias(hl_tensor_descriptor bias,
- real* bias_grad_data,
- hl_tensor_descriptor output,
- real* output_grad_data) {}
-
-inline void hl_softmax_forward(real *input,
- real *output,
- int height,
- int width) {}
+ real* bias_grad_data,
+ hl_tensor_descriptor output,
+ real* output_grad_data) {}
-inline void hl_softmax_backward(real *output_value,
- real *output_grad,
+inline void hl_softmax_forward(real* input,
+ real* output,
int height,
int width) {}
+inline void hl_softmax_backward(real* output_value,
+ real* output_grad,
+ int height,
+ int width) {}
+
inline void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
- real *input,
+ real* input,
hl_tensor_descriptor outputDesc,
- real *output,
+ real* output,
hl_tensor_descriptor bnParamDesc,
- real *scale,
- real *bias,
+ real* scale,
+ real* bias,
double factor,
- real *runningMean,
- real *runningInvVar,
+ real* runningMean,
+ real* runningInvVar,
double epsilon,
- real *savedMean,
- real *savedVar) {}
+ real* savedMean,
+ real* savedVar) {}
inline void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
- real *input,
+ real* input,
hl_tensor_descriptor outputDesc,
- real *output,
+ real* output,
hl_tensor_descriptor bnParamDesc,
- real *scale,
- real *bias,
- real *estimatedMean,
- real *estimatedVar,
+ real* scale,
+ real* bias,
+ real* estimatedMean,
+ real* estimatedVar,
double epsilon) {}
inline void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
- real *input,
+ real* input,
hl_tensor_descriptor outGradDesc,
- real *outGrad,
+ real* outGrad,
hl_tensor_descriptor inGradDesc,
- real *inGrad,
+ real* inGrad,
hl_tensor_descriptor dBnParamDesc,
- real *scale,
- real *scaleGrad,
- real *biasGrad,
+ real* scale,
+ real* scaleGrad,
+ real* biasGrad,
double epsilon,
- real *savedMean,
- real *savedInvVar) {}
+ real* savedMean,
+ real* savedInvVar) {}
#endif // HL_CUDA_CUDNN_STUB_H_
-
diff --git a/paddle/cuda/include/stub/hl_cuda_stub.h b/paddle/cuda/include/stub/hl_cuda_stub.h
index 675ac03b0e188e9b26038dd4e40264099618e17a..1f91068cdf8b3d472c4b403d1ec7d5293c28c07e 100644
--- a/paddle/cuda/include/stub/hl_cuda_stub.h
+++ b/paddle/cuda/include/stub/hl_cuda_stub.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_CUDA_STUB_H_
#define HL_CUDA_STUB_H_
@@ -24,29 +23,25 @@ inline void hl_specify_devices_start(int *device, int number) {}
inline void hl_init(int device) {}
-inline int hl_get_cuda_lib_version(int device) {
- return 0;
-}
+inline int hl_get_cuda_lib_version(int device) { return 0; }
inline void hl_fini() {}
inline void hl_set_sync_flag(bool flag) {}
-inline bool hl_get_sync_flag() {
- return false;
-}
+inline bool hl_get_sync_flag() { return false; }
-inline int hl_get_device_count() { return 0; }
+inline int hl_get_device_count() { return 0; }
inline void hl_set_device(int device) {}
-inline int hl_get_device() { return 0; }
+inline int hl_get_device() { return 0; }
-inline void* hl_malloc_device(size_t size) { return NULL; }
+inline void *hl_malloc_device(size_t size) { return NULL; }
inline void hl_free_mem_device(void *dest_d) {}
-inline void* hl_malloc_host(size_t size) { return NULL; }
+inline void *hl_malloc_host(size_t size) { return NULL; }
inline void hl_free_mem_host(void *dest_h) {}
@@ -64,7 +59,9 @@ inline void hl_rand(real *dest_d, size_t num) {}
inline void hl_srand(unsigned int seed) {}
-inline void hl_memcpy_async(void *dst, void *src, size_t size,
+inline void hl_memcpy_async(void *dst,
+ void *src,
+ size_t size,
hl_stream_t stream) {}
inline void hl_stream_synchronize(hl_stream_t stream) {}
@@ -83,11 +80,11 @@ inline void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) {}
inline void hl_event_synchronize(hl_event_t event) {}
-inline int hl_get_device_last_error() { return 0; }
+inline int hl_get_device_last_error() { return 0; }
-inline const char* hl_get_device_error_string() { return NULL; }
+inline const char *hl_get_device_error_string() { return NULL; }
-inline const char* hl_get_device_error_string(size_t err) { return NULL; }
+inline const char *hl_get_device_error_string(size_t err) { return NULL; }
inline bool hl_cuda_event_is_ready(hl_event_t event) { return true; }
diff --git a/paddle/cuda/include/stub/hl_lstm_stub.h b/paddle/cuda/include/stub/hl_lstm_stub.h
index 2700bef02a5e1e40ee7603ccab7fec754196f8cd..7ccda032d26f2fbbe99136e8481416daea557a78 100644
--- a/paddle/cuda/include/stub/hl_lstm_stub.h
+++ b/paddle/cuda/include/stub/hl_lstm_stub.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_LSTM_STUB_H_
#define HL_LSTM_STUB_H_
diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/cuda/include/stub/hl_matrix_stub.h
index e37b1275432caae29b14e95658e3db291632a672..1bd78d23fbaf46e6265ba0db25ea399a204bd96f 100644
--- a/paddle/cuda/include/stub/hl_matrix_stub.h
+++ b/paddle/cuda/include/stub/hl_matrix_stub.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_MATRIX_STUB_H_
#define HL_MATRIX_STUB_H_
@@ -26,36 +25,30 @@ inline void hl_matrix_add(real* A_d,
real alpha,
real beta) {}
-inline void hl_matrix_softmax(real *A_d, real *C_d, int dimM, int dimN) {}
+inline void hl_matrix_softmax(real* A_d, real* C_d, int dimM, int dimN) {}
-inline void hl_sequence_softmax_forward(real *A_d,
- real *C_d,
+inline void hl_sequence_softmax_forward(real* A_d,
+ real* C_d,
const int* index,
int numSequence) {}
-inline void hl_matrix_softmax_derivative(real* grad_d,
- real* output_d,
- real* sftmaxSum_d,
- int dimM,
- int dimN) {}
-
-inline void hl_matrix_classification_error(real* A_d,
- int* B_d,
- real* C_d,
- int dimM,
- int dimN) {}
-
-inline void hl_matrix_cross_entropy(real* A_d,
- real* C_d,
- int* label_d,
- int dimM,
- int dimN) {}
-
-inline void hl_matrix_cross_entropy_bp(real* grad_d,
- real* output_d,
- int* label_d,
- int dimM,
- int dimN) {}
+inline void hl_matrix_softmax_derivative(
+ real* grad_d, real* output_d, real* sftmaxSum_d, int dimM, int dimN) {}
+
+inline void hl_matrix_classification_error(
+ real* A_d, int* B_d, real* C_d, int dimM, int dimN) {}
+
+inline void hl_matrix_cross_entropy(
+ real* A_d, real* C_d, int* label_d, int dimM, int dimN) {}
+
+inline void hl_matrix_cross_entropy_bp(
+ real* grad_d, real* output_d, int* label_d, int dimM, int dimN) {}
+
+inline void hl_matrix_multi_binary_cross_entropy(
+ real* output, real* entropy, hl_sparse_matrix_s mat, int dimM, int dimN) {}
+
+inline void hl_matrix_multi_binary_cross_entropy_bp(
+ real* output, real* grad, hl_sparse_matrix_s mat, int dimM, int dimN) {}
inline void hl_matrix_zero_mem(real* data, int num) {}
@@ -89,7 +82,6 @@ inline void hl_cossim(real* output,
int input2_height,
real scale) {}
-
inline void hl_cossim_derivative(real* grad,
real* output,
real* prevOutX,
diff --git a/paddle/cuda/include/stub/hl_sequence_stub.h b/paddle/cuda/include/stub/hl_sequence_stub.h
index aabd956c37f7dce48a379b995ab88a53aa65c760..381f0a6f26c5669465f029e972c6ca8b0e6e1776 100644
--- a/paddle/cuda/include/stub/hl_sequence_stub.h
+++ b/paddle/cuda/include/stub/hl_sequence_stub.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_SEQUENCE_STUB_H_
#define HL_SEQUENCE_STUB_H_
@@ -21,15 +20,12 @@ limitations under the License. */
inline void hl_max_sequence_forward(real* input,
const int* sequence,
real* output,
- int *index,
+ int* index,
int numSequences,
int dim) {}
-inline void hl_max_sequence_backward(real* outputGrad,
- int *index,
- real* inputGrad,
- int numSequences,
- int dim) {}
+inline void hl_max_sequence_backward(
+ real* outputGrad, int* index, real* inputGrad, int numSequences, int dim) {}
inline void hl_context_projection_forward(real* input,
const int* sequence,
@@ -60,16 +56,16 @@ inline void hl_context_projection_backward_weight(real* outputGrad,
int contextStart,
int beginPad) {}
-inline void hl_sequence2batch_copy(real *batch,
- real *sequence,
- const int *batchIndex,
+inline void hl_sequence2batch_copy(real* batch,
+ real* sequence,
+ const int* batchIndex,
int seqWidth,
int batchCount,
bool seq2batch) {}
-inline void hl_sequence2batch_add(real *batch,
- real *sequence,
- int *batchIndex,
+inline void hl_sequence2batch_add(real* batch,
+ real* sequence,
+ int* batchIndex,
int seqWidth,
int batchCount,
bool seq2batch) {}
diff --git a/paddle/cuda/include/stub/hl_sparse_stub.h b/paddle/cuda/include/stub/hl_sparse_stub.h
index 346a1900dda5825e9a4311a2c51e8a50e6e7df0b..d47bdd2c47d097c4c68b7b7e88ef888bc18270c2 100644
--- a/paddle/cuda/include/stub/hl_sparse_stub.h
+++ b/paddle/cuda/include/stub/hl_sparse_stub.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifndef HL_SPARSE_STUB_H_
#define HL_SPARSE_STUB_H_
@@ -20,7 +19,7 @@ limitations under the License. */
inline void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d,
hl_matrix_format_t format,
- hl_matrix_value_t value_type,
+ hl_matrix_value_t value_type,
int dimM,
int dimN,
int nnz) {}
@@ -28,20 +27,20 @@ inline void hl_malloc_sparse_matrix(hl_sparse_matrix_s *A_d,
inline void hl_free_sparse_matrix(hl_sparse_matrix_s A_d) {}
inline void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
- void * dest_d,
+ void *dest_d,
size_t size,
hl_matrix_format_t format,
- hl_matrix_value_t value_type,
+ hl_matrix_value_t value_type,
int dimM,
int dimN,
int nnz) {}
inline void hl_construct_sparse_matrix(hl_sparse_matrix_s *A_d,
- real* value_d,
- int* rows_d,
- int* cols_d,
+ real *value_d,
+ int *rows_d,
+ int *cols_d,
hl_matrix_format_t format,
- hl_matrix_value_t value_type,
+ hl_matrix_value_t value_type,
int dimM,
int dimN,
int nnz) {}
@@ -87,10 +86,14 @@ inline void hl_matrix_csr_mul_dense(hl_sparse_matrix_s A_d,
inline void hl_matrix_csc_mul_dense(hl_sparse_matrix_s A_d,
hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
+ real *B_d,
+ hl_trans_op_t transb,
real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta) {}
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta) {}
inline void hl_matrix_dense_mul_csc(real *A_d,
hl_trans_op_t transa,
@@ -103,18 +106,27 @@ inline void hl_matrix_dense_mul_csc(real *A_d,
real alpha,
real beta) {}
-inline void hl_sparse_matrix_mul(real* A_d, hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
+inline void hl_sparse_matrix_mul(real *A_d,
+ hl_trans_op_t transa,
+ real *B_d,
+ hl_trans_op_t transb,
hl_sparse_matrix_s C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta) {}
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta) {}
-inline void hl_matrix_dense_mul_csr(real *A_d, hl_trans_op_t transa,
+inline void hl_matrix_dense_mul_csr(real *A_d,
+ hl_trans_op_t transa,
hl_sparse_matrix_s B_d,
hl_trans_op_t transb,
real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta) {}
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta) {}
inline void hl_memcpy_from_csc_matrix(real *csc_val,
size_t val_size,
@@ -134,49 +146,39 @@ inline void hl_memcpy_from_csr_matrix(real *csr_val,
hl_sparse_matrix_s csr_matrix,
hl_stream_t stream) {}
-inline void hl_sparse_matrix_column_sum(real* A_d,
- hl_sparse_matrix_s B_d,
- int dimM,
- int dimN,
- real scale) {}
+inline void hl_sparse_matrix_column_sum(
+ real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale) {}
-inline void hl_matrix_csr_column_sum(real* A_d,
- hl_sparse_matrix_s B_d,
- int dimM,
- int dimN,
- real scale) {}
+inline void hl_matrix_csr_column_sum(
+ real *A_d, hl_sparse_matrix_s B_d, int dimM, int dimN, real scale) {}
inline void hl_sparse_matrix_add_bias(hl_sparse_matrix_s A_d,
- real* B_d,
+ real *B_d,
real scale) {}
inline void hl_matrix_csr_add_bias(hl_sparse_matrix_s A_d,
- real* B_d,
+ real *B_d,
real scale) {}
inline void hl_sparse_matrix_add_dense(hl_sparse_matrix_s A_d,
- real* B_d,
+ real *B_d,
int dimM,
int dimN,
real alpha,
real beta) {}
inline void hl_matrix_csr_add_dense(hl_sparse_matrix_s A_d,
- real* B_d,
+ real *B_d,
int dimM,
int dimN,
real alpha,
real beta) {}
-inline int* hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat) {
- return NULL;
-}
+inline int *hl_sparse_matrix_get_rows(hl_sparse_matrix_s sMat) { return NULL; }
-inline int* hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat) {
- return NULL;
-}
+inline int *hl_sparse_matrix_get_cols(hl_sparse_matrix_s sMat) { return NULL; }
-inline real* hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat) {
+inline real *hl_sparse_matrix_get_value(hl_sparse_matrix_s sMat) {
return NULL;
}
diff --git a/paddle/cuda/src/avx_mathfun.h b/paddle/cuda/src/avx_mathfun.h
index 808c2508d1a1a09fb25f052047d6b0539cad8df2..2412ed5abc13b2a83521a75524f581e106788b60 100644
--- a/paddle/cuda/src/avx_mathfun.h
+++ b/paddle/cuda/src/avx_mathfun.h
@@ -32,32 +32,35 @@
#include
/* yes I know, the top of this file is quite ugly */
-# define ALIGN32_BEG
-# define ALIGN32_END __attribute__((aligned(32)))
+#define ALIGN32_BEG
+#define ALIGN32_END __attribute__((aligned(32)))
/* __m128 is ugly to write */
-typedef __m256 v8sf; // vector of 8 float (avx)
-typedef __m256i v8si; // vector of 8 int (avx)
-typedef __m128i v4si; // vector of 8 int (avx)
+typedef __m256 v8sf; // vector of 8 float (avx)
+typedef __m256i v8si; // vector of 8 int (avx)
+typedef __m128i v4si; // vector of 8 int (avx)
-#define _PI32AVX_CONST(Name, Val) \
- static const ALIGN32_BEG int _pi32avx_##Name[4] ALIGN32_END = { Val, Val, Val, Val }
+#define _PI32AVX_CONST(Name, Val) \
+ static const ALIGN32_BEG int _pi32avx_##Name[4] ALIGN32_END = { \
+ Val, Val, Val, Val}
_PI32AVX_CONST(1, 1);
_PI32AVX_CONST(inv1, ~1);
_PI32AVX_CONST(2, 2);
_PI32AVX_CONST(4, 4);
-
/* declare some AVX constants -- why can't I figure a better way to do that? */
-#define _PS256_CONST(Name, Val) \
- static const ALIGN32_BEG float _ps256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val }
-#define _PI32_CONST256(Name, Val) \
- static const ALIGN32_BEG int _pi32_256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val }
-#define _PS256_CONST_TYPE(Name, Type, Val) \
- static const ALIGN32_BEG Type _ps256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val }
-
-_PS256_CONST(1 , 1.0f);
+#define _PS256_CONST(Name, Val) \
+ static const ALIGN32_BEG float _ps256_##Name[8] ALIGN32_END = { \
+ Val, Val, Val, Val, Val, Val, Val, Val}
+#define _PI32_CONST256(Name, Val) \
+ static const ALIGN32_BEG int _pi32_256_##Name[8] ALIGN32_END = { \
+ Val, Val, Val, Val, Val, Val, Val, Val}
+#define _PS256_CONST_TYPE(Name, Type, Val) \
+ static const ALIGN32_BEG Type _ps256_##Name[8] ALIGN32_END = { \
+ Val, Val, Val, Val, Val, Val, Val, Val}
+
+_PS256_CONST(1, 1.0f);
_PS256_CONST(0p5, 0.5f);
/* the smallest non denormalized float number */
_PS256_CONST_TYPE(min_norm_pos, int, 0x00800000);
@@ -76,14 +79,14 @@ _PI32_CONST256(0x7f, 0x7f);
_PS256_CONST(cephes_SQRTHF, 0.707106781186547524);
_PS256_CONST(cephes_log_p0, 7.0376836292E-2);
-_PS256_CONST(cephes_log_p1, - 1.1514610310E-1);
+_PS256_CONST(cephes_log_p1, -1.1514610310E-1);
_PS256_CONST(cephes_log_p2, 1.1676998740E-1);
-_PS256_CONST(cephes_log_p3, - 1.2420140846E-1);
-_PS256_CONST(cephes_log_p4, + 1.4249322787E-1);
-_PS256_CONST(cephes_log_p5, - 1.6668057665E-1);
-_PS256_CONST(cephes_log_p6, + 2.0000714765E-1);
-_PS256_CONST(cephes_log_p7, - 2.4999993993E-1);
-_PS256_CONST(cephes_log_p8, + 3.3333331174E-1);
+_PS256_CONST(cephes_log_p3, -1.2420140846E-1);
+_PS256_CONST(cephes_log_p4, +1.4249322787E-1);
+_PS256_CONST(cephes_log_p5, -1.6668057665E-1);
+_PS256_CONST(cephes_log_p6, +2.0000714765E-1);
+_PS256_CONST(cephes_log_p7, -2.4999993993E-1);
+_PS256_CONST(cephes_log_p8, +3.3333331174E-1);
_PS256_CONST(cephes_log_q1, -2.12194440e-4);
_PS256_CONST(cephes_log_q2, 0.693359375);
@@ -94,50 +97,51 @@ typedef union imm_xmm_union {
v4si xmm[2];
} imm_xmm_union;
-#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \
- imm_xmm_union u __attribute__((aligned(32))); \
- u.imm = imm_; \
- xmm0_ = u.xmm[0]; \
- xmm1_ = u.xmm[1]; \
-}
-
-#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \
+#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) \
+ { \
imm_xmm_union u __attribute__((aligned(32))); \
- u.xmm[0]=xmm0_; u.xmm[1]=xmm1_; imm_ = u.imm; \
+ u.imm = imm_; \
+ xmm0_ = u.xmm[0]; \
+ xmm1_ = u.xmm[1]; \
}
+#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) \
+ { \
+ imm_xmm_union u __attribute__((aligned(32))); \
+ u.xmm[0] = xmm0_; \
+ u.xmm[1] = xmm1_; \
+ imm_ = u.imm; \
+ }
-#define AVX2_BITOP_USING_SSE2(fn) \
-static inline v8si avx2_mm256_##fn(v8si x, int a) \
-{ \
- /* use SSE2 instruction to perform the bitop AVX2 */ \
- v4si x1, x2; \
- v8si ret; \
- COPY_IMM_TO_XMM(x, x1, x2); \
- x1 = _mm_##fn(x1,a); \
- x2 = _mm_##fn(x2,a); \
- COPY_XMM_TO_IMM(x1, x2, ret); \
- return(ret); \
-}
+#define AVX2_BITOP_USING_SSE2(fn) \
+ static inline v8si avx2_mm256_##fn(v8si x, int a) { \
+ /* use SSE2 instruction to perform the bitop AVX2 */ \
+ v4si x1, x2; \
+ v8si ret; \
+ COPY_IMM_TO_XMM(x, x1, x2); \
+ x1 = _mm_##fn(x1, a); \
+ x2 = _mm_##fn(x2, a); \
+ COPY_XMM_TO_IMM(x1, x2, ret); \
+ return (ret); \
+ }
//#warning "Using SSE2 to perform AVX2 bitshift ops"
AVX2_BITOP_USING_SSE2(slli_epi32)
AVX2_BITOP_USING_SSE2(srli_epi32)
-#define AVX2_INTOP_USING_SSE2(fn) \
-static inline v8si avx2_mm256_##fn(v8si x, v8si y) \
-{ \
- /* use SSE2 instructions to perform the AVX2 integer operation */ \
- v4si x1, x2; \
- v4si y1, y2; \
- v8si ret; \
- COPY_IMM_TO_XMM(x, x1, x2); \
- COPY_IMM_TO_XMM(y, y1, y2); \
- x1 = _mm_##fn(x1,y1); \
- x2 = _mm_##fn(x2,y2); \
- COPY_XMM_TO_IMM(x1, x2, ret); \
- return(ret); \
-}
+#define AVX2_INTOP_USING_SSE2(fn) \
+ static inline v8si avx2_mm256_##fn(v8si x, v8si y) { \
+ /* use SSE2 instructions to perform the AVX2 integer operation */ \
+ v4si x1, x2; \
+ v4si y1, y2; \
+ v8si ret; \
+ COPY_IMM_TO_XMM(x, x1, x2); \
+ COPY_IMM_TO_XMM(y, y1, y2); \
+ x1 = _mm_##fn(x1, y1); \
+ x2 = _mm_##fn(x2, y2); \
+ COPY_XMM_TO_IMM(x1, x2, ret); \
+ return (ret); \
+ }
//#warning "Using SSE2 to perform AVX2 integer ops"
AVX2_INTOP_USING_SSE2(and_si128)
@@ -157,84 +161,83 @@ AVX2_INTOP_USING_SSE2(add_epi32)
#define avx2_mm256_add_epi32 _mm256_add_epi32
#endif /* __AVX2__ */
-
-/* natural logarithm computed for 8 simultaneous float
+/* natural logarithm computed for 8 simultaneous float
return NaN for x <= 0
*/
v8sf log256_ps(v8sf x) {
v8si imm0;
- v8sf one = *(v8sf*)_ps256_1;
+ v8sf one = *(v8sf *)_ps256_1;
- //v8sf invalid_mask = _mm256_cmple_ps(x, _mm256_setzero_ps());
+ // v8sf invalid_mask = _mm256_cmple_ps(x, _mm256_setzero_ps());
v8sf invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_LE_OS);
- x = _mm256_max_ps(x, *(v8sf*)_ps256_min_norm_pos); /* cut off denormalized stuff */
+ x = _mm256_max_ps(
+ x, *(v8sf *)_ps256_min_norm_pos); /* cut off denormalized stuff */
// can be done with AVX2
imm0 = avx2_mm256_srli_epi32(_mm256_castps_si256(x), 23);
/* keep only the fractional part */
- x = _mm256_and_ps(x, *(v8sf*)_ps256_inv_mant_mask);
- x = _mm256_or_ps(x, *(v8sf*)_ps256_0p5);
+ x = _mm256_and_ps(x, *(v8sf *)_ps256_inv_mant_mask);
+ x = _mm256_or_ps(x, *(v8sf *)_ps256_0p5);
// this is again another AVX2 instruction
- imm0 = avx2_mm256_sub_epi32(imm0, *(v8si*)_pi32_256_0x7f);
+ imm0 = avx2_mm256_sub_epi32(imm0, *(v8si *)_pi32_256_0x7f);
v8sf e = _mm256_cvtepi32_ps(imm0);
e = _mm256_add_ps(e, one);
- /* part2:
+ /* part2:
if( x < SQRTHF ) {
e -= 1;
x = x + x - 1.0;
} else { x = x - 1.0; }
*/
- //v8sf mask = _mm256_cmplt_ps(x, *(v8sf*)_ps256_cephes_SQRTHF);
- v8sf mask = _mm256_cmp_ps(x, *(v8sf*)_ps256_cephes_SQRTHF, _CMP_LT_OS);
+ // v8sf mask = _mm256_cmplt_ps(x, *(v8sf*)_ps256_cephes_SQRTHF);
+ v8sf mask = _mm256_cmp_ps(x, *(v8sf *)_ps256_cephes_SQRTHF, _CMP_LT_OS);
v8sf tmp = _mm256_and_ps(x, mask);
x = _mm256_sub_ps(x, one);
e = _mm256_sub_ps(e, _mm256_and_ps(one, mask));
x = _mm256_add_ps(x, tmp);
- v8sf z = _mm256_mul_ps(x,x);
+ v8sf z = _mm256_mul_ps(x, x);
- v8sf y = *(v8sf*)_ps256_cephes_log_p0;
+ v8sf y = *(v8sf *)_ps256_cephes_log_p0;
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p1);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p1);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p2);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p2);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p3);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p3);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p4);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p4);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p5);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p5);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p6);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p6);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p7);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p7);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p8);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_log_p8);
y = _mm256_mul_ps(y, x);
y = _mm256_mul_ps(y, z);
-
- tmp = _mm256_mul_ps(e, *(v8sf*)_ps256_cephes_log_q1);
- y = _mm256_add_ps(y, tmp);
+ tmp = _mm256_mul_ps(e, *(v8sf *)_ps256_cephes_log_q1);
+ y = _mm256_add_ps(y, tmp);
- tmp = _mm256_mul_ps(z, *(v8sf*)_ps256_0p5);
+ tmp = _mm256_mul_ps(z, *(v8sf *)_ps256_0p5);
y = _mm256_sub_ps(y, tmp);
- tmp = _mm256_mul_ps(e, *(v8sf*)_ps256_cephes_log_q2);
+ tmp = _mm256_mul_ps(e, *(v8sf *)_ps256_cephes_log_q2);
x = _mm256_add_ps(x, y);
x = _mm256_add_ps(x, tmp);
- x = _mm256_or_ps(x, invalid_mask); // negative arg will be NAN
+ x = _mm256_or_ps(x, invalid_mask); // negative arg will be NAN
return x;
}
-_PS256_CONST(exp_hi, 88.3762626647949f);
-_PS256_CONST(exp_lo, -88.3762626647949f);
+_PS256_CONST(exp_hi, 88.3762626647949f);
+_PS256_CONST(exp_lo, -88.3762626647949f);
_PS256_CONST(cephes_LOG2EF, 1.44269504088896341);
_PS256_CONST(cephes_exp_C1, 0.693359375);
@@ -250,45 +253,45 @@ _PS256_CONST(cephes_exp_p5, 5.0000001201E-1);
v8sf exp256_ps(v8sf x) {
v8sf tmp = _mm256_setzero_ps(), fx;
v8si imm0;
- v8sf one = *(v8sf*)_ps256_1;
+ v8sf one = *(v8sf *)_ps256_1;
- x = _mm256_min_ps(x, *(v8sf*)_ps256_exp_hi);
- x = _mm256_max_ps(x, *(v8sf*)_ps256_exp_lo);
+ x = _mm256_min_ps(x, *(v8sf *)_ps256_exp_hi);
+ x = _mm256_max_ps(x, *(v8sf *)_ps256_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
- fx = _mm256_mul_ps(x, *(v8sf*)_ps256_cephes_LOG2EF);
- fx = _mm256_add_ps(fx, *(v8sf*)_ps256_0p5);
+ fx = _mm256_mul_ps(x, *(v8sf *)_ps256_cephes_LOG2EF);
+ fx = _mm256_add_ps(fx, *(v8sf *)_ps256_0p5);
/* how to perform a floorf with SSE: just below */
- //imm0 = _mm256_cvttps_epi32(fx);
- //tmp = _mm256_cvtepi32_ps(imm0);
-
+ // imm0 = _mm256_cvttps_epi32(fx);
+ // tmp = _mm256_cvtepi32_ps(imm0);
+
tmp = _mm256_floor_ps(fx);
/* if greater, substract 1 */
- //v8sf mask = _mm256_cmpgt_ps(tmp, fx);
- v8sf mask = _mm256_cmp_ps(tmp, fx, _CMP_GT_OS);
+ // v8sf mask = _mm256_cmpgt_ps(tmp, fx);
+ v8sf mask = _mm256_cmp_ps(tmp, fx, _CMP_GT_OS);
mask = _mm256_and_ps(mask, one);
fx = _mm256_sub_ps(tmp, mask);
- tmp = _mm256_mul_ps(fx, *(v8sf*)_ps256_cephes_exp_C1);
- v8sf z = _mm256_mul_ps(fx, *(v8sf*)_ps256_cephes_exp_C2);
+ tmp = _mm256_mul_ps(fx, *(v8sf *)_ps256_cephes_exp_C1);
+ v8sf z = _mm256_mul_ps(fx, *(v8sf *)_ps256_cephes_exp_C2);
x = _mm256_sub_ps(x, tmp);
x = _mm256_sub_ps(x, z);
- z = _mm256_mul_ps(x,x);
-
- v8sf y = *(v8sf*)_ps256_cephes_exp_p0;
+ z = _mm256_mul_ps(x, x);
+
+ v8sf y = *(v8sf *)_ps256_cephes_exp_p0;
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p1);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p1);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p2);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p2);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p3);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p3);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p4);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p4);
y = _mm256_mul_ps(y, x);
- y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p5);
+ y = _mm256_add_ps(y, *(v8sf *)_ps256_cephes_exp_p5);
y = _mm256_mul_ps(y, z);
y = _mm256_add_ps(y, x);
y = _mm256_add_ps(y, one);
@@ -296,7 +299,7 @@ v8sf exp256_ps(v8sf x) {
/* build 2^n */
imm0 = _mm256_cvttps_epi32(fx);
// another two AVX2 instructions
- imm0 = avx2_mm256_add_epi32(imm0, *(v8si*)_pi32_256_0x7f);
+ imm0 = avx2_mm256_add_epi32(imm0, *(v8si *)_pi32_256_0x7f);
imm0 = avx2_mm256_slli_epi32(imm0, 23);
v8sf pow2n = _mm256_castsi256_ps(imm0);
y = _mm256_mul_ps(y, pow2n);
@@ -307,13 +310,12 @@ _PS256_CONST(minus_cephes_DP1, -0.78515625);
_PS256_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
_PS256_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
_PS256_CONST(sincof_p0, -1.9515295891E-4);
-_PS256_CONST(sincof_p1, 8.3321608736E-3);
+_PS256_CONST(sincof_p1, 8.3321608736E-3);
_PS256_CONST(sincof_p2, -1.6666654611E-1);
-_PS256_CONST(coscof_p0, 2.443315711809948E-005);
+_PS256_CONST(coscof_p0, 2.443315711809948E-005);
_PS256_CONST(coscof_p1, -1.388731625493765E-003);
-_PS256_CONST(coscof_p2, 4.166664568298827E-002);
-_PS256_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
-
+_PS256_CONST(coscof_p2, 4.166664568298827E-002);
+_PS256_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
/* evaluation of 8 sines at onces using AVX intrisics
@@ -327,7 +329,7 @@ _PS256_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
surprising but correct result.
*/
-v8sf sin256_ps(v8sf x) { // any x
+v8sf sin256_ps(v8sf x) { // any x
v8sf xmm1, xmm2 = _mm256_setzero_ps(), xmm3, sign_bit, y;
v8si imm0, imm2;
@@ -338,78 +340,78 @@ v8sf sin256_ps(v8sf x) { // any x
sign_bit = x;
/* take the absolute value */
- x = _mm256_and_ps(x, *(v8sf*)_ps256_inv_sign_mask);
+ x = _mm256_and_ps(x, *(v8sf *)_ps256_inv_sign_mask);
/* extract the sign bit (upper one) */
- sign_bit = _mm256_and_ps(sign_bit, *(v8sf*)_ps256_sign_mask);
-
+ sign_bit = _mm256_and_ps(sign_bit, *(v8sf *)_ps256_sign_mask);
+
/* scale by 4/Pi */
- y = _mm256_mul_ps(x, *(v8sf*)_ps256_cephes_FOPI);
+ y = _mm256_mul_ps(x, *(v8sf *)_ps256_cephes_FOPI);
- /*
- Here we start a series of integer operations, which are in the
- realm of AVX2.
- If we don't have AVX, let's perform them using SSE2 directives
- */
+/*
+ Here we start a series of integer operations, which are in the
+ realm of AVX2.
+ If we don't have AVX, let's perform them using SSE2 directives
+*/
#ifdef __AVX2__
/* store the integer part of y in mm0 */
imm2 = _mm256_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
// another two AVX2 instruction
- imm2 = avx2_mm256_add_epi32(imm2, *(v8si*)_pi32_256_1);
- imm2 = avx2_mm256_and_si256(imm2, *(v8si*)_pi32_256_inv1);
+ imm2 = avx2_mm256_add_epi32(imm2, *(v8si *)_pi32_256_1);
+ imm2 = avx2_mm256_and_si256(imm2, *(v8si *)_pi32_256_inv1);
y = _mm256_cvtepi32_ps(imm2);
/* get the swap sign flag */
- imm0 = avx2_mm256_and_si256(imm2, *(v8si*)_pi32_256_4);
+ imm0 = avx2_mm256_and_si256(imm2, *(v8si *)_pi32_256_4);
imm0 = avx2_mm256_slli_epi32(imm0, 29);
- /* get the polynom selection mask
+ /* get the polynom selection mask
there is one polynom for 0 <= x <= Pi/4
and another one for Pi/4
#include "hl_functions.h"
namespace hppl {
- extern __m256 exp(__m256 a);
+extern __m256 exp(__m256 a);
- __m256 relu(const __m256 a) {
- __m256 tmp = _mm256_set1_ps(0.0f);
- return _mm256_max_ps(a, tmp);
- }
+__m256 relu(const __m256 a) {
+ __m256 tmp = _mm256_set1_ps(0.0f);
+ return _mm256_max_ps(a, tmp);
+}
- __m256 sigmoid(const __m256 a) {
- __m256 max = _mm256_set1_ps(SIGMOID_THRESHOLD_MAX);
- __m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN);
- __m256 tmp = _mm256_max_ps(a, min);
- tmp = _mm256_min_ps(tmp, max);
- tmp = _mm256_sub_ps(_mm256_set1_ps(0.0f), tmp);
- tmp = exp(tmp);
- tmp = _mm256_add_ps(_mm256_set1_ps(1.0f), tmp);
- tmp = _mm256_div_ps(_mm256_set1_ps(1.0f), tmp);
- return tmp;
- }
+__m256 sigmoid(const __m256 a) {
+ __m256 max = _mm256_set1_ps(SIGMOID_THRESHOLD_MAX);
+ __m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN);
+ __m256 tmp = _mm256_max_ps(a, min);
+ tmp = _mm256_min_ps(tmp, max);
+ tmp = _mm256_sub_ps(_mm256_set1_ps(0.0f), tmp);
+ tmp = exp(tmp);
+ tmp = _mm256_add_ps(_mm256_set1_ps(1.0f), tmp);
+ tmp = _mm256_div_ps(_mm256_set1_ps(1.0f), tmp);
+ return tmp;
+}
- __m256 tanh(const __m256 a) {
- __m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a);
- tmp = exp(tmp);
- return _mm256_sub_ps(
- _mm256_div_ps(_mm256_set1_ps(2.0f),
- _mm256_add_ps(_mm256_set1_ps(1.0f), tmp)), _mm256_set1_ps(1.0f));
- }
+__m256 tanh(const __m256 a) {
+ __m256 max = _mm256_set1_ps(EXP_MAX_INPUT);
+ __m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a);
+ tmp = _mm256_min_ps(tmp, max);
+ tmp = exp(tmp);
+ return _mm256_sub_ps(_mm256_div_ps(_mm256_set1_ps(2.0f),
+ _mm256_add_ps(_mm256_set1_ps(1.0f), tmp)),
+ _mm256_set1_ps(1.0f));
+}
- __m256 linear(const __m256 a) {
- return a;
- }
+__m256 linear(const __m256 a) { return a; }
- __m256 relu(const __m256 a, const __m256 b) {
- return _mm256_mul_ps(a,
+__m256 relu(const __m256 a, const __m256 b) {
+ return _mm256_mul_ps(
+ a,
_mm256_and_ps(_mm256_cmp_ps(b, _mm256_set1_ps(0.0f), _CMP_GT_OS),
- _mm256_set1_ps(1.0f)));
- }
+ _mm256_set1_ps(1.0f)));
+}
- __m256 sigmoid(const __m256 a, const __m256 b) {
- return _mm256_mul_ps(_mm256_mul_ps(a, b),
- _mm256_sub_ps(_mm256_set1_ps(1.0f), b));
- }
+__m256 sigmoid(const __m256 a, const __m256 b) {
+ return _mm256_mul_ps(_mm256_mul_ps(a, b),
+ _mm256_sub_ps(_mm256_set1_ps(1.0f), b));
+}
- __m256 tanh(const __m256 a, const __m256 b) {
- return _mm256_mul_ps(a,
- _mm256_sub_ps(_mm256_set1_ps(1.0f), _mm256_mul_ps(b, b)));
- }
+__m256 tanh(const __m256 a, const __m256 b) {
+ return _mm256_mul_ps(
+ a, _mm256_sub_ps(_mm256_set1_ps(1.0f), _mm256_mul_ps(b, b)));
+}
- __m256 linear(const __m256 a, const __m256 b) {
- return a;
- }
+__m256 linear(const __m256 a, const __m256 b) { return a; }
} // namespace hppl
diff --git a/paddle/cuda/src/hl_cpu_functions.cc b/paddle/cuda/src/hl_cpu_functions.cc
index 3fd6b278d053714a6b6f0fe33831a32e2c64e3ae..af00f352e536bf342e15315d1f6804225b87eb0b 100644
--- a/paddle/cuda/src/hl_cpu_functions.cc
+++ b/paddle/cuda/src/hl_cpu_functions.cc
@@ -12,44 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include
#include "hl_functions.h"
namespace hppl {
- real relu(const real a) {
- return a > 0.0f ? a : 0.0f;
- }
+real relu(const real a) { return a > 0.0f ? a : 0.0f; }
- real sigmoid(const real a) {
- const real min = SIGMOID_THRESHOLD_MIN;
- const real max = SIGMOID_THRESHOLD_MAX;
- real tmp = (a < min) ? min : ((a > max) ? max : a);
- return 1.0 / (1.0 + exp(-tmp));
- }
+real sigmoid(const real a) {
+ const real min = SIGMOID_THRESHOLD_MIN;
+ const real max = SIGMOID_THRESHOLD_MAX;
+ real tmp = (a < min) ? min : ((a > max) ? max : a);
+ return 1.0 / (1.0 + exp(-tmp));
+}
- real tanh(const real a) {
- return (2.0 / (1.0 + exp(-2.0*a))) - 1.0;
- }
+real tanh(const real a) {
+ real tmp = -2.0 * a;
+ tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
+ return (2.0 / (1.0 + exp(tmp))) - 1.0;
+}
- real linear(const real a) {
- return a;
- }
+real linear(const real a) { return a; }
- real relu(const real a, const real b) {
- return a * (b > 0.0f ? 1.0f : 0.0f);
- }
+real relu(const real a, const real b) { return a * (b > 0.0f ? 1.0f : 0.0f); }
- real sigmoid(const real a, const real b) {
- return a * b * (1 - b);
- }
+real sigmoid(const real a, const real b) { return a * b * (1 - b); }
- real tanh(const real a, const real b) {
- return a * (1.0f - b * b);
- }
+real tanh(const real a, const real b) { return a * (1.0f - b * b); }
- real linear(const real a, const real b) {
- return a;
- }
+real linear(const real a, const real b) { return a; }
} // namespace hppl
diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu
index baa2fb0d27d749197c10645ff976851ddc38c84f..ae387a8bc0e0791995810df9e5f2556264d869b1 100644
--- a/paddle/cuda/src/hl_cuda_cnn.cu
+++ b/paddle/cuda/src/hl_cuda_cnn.cu
@@ -152,7 +152,7 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
const int ksizeW, const int ksizeH,
const int strideH, const int strideW,
const int offsetH, const int offsetW,
- real* tgtData) {
+ real* tgtData, const int tgtStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
@@ -173,7 +173,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
maxval = inputData[h * width + w];
}
}
- tgtData[index] = maxval;
+ int tgtIndex = index % (pooledW * pooledH * channels) +
+ frameNum * tgtStride;
+ tgtData[tgtIndex] = maxval;
}
}
@@ -184,7 +186,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
- real* tgtData) {
+ real* tgtData, const int tgtStride) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
@@ -194,7 +196,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
KeMaxPoolForward<<< grid, threads, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels, height, width,
pooledH, pooledW, sizeX, sizeY, strideH, strideW,
- paddingH, paddingW, tgtData);
+ paddingH, paddingW, tgtData, tgtStride);
CHECK_SYNC("hl_maxpool_forward failed");
}
@@ -207,7 +209,7 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
const int strideH, const int strideW,
const int padH, const int padW,
real scaleA, real scaleB,
- real* targetGrad) {
+ real* targetGrad, const int outStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
// find out the local index
@@ -223,8 +225,8 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0;
real input = inputData[index];
- outData += (frameNum * channels + offsetC) * pooledH * pooledW;
- outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
+ outData += (frameNum * outStride + offsetC * pooledH * pooledW);
+ outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
if (input == outData[ph * pooledW + pw]) {
@@ -246,7 +248,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
- real* targetGrad) {
+ real* targetGrad, const int outStride) {
int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
@@ -257,7 +259,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
strideH, strideW,
paddingH, paddingW,
scaleA, scaleB,
- targetGrad);
+ targetGrad, outStride);
CHECK_SYNC("hl_maxpool_backward");
}
@@ -268,7 +270,7 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int padH, const int padW,
- real* tgtData) {
+ real* tgtData, const int tgtStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
@@ -293,7 +295,9 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
aveval += inputData[h * width + w];
}
}
- tgtData[index] = aveval / pool_size;
+ int tgtIndex = index % (pooledW * pooledH * channels) +
+ frameNum * tgtStride;
+ tgtData[tgtIndex] = aveval / pool_size;
}
}
@@ -303,14 +307,15 @@ void hl_avgpool_forward(const int frameCnt, const real* inputData,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
- const int paddingH, const int paddingW, real* tgtData) {
+ const int paddingH, const int paddingW,
+ real* tgtData, const int tgtStride) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
KeAvgPoolForward<<< blocks, 1024, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels,
height, width, pooledH, pooledW,
sizeX, sizeY, strideH, strideW,
- paddingH, paddingW, tgtData);
+ paddingH, paddingW, tgtData, tgtStride);
CHECK_SYNC("hl_avgpool_forward failed");
}
@@ -322,7 +327,7 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
const int strideH, const int strideW,
const int padH, const int padW,
real scaleA, real scaleB,
- real* tgtGrad) {
+ real* tgtGrad, const int outStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int offsetW = index % width + padW;
@@ -335,7 +340,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0;
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0;
- outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
+ outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
+
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
@@ -360,7 +366,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
- real* backGrad) {
+ real* backGrad, const int outStride) {
int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
@@ -370,7 +376,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
strideH, strideW,
paddingH, paddingW,
scaleA, scaleB,
- backGrad);
+ backGrad, outStride);
CHECK_SYNC("hl_avgpool_backward failed");
}
@@ -522,7 +528,7 @@ void hl_CMRNorm_backward(size_t frameCnt, const real* inV,
size_t height, size_t width, size_t sizeX,
real alpha, real beta) {
size_t threadsNum = frameCnt * height * width;
- size_t blocksX = (threadsNum + 1024 -1) / 1024;
+ size_t blocksX = (threadsNum + 1024 - 1) / 1024;
size_t blocksY = 1;
dim3 threads(1024, 1);
dim3 grid(blocksX, blocksY);
@@ -532,6 +538,138 @@ void hl_CMRNorm_backward(size_t frameCnt, const real* inV,
CHECK_SYNC("hl_CMRNorm_backward");
}
+__global__ void KeBilinearInterpFw(const real* in,
+ const size_t inImgH,
+ const size_t inImgW,
+ const size_t inputH,
+ const size_t inputW,
+ real* out,
+ const size_t outImgH,
+ const size_t outImgW,
+ const size_t outputH,
+ const size_t outputW,
+ const size_t numChannels,
+ const real ratioH,
+ const real ratioW) {
+ int nthreads = outputH * outputW;
+ int tid = blockIdx.x * blockDim.x + threadIdx.x;
+ if (tid < nthreads) {
+ int outIdH = tid / outputW;
+ int outIdW = tid % outputW;
+ int inImgSize = inputW / numChannels;
+ int outImgSize = outputW / numChannels;
+ int channelId = outIdW / outImgSize;
+
+ int outImgIdy = (outIdW % outImgSize) / outImgW;
+ int inImgIdy = ratioH * outImgIdy;
+ int hId = (inImgIdy < inImgH - 1) ? 1 : 0;
+ real h1lambda = ratioH * outImgIdy - inImgIdy;
+ real h2lambda = 1.f - h1lambda;
+
+ int outImgIdx = tid % outImgW;
+ int inImgIdx = ratioW * outImgIdx;
+ int wId = (inImgIdx < inImgW - 1) ? 1 : 0;
+ real w1lambda = ratioW * outImgIdx - inImgIdx;
+ real w2lambda = 1.f - w1lambda;
+
+ const real* inPos =
+ &in[outIdH * inputW + channelId * inImgSize + inImgIdy * inImgW + inImgIdx];
+
+ // bilinear interpolation
+ out[outIdH * outputW + outIdW] =
+ h2lambda * (w2lambda * inPos[0] + w1lambda * inPos[wId]) +
+ h1lambda * (w2lambda * inPos[hId * inImgW] + w1lambda * inPos[hId * inImgW + wId]);
+ }
+}
+
+void hl_bilinear_forward(const real* inData,
+ const size_t inImgH,
+ const size_t inImgW,
+ const size_t inputH,
+ const size_t inputW,
+ real* outData,
+ const size_t outImgH,
+ const size_t outImgW,
+ const size_t outputH,
+ const size_t outputW,
+ const size_t numChannels,
+ const real ratioH,
+ const real ratioW) {
+ int threadNum = outputH * outputW;
+ int blocks = (threadNum + 1024 - 1) / 1024;
+
+ KeBilinearInterpFw<<< blocks, 1024, 0, STREAM_DEFAULT>>>(
+ inData, inImgH, inImgW, inputH, inputW, outData, outImgH,
+ outImgW, outputH, outputW, numChannels, ratioH, ratioW);
+ CHECK_SYNC("hl_bilinear_forward failed");
+}
+
+__global__ void KeBilinearInterpBw(real* in,
+ const size_t inImgH,
+ const size_t inImgW,
+ const size_t inputH,
+ const size_t inputW,
+ const real* out,
+ const size_t outImgH,
+ const size_t outImgW,
+ const size_t outputH,
+ const size_t outputW,
+ const size_t numChannels,
+ const real ratioH,
+ const real ratioW) {
+ int nthreads = outputH * outputW;
+ int tid = blockIdx.x * blockDim.x + threadIdx.x;
+ if (tid < nthreads) {
+ int outIdH = tid / outputW;
+ int outIdW = tid % outputW;
+ int inImgSize = inputW / numChannels;
+ int outImgSize = outputW / numChannels;
+ int channelId = outIdW / outImgSize;
+
+ int outImgIdy = (outIdW % outImgSize) / outImgW;
+ int inImgIdy = ratioH * outImgIdy;
+ int hId = (inImgIdy < inImgH - 1) ? 1 : 0;
+ real h1lambda = ratioH * outImgIdy - inImgIdy;
+ real h2lambda = 1.f - h1lambda;
+
+ int outImgIdx = tid % outImgW;
+ int inImgIdx = ratioW * outImgIdx;
+ int wId = (inImgIdx < inImgW - 1) ? 1 : 0;
+ real w1lambda = ratioW * outImgIdx - inImgIdx;
+ real w2lambda = 1.f - w1lambda;
+
+ real* inPos =
+ &in[outIdH * inputW + channelId * inImgSize + inImgIdy * inImgW + inImgIdx];
+ const real* outPos = &out[outIdH * outputW + outIdW];
+ atomicAdd(&inPos[0], h2lambda * w2lambda * outPos[0]);
+ atomicAdd(&inPos[wId], h2lambda * w1lambda * outPos[0]);
+ atomicAdd(&inPos[hId * inImgW], h1lambda * w2lambda * outPos[0]);
+ atomicAdd(&inPos[hId * inImgW + wId], h1lambda * w1lambda * outPos[0]);
+ }
+}
+
+void hl_bilinear_backward(real* inGrad,
+ const size_t inImgH,
+ const size_t inImgW,
+ const size_t inputH,
+ const size_t inputW,
+ const real* outGrad,
+ const size_t outImgH,
+ const size_t outImgW,
+ const size_t outputH,
+ const size_t outputW,
+ const size_t numChannels,
+ const real ratioH,
+ const real ratioW) {
+ int threadNum = outputH * outputW;
+ int blocks = (threadNum + 1024 - 1) / 1024;
+
+ KeBilinearInterpBw<<< blocks, 1024, 0, STREAM_DEFAULT>>>(
+ inGrad, inImgH, inImgW, inputH, inputW, outGrad, outImgH,
+ outImgW, outputH, outputW, numChannels, ratioH, ratioW);
+ CHECK_SYNC("hl_bilinear_backward failed");
+}
+
__global__ void maxoutFpCompute(size_t nthreads, const real * inData,
real * outData, int* idData,
size_t size, size_t featLen, size_t groups) {
diff --git a/paddle/cuda/src/hl_cuda_cublas.cc b/paddle/cuda/src/hl_cuda_cublas.cc
index 724ea490e8ea9a8b2a1be39f3e0037df6e49882f..e8ba232d44b3f66254d4749d4abbcfbe46d1fd0e 100644
--- a/paddle/cuda/src/hl_cuda_cublas.cc
+++ b/paddle/cuda/src/hl_cuda_cublas.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include
#include
#include "hl_cuda.h"
@@ -24,7 +23,7 @@ limitations under the License. */
namespace dynload {
std::once_flag cublas_dso_flag;
-void* cublas_dso_handle = nullptr;
+void *cublas_dso_handle = nullptr;
/**
* The following macro definition can generate structs
@@ -34,31 +33,30 @@ void* cublas_dso_handle = nullptr;
* note: default dynamic linked libs
*/
#ifdef PADDLE_USE_DSO
-#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
- struct DynLoad__##__name { \
- template \
- cublasStatus_t operator()(Args... args) { \
- typedef cublasStatus_t (*cublasFunc)(Args...); \
- std::call_once(cublas_dso_flag, GetCublasDsoHandle, \
- &cublas_dso_handle); \
- void* p_##__name = dlsym(cublas_dso_handle, #__name); \
- return reinterpret_cast(p_##__name)(args...); \
- } \
+#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
+ struct DynLoad__##__name { \
+ template \
+ cublasStatus_t operator()(Args... args) { \
+ typedef cublasStatus_t (*cublasFunc)(Args...); \
+ std::call_once(cublas_dso_flag, GetCublasDsoHandle, &cublas_dso_handle); \
+ void *p_##__name = dlsym(cublas_dso_handle, #__name); \
+ return reinterpret_cast(p_##__name)(args...); \
+ } \
} __name; // struct DynLoad__##__name
#else
-#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
- struct DynLoad__##__name { \
- template \
- cublasStatus_t operator()(Args... args) { \
- return __name(args...); \
- } \
+#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
+ struct DynLoad__##__name { \
+ template \
+ cublasStatus_t operator()(Args... args) { \
+ return __name(args...); \
+ } \
} __name; // struct DynLoad__##__name
#endif
-#define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) \
- DYNAMIC_LOAD_CUBLAS_WRAP(__name)
+#define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) DYNAMIC_LOAD_CUBLAS_WRAP(__name)
// include all needed cublas functions in HPPL
+// clang-format off
#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \
__macro(cublasSgemv) \
__macro(cublasDgemv) \
@@ -78,6 +76,8 @@ DYNAMIC_LOAD_CUBLAS_WRAP(cublasCgemmBatched)
DYNAMIC_LOAD_CUBLAS_WRAP(cublasZgemmBatched)
DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetrfBatched)
DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetriBatched)
+DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetrfBatched)
+DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetriBatched)
CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
#undef DYNAMIC_LOAD_CUBLAS_WRAP
@@ -86,41 +86,41 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
} /* namespace dynload */
-
+// clang-format on
#ifndef PADDLE_TYPE_DOUBLE
-#define CUBLAS_GEAM dynload::cublasSgeam
-#define CUBLAS_GEMV dynload::cublasSgemv
-#define CUBLAS_GEMM dynload::cublasSgemm
-#define CUBLAS_GETRF dynload::cublasSgetrfBatched
-#define CUBLAS_GETRI dynload::cublasSgetriBatched
+#define CUBLAS_GEAM dynload::cublasSgeam
+#define CUBLAS_GEMV dynload::cublasSgemv
+#define CUBLAS_GEMM dynload::cublasSgemm
+#define CUBLAS_GETRF dynload::cublasSgetrfBatched
+#define CUBLAS_GETRI dynload::cublasSgetriBatched
#else
-#define CUBLAS_GEAM dynload::cublasDgeam
-#define CUBLAS_GEMV dynload::cublasDgemv
-#define CUBLAS_GEMM dynload::cublasDgemm
-#define CUBLAS_GETRF dynload::cublasDgetrfBatched
-#define CUBLAS_GETRI dynload::cublasDgetriBatched
+#define CUBLAS_GEAM dynload::cublasDgeam
+#define CUBLAS_GEMV dynload::cublasDgemv
+#define CUBLAS_GEMM dynload::cublasDgemm
+#define CUBLAS_GETRF dynload::cublasDgetrfBatched
+#define CUBLAS_GETRI dynload::cublasDgetriBatched
#endif
-const char* hl_cublas_get_error_string(cublasStatus_t status) {
- switch(status) {
- case CUBLAS_STATUS_NOT_INITIALIZED:
- return "[cublas status]: not initialized";
- case CUBLAS_STATUS_ALLOC_FAILED:
- return "[cublas status]: allocate failed";
- case CUBLAS_STATUS_INVALID_VALUE:
- return "[cublas status]: invalid value";
- case CUBLAS_STATUS_ARCH_MISMATCH:
- return "[cublas status]: arch mismatch";
- case CUBLAS_STATUS_MAPPING_ERROR:
- return "[cublas status]: mapping error";
- case CUBLAS_STATUS_EXECUTION_FAILED:
- return "[cublas status]: execution failed";
- case CUBLAS_STATUS_INTERNAL_ERROR:
- return "[cublas status]: internal error";
- case CUBLAS_STATUS_SUCCESS:
- return "[cublas status]: success";
- default:
- return "[cublas status]: unknown error";
+const char *hl_cublas_get_error_string(cublasStatus_t status) {
+ switch (status) {
+ case CUBLAS_STATUS_NOT_INITIALIZED:
+ return "[cublas status]: not initialized";
+ case CUBLAS_STATUS_ALLOC_FAILED:
+ return "[cublas status]: allocate failed";
+ case CUBLAS_STATUS_INVALID_VALUE:
+ return "[cublas status]: invalid value";
+ case CUBLAS_STATUS_ARCH_MISMATCH:
+ return "[cublas status]: arch mismatch";
+ case CUBLAS_STATUS_MAPPING_ERROR:
+ return "[cublas status]: mapping error";
+ case CUBLAS_STATUS_EXECUTION_FAILED:
+ return "[cublas status]: execution failed";
+ case CUBLAS_STATUS_INTERNAL_ERROR:
+ return "[cublas status]: internal error";
+ case CUBLAS_STATUS_SUCCESS:
+ return "[cublas status]: success";
+ default:
+ return "[cublas status]: unknown error";
}
}
@@ -129,27 +129,21 @@ const char* hl_cublas_get_error_string(cublasStatus_t status) {
* support << operator for more details error info.
*/
cublasStatus_t g_cublasStat;
-#define CHECK_CUBLAS(cublas_func) \
- g_cublasStat = cublas_func; \
- CHECK_EQ(CUBLAS_STATUS_SUCCESS, g_cublasStat) \
- << "Cublas Error: " \
- << hl_cublas_get_error_string(g_cublasStat) \
- << " "
+#define CHECK_CUBLAS(cublas_func) \
+ g_cublasStat = cublas_func; \
+ CHECK_EQ(CUBLAS_STATUS_SUCCESS, g_cublasStat) \
+ << "Cublas Error: " << hl_cublas_get_error_string(g_cublasStat) << " "
void hl_cublas_init(cublasHandle_t *cublas_handle, cudaStream_t stream) {
CHECK_CUBLAS(dynload::cublasCreate(cublas_handle))
- << "[cublas init] Cublas create handle faild!";
+ << "[cublas init] Cublas create handle faild!";
CHECK_CUBLAS(dynload::cublasSetStream(*cublas_handle, stream))
- << "[cublas init] Cublas set stream faild!";
+ << "[cublas init] Cublas set stream faild!";
}
-void hl_matrix_transpose(real *A_d,
- real *C_d,
- int dimM,
- int dimN,
- int lda,
- int ldc) {
+void hl_matrix_transpose(
+ real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc) {
real alpha = 1.0;
real beta = 0.0;
@@ -157,11 +151,18 @@ void hl_matrix_transpose(real *A_d,
CHECK_NOTNULL(C_d);
CHECK_CUBLAS(CUBLAS_GEAM(t_resource.handle,
- CUBLAS_OP_T, CUBLAS_OP_N,
- dimM, dimN,
- &alpha, A_d, lda,
- &beta, nullptr, dimM,
- C_d, ldc));
+ CUBLAS_OP_T,
+ CUBLAS_OP_N,
+ dimM,
+ dimN,
+ &alpha,
+ A_d,
+ lda,
+ &beta,
+ nullptr,
+ dimM,
+ C_d,
+ ldc));
CHECK_SYNC("hl_matrix_transpose failed");
}
@@ -179,21 +180,20 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
real **inout_d = (real **)hl_malloc_device(sizeof(real *));
hl_memcpy(inout_d, inout_h, sizeof(real *));
- int *pivot_d = (int *)hl_malloc_device(dimN*sizeof(int));
+ int *pivot_d = (int *)hl_malloc_device(dimN * sizeof(int));
int *info_d = (int *)t_resource.gpu_mem;
/* Note: cublasSgetrfBatched is used to calculate a number of
small-sized matrices. There may be a better way to reconstruct
the API for better performance.
*/
- CHECK_CUBLAS(CUBLAS_GETRF(t_resource.handle,
- dimN, inout_d, lda, pivot_d,
- info_d, 1));
+ CHECK_CUBLAS(
+ CUBLAS_GETRF(t_resource.handle, dimN, inout_d, lda, pivot_d, info_d, 1));
- int info_h;
+ int info_h;
hl_memcpy(&info_h, info_d, sizeof(int));
if (info_h != 0) {
- LOG(FATAL) << "Factorization of matrix failed: matrix may be singular.\n";
+ LOG(FATAL) << "Factorization of matrix failed: matrix may be singular.\n";
}
/* Step 2: Compute the inverse of the matrix given its LU decomposition */
@@ -202,27 +202,40 @@ void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc) {
hl_memcpy(out_d, out_h, sizeof(real *));
CHECK_CUBLAS(CUBLAS_GETRI(t_resource.handle,
- dimN, (const real **)inout_d, lda, pivot_d,
- out_d, ldc, info_d, 1));
+ dimN,
+ (const real **)inout_d,
+ lda,
+ pivot_d,
+ out_d,
+ ldc,
+ info_d,
+ 1));
hl_memcpy(&info_h, info_d, sizeof(int));
if (info_h != 0) {
- LOG(FATAL) << "Inversion of matrix failed: matrix may be singular.\n";
+ LOG(FATAL) << "Inversion of matrix failed: matrix may be singular.\n";
}
hl_free_mem_device(inout_d);
hl_free_mem_device(pivot_d);
hl_free_mem_device(out_d);
-
+
CHECK_SYNC("hl_matrix_inverse failed");
}
-void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
+void hl_matrix_mul(real *A_d,
+ hl_trans_op_t transa,
+ real *B_d,
+ hl_trans_op_t transb,
real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta,
- int lda, int ldb, int ldc) {
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta,
+ int lda,
+ int ldb,
+ int ldc) {
CHECK_NOTNULL(A_d);
CHECK_NOTNULL(B_d);
CHECK_NOTNULL(C_d);
@@ -230,8 +243,8 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
if (dimN == 1 && dimM != 1 && dimK != 1 && transb == HPPL_OP_N) {
int m = (transa == HPPL_OP_N) ? dimM : dimK;
int n = (transa == HPPL_OP_N) ? dimK : dimM;
- hl_matrix_mul_vector(A_d, transa, B_d, C_d, m, n,
- alpha, beta, lda, ldb, ldc);
+ hl_matrix_mul_vector(
+ A_d, transa, B_d, C_d, m, n, alpha, beta, lda, ldb, ldc);
return;
}
@@ -239,8 +252,7 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
int m = (transb == HPPL_OP_N) ? dimK : dimN;
int n = (transb == HPPL_OP_N) ? dimN : dimK;
hl_trans_op_t trans = (transb == HPPL_OP_N) ? HPPL_OP_T : HPPL_OP_N;
- hl_matrix_mul_vector(B_d, trans, A_d, C_d, m, n,
- alpha, beta, ldb, 1, 1);
+ hl_matrix_mul_vector(B_d, trans, A_d, C_d, m, n, alpha, beta, ldb, 1, 1);
return;
}
@@ -249,26 +261,47 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
stat = CUBLAS_GEMM(t_resource.handle,
CUBLAS_OP_N,
CUBLAS_OP_N,
- dimN, dimM, dimK,
- &alpha, B_d, ldb,
- A_d, lda,
- &beta, C_d, ldc);
+ dimN,
+ dimM,
+ dimK,
+ &alpha,
+ B_d,
+ ldb,
+ A_d,
+ lda,
+ &beta,
+ C_d,
+ ldc);
} else if ((HPPL_OP_T == transa) && (HPPL_OP_N == transb)) {
stat = CUBLAS_GEMM(t_resource.handle,
CUBLAS_OP_N,
CUBLAS_OP_T,
- dimN, dimM, dimK,
- &alpha, B_d, ldb,
- A_d, lda,
- &beta, C_d, ldc);
+ dimN,
+ dimM,
+ dimK,
+ &alpha,
+ B_d,
+ ldb,
+ A_d,
+ lda,
+ &beta,
+ C_d,
+ ldc);
} else if ((HPPL_OP_N == transa) && (HPPL_OP_T == transb)) {
stat = CUBLAS_GEMM(t_resource.handle,
CUBLAS_OP_T,
CUBLAS_OP_N,
- dimN, dimM, dimK,
- &alpha, B_d, ldb,
- A_d, lda,
- &beta, C_d, ldc);
+ dimN,
+ dimM,
+ dimK,
+ &alpha,
+ B_d,
+ ldb,
+ A_d,
+ lda,
+ &beta,
+ C_d,
+ ldc);
} else {
LOG(FATAL) << "parameter transa error!";
}
@@ -276,24 +309,46 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
CHECK_SYNC("hl_matrix_mul failed");
}
-void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
- real *B_d, hl_trans_op_t transb,
+void hl_matrix_mul(real *A_d,
+ hl_trans_op_t transa,
+ real *B_d,
+ hl_trans_op_t transb,
real *C_d,
- int dimM, int dimN, int dimK,
- real alpha, real beta) {
+ int dimM,
+ int dimN,
+ int dimK,
+ real alpha,
+ real beta) {
int lda = (HPPL_OP_N == transa) ? dimK : dimM;
int ldb = (HPPL_OP_N == transb) ? dimN : dimK;
int ldc = dimN;
- hl_matrix_mul(A_d, transa, B_d, transb, C_d, dimM, dimN,
- dimK, alpha, beta, lda, ldb, ldc);
+ hl_matrix_mul(A_d,
+ transa,
+ B_d,
+ transb,
+ C_d,
+ dimM,
+ dimN,
+ dimK,
+ alpha,
+ beta,
+ lda,
+ ldb,
+ ldc);
}
-void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
- real *B_d, real *C_d,
- int dimM, int dimN,
- real alpha, real beta,
- int lda, int incb, int incc) {
+void hl_matrix_mul_vector(real *A_d,
+ hl_trans_op_t trans,
+ real *B_d,
+ real *C_d,
+ int dimM,
+ int dimN,
+ real alpha,
+ real beta,
+ int lda,
+ int incb,
+ int incc) {
CHECK_NOTNULL(A_d);
CHECK_NOTNULL(B_d);
CHECK_NOTNULL(C_d);
@@ -302,21 +357,29 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
if (HPPL_OP_N == trans) {
stat = CUBLAS_GEMV(t_resource.handle,
CUBLAS_OP_T,
- dimN, dimM,
+ dimN,
+ dimM,
&alpha,
- A_d, lda,
- B_d, incb,
+ A_d,
+ lda,
+ B_d,
+ incb,
&beta,
- C_d, incc);
+ C_d,
+ incc);
} else if (HPPL_OP_T == trans) {
stat = CUBLAS_GEMV(t_resource.handle,
CUBLAS_OP_N,
- dimN, dimM,
+ dimN,
+ dimM,
&alpha,
- A_d, lda,
- B_d, incb,
+ A_d,
+ lda,
+ B_d,
+ incb,
&beta,
- C_d, incc);
+ C_d,
+ incc);
} else {
LOG(FATAL) << "parameter transa error!";
}
@@ -325,10 +388,14 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
CHECK_SYNC("hl_matrix_mul_vector");
}
-void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
- real *B_d, real *C_d,
- int dimM, int dimN,
- real alpha, real beta) {
- hl_matrix_mul_vector(A_d, trans, B_d, C_d, dimM, dimN,
- alpha, beta, dimN, 1, 1);
+void hl_matrix_mul_vector(real *A_d,
+ hl_trans_op_t trans,
+ real *B_d,
+ real *C_d,
+ int dimM,
+ int dimN,
+ real alpha,
+ real beta) {
+ hl_matrix_mul_vector(
+ A_d, trans, B_d, C_d, dimM, dimN, alpha, beta, dimN, 1, 1);
}
diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc
index 92b28e4345c3d4d306e6ee2a7f9f50189454f951..9d4ff08a78d641896e946e9bf04590d4ba93350f 100644
--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include
#include
#include "hl_cuda_cudnn.h"
@@ -22,9 +21,10 @@ limitations under the License. */
#include "paddle/utils/Logging.h"
#include "paddle/utils/CommandLineParser.h"
-P_DEFINE_int32(cudnn_conv_workspace_limit_in_mb, 4096,
- "Specify cuDNN max workspace limit, in units MB, "
- "4096MB=4GB by default.");
+P_DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
+ 4096,
+ "Specify cuDNN max workspace limit, in units MB, "
+ "4096MB=4GB by default.");
namespace dynload {
@@ -41,16 +41,15 @@ void* cudnn_dso_handle = nullptr;
#ifdef PADDLE_USE_DSO
-#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \
- struct DynLoad__##__name { \
- template \
- auto operator()(Args... args) -> decltype(__name(args...)) { \
- using cudnn_func = decltype(__name(args...))(*)(Args...); \
- std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, \
- &cudnn_dso_handle); \
- void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
- return reinterpret_cast(p_##__name)(args...); \
- } \
+#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \
+ struct DynLoad__##__name { \
+ template \
+ auto operator()(Args... args) -> decltype(__name(args...)) { \
+ using cudnn_func = decltype(__name(args...)) (*)(Args...); \
+ std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, &cudnn_dso_handle); \
+ void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
+ return reinterpret_cast(p_##__name)(args...); \
+ } \
} __name; /* struct DynLoad__##__name */
#else
@@ -69,6 +68,7 @@ void* cudnn_dso_handle = nullptr;
* include all needed cudnn functions in HPPL
* different cudnn version has different interfaces
**/
+// clang-format off
#define CUDNN_DNN_ROUTINE_EACH(__macro) \
__macro(cudnnSetTensor4dDescriptor) \
__macro(cudnnSetTensor4dDescriptorEx) \
@@ -141,58 +141,53 @@ CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP)
#endif
#undef CUDNN_DNN_ROUTINE_EACH
-
+// clang-format on
} /* namespace dynload */
/**
* Check build-in cudnn function using glog and it **does not**
* support << operator for more details error info.
*/
-#define CHECK_CUDNN(cudnnFunc) \
- do { \
- cudnnStatus_t cudnnStat = cudnnFunc; \
- CHECK_EQ(CUDNN_STATUS_SUCCESS, cudnnStat) \
- << "Cudnn Error: " \
- << dynload::cudnnGetErrorString(cudnnStat); \
+#define CHECK_CUDNN(cudnnFunc) \
+ do { \
+ cudnnStatus_t cudnnStat = cudnnFunc; \
+ CHECK_EQ(CUDNN_STATUS_SUCCESS, cudnnStat) \
+ << "Cudnn Error: " << dynload::cudnnGetErrorString(cudnnStat); \
} while (0)
bool g_is_libcudnn_init = false;
int g_cudnn_lib_version = 0;
-void hl_cudnn_desc_init(cudnnTensorDescriptor_t* cudnn_desc)
-{
- CHECK_CUDNN(dynload::cudnnCreateTensorDescriptor(cudnn_desc));
+void hl_cudnn_desc_init(cudnnTensorDescriptor_t* cudnn_desc) {
+ CHECK_CUDNN(dynload::cudnnCreateTensorDescriptor(cudnn_desc));
}
-void hl_cudnn_init(cudnnHandle_t *cudnn_handle, cudaStream_t stream)
-{
- size_t cudnn_dso_ver = dynload::cudnnGetVersion();
- size_t cudnn_dso_major = cudnn_dso_ver / 1000;
- size_t cudnn_cuh_major = CUDNN_VERSION / 1000;
-
- // Compare cudnn header version with that of cudnn.so.
- CHECK((cudnn_cuh_major < 4 && cudnn_dso_major < 4) ||
- (cudnn_cuh_major == cudnn_dso_major))
- << "[cudnn init] libcudnn v" << cudnn_dso_major <<
- " with header v" << cudnn_cuh_major << " unmatched!\n"
- << "PaddlePaddle Requirement: "
- << "(header v[2-3] with libcudnn v[2-3]) Or "
- << "(header v4 with libcudnn v4) Or "
- << "(header v5 with libcudnn v5).";
-
- CHECK(!(CUDNN_VERSION >= 5000 && CUDA_VERSION < 7050))
- << "cudnn v5 requires cuda version >= 7.5";
-
- CHECK_CUDNN(dynload::cudnnCreate(cudnn_handle));
- CHECK_CUDNN(dynload::cudnnSetStream(*cudnn_handle, stream));
-
- g_is_libcudnn_init = true;
- g_cudnn_lib_version = cudnn_dso_ver;
+void hl_cudnn_init(cudnnHandle_t* cudnn_handle, cudaStream_t stream) {
+ size_t cudnn_dso_ver = dynload::cudnnGetVersion();
+ size_t cudnn_dso_major = cudnn_dso_ver / 1000;
+ size_t cudnn_cuh_major = CUDNN_VERSION / 1000;
+
+ // Compare cudnn header version with that of cudnn.so.
+ CHECK((cudnn_cuh_major < 4 && cudnn_dso_major < 4) ||
+ (cudnn_cuh_major == cudnn_dso_major))
+ << "[cudnn init] libcudnn v" << cudnn_dso_major << " with header v"
+ << cudnn_cuh_major << " unmatched!\n"
+ << "PaddlePaddle Requirement: "
+ << "(header v[2-3] with libcudnn v[2-3]) Or "
+ << "(header v4 with libcudnn v4) Or "
+ << "(header v5 with libcudnn v5).";
+
+ CHECK(!(CUDNN_VERSION >= 5000 && CUDA_VERSION < 7050))
+ << "cudnn v5 requires cuda version >= 7.5";
+
+ CHECK_CUDNN(dynload::cudnnCreate(cudnn_handle));
+ CHECK_CUDNN(dynload::cudnnSetStream(*cudnn_handle, stream));
+
+ g_is_libcudnn_init = true;
+ g_cudnn_lib_version = cudnn_dso_ver;
}
-int hl_get_cudnn_lib_version() {
- return g_cudnn_lib_version;
-}
+int hl_get_cudnn_lib_version() { return g_cudnn_lib_version; }
void hl_conv_workspace(hl_tensor_descriptor input,
hl_tensor_descriptor output,
@@ -206,94 +201,91 @@ void hl_conv_workspace(hl_tensor_descriptor input,
size_t* bwdFilterLimitBytes) {
#if CUDNN_VERSION >= 4000
- CHECK_NOTNULL(input);
- CHECK_NOTNULL(output);
- CHECK_NOTNULL(filter);
- CHECK_NOTNULL(conv);
-
- // Specify workspace limit directly
- size_t memoryLimitBytes = (1LL << 20) * FLAGS_cudnn_conv_workspace_limit_in_mb;
-
- // cudnn convolution forward configuration
- cudnnTensorDescriptor_t fwd_src_desc = GET_TENSOR_DESCRIPTOR(input);
- cudnnTensorDescriptor_t fwd_dest_desc = GET_TENSOR_DESCRIPTOR(output);
- cudnnFilterDescriptor_t fwd_filter_desc = GET_FILTER_DESCRIPTOR(filter);
- cudnnConvolutionDescriptor_t fwd_conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
-
- CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm(
- t_resource.cudnn_handle,
- fwd_src_desc,
- fwd_filter_desc,
- fwd_conv_desc,
- fwd_dest_desc,
- CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
- memoryLimitBytes,
- reinterpret_cast(convFwdAlgo)));
-
- CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize(
- t_resource.cudnn_handle,
- fwd_src_desc,
- fwd_filter_desc,
- fwd_conv_desc,
- fwd_dest_desc,
- static_cast(*convFwdAlgo),
- fwdLimitBytes));
-
- // cudnn convolution backward data configuration
- cudnnFilterDescriptor_t bwd_data_filter_desc =
- GET_FILTER_DESCRIPTOR(filter);
- cudnnTensorDescriptor_t bwd_data_diff_desc =
- GET_TENSOR_DESCRIPTOR(output);
- cudnnTensorDescriptor_t bwd_data_grad_desc =
- GET_TENSOR_DESCRIPTOR(input);
- cudnnConvolutionDescriptor_t bwd_data_conv_desc =
- GET_CONVOLUTION_DESCRIPTOR(conv);
-
- CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm(
- t_resource.cudnn_handle,
- bwd_data_filter_desc,
- bwd_data_diff_desc,
- bwd_data_conv_desc,
- bwd_data_grad_desc,
- CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
- memoryLimitBytes,
- reinterpret_cast(convBwdDataAlgo)));
-
- CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
- t_resource.cudnn_handle,
- bwd_data_filter_desc,
- bwd_data_diff_desc,
- bwd_data_conv_desc,
- bwd_data_grad_desc,
- static_cast(*convBwdDataAlgo),
- bwdDataLimitBytes));
-
- // cudnn convolution backward filter configuration
- cudnnTensorDescriptor_t bwd_filter_src_desc =
- GET_TENSOR_DESCRIPTOR(input);
- cudnnTensorDescriptor_t bwd_filter_diff_desc =
- GET_TENSOR_DESCRIPTOR(output);
- cudnnConvolutionDescriptor_t bwd_filter_conv_desc =
- GET_CONVOLUTION_DESCRIPTOR(conv);
- cudnnFilterDescriptor_t bwd_filter_grad_desc =
- GET_FILTER_DESCRIPTOR(filter);
-
- CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
- t_resource.cudnn_handle,
- bwd_filter_src_desc,
- bwd_filter_diff_desc,
- bwd_filter_conv_desc,
- bwd_filter_grad_desc,
- CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
- memoryLimitBytes,
- reinterpret_cast(convBwdFilterAlgo)));
-
- CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(
- t_resource.cudnn_handle, bwd_filter_src_desc,
- bwd_filter_diff_desc, bwd_filter_conv_desc,
- bwd_filter_grad_desc,
- static_cast(*convBwdFilterAlgo),
- bwdFilterLimitBytes));
+ CHECK_NOTNULL(input);
+ CHECK_NOTNULL(output);
+ CHECK_NOTNULL(filter);
+ CHECK_NOTNULL(conv);
+
+ // Specify workspace limit directly
+ size_t memoryLimitBytes =
+ (1LL << 20) * FLAGS_cudnn_conv_workspace_limit_in_mb;
+
+ // cudnn convolution forward configuration
+ cudnnTensorDescriptor_t fwd_src_desc = GET_TENSOR_DESCRIPTOR(input);
+ cudnnTensorDescriptor_t fwd_dest_desc = GET_TENSOR_DESCRIPTOR(output);
+ cudnnFilterDescriptor_t fwd_filter_desc = GET_FILTER_DESCRIPTOR(filter);
+ cudnnConvolutionDescriptor_t fwd_conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
+
+ CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm(
+ t_resource.cudnn_handle,
+ fwd_src_desc,
+ fwd_filter_desc,
+ fwd_conv_desc,
+ fwd_dest_desc,
+ CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
+ memoryLimitBytes,
+ reinterpret_cast(convFwdAlgo)));
+
+ CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize(
+ t_resource.cudnn_handle,
+ fwd_src_desc,
+ fwd_filter_desc,
+ fwd_conv_desc,
+ fwd_dest_desc,
+ static_cast(*convFwdAlgo),
+ fwdLimitBytes));
+
+ // cudnn convolution backward data configuration
+ cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter);
+ cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output);
+ cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input);
+ cudnnConvolutionDescriptor_t bwd_data_conv_desc =
+ GET_CONVOLUTION_DESCRIPTOR(conv);
+
+ CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm(
+ t_resource.cudnn_handle,
+ bwd_data_filter_desc,
+ bwd_data_diff_desc,
+ bwd_data_conv_desc,
+ bwd_data_grad_desc,
+ CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
+ memoryLimitBytes,
+ reinterpret_cast(convBwdDataAlgo)));
+
+ CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
+ t_resource.cudnn_handle,
+ bwd_data_filter_desc,
+ bwd_data_diff_desc,
+ bwd_data_conv_desc,
+ bwd_data_grad_desc,
+ static_cast(*convBwdDataAlgo),
+ bwdDataLimitBytes));
+
+ // cudnn convolution backward filter configuration
+ cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input);
+ cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output);
+ cudnnConvolutionDescriptor_t bwd_filter_conv_desc =
+ GET_CONVOLUTION_DESCRIPTOR(conv);
+ cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter);
+
+ CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
+ t_resource.cudnn_handle,
+ bwd_filter_src_desc,
+ bwd_filter_diff_desc,
+ bwd_filter_conv_desc,
+ bwd_filter_grad_desc,
+ CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
+ memoryLimitBytes,
+ reinterpret_cast(convBwdFilterAlgo)));
+
+ CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(
+ t_resource.cudnn_handle,
+ bwd_filter_src_desc,
+ bwd_filter_diff_desc,
+ bwd_filter_conv_desc,
+ bwd_filter_grad_desc,
+ static_cast(*convBwdFilterAlgo),
+ bwdFilterLimitBytes));
#endif
}
@@ -302,78 +294,75 @@ void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc,
int batch_size,
int feature_maps,
int height,
- int width)
-{
- CHECK_NOTNULL(image_desc);
+ int width) {
+ CHECK_NOTNULL(image_desc);
- cudnn_tensor_descriptor hl_desc =
- (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor));
- CHECK_NOTNULL(hl_desc);
+ cudnn_tensor_descriptor hl_desc =
+ (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor));
+ CHECK_NOTNULL(hl_desc);
#ifndef PADDLE_TYPE_DOUBLE
- cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
+ cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else
- cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
+ cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
#endif
- CHECK_CUDNN(dynload::cudnnCreateTensorDescriptor(&hl_desc->desc));
-
- CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptor(
- hl_desc->desc,
- CUDNN_TENSOR_NCHW,
- data_type,
- batch_size,
- feature_maps,
- height,
- width));
-
- hl_desc->format = CUDNN_TENSOR_NCHW;
- hl_desc->data_type = data_type;
- hl_desc->batch_size = batch_size;
- hl_desc->feature_maps = feature_maps;
- hl_desc->height = height;
- hl_desc->width = width;
-
- *image_desc = (hl_tensor_descriptor)hl_desc;
+ CHECK_CUDNN(dynload::cudnnCreateTensorDescriptor(&hl_desc->desc));
+
+ CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptor(hl_desc->desc,
+ CUDNN_TENSOR_NCHW,
+ data_type,
+ batch_size,
+ feature_maps,
+ height,
+ width));
+
+ hl_desc->format = CUDNN_TENSOR_NCHW;
+ hl_desc->data_type = data_type;
+ hl_desc->batch_size = batch_size;
+ hl_desc->feature_maps = feature_maps;
+ hl_desc->height = height;
+ hl_desc->width = width;
+
+ *image_desc = (hl_tensor_descriptor)hl_desc;
}
void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc) {
- CHECK_NOTNULL(image_desc);
+ CHECK_NOTNULL(image_desc);
- cudnn_tensor_descriptor hl_desc =
- (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor));
- CHECK_NOTNULL(hl_desc);
+ cudnn_tensor_descriptor hl_desc =
+ (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor));
+ CHECK_NOTNULL(hl_desc);
#ifndef PADDLE_TYPE_DOUBLE
- cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
+ cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else
- cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
+ cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
#endif
- CHECK_CUDNN(dynload::cudnnCreateTensorDescriptor(&hl_desc->desc));
+ CHECK_CUDNN(dynload::cudnnCreateTensorDescriptor(&hl_desc->desc));
- hl_desc->data_type = data_type;
+ hl_desc->data_type = data_type;
- *image_desc = (hl_tensor_descriptor)hl_desc;
+ *image_desc = (hl_tensor_descriptor)hl_desc;
}
void hl_tensor_reshape(hl_tensor_descriptor image_desc,
int batch_size,
int feature_maps,
int height,
- int width)
-{
- const int stride_w = 1;
- const int stride_h = width * stride_w;
- const int stride_c = height * stride_h;
- const int stride_n = feature_maps * stride_c;
- return hl_tensor_reshape(image_desc,
- batch_size,
- feature_maps,
- height,
- width,
- stride_n,
- stride_c,
- stride_h,
- stride_w);
+ int width) {
+ const int stride_w = 1;
+ const int stride_h = width * stride_w;
+ const int stride_c = height * stride_h;
+ const int stride_n = feature_maps * stride_c;
+ return hl_tensor_reshape(image_desc,
+ batch_size,
+ feature_maps,
+ height,
+ width,
+ stride_n,
+ stride_c,
+ stride_h,
+ stride_w);
}
void hl_tensor_reshape(hl_tensor_descriptor image_desc,
@@ -384,45 +373,42 @@ void hl_tensor_reshape(hl_tensor_descriptor image_desc,
int nStride,
int cStride,
int hStride,
- int wStride)
-{
- CHECK_NOTNULL(image_desc);
-
- cudnn_tensor_descriptor hl_desc = (cudnn_tensor_descriptor)image_desc;
- CHECK_NOTNULL(hl_desc->desc);
-
- CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptorEx(hl_desc->desc,
- hl_desc->data_type,
- batch_size,
- feature_maps,
- height,
- width,
- nStride,
- cStride,
- hStride,
- wStride));
-
- hl_desc->batch_size = batch_size;
- hl_desc->feature_maps = feature_maps;
- hl_desc->height = height;
- hl_desc->width = width;
+ int wStride) {
+ CHECK_NOTNULL(image_desc);
+
+ cudnn_tensor_descriptor hl_desc = (cudnn_tensor_descriptor)image_desc;
+ CHECK_NOTNULL(hl_desc->desc);
+
+ CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptorEx(hl_desc->desc,
+ hl_desc->data_type,
+ batch_size,
+ feature_maps,
+ height,
+ width,
+ nStride,
+ cStride,
+ hStride,
+ wStride));
+
+ hl_desc->batch_size = batch_size;
+ hl_desc->feature_maps = feature_maps;
+ hl_desc->height = height;
+ hl_desc->width = width;
}
-void hl_destroy_tensor_descriptor(hl_tensor_descriptor image_desc)
-{
- CHECK_NOTNULL(image_desc);
+void hl_destroy_tensor_descriptor(hl_tensor_descriptor image_desc) {
+ CHECK_NOTNULL(image_desc);
- cudnn_tensor_descriptor hl_desc = (cudnn_tensor_descriptor)image_desc;
- CHECK_NOTNULL(hl_desc->desc);
+ cudnn_tensor_descriptor hl_desc = (cudnn_tensor_descriptor)image_desc;
+ CHECK_NOTNULL(hl_desc->desc);
- CHECK_CUDNN(dynload::cudnnDestroyTensorDescriptor(hl_desc->desc));
+ CHECK_CUDNN(dynload::cudnnDestroyTensorDescriptor(hl_desc->desc));
- hl_desc->desc = NULL;
+ hl_desc->desc = NULL;
- free(image_desc);
+ free(image_desc);
}
-
void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
hl_pooling_mode_t mode,
int height,
@@ -430,99 +416,93 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
int height_padding,
int width_padding,
int stride_height,
- int stride_width)
-{
- cudnnPoolingMode_t cudnn_mode;
- switch (mode)
- {
- case HL_POOLING_MAX:
- cudnn_mode = CUDNN_POOLING_MAX;
- break;
- case HL_POOLING_AVERAGE:
- cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
- break;
- case HL_POOLING_AVERAGE_EXCLUDE_PADDING:
- cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
- break;
- default:
- LOG(FATAL) << "parameter mode error";
- }
-
- CHECK_NOTNULL(pooling_desc);
-
- cudnn_pooling_descriptor hl_pooling_desc =
- (cudnn_pooling_descriptor)malloc(sizeof(_cudnn_pooling_descriptor));
- CHECK_NOTNULL(hl_pooling_desc);
-
- CHECK_CUDNN(dynload::cudnnCreatePoolingDescriptor(&hl_pooling_desc->desc));
-
- CHECK_CUDNN(dynload::cudnnSetPooling2dDescriptor(
- hl_pooling_desc->desc,
- cudnn_mode,
+ int stride_width) {
+ cudnnPoolingMode_t cudnn_mode;
+ switch (mode) {
+ case HL_POOLING_MAX:
+ cudnn_mode = CUDNN_POOLING_MAX;
+ break;
+ case HL_POOLING_AVERAGE:
+ cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
+ break;
+ case HL_POOLING_AVERAGE_EXCLUDE_PADDING:
+ cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
+ break;
+ default:
+ LOG(FATAL) << "parameter mode error";
+ }
+
+ CHECK_NOTNULL(pooling_desc);
+
+ cudnn_pooling_descriptor hl_pooling_desc =
+ (cudnn_pooling_descriptor)malloc(sizeof(_cudnn_pooling_descriptor));
+ CHECK_NOTNULL(hl_pooling_desc);
+
+ CHECK_CUDNN(dynload::cudnnCreatePoolingDescriptor(&hl_pooling_desc->desc));
+
+ CHECK_CUDNN(dynload::cudnnSetPooling2dDescriptor(hl_pooling_desc->desc,
+ cudnn_mode,
#if CUDNN_VERSION >= 5000
- CUDNN_PROPAGATE_NAN,
+ CUDNN_PROPAGATE_NAN,
#endif
- height,
- width,
- height_padding,
- width_padding,
- stride_height,
- stride_width));
-
- hl_pooling_desc->mode = cudnn_mode;
- hl_pooling_desc->window_height = height;
- hl_pooling_desc->window_width = width;
- hl_pooling_desc->stride_height = stride_height;
- hl_pooling_desc->stride_width = stride_width;
-
- *pooling_desc = (hl_pooling_descriptor)hl_pooling_desc;
+ height,
+ width,
+ height_padding,
+ width_padding,
+ stride_height,
+ stride_width));
+
+ hl_pooling_desc->mode = cudnn_mode;
+ hl_pooling_desc->window_height = height;
+ hl_pooling_desc->window_width = width;
+ hl_pooling_desc->stride_height = stride_height;
+ hl_pooling_desc->stride_width = stride_width;
+
+ *pooling_desc = (hl_pooling_descriptor)hl_pooling_desc;
}
-void hl_destroy_pooling_descriptor(hl_pooling_descriptor pooling_desc)
-{
- CHECK_NOTNULL(pooling_desc);
+void hl_destroy_pooling_descriptor(hl_pooling_descriptor pooling_desc) {
+ CHECK_NOTNULL(pooling_desc);
- cudnn_pooling_descriptor hl_pooling = (cudnn_pooling_descriptor)pooling_desc;
- CHECK_NOTNULL(hl_pooling->desc);
+ cudnn_pooling_descriptor hl_pooling = (cudnn_pooling_descriptor)pooling_desc;
- CHECK_CUDNN(dynload::cudnnDestroyPoolingDescriptor(hl_pooling->desc));
+ CHECK_NOTNULL(hl_pooling->desc);
+ CHECK_CUDNN(dynload::cudnnDestroyPoolingDescriptor(hl_pooling->desc));
- hl_pooling->desc = NULL;
+ hl_pooling->desc = NULL;
- free(pooling_desc);
+ free(pooling_desc);
}
void hl_pooling_forward(hl_tensor_descriptor input,
real* input_image,
hl_tensor_descriptor output,
real* output_image,
- hl_pooling_descriptor pooling)
-{
- cudnnPoolingDescriptor_t pooling_desc;
- cudnnTensorDescriptor_t input_desc;
- cudnnTensorDescriptor_t output_desc;
-
- CHECK_NOTNULL(input);
- CHECK_NOTNULL(output);
- CHECK_NOTNULL(pooling);
- CHECK_NOTNULL(input_image);
- CHECK_NOTNULL(output_image);
-
- real alpha = 1.0f;
- real beta = 1.0f;
- input_desc = ((cudnn_tensor_descriptor)input)->desc;
- output_desc = ((cudnn_tensor_descriptor)output)->desc;
- pooling_desc = ((cudnn_pooling_descriptor)pooling)->desc;
- CHECK_CUDNN(dynload::cudnnPoolingForward(
- t_resource.cudnn_handle,
- pooling_desc,
- &alpha,
- input_desc,
- input_image,
- &beta,
- output_desc,
- output_image));
- CHECK_SYNC("hl_pooling_forward failed");
+ hl_pooling_descriptor pooling) {
+ cudnnPoolingDescriptor_t pooling_desc;
+ cudnnTensorDescriptor_t input_desc;
+ cudnnTensorDescriptor_t output_desc;
+
+ CHECK_NOTNULL(input);
+ CHECK_NOTNULL(output);
+ CHECK_NOTNULL(pooling);
+ CHECK_NOTNULL(input_image);
+ CHECK_NOTNULL(output_image);
+
+ real alpha = 1.0f;
+ real beta = 1.0f;
+ input_desc = ((cudnn_tensor_descriptor)input)->desc;
+ output_desc = ((cudnn_tensor_descriptor)output)->desc;
+ pooling_desc = ((cudnn_pooling_descriptor)pooling)->desc;
+ CHECK_CUDNN(dynload::cudnnPoolingForward(t_resource.cudnn_handle,
+ pooling_desc,
+ &alpha,
+ input_desc,
+ input_image,
+ &beta,
+ output_desc,
+ output_image));
+ CHECK_SYNC("hl_pooling_forward failed");
}
void hl_pooling_backward(hl_tensor_descriptor input,
@@ -531,94 +511,87 @@ void hl_pooling_backward(hl_tensor_descriptor input,
hl_tensor_descriptor output,
real* output_image,
real* output_image_grad,
- hl_pooling_descriptor pooling)
-{
- cudnnPoolingDescriptor_t pooling_desc;
- cudnnTensorDescriptor_t input_desc;
- cudnnTensorDescriptor_t output_desc;
-
- CHECK_NOTNULL(input);
- CHECK_NOTNULL(output);
- CHECK_NOTNULL(pooling);
- CHECK_NOTNULL(input_image);
- CHECK_NOTNULL(input_image_grad);
- CHECK_NOTNULL(output_image);
- CHECK_NOTNULL(output_image_grad);
-
- real alpha = 1.0f;
- real beta = 1.0f;
- input_desc = ((cudnn_tensor_descriptor)input)->desc;
- output_desc = ((cudnn_tensor_descriptor)output)->desc;
- pooling_desc = ((cudnn_pooling_descriptor)pooling)->desc;
- CHECK_CUDNN(dynload::cudnnPoolingBackward(
- t_resource.cudnn_handle,
- pooling_desc,
- &alpha,
- output_desc,
- output_image,
- output_desc,
- output_image_grad,
- input_desc,
- input_image,
- &beta,
- input_desc,
- input_image_grad));
+ hl_pooling_descriptor pooling) {
+ cudnnPoolingDescriptor_t pooling_desc;
+ cudnnTensorDescriptor_t input_desc;
+ cudnnTensorDescriptor_t output_desc;
+
+ CHECK_NOTNULL(input);
+ CHECK_NOTNULL(output);
+ CHECK_NOTNULL(pooling);
+ CHECK_NOTNULL(input_image);
+ CHECK_NOTNULL(input_image_grad);
+ CHECK_NOTNULL(output_image);
+ CHECK_NOTNULL(output_image_grad);
+
+ real alpha = 1.0f;
+ real beta = 1.0f;
+ input_desc = ((cudnn_tensor_descriptor)input)->desc;
+ output_desc = ((cudnn_tensor_descriptor)output)->desc;
+ pooling_desc = ((cudnn_pooling_descriptor)pooling)->desc;
+ CHECK_CUDNN(dynload::cudnnPoolingBackward(t_resource.cudnn_handle,
+ pooling_desc,
+ &alpha,
+ output_desc,
+ output_image,
+ output_desc,
+ output_image_grad,
+ input_desc,
+ input_image,
+ &beta,
+ input_desc,
+ input_image_grad));
CHECK_SYNC("hl_pooling_backward failed");
}
-
void hl_create_filter_descriptor(hl_filter_descriptor* filter,
int input_feature_maps,
int output_feature_maps,
int height,
- int width)
-{
- CHECK_NOTNULL(filter);
+ int width) {
+ CHECK_NOTNULL(filter);
- cudnn_filter_descriptor hl_filter =
- (cudnn_filter_descriptor)malloc(sizeof(_cudnn_filter_descriptor));
- CHECK_NOTNULL(hl_filter);
+ cudnn_filter_descriptor hl_filter =
+ (cudnn_filter_descriptor)malloc(sizeof(_cudnn_filter_descriptor));
+ CHECK_NOTNULL(hl_filter);
- CHECK_CUDNN(dynload::cudnnCreateFilterDescriptor(&hl_filter->desc));
+ CHECK_CUDNN(dynload::cudnnCreateFilterDescriptor(&hl_filter->desc));
#ifndef PADDLE_TYPE_DOUBLE
- cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
+ cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else
- cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
+ cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
#endif
- CHECK_CUDNN(dynload::cudnnSetFilter4dDescriptor(
- hl_filter->desc,
- data_type,
+ CHECK_CUDNN(dynload::cudnnSetFilter4dDescriptor(hl_filter->desc,
+ data_type,
#if CUDNN_VERSION >= 5000
- CUDNN_TENSOR_NCHW,
+ CUDNN_TENSOR_NCHW,
#endif
- output_feature_maps,
- input_feature_maps,
- height,
- width));
-
- hl_filter->data_type = data_type;
- hl_filter->output_feature_maps = output_feature_maps;
- hl_filter->input_feature_maps = input_feature_maps;
- hl_filter->filter_height = height;
- hl_filter->filter_width = width;
-
- *filter = (hl_filter_descriptor)hl_filter;
+ output_feature_maps,
+ input_feature_maps,
+ height,
+ width));
+
+ hl_filter->data_type = data_type;
+ hl_filter->output_feature_maps = output_feature_maps;
+ hl_filter->input_feature_maps = input_feature_maps;
+ hl_filter->filter_height = height;
+ hl_filter->filter_width = width;
+
+ *filter = (hl_filter_descriptor)hl_filter;
}
+void hl_destroy_filter_descriptor(hl_filter_descriptor filter) {
+ CHECK_NOTNULL(filter);
-void hl_destroy_filter_descriptor(hl_filter_descriptor filter)
-{
- CHECK_NOTNULL(filter);
+ cudnn_filter_descriptor hl_filter = (cudnn_filter_descriptor)filter;
+ CHECK_NOTNULL(hl_filter->desc);
- cudnn_filter_descriptor hl_filter = (cudnn_filter_descriptor)filter;
- CHECK_NOTNULL(hl_filter->desc);
+ CHECK_CUDNN(dynload::cudnnDestroyFilterDescriptor(hl_filter->desc));
- CHECK_CUDNN(dynload::cudnnDestroyFilterDescriptor(hl_filter->desc));
+ hl_filter->desc = NULL;
- hl_filter->desc = NULL;
-
- free(filter);
+ free(filter);
}
void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
@@ -627,38 +600,36 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
int padding_height,
int padding_width,
int stride_height,
- int stride_width)
-{
- CHECK_NOTNULL(conv);
-
- cudnn_convolution_descriptor hl_conv =
- (cudnn_convolution_descriptor)malloc(sizeof(_cudnn_convolution_descriptor));
- CHECK_NOTNULL(hl_conv);
-
- CHECK_CUDNN(dynload::cudnnCreateConvolutionDescriptor(&hl_conv->desc));
-
- cudnnConvolutionMode_t mode = CUDNN_CROSS_CORRELATION;
- CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(
- hl_conv->desc,
- padding_height,
- padding_width,
- stride_height,
- stride_width,
- 1,
- 1,
- mode));
-
- hl_conv->input_image = image;
- hl_conv->filter = filter;
- hl_conv->padding_height = padding_height;
- hl_conv->padding_width = padding_width;
- hl_conv->stride_height = stride_height;
- hl_conv->stride_width = stride_width;
- hl_conv->upscalex = 1;
- hl_conv->upscaley = 1;
- hl_conv->mode = mode;
-
- *conv = (hl_convolution_descriptor)hl_conv;
+ int stride_width) {
+ CHECK_NOTNULL(conv);
+
+ cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)malloc(
+ sizeof(_cudnn_convolution_descriptor));
+
+ CHECK_NOTNULL(hl_conv);
+ CHECK_CUDNN(dynload::cudnnCreateConvolutionDescriptor(&hl_conv->desc));
+
+ cudnnConvolutionMode_t mode = CUDNN_CROSS_CORRELATION;
+ CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc,
+ padding_height,
+ padding_width,
+ stride_height,
+ stride_width,
+ 1,
+ 1,
+ mode));
+
+ hl_conv->input_image = image;
+ hl_conv->filter = filter;
+ hl_conv->padding_height = padding_height;
+ hl_conv->padding_width = padding_width;
+ hl_conv->stride_height = stride_height;
+ hl_conv->stride_width = stride_width;
+ hl_conv->upscalex = 1;
+ hl_conv->upscaley = 1;
+ hl_conv->mode = mode;
+
+ *conv = (hl_convolution_descriptor)hl_conv;
}
void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
@@ -667,47 +638,44 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
int padding_height,
int padding_width,
int stride_height,
- int stride_width)
-{
- CHECK_NOTNULL(conv);
- CHECK_NOTNULL(image);
- CHECK_NOTNULL(filter);
-
- cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
- cudnnConvolutionMode_t mode = CUDNN_CROSS_CORRELATION;
- CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(
- conv_desc,
- padding_height,
- padding_width,
- stride_height,
- stride_width,
- 1,
- 1,
- mode));
-
- cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)conv;
- hl_conv->input_image = image;
- hl_conv->filter = filter;
- hl_conv->padding_height = padding_height;
- hl_conv->padding_width = padding_width;
- hl_conv->stride_height = stride_height;
- hl_conv->stride_width = stride_width;
- hl_conv->upscalex = 1;
- hl_conv->upscaley = 1;
- hl_conv->mode = mode;
+ int stride_width) {
+ CHECK_NOTNULL(conv);
+ CHECK_NOTNULL(image);
+ CHECK_NOTNULL(filter);
+
+ cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
+ cudnnConvolutionMode_t mode = CUDNN_CROSS_CORRELATION;
+ CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(conv_desc,
+ padding_height,
+ padding_width,
+ stride_height,
+ stride_width,
+ 1,
+ 1,
+ mode));
+
+ cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)conv;
+ hl_conv->input_image = image;
+ hl_conv->filter = filter;
+ hl_conv->padding_height = padding_height;
+ hl_conv->padding_width = padding_width;
+ hl_conv->stride_height = stride_height;
+ hl_conv->stride_width = stride_width;
+ hl_conv->upscalex = 1;
+ hl_conv->upscaley = 1;
+ hl_conv->mode = mode;
}
-void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv)
-{
- CHECK_NOTNULL(conv);
+void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {
+ CHECK_NOTNULL(conv);
- cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)conv;
- CHECK_NOTNULL(hl_conv->desc);
+ cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)conv;
+ CHECK_NOTNULL(hl_conv->desc);
- CHECK_CUDNN(dynload::cudnnDestroyConvolutionDescriptor(hl_conv->desc));
- hl_conv->desc = NULL;
+ CHECK_CUDNN(dynload::cudnnDestroyConvolutionDescriptor(hl_conv->desc));
+ hl_conv->desc = NULL;
- free(conv);
+ free(conv);
}
void hl_convolution_forward(hl_tensor_descriptor input,
@@ -720,87 +688,83 @@ void hl_convolution_forward(hl_tensor_descriptor input,
void* gpuWorkSpace,
size_t sizeInBytes,
int convFwdAlgo) {
- CHECK_NOTNULL(input);
- CHECK_NOTNULL(output);
- CHECK_NOTNULL(filter);
- CHECK_NOTNULL(conv);
- CHECK_NOTNULL(input_data);
- CHECK_NOTNULL(output_data);
- CHECK_NOTNULL(filter_data);
- cudnnTensorDescriptor_t src_desc = GET_TENSOR_DESCRIPTOR(input);
- cudnnTensorDescriptor_t dest_desc = GET_TENSOR_DESCRIPTOR(output);
- cudnnFilterDescriptor_t filter_desc = GET_FILTER_DESCRIPTOR(filter);
- cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
- real alpha = 1.0f;
- real beta = 1.0f;
- CHECK_CUDNN(dynload::cudnnConvolutionForward(
- t_resource.cudnn_handle,
- &alpha,
- src_desc,
- input_data,
- filter_desc,
- filter_data,
- conv_desc,
- static_cast(convFwdAlgo),
- gpuWorkSpace,
- sizeInBytes,
- &beta,
- dest_desc,
- output_data));
+ CHECK_NOTNULL(input);
+ CHECK_NOTNULL(output);
+ CHECK_NOTNULL(filter);
+ CHECK_NOTNULL(conv);
+ CHECK_NOTNULL(input_data);
+ CHECK_NOTNULL(output_data);
+ CHECK_NOTNULL(filter_data);
+ cudnnTensorDescriptor_t src_desc = GET_TENSOR_DESCRIPTOR(input);
+ cudnnTensorDescriptor_t dest_desc = GET_TENSOR_DESCRIPTOR(output);
+ cudnnFilterDescriptor_t filter_desc = GET_FILTER_DESCRIPTOR(filter);
+ cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
+ real alpha = 1.0f;
+ real beta = 1.0f;
+ CHECK_CUDNN(dynload::cudnnConvolutionForward(
+ t_resource.cudnn_handle,
+ &alpha,
+ src_desc,
+ input_data,
+ filter_desc,
+ filter_data,
+ conv_desc,
+ static_cast(convFwdAlgo),
+ gpuWorkSpace,
+ sizeInBytes,
+ &beta,
+ dest_desc,
+ output_data));
CHECK_SYNC("hl_convolution_forward failed");
}
void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
real* bias_data,
hl_tensor_descriptor output,
- real* output_data)
-{
- CHECK_NOTNULL(bias);
- CHECK_NOTNULL(output);
- CHECK_NOTNULL(bias_data);
- CHECK_NOTNULL(output_data);
-
- cudnnTensorDescriptor_t output_desc = GET_TENSOR_DESCRIPTOR(output);
- cudnnTensorDescriptor_t bias_desc = GET_TENSOR_DESCRIPTOR(bias);
- real alpha = 1.0f;
- real beta = 1.0f;
-
- CHECK_CUDNN(dynload::cudnnAddTensor(
- t_resource.cudnn_handle,
+ real* output_data) {
+ CHECK_NOTNULL(bias);
+ CHECK_NOTNULL(output);
+ CHECK_NOTNULL(bias_data);
+ CHECK_NOTNULL(output_data);
+
+ cudnnTensorDescriptor_t output_desc = GET_TENSOR_DESCRIPTOR(output);
+ cudnnTensorDescriptor_t bias_desc = GET_TENSOR_DESCRIPTOR(bias);
+ real alpha = 1.0f;
+ real beta = 1.0f;
+
+ CHECK_CUDNN(dynload::cudnnAddTensor(t_resource.cudnn_handle,
#if CUDNN_VERSION < 4000
- CUDNN_ADD_SAME_C,
+ CUDNN_ADD_SAME_C,
#endif
- &alpha,
- bias_desc,
- bias_data,
- &beta,
- output_desc,
- output_data));
+ &alpha,
+ bias_desc,
+ bias_data,
+ &beta,
+ output_desc,
+ output_data));
CHECK_SYNC("hl_convolution_forward_add_bias failed");
}
void hl_convolution_backward_bias(hl_tensor_descriptor bias,
real* bias_grad_data,
hl_tensor_descriptor output,
- real* output_grad_data)
-{
- CHECK_NOTNULL(bias);
- CHECK_NOTNULL(output);
- CHECK_NOTNULL(bias_grad_data);
- CHECK_NOTNULL(output_grad_data);
-
- real alpha = 1.0f;
- real beta = 1.0f;
- cudnnTensorDescriptor_t diff_desc = GET_TENSOR_DESCRIPTOR(output);
- cudnnTensorDescriptor_t bias_desc = GET_TENSOR_DESCRIPTOR(bias);
- CHECK_CUDNN(dynload::cudnnConvolutionBackwardBias(
- t_resource.cudnn_handle,
- &alpha,
- diff_desc,
- output_grad_data,
- &beta,
- bias_desc,
- bias_grad_data));
+ real* output_grad_data) {
+ CHECK_NOTNULL(bias);
+ CHECK_NOTNULL(output);
+ CHECK_NOTNULL(bias_grad_data);
+ CHECK_NOTNULL(output_grad_data);
+
+ real alpha = 1.0f;
+ real beta = 1.0f;
+ cudnnTensorDescriptor_t diff_desc = GET_TENSOR_DESCRIPTOR(output);
+ cudnnTensorDescriptor_t bias_desc = GET_TENSOR_DESCRIPTOR(bias);
+ CHECK_CUDNN(dynload::cudnnConvolutionBackwardBias(t_resource.cudnn_handle,
+ &alpha,
+ diff_desc,
+ output_grad_data,
+ &beta,
+ bias_desc,
+ bias_grad_data));
CHECK_SYNC("hl_convolution_backward_bias failed");
}
@@ -814,38 +778,37 @@ void hl_convolution_backward_filter(hl_tensor_descriptor input,
void* gpuWorkSpace,
size_t sizeInBytes,
int convBwdFilterAlgo) {
+ CHECK_NOTNULL(input);
+ CHECK_NOTNULL(output);
+ CHECK_NOTNULL(filter);
+ CHECK_NOTNULL(conv);
+ CHECK_NOTNULL(input_data);
+ CHECK_NOTNULL(output_grad_data);
+ CHECK_NOTNULL(filter_grad_data);
- CHECK_NOTNULL(input);
- CHECK_NOTNULL(output);
- CHECK_NOTNULL(filter);
- CHECK_NOTNULL(conv);
- CHECK_NOTNULL(input_data);
- CHECK_NOTNULL(output_grad_data);
- CHECK_NOTNULL(filter_grad_data);
-
- real alpha = 1.0f;
- real beta = 1.0f;
- cudnnTensorDescriptor_t src_desc = GET_TENSOR_DESCRIPTOR(input);
- cudnnTensorDescriptor_t diff_desc = GET_TENSOR_DESCRIPTOR(output);
- cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
- cudnnFilterDescriptor_t grad_desc = GET_FILTER_DESCRIPTOR(filter);
-
- CHECK_CUDNN(dynload::cudnnConvolutionBackwardFilter(
- t_resource.cudnn_handle,
- &alpha,
- src_desc,
- input_data,
- diff_desc,
- output_grad_data,
- conv_desc,
+ real alpha = 1.0f;
+ real beta = 1.0f;
+ cudnnTensorDescriptor_t src_desc = GET_TENSOR_DESCRIPTOR(input);
+ cudnnTensorDescriptor_t diff_desc = GET_TENSOR_DESCRIPTOR(output);
+ cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
+ cudnnFilterDescriptor_t grad_desc = GET_FILTER_DESCRIPTOR(filter);
+
+ CHECK_CUDNN(dynload::cudnnConvolutionBackwardFilter(
+ t_resource.cudnn_handle,
+ &alpha,
+ src_desc,
+ input_data,
+ diff_desc,
+ output_grad_data,
+ conv_desc,
#if CUDNN_VERSION >= 4000
- static_cast(convBwdFilterAlgo),
- gpuWorkSpace,
- sizeInBytes,
+ static_cast(convBwdFilterAlgo),
+ gpuWorkSpace,
+ sizeInBytes,
#endif
- &beta,
- grad_desc,
- filter_grad_data));
+ &beta,
+ grad_desc,
+ filter_grad_data));
CHECK_SYNC("hl_convolution_backward_filter failed");
}
@@ -859,121 +822,111 @@ void hl_convolution_backward_data(hl_tensor_descriptor input,
void* gpuWorkSpace,
size_t sizeInBytes,
int convBwdDataAlgo) {
- real alpha = 1.0f;
- real beta = 1.0f;
- cudnnFilterDescriptor_t filter_desc = GET_FILTER_DESCRIPTOR(filter);
- cudnnTensorDescriptor_t diff_desc = GET_TENSOR_DESCRIPTOR(output);
- cudnnTensorDescriptor_t grad_desc = GET_TENSOR_DESCRIPTOR(input);
- cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
-
- CHECK_CUDNN(dynload::cudnnConvolutionBackwardData(
- t_resource.cudnn_handle,
- &alpha,
- filter_desc,
- filter_data,
- diff_desc,
- output_grad_data,
- conv_desc,
+ real alpha = 1.0f;
+ real beta = 1.0f;
+ cudnnFilterDescriptor_t filter_desc = GET_FILTER_DESCRIPTOR(filter);
+ cudnnTensorDescriptor_t diff_desc = GET_TENSOR_DESCRIPTOR(output);
+ cudnnTensorDescriptor_t grad_desc = GET_TENSOR_DESCRIPTOR(input);
+ cudnnConvolutionDescriptor_t conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
+
+ CHECK_CUDNN(dynload::cudnnConvolutionBackwardData(
+ t_resource.cudnn_handle,
+ &alpha,
+ filter_desc,
+ filter_data,
+ diff_desc,
+ output_grad_data,
+ conv_desc,
#if CUDNN_VERSION >= 4000
- static_cast(convBwdDataAlgo),
- gpuWorkSpace,
- sizeInBytes,
+ static_cast(convBwdDataAlgo),
+ gpuWorkSpace,
+ sizeInBytes,
#endif
- &beta,
- grad_desc,
- input_data_grad));
+ &beta,
+ grad_desc,
+ input_data_grad));
CHECK_SYNC("hl_convolution_backward_data failed");
}
-
-void hl_softmax_forward(real *input,
- real *output,
- int height,
- int width)
-{
+void hl_softmax_forward(real* input, real* output, int height, int width) {
#ifndef PADDLE_TYPE_DOUBLE
- cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
+ cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else
- cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
+ cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
#endif
- CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptor(
- t_resource.cudnn_desc,
- CUDNN_TENSOR_NCHW,
- data_type,
- height,
- width,
- 1,
- 1));
-
- real alpha = 1.0f;
- real beta = 0.0f;
- CHECK_CUDNN(dynload::cudnnSoftmaxForward(
- t_resource.cudnn_handle,
- CUDNN_SOFTMAX_ACCURATE,
- CUDNN_SOFTMAX_MODE_CHANNEL,
- &alpha,
- t_resource.cudnn_desc,
- input,
- &beta,
- t_resource.cudnn_desc,
- output));
+ CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptor(t_resource.cudnn_desc,
+ CUDNN_TENSOR_NCHW,
+ data_type,
+ height,
+ width,
+ 1,
+ 1));
+
+ real alpha = 1.0f;
+ real beta = 0.0f;
+ CHECK_CUDNN(dynload::cudnnSoftmaxForward(t_resource.cudnn_handle,
+ CUDNN_SOFTMAX_ACCURATE,
+ CUDNN_SOFTMAX_MODE_CHANNEL,
+ &alpha,
+ t_resource.cudnn_desc,
+ input,
+ &beta,
+ t_resource.cudnn_desc,
+ output));
CHECK_SYNC("hl_softmax_forward failed");
}
-void hl_softmax_backward(real *output_value,
- real *output_grad,
+void hl_softmax_backward(real* output_value,
+ real* output_grad,
int height,
- int width)
-{
+ int width) {
#ifndef PADDLE_TYPE_DOUBLE
- cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
+ cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else
- cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
+ cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
#endif
- CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptor(
- t_resource.cudnn_desc,
- CUDNN_TENSOR_NCHW,
- data_type,
- height,
- width,
- 1,
- 1));
-
- real alpha = 1.0f;
- real beta = 0.0f;
- CHECK_CUDNN(dynload::cudnnSoftmaxBackward(
- t_resource.cudnn_handle,
- CUDNN_SOFTMAX_ACCURATE,
- CUDNN_SOFTMAX_MODE_CHANNEL,
- &alpha,
- t_resource.cudnn_desc,
- output_value,
- t_resource.cudnn_desc,
- output_grad,
- &beta,
- t_resource.cudnn_desc,
- output_grad));
+ CHECK_CUDNN(dynload::cudnnSetTensor4dDescriptor(t_resource.cudnn_desc,
+ CUDNN_TENSOR_NCHW,
+ data_type,
+ height,
+ width,
+ 1,
+ 1));
+
+ real alpha = 1.0f;
+ real beta = 0.0f;
+ CHECK_CUDNN(dynload::cudnnSoftmaxBackward(t_resource.cudnn_handle,
+ CUDNN_SOFTMAX_ACCURATE,
+ CUDNN_SOFTMAX_MODE_CHANNEL,
+ &alpha,
+ t_resource.cudnn_desc,
+ output_value,
+ t_resource.cudnn_desc,
+ output_grad,
+ &beta,
+ t_resource.cudnn_desc,
+ output_grad));
CHECK_SYNC("hl_softmax_backward failed");
}
void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
- real *input,
+ real* input,
hl_tensor_descriptor outputDesc,
- real *output,
+ real* output,
hl_tensor_descriptor bnParamDesc,
- real *scale,
- real *bias,
+ real* scale,
+ real* bias,
double factor,
- real *runningMean,
- real *runningInvVar,
+ real* runningMean,
+ real* runningInvVar,
double epsilon,
- real *savedMean,
- real *savedVar) {
+ real* savedMean,
+ real* savedVar) {
#if CUDNN_VERSION >= 4007
if ((NULL != runningMean && NULL == runningInvVar) ||
(NULL == runningMean && NULL != runningInvVar)) {
LOG(FATAL) << "runningMean and runningInvVar can be NULL "
- << "but only at the same time.";
+ << "but only at the same time.";
}
if ((NULL != savedMean && NULL == savedVar) ||
(NULL == savedMean && NULL != savedVar)) {
@@ -987,10 +940,24 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
real alpha = 1.0f;
real beta = 1.0f;
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
- CHECK_CUDNN(dynload::cudnnBatchNormalizationForwardTraining(
- t_resource.cudnn_handle, mode, &alpha, &beta, xDesc,
- input, yDesc, output, bnDesc, scale, bias, factor,
- runningMean, runningInvVar, epsilon, savedMean, savedVar));
+ CHECK_CUDNN(
+ dynload::cudnnBatchNormalizationForwardTraining(t_resource.cudnn_handle,
+ mode,
+ &alpha,
+ &beta,
+ xDesc,
+ input,
+ yDesc,
+ output,
+ bnDesc,
+ scale,
+ bias,
+ factor,
+ runningMean,
+ runningInvVar,
+ epsilon,
+ savedMean,
+ savedVar));
CHECK_SYNC("hl_batch_norm_forward_training failed");
#else
@@ -1000,15 +967,15 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
}
void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
- real *input,
- hl_tensor_descriptor outputDesc,
- real *output,
- hl_tensor_descriptor bnParamDesc,
- real *scale,
- real *bias,
- real *estimatedMean,
- real *estimatedInvVar,
- double epsilon) {
+ real* input,
+ hl_tensor_descriptor outputDesc,
+ real* output,
+ hl_tensor_descriptor bnParamDesc,
+ real* scale,
+ real* bias,
+ real* estimatedMean,
+ real* estimatedInvVar,
+ double epsilon) {
#if CUDNN_VERSION >= 4007
cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc);
cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc);
@@ -1016,10 +983,21 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real alpha = 1.0f;
real beta = 1.0f;
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
- CHECK_CUDNN(dynload::cudnnBatchNormalizationForwardInference(
- t_resource.cudnn_handle, mode, &alpha, &beta, xDesc,
- input, yDesc, output, bnDesc, scale, bias,
- estimatedMean, estimatedInvVar, epsilon));
+ CHECK_CUDNN(
+ dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle,
+ mode,
+ &alpha,
+ &beta,
+ xDesc,
+ input,
+ yDesc,
+ output,
+ bnDesc,
+ scale,
+ bias,
+ estimatedMean,
+ estimatedInvVar,
+ epsilon));
CHECK_SYNC("hl_batch_norm_forward_inference failed");
#else
@@ -1029,18 +1007,18 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
}
void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
- real *input,
+ real* input,
hl_tensor_descriptor outGradDesc,
- real *outGrad,
+ real* outGrad,
hl_tensor_descriptor inGradDesc,
- real *inGrad,
+ real* inGrad,
hl_tensor_descriptor dBnParamDesc,
- real *scale,
- real *scaleGrad,
- real *biasGrad,
+ real* scale,
+ real* scaleGrad,
+ real* biasGrad,
double epsilon,
- real *savedMean,
- real *savedInvVar) {
+ real* savedMean,
+ real* savedInvVar) {
#if CUDNN_VERSION >= 4007
if ((NULL != savedMean && NULL == savedInvVar) ||
(NULL == savedMean && NULL != savedInvVar)) {
@@ -1055,12 +1033,25 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
real alpha = 1.0f;
real beta = 1.0f;
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
- CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward(
- t_resource.cudnn_handle, mode, &alpha, &beta,
- &alpha, &beta,
- xDesc, input, dyDesc, outGrad, dxDesc, inGrad,
- bnDesc, scale, scaleGrad, biasGrad, epsilon,
- savedMean, savedInvVar));
+ CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward(t_resource.cudnn_handle,
+ mode,
+ &alpha,
+ &beta,
+ &alpha,
+ &beta,
+ xDesc,
+ input,
+ dyDesc,
+ outGrad,
+ dxDesc,
+ inGrad,
+ bnDesc,
+ scale,
+ scaleGrad,
+ biasGrad,
+ epsilon,
+ savedMean,
+ savedInvVar));
CHECK_SYNC("hl_batch_norm_backward failed");
#else
diff --git a/paddle/cuda/src/hl_cuda_device.cc b/paddle/cuda/src/hl_cuda_device.cc
index 3ea2c91bd5a41e0cd6ece0605a25e645676faa40..745be35b56278ed2e0033d5fd2806320d3164d7c 100644
--- a/paddle/cuda/src/hl_cuda_device.cc
+++ b/paddle/cuda/src/hl_cuda_device.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include
#include
#include
@@ -27,7 +26,7 @@ limitations under the License. */
namespace dynload {
std::once_flag curand_dso_flag;
-void* curand_dso_handle = nullptr;
+void *curand_dso_handle = nullptr;
/**
* The following macro definition can generate structs
@@ -37,34 +36,35 @@ void* curand_dso_handle = nullptr;
* note: default dynamic linked libs
*/
#ifdef PADDLE_USE_DSO
-#define DYNAMIC_LOAD_CURAND_WRAP(__name) \
- struct DynLoad__##__name { \
- template \
- curandStatus_t operator()(Args... args) { \
- typedef curandStatus_t (*curandFunc)(Args...); \
- std::call_once(curand_dso_flag, GetCurandDsoHandle, \
- &curand_dso_handle); \
- void* p_##__name = dlsym(curand_dso_handle, #__name); \
- return reinterpret_cast(p_##__name)(args...); \
- } \
- } __name; /* struct DynLoad__##__name */
+#define DYNAMIC_LOAD_CURAND_WRAP(__name) \
+ struct DynLoad__##__name { \
+ template \
+ curandStatus_t operator()(Args... args) { \
+ typedef curandStatus_t (*curandFunc)(Args...); \
+ std::call_once(curand_dso_flag, GetCurandDsoHandle, &curand_dso_handle); \
+ void *p_##__name = dlsym(curand_dso_handle, #__name); \
+ return reinterpret_cast(p_##__name)(args...); \
+ } \
+ } __name; /* struct DynLoad__##__name */
#else
-#define DYNAMIC_LOAD_CURAND_WRAP(__name) \
- struct DynLoad__##__name { \
- template \
- curandStatus_t operator()(Args... args) { \
- return __name(args...); \
- } \
- } __name; /* struct DynLoad__##__name */
+#define DYNAMIC_LOAD_CURAND_WRAP(__name) \
+ struct DynLoad__##__name { \
+ template \
+ curandStatus_t operator()(Args... args) { \
+ return __name(args...); \
+ } \
+ } __name; /* struct DynLoad__##__name */
#endif
/* include all needed curand functions in HPPL */
+// clang-format off
#define CURAND_RAND_ROUTINE_EACH(__macro) \
__macro(curandCreateGenerator) \
__macro(curandSetStream) \
__macro(curandSetPseudoRandomGeneratorSeed)\
__macro(curandGenerateUniform) \
__macro(curandGenerateUniformDouble)
+// clang-format on
CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP)
@@ -72,7 +72,7 @@ CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP)
#undef DYNAMIC_LOAD_CURAND_WRAP
std::once_flag cudart_dso_flag;
-void* cudart_dso_handle = nullptr;
+void *cudart_dso_handle = nullptr;
/**
* The following macro definition can generate structs
@@ -82,28 +82,28 @@ void* cudart_dso_handle = nullptr;
* note: default dynamic linked libs
*/
#ifdef PADDLE_USE_DSO
-#define DYNAMIC_LOAD_CUDART_WRAP(__name) \
- struct DynLoad__##__name { \
- template \
- auto operator()(Args... args) -> decltype(__name(args...)) { \
- using cudart_func = decltype(__name(args...))(*)(Args...); \
- std::call_once(cudart_dso_flag, GetCudartDsoHandle, \
- &cudart_dso_handle); \
- void* p_##__name = dlsym(cudart_dso_handle, #__name); \
- return reinterpret_cast(p_##__name)(args...); \
- } \
- } __name; /* struct DynLoad__##__name */
+#define DYNAMIC_LOAD_CUDART_WRAP(__name) \
+ struct DynLoad__##__name { \
+ template \
+ auto operator()(Args... args) -> decltype(__name(args...)) { \
+ using cudart_func = decltype(__name(args...)) (*)(Args...); \
+ std::call_once(cudart_dso_flag, GetCudartDsoHandle, &cudart_dso_handle); \
+ void *p_##__name = dlsym(cudart_dso_handle, #__name); \
+ return reinterpret_cast(p_##__name)(args...); \
+ } \
+ } __name; /* struct DynLoad__##__name */
#else
-#define DYNAMIC_LOAD_CUDART_WRAP(__name) \
- struct DynLoad__##__name { \
- template \
- auto operator()(Args... args) -> decltype(__name(args...)) { \
- return __name(args...); \
- } \
- } __name; /* struct DynLoad__##__name */
+#define DYNAMIC_LOAD_CUDART_WRAP(__name) \
+ struct DynLoad__##__name { \
+ template \
+ auto operator()(Args... args) -> decltype(__name(args...)) { \
+ return __name(args...); \
+ } \
+ } __name; /* struct DynLoad__##__name */
#endif
/* include all needed cuda functions in HPPL */
+// clang-format off
#define CUDA_ROUTINE_EACH(__macro) \
__macro(cudaMalloc) \
__macro(cudaHostAlloc) \
@@ -134,57 +134,57 @@ void* cudart_dso_handle = nullptr;
__macro(cudaFuncSetCacheConfig) \
__macro(cudaRuntimeGetVersion) \
__macro(cudaGetErrorString)
+// clang-format on
CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP)
#undef CUDA_ROUNTINE_EACH
#undef DYNAMIC_LOAD_CUDART_WRAP
-} /* namespace dynload */
+} /* namespace dynload */
/**
* @brief global resource.
*/
-int g_system_device_num = 0; /* system device number */
-int device_num = 0; /* use device number */
-hl_device_prop *g_device; /* device info table */
-__thread thread_device_resources *t_device; /* device resources table */
+int g_system_device_num = 0; /* system device number */
+int device_num = 0; /* use device number */
+hl_device_prop *g_device; /* device info table */
+__thread thread_device_resources *t_device; /* device resources table */
int g_cuda_lib_version = 0;
/* number of global stream */
-#define NUMBER_OF_GLOBAL_STREAM (HPPL_THREAD_STREAM_1)
+#define NUMBER_OF_GLOBAL_STREAM (HPPL_THREAD_STREAM_1)
/* number of thread stream */
-#define NUMBER_OF_THREAD_STREAM (HPPL_STREAM_END - HPPL_THREAD_STREAM_1)
+#define NUMBER_OF_THREAD_STREAM (HPPL_STREAM_END - HPPL_THREAD_STREAM_1)
/* sizeof of device memory */
-#define HPPL_GPU_MEMORY_SIZE (256*4)
+#define HPPL_GPU_MEMORY_SIZE (256 * 4)
/**
* Check build-in cuda function using glog and it **does not**
* support << operator for more details error info.
*/
-#define CHECK_CUDA(cudaFunc) \
- do { \
- cudaError_t cudaStat = cudaFunc; \
- CHECK_EQ(cudaSuccess, cudaStat) << "Cuda Error: " \
- << dynload::cudaGetErrorString(cudaStat); \
+#define CHECK_CUDA(cudaFunc) \
+ do { \
+ cudaError_t cudaStat = cudaFunc; \
+ CHECK_EQ(cudaSuccess, cudaStat) << "Cuda Error: " \
+ << dynload::cudaGetErrorString(cudaStat); \
} while (0)
/**
* @brief thread resource.
*/
-__thread _hl_thread_resource t_resource = {
- {0}, /* stream */
- 0, /* handle */
- 0, /* gen */
- 0, /* cudnn_handle */
- 0, /* cudnn_desc */
- NULL, /* gen_mutex */
- NULL, /* gpu_mem */
- NULL, /* cpu_mem */
- 0, /* event */
- -1, /* device */
- 0, /* major */
- false}; /* is_init */
+__thread _hl_thread_resource t_resource = {{0}, /* stream */
+ 0, /* handle */
+ 0, /* gen */
+ 0, /* cudnn_handle */
+ 0, /* cudnn_desc */
+ NULL, /* gen_mutex */
+ NULL, /* gpu_mem */
+ NULL, /* cpu_mem */
+ 0, /* event */
+ -1, /* device */
+ 0, /* major */
+ false}; /* is_init */
__thread cudaStream_t default_stream = 0;
__thread bool g_sync_flag = true;
@@ -198,18 +198,17 @@ inline pid_t gettid() {
uint64_t tid;
pthread_threadid_np(NULL, &tid);
#else
- #ifndef __NR_gettid
- #define __NR_gettid 224
- #endif
+#ifndef __NR_gettid
+#define __NR_gettid 224
+#endif
pid_t tid = syscall(__NR_gettid);
#endif
- CHECK_NE(tid, -1);
- return tid;
+ CHECK_NE((int)tid, -1);
+ return tid;
}
void hl_init(int device) {
- CHECK(hl_start_flag)
- << "[Init failed] hl_start() did not succeed.";
+ CHECK(hl_start_flag) << "[Init failed] hl_start() did not succeed.";
/* thread has been initialized */
if (true == t_resource.is_init) {
@@ -220,16 +219,16 @@ void hl_init(int device) {
/* create thread devcie resources */
char *tmp;
thread_device_resources device_res;
- tmp = (char *)malloc(g_system_device_num*sizeof(thread_device_resources*) +
- device_num*sizeof(_thread_device_resources));
+ tmp = (char *)malloc(g_system_device_num * sizeof(thread_device_resources *) +
+ device_num * sizeof(_thread_device_resources));
CHECK_NOTNULL(tmp);
- t_device = (thread_device_resources*)tmp;
- device_res = (thread_device_resources)((char*)tmp +
- g_system_device_num*sizeof(thread_device_resources*));
- memset(t_device, 0, g_system_device_num*sizeof(thread_device_resources*));
+ t_device = (thread_device_resources *)tmp;
+ device_res = (thread_device_resources)(
+ (char *)tmp + g_system_device_num * sizeof(thread_device_resources *));
+ memset(t_device, 0, g_system_device_num * sizeof(thread_device_resources *));
- char *tmp_stream = (char *)
- malloc(device_num*NUMBER_OF_THREAD_STREAM*sizeof(cudaStream_t));
+ char *tmp_stream = (char *)malloc(device_num * NUMBER_OF_THREAD_STREAM *
+ sizeof(cudaStream_t));
CHECK_NOTNULL(tmp_stream);
int num = 0;
@@ -239,8 +238,9 @@ void hl_init(int device) {
}
t_device[dev] = &device_res[num];
- t_device[dev]->stream = (cudaStream_t*)(tmp_stream +
- num*NUMBER_OF_THREAD_STREAM*sizeof(cudaStream_t));
+ t_device[dev]->stream =
+ (cudaStream_t *)(tmp_stream +
+ num * NUMBER_OF_THREAD_STREAM * sizeof(cudaStream_t));
hl_create_thread_resources(dev, t_device[dev]);
num++;
@@ -266,14 +266,14 @@ void hl_fini() {
t_resource.stream[i] = 0;
}
- char* tmp = (char*)t_device;
- char* tmp_stream = NULL;
+ char *tmp = (char *)t_device;
+ char *tmp_stream = NULL;
for (int dev = 0; dev < g_system_device_num; dev++) {
if (!t_device[dev]) {
continue;
}
if (!tmp_stream) {
- tmp_stream = (char*)t_device[dev]->stream;
+ tmp_stream = (char *)t_device[dev]->stream;
}
for (int j = 0; j < NUMBER_OF_THREAD_STREAM; j++) {
CHECK_CUDA(dynload::cudaStreamDestroy(t_device[dev]->stream[j]));
@@ -290,9 +290,7 @@ void hl_fini() {
t_resource.is_init = false;
}
-int hl_get_device_count() {
- return device_num;
-}
+int hl_get_device_count() { return device_num; }
void hl_set_device(int device) {
if (device == t_resource.device) {
@@ -300,7 +298,7 @@ void hl_set_device(int device) {
}
CHECK(device >= 0 && device < g_system_device_num && g_device[device])
- << "Device: " << device << " is not specified in startup.";
+ << "Device: " << device << " is not specified in startup.";
CHECK_CUDA(dynload::cudaSetDevice(device));
@@ -312,11 +310,11 @@ void hl_set_device(int device) {
if (true == t_resource.is_init) {
for (int i = NUMBER_OF_GLOBAL_STREAM; i < HPPL_STREAM_END; i++) {
t_resource.stream[i] =
- t_device[device]->stream[i - NUMBER_OF_GLOBAL_STREAM];
+ t_device[device]->stream[i - NUMBER_OF_GLOBAL_STREAM];
}
t_resource.gpu_mem = t_device[device]->gpu_mem;
t_resource.cpu_mem = t_device[device]->cpu_mem;
- t_resource.event = t_device[device]->mem_event;
+ t_resource.event = t_device[device]->mem_event;
}
t_resource.handle = g_device[device]->device_resources->handle;
@@ -334,11 +332,11 @@ int hl_get_device() {
return device;
}
-void* hl_malloc_device(size_t size) {
+void *hl_malloc_device(size_t size) {
void *dest_d;
CHECK(size) << __func__ << ": the size for device memory is 0, please check.";
- CHECK_CUDA(dynload::cudaMalloc((void**)&dest_d, size));
+ CHECK_CUDA(dynload::cudaMalloc((void **)&dest_d, size));
return dest_d;
}
@@ -348,14 +346,15 @@ void hl_free_mem_device(void *dest_d) {
cudaError_t err = dynload::cudaFree(dest_d);
CHECK(cudaSuccess == err || cudaErrorCudartUnloading == err)
- << hl_get_device_error_string();
+ << hl_get_device_error_string();
}
-void* hl_malloc_host(size_t size) {
+void *hl_malloc_host(size_t size) {
void *dest_h;
CHECK(size) << __func__ << ": the size for device memory is 0, please check.";
- CHECK_CUDA(dynload::cudaHostAlloc((void**)&dest_h, size, cudaHostAllocDefault));
+ CHECK_CUDA(
+ dynload::cudaHostAlloc((void **)&dest_h, size, cudaHostAllocDefault));
return dest_h;
}
@@ -364,8 +363,8 @@ void hl_free_mem_host(void *dest_h) {
CHECK_NOTNULL(dest_h);
cudaError_t err = dynload::cudaFreeHost(dest_h);
- CHECK (cudaSuccess == err || cudaErrorCudartUnloading == err)
- << hl_get_device_error_string();
+ CHECK(cudaSuccess == err || cudaErrorCudartUnloading == err)
+ << hl_get_device_error_string();
}
void hl_memcpy(void *dst, void *src, size_t size) {
@@ -387,8 +386,7 @@ void hl_memcpy_host2device(void *dest_d, void *src_h, size_t size) {
}
CHECK_NOTNULL(src_h);
CHECK_NOTNULL(dest_d);
- CHECK_CUDA(dynload::cudaMemcpy(dest_d, src_h, size,
- cudaMemcpyHostToDevice));
+ CHECK_CUDA(dynload::cudaMemcpy(dest_d, src_h, size, cudaMemcpyHostToDevice));
}
void hl_memcpy_device2host(void *dest_h, void *src_d, size_t size) {
@@ -397,8 +395,7 @@ void hl_memcpy_device2host(void *dest_h, void *src_d, size_t size) {
}
CHECK_NOTNULL(dest_h);
CHECK_NOTNULL(src_d);
- CHECK_CUDA(dynload::cudaMemcpy(dest_h, src_d, size,
- cudaMemcpyDeviceToHost));
+ CHECK_CUDA(dynload::cudaMemcpy(dest_h, src_d, size, cudaMemcpyDeviceToHost));
}
void hl_memcpy_device2device(void *dest_d, void *src_d, size_t size) {
@@ -407,8 +404,8 @@ void hl_memcpy_device2device(void *dest_d, void *src_d, size_t size) {
}
CHECK_NOTNULL(dest_d);
CHECK_NOTNULL(src_d);
- CHECK_CUDA(dynload::cudaMemcpy(dest_d, src_d, size,
- cudaMemcpyDeviceToDevice));
+ CHECK_CUDA(
+ dynload::cudaMemcpy(dest_d, src_d, size, cudaMemcpyDeviceToDevice));
}
void hl_memcpy_async(void *dst, void *src, size_t size, hl_stream_t stream) {
@@ -422,8 +419,8 @@ void hl_memcpy_async(void *dst, void *src, size_t size, hl_stream_t stream) {
CHECK_LT(stream, HPPL_STREAM_END);
cu_stream = t_resource.stream[stream];
- CHECK_CUDA(dynload::cudaMemcpyAsync(dst, src, size, cudaMemcpyDefault,
- cu_stream));
+ CHECK_CUDA(
+ dynload::cudaMemcpyAsync(dst, src, size, cudaMemcpyDefault, cu_stream));
}
void hl_start() {
@@ -434,8 +431,8 @@ void hl_start() {
bool hl_device_can_access_peer(int device, int peerDevice) {
int canAccessPeer;
- CHECK_CUDA(dynload::cudaDeviceCanAccessPeer(&canAccessPeer, device,
- peerDevice));
+ CHECK_CUDA(
+ dynload::cudaDeviceCanAccessPeer(&canAccessPeer, device, peerDevice));
if (canAccessPeer == 1) {
return true;
@@ -477,32 +474,32 @@ void hl_create_global_resources(hl_device_prop device_prop) {
/* create curand gen */
CHECK_EQ(dynload::curandCreateGenerator(&device_res->gen,
- CURAND_RNG_PSEUDO_DEFAULT), CURAND_STATUS_SUCCESS)
- << "[Start failed] Curand init failed.";
+ CURAND_RNG_PSEUDO_DEFAULT),
+ CURAND_STATUS_SUCCESS)
+ << "[Start failed] Curand init failed.";
- CHECK_EQ(dynload::curandSetStream(device_res->gen,
- device_res->stream[0]), CURAND_STATUS_SUCCESS)
- << "[Start failed] Curand set stream failed!";
+ CHECK_EQ(dynload::curandSetStream(device_res->gen, device_res->stream[0]),
+ CURAND_STATUS_SUCCESS)
+ << "[Start failed] Curand set stream failed!";
/* create cudnn handle */
hl_cudnn_init(&device_res->cudnn_handle, device_res->stream[0]);
int seed = gettid();
- CHECK_EQ(dynload::curandSetPseudoRandomGeneratorSeed(
- device_res->gen, seed+device), CURAND_STATUS_SUCCESS);
+ CHECK_EQ(dynload::curandSetPseudoRandomGeneratorSeed(device_res->gen,
+ seed + device),
+ CURAND_STATUS_SUCCESS);
- device_res->gen_mutex =
- (pthread_mutex_t*)(malloc(sizeof (pthread_mutex_t)));
+ device_res->gen_mutex = (pthread_mutex_t *)(malloc(sizeof(pthread_mutex_t)));
pthread_mutex_init(device_res->gen_mutex, NULL);
CHECK_CUDA(dynload::cudaRuntimeGetVersion(&g_cuda_lib_version));
}
-int hl_get_cuda_version() {
- return g_cuda_lib_version;
-}
+int hl_get_cuda_version() { return g_cuda_lib_version; }
-void hl_create_thread_resources(int device, thread_device_resources device_res) {
+void hl_create_thread_resources(int device,
+ thread_device_resources device_res) {
CHECK_CUDA(dynload::cudaSetDevice(device));
/* create thread stream */
@@ -511,15 +508,15 @@ void hl_create_thread_resources(int device, thread_device_resources device_res)
}
/* allocation device memory */
- device_res->gpu_mem = (real*)hl_malloc_device(HPPL_GPU_MEMORY_SIZE);
+ device_res->gpu_mem = (real *)hl_malloc_device(HPPL_GPU_MEMORY_SIZE);
/* allocation host memory */
- device_res->cpu_mem = (real*)hl_malloc_host(HPPL_GPU_MEMORY_SIZE);
+ device_res->cpu_mem = (real *)hl_malloc_host(HPPL_GPU_MEMORY_SIZE);
CHECK_CUDA(dynload::cudaEventCreate(&device_res->mem_event));
}
-void hl_specify_devices_start(int* device, int number) {
+void hl_specify_devices_start(int *device, int number) {
if (hl_start_flag) return;
/* 1. get the number of devices */
@@ -531,20 +528,19 @@ void hl_specify_devices_start(int* device, int number) {
/* 2. check device & create device property table */
CHECK_LE(number, g_system_device_num)
- << "[Start failed] System does not have enough device. "
- << "Device number: " << g_system_device_num
- << "Input number: " << number;
+ << "[Start failed] System does not have enough device. "
+ << "Device number: " << g_system_device_num << "Input number: " << number;
char *tmp;
hl_device_prop device_prop;
- tmp = (char *)malloc(g_system_device_num*sizeof(hl_device_prop*) +
- number*sizeof(_hl_device_prop));
+ tmp = (char *)malloc(g_system_device_num * sizeof(hl_device_prop *) +
+ number * sizeof(_hl_device_prop));
CHECK(tmp) << "[Start failed] System memory is not enough.";
- g_device = (hl_device_prop*)tmp;
- device_prop = (hl_device_prop)((char*)tmp +
- g_system_device_num*sizeof(hl_device_prop*));
- memset(g_device, 0, g_system_device_num*sizeof(hl_device_prop*));
+ g_device = (hl_device_prop *)tmp;
+ device_prop = (hl_device_prop)(
+ (char *)tmp + g_system_device_num * sizeof(hl_device_prop *));
+ memset(g_device, 0, g_system_device_num * sizeof(hl_device_prop *));
int num = 0;
for (int i = 0; i < number; i++) {
int dev;
@@ -555,13 +551,13 @@ void hl_specify_devices_start(int* device, int number) {
}
CHECK_LT(dev, g_system_device_num)
- << "[Start failed] The specified device number is "
- << "out of range. Max device number: " << g_system_device_num - 1
- << " Specified devcie number: "<< dev;
+ << "[Start failed] The specified device number is "
+ << "out of range. Max device number: " << g_system_device_num - 1
+ << " Specified devcie number: " << dev;
if (g_device[dev]) {
/* Warning */
- LOG(WARNING) <<"[Warning] Repeat specify device: " << dev;
+ LOG(WARNING) << "[Warning] Repeat specify device: " << dev;
continue;
}
@@ -572,11 +568,11 @@ void hl_specify_devices_start(int* device, int number) {
device_num = num;
/* 3. create global device resources */
- char *tmp_res = (char *)malloc(device_num*sizeof(_global_device_resources));
+ char *tmp_res = (char *)malloc(device_num * sizeof(_global_device_resources));
CHECK_NOTNULL(tmp_res);
- char *tmp_stream =
- (char *)malloc(device_num*NUMBER_OF_GLOBAL_STREAM*sizeof(cudaStream_t));
+ char *tmp_stream = (char *)malloc(device_num * NUMBER_OF_GLOBAL_STREAM *
+ sizeof(cudaStream_t));
CHECK_NOTNULL(tmp_stream);
num = 0;
@@ -585,10 +581,11 @@ void hl_specify_devices_start(int* device, int number) {
continue;
}
- g_device[i]->device_resources = (global_device_resources)(tmp_res +
- num*sizeof(_global_device_resources));
- g_device[i]->device_resources->stream = (cudaStream_t*)(tmp_stream +
- num*NUMBER_OF_GLOBAL_STREAM*sizeof(cudaStream_t));
+ g_device[i]->device_resources = (global_device_resources)(
+ tmp_res + num * sizeof(_global_device_resources));
+ g_device[i]->device_resources->stream =
+ (cudaStream_t *)(tmp_stream +
+ num * NUMBER_OF_GLOBAL_STREAM * sizeof(cudaStream_t));
hl_create_global_resources(g_device[i]);
num++;
@@ -598,9 +595,9 @@ void hl_specify_devices_start(int* device, int number) {
hl_start_flag = true;
/* set default device */
if (device == NULL) {
- hl_set_device(0);
+ hl_set_device(0);
} else {
- hl_set_device(device[0]);
+ hl_set_device(device[0]);
}
}
@@ -608,35 +605,31 @@ void hl_rand(real *dest_d, size_t num) {
pthread_mutex_lock(t_resource.gen_mutex);
CHECK_EQ(
#ifndef PADDLE_TYPE_DOUBLE
- dynload::curandGenerateUniform(t_resource.gen, dest_d, num),
+ dynload::curandGenerateUniform(t_resource.gen, dest_d, num),
#else
- dynload::curandGenerateUniformDouble(t_resource.gen, dest_d, num),
+ dynload::curandGenerateUniformDouble(t_resource.gen, dest_d, num),
#endif
- CURAND_STATUS_SUCCESS);
+ CURAND_STATUS_SUCCESS);
pthread_mutex_unlock(t_resource.gen_mutex);
CHECK_SYNC("hl_rand failed");
}
void hl_srand(unsigned int seed) {
pthread_mutex_lock(t_resource.gen_mutex);
- CHECK_EQ(dynload::curandSetPseudoRandomGeneratorSeed(
- t_resource.gen, seed), CURAND_STATUS_SUCCESS);
+ CHECK_EQ(dynload::curandSetPseudoRandomGeneratorSeed(t_resource.gen, seed),
+ CURAND_STATUS_SUCCESS);
pthread_mutex_unlock(t_resource.gen_mutex);
}
-void hl_set_sync_flag(bool flag) {
- g_sync_flag = flag;
-}
+void hl_set_sync_flag(bool flag) { g_sync_flag = flag; }
-bool hl_get_sync_flag() {
- return g_sync_flag;
-}
+bool hl_get_sync_flag() { return g_sync_flag; }
void hl_stream_synchronize(hl_stream_t stream) {
cudaStream_t cu_stream;
- CHECK_LT(stream, HPPL_STREAM_END)
- << __func__ <<": the parameter stream is error.";
+ CHECK_LT(stream, HPPL_STREAM_END) << __func__
+ << ": the parameter stream is error.";
cu_stream = t_resource.stream[stream];
CHECK_CUDA(dynload::cudaStreamSynchronize(cu_stream));
@@ -645,8 +638,8 @@ void hl_stream_synchronize(hl_stream_t stream) {
void hl_create_event(hl_event_t *event) {
CHECK_NOTNULL(event);
- struct _hl_event_st* st_event =
- (struct _hl_event_st*)malloc(sizeof(struct _hl_event_st));
+ struct _hl_event_st *st_event =
+ (struct _hl_event_st *)malloc(sizeof(struct _hl_event_st));
CHECK_CUDA(dynload::cudaEventCreate(&st_event->cu_event));
@@ -658,8 +651,8 @@ float hl_event_elapsed_time(hl_event_t start, hl_event_t end) {
CHECK_NOTNULL(start);
CHECK_NOTNULL(end);
- CHECK_CUDA(dynload::cudaEventElapsedTime(&time,
- start->cu_event, end->cu_event));
+ CHECK_CUDA(
+ dynload::cudaEventElapsedTime(&time, start->cu_event, end->cu_event));
return time;
}
@@ -667,24 +660,22 @@ void hl_stream_record_event(hl_stream_t stream, hl_event_t event) {
cudaStream_t cu_stream;
CHECK_NOTNULL(event);
- CHECK_LT(stream, HPPL_STREAM_END)
- << __func__ <<": the parameter stream is error.";
+ CHECK_LT(stream, HPPL_STREAM_END) << __func__
+ << ": the parameter stream is error.";
cu_stream = t_resource.stream[stream];
- CHECK_CUDA(dynload::cudaEventRecord(
- event->cu_event, cu_stream));
+ CHECK_CUDA(dynload::cudaEventRecord(event->cu_event, cu_stream));
}
void hl_stream_wait_event(hl_stream_t stream, hl_event_t event) {
cudaStream_t cu_stream;
CHECK_NOTNULL(event);
- CHECK_LT(stream, HPPL_STREAM_END)
- << __func__ <<": the parameter stream is error.";
+ CHECK_LT(stream, HPPL_STREAM_END) << __func__
+ << ": the parameter stream is error.";
cu_stream = t_resource.stream[stream];
- CHECK_CUDA(dynload::cudaStreamWaitEvent(
- cu_stream, event->cu_event, 0));
+ CHECK_CUDA(dynload::cudaStreamWaitEvent(cu_stream, event->cu_event, 0));
}
void hl_destroy_event(hl_event_t event) {
@@ -703,15 +694,15 @@ void hl_event_synchronize(hl_event_t event) {
void hl_get_device_name(char *name, int len, int device) {
CHECK_NOTNULL(name);
CHECK(device >= 0 && device < g_system_device_num && g_device[device])
- << "Device("<< device <<") is not specified in startup.";
+ << "Device(" << device << ") is not specified in startup.";
- strncpy(name, g_device[device]->device_name , len);
+ strncpy(name, g_device[device]->device_name, len);
}
void hl_get_device_memory(size_t *mem_size, int device) {
CHECK_NOTNULL(mem_size);
CHECK(device >= 0 && device < g_system_device_num && g_device[device])
- << "Device("<< device <<") is not specified in startup.";
+ << "Device(" << device << ") is not specified in startup.";
*mem_size = g_device[device]->device_mem;
}
@@ -720,31 +711,26 @@ void hl_get_device_compute_capability(int *major, int *minor, int device) {
CHECK_NOTNULL(major);
CHECK_NOTNULL(minor);
CHECK(device >= 0 && device < g_system_device_num && g_device[device])
- << "Device("<< device << ") is not specified in startup.";
+ << "Device(" << device << ") is not specified in startup.";
*major = g_device[device]->major;
*minor = g_device[device]->minor;
}
-int hl_get_device_last_error() {
- return (int)dynload::cudaGetLastError();
-}
+int hl_get_device_last_error() { return (int)dynload::cudaGetLastError(); }
-const char* hl_get_device_error_string() {
+const char *hl_get_device_error_string() {
cudaError_t err = dynload::cudaGetLastError();
return dynload::cudaGetErrorString(err);
}
-const char* hl_get_device_error_string(size_t err) {
+const char *hl_get_device_error_string(size_t err) {
return dynload::cudaGetErrorString((cudaError_t)err);
}
-void hl_device_synchronize() {
- CHECK_CUDA(dynload::cudaDeviceSynchronize());
-}
+void hl_device_synchronize() { CHECK_CUDA(dynload::cudaDeviceSynchronize()); }
void hl_set_device_flags_block() {
- CHECK_CUDA(dynload::cudaSetDeviceFlags(
- cudaDeviceScheduleBlockingSync));
+ CHECK_CUDA(dynload::cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
}
bool hl_cuda_event_is_ready(hl_event_t event) {
diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu
index 3df9f63f9e4b79d61a818b2af49a4d9dfd84a9ab..0b7cd3375671d58464dac93458ec6659add8b730 100644
--- a/paddle/cuda/src/hl_cuda_matrix.cu
+++ b/paddle/cuda/src/hl_cuda_matrix.cu
@@ -18,6 +18,7 @@ limitations under the License. */
#include "hl_matrix_ops.cuh"
#include "hl_matrix_apply.cuh"
#include "hl_sequence.h"
+#include "hl_sparse.ph"
#include "paddle/utils/Logging.h"
#include "hl_device_functions.cuh"
#include "hl_gpu_matrix_kernel.cuh"
@@ -317,6 +318,85 @@ void hl_matrix_classification_error(real* A_d,
CHECK_SYNC("hl_matrix_classification_error");
}
+__global__ void KeMatrixMultiBinaryCrossEntropy(real* output,
+ real* entropy,
+ int* row,
+ int* col,
+ int dimM,
+ int dimN) {
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index < dimM) {
+ for (int i = 0; i < dimN; i ++) {
+ entropy[index] -= log(1 - output[index * dimN + i]);
+ }
+ int *row_col = col + row[index];
+ int col_num = row[index + 1] - row[index];
+ for (int i = 0; i < col_num; i ++) {
+ real o = output[index * dimN + row_col[i]];
+ entropy[index] -= log(o / (1 - o));
+ }
+ }
+}
+
+void hl_matrix_multi_binary_cross_entropy(real* output,
+ real* entropy,
+ hl_sparse_matrix_s csr_mat,
+ int dimM,
+ int dimN) {
+ CHECK_NOTNULL(output);
+ CHECK_NOTNULL(entropy);
+ CHECK_NOTNULL(csr_mat);
+ CHECK_EQ(csr_mat->format, HL_SPARSE_CSR);
+ int n_threads = 1024;
+ int blocks = (dimM + n_threads - 1) / n_threads;
+ dim3 threads(n_threads);
+ dim3 grid(blocks);
+ hl_csr_matrix mat = (hl_csr_matrix)(csr_mat->matrix);
+ KeMatrixMultiBinaryCrossEntropy<<< grid, threads, 0, STREAM_DEFAULT >>>
+ (output, entropy, mat->csr_row, mat->csr_col, dimM, dimN);
+ CHECK_SYNC("hl_matrix_multi_binary_cross_entropy failed");
+}
+
+__global__ void KeMatrixMultiBinaryCrossEntropyBp(real* output,
+ real* grad,
+ int* row,
+ int* col,
+ int dimM,
+ int dimN) {
+ int row_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (row_idx < dimM) {
+ for (int i = 0; i < dimN; i ++) {
+ int index = row_idx * dimN + i;
+ grad[index] += 1.0 / (1 - output[index]);
+ }
+ int col_num = row[row_idx + 1] - row[row_idx];
+ int *row_col = col + row[row_idx];
+ for (int i = 0; i < col_num; i ++) {
+ int index = row_idx * dimN + row_col[i];
+ grad[index] -= 1.0 / (output[index] * (1 - output[index]));
+ }
+ }
+}
+
+void hl_matrix_multi_binary_cross_entropy_bp(real* output,
+ real* grad,
+ hl_sparse_matrix_s csr_mat,
+ int dimM,
+ int dimN) {
+ CHECK_NOTNULL(output);
+ CHECK_NOTNULL(grad);
+ CHECK_NOTNULL(csr_mat);
+ CHECK_EQ(csr_mat->format, HL_SPARSE_CSR);
+ int n_threads = 1024;
+ int blocks = (dimM + n_threads - 1) / n_threads;
+ dim3 threads(n_threads);
+ dim3 grid(blocks);
+ hl_csr_matrix mat = (hl_csr_matrix)(csr_mat->matrix);
+ KeMatrixMultiBinaryCrossEntropyBp<<< grid, threads, 0, STREAM_DEFAULT >>>
+ (output, grad, mat->csr_row, mat->csr_col, dimM, dimN);
+ CHECK_SYNC("hl_matrix_multi_binary_cross_entropy_bp failed");
+}
+
__global__ void KeMatrixCrossEntropy(real* O,
real* E,
int* label,
@@ -685,7 +765,7 @@ __global__ void KeMatrixAddSharedBias(real* A,
int dim = N / channel;
if (index < M * N) {
int i = index % N;
- i = i / dim;
+ i = i / dim;
A[index] += scale * B[i];
}
}
@@ -713,7 +793,7 @@ __global__ void KeMatrixCollectSharedBias(real *B,
const int dim,
const int limit,
real scale) {
- if (dim < limit) {
+ if (dim < limit) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < channel) {
real sum = 0.0;
diff --git a/paddle/cuda/src/hl_cudart_wrap.cc b/paddle/cuda/src/hl_cudart_wrap.cc
index 27bbd03bc328293d978867c6badddc13a754ece2..ff6b830b7addc5c87af0d55070260c279a046a75 100644
--- a/paddle/cuda/src/hl_cudart_wrap.cc
+++ b/paddle/cuda/src/hl_cudart_wrap.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#ifdef PADDLE_USE_DSO
#include
@@ -29,26 +28,26 @@ limitations under the License. */
namespace dynload {
extern std::once_flag cudart_dso_flag;
-extern void* cudart_dso_handle;
+extern void *cudart_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cuda routine
* via operator overloading.
**/
-#define DYNAMIC_LOAD_CUDART_WRAP(__name, __type) \
- struct DynLoad__##__name { \
- template \
- __type operator()(Args... args) { \
- typedef __type (*cudartFunc)(Args...); \
- std::call_once(cudart_dso_flag, GetCudartDsoHandle, \
- &cudart_dso_handle); \
- void* p_##__name = dlsym(cudart_dso_handle, #__name); \
- return reinterpret_cast(p_##__name)(args...); \
- } \
- } __name; /* struct DynLoad__##__name */
+#define DYNAMIC_LOAD_CUDART_WRAP(__name, __type) \
+ struct DynLoad__##__name { \
+ template \
+ __type operator()(Args... args) { \
+ typedef __type (*cudartFunc)(Args...); \
+ std::call_once(cudart_dso_flag, GetCudartDsoHandle, &cudart_dso_handle); \
+ void *p_##__name = dlsym(cudart_dso_handle, #__name); \
+ return reinterpret_cast(p_##__name)(args...); \
+ } \
+ } __name; /* struct DynLoad__##__name */
/* include all needed cuda functions in HPPL */
+// clang-format off
#define CUDA_ROUTINE_EACH(__macro) \
__macro(cudaLaunch, cudaError_t) \
__macro(cudaSetupArgument, cudaError_t) \
@@ -61,16 +60,17 @@ extern void* cudart_dso_handle;
__macro(__cudaInitModule, char) \
__macro(__cudaRegisterTexture, void) \
__macro(__cudaRegisterSurface, void)
+// clang-format on
CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP)
#if CUDART_VERSION >= 7000
- DYNAMIC_LOAD_CUDART_WRAP(cudaLaunchKernel, cudaError_t)
+DYNAMIC_LOAD_CUDART_WRAP(cudaLaunchKernel, cudaError_t)
#endif
#undef CUDA_ROUNTINE_EACH
-} /* namespace dynload */
+} /* namespace dynload */
#if CUDART_VERSION >= 7000
__host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func,
@@ -78,131 +78,120 @@ __host__ cudaError_t CUDARTAPI cudaLaunchKernel(const void *func,
dim3 blockDim,
void **args,
size_t sharedMem,
- cudaStream_t stream)
-{
- return dynload::cudaLaunchKernel(func, gridDim, blockDim, args, sharedMem, stream);
+ cudaStream_t stream) {
+ return dynload::cudaLaunchKernel(
+ func, gridDim, blockDim, args, sharedMem, stream);
}
#endif /* CUDART_VERSION >= 7000 */
-
-__host__ cudaError_t CUDARTAPI cudaLaunch(const void *func)
-{
+__host__ cudaError_t CUDARTAPI cudaLaunch(const void *func) {
return dynload::cudaLaunch(func);
}
__host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg,
size_t size,
- size_t offset)
-{
+ size_t offset) {
return dynload::cudaSetupArgument(arg, size, offset);
}
__host__ cudaError_t CUDARTAPI cudaConfigureCall(dim3 gridDim,
dim3 blockDim,
size_t sharedMem,
- cudaStream_t stream)
-{
- return dynload::cudaConfigureCall(gridDim, blockDim,
- sharedMem, stream);
+ cudaStream_t stream) {
+ return dynload::cudaConfigureCall(gridDim, blockDim, sharedMem, stream);
}
extern "C" {
-void** CUDARTAPI __cudaRegisterFatBinary(
- void *fatCubin
-)
-{
+void **CUDARTAPI __cudaRegisterFatBinary(void *fatCubin) {
return dynload::__cudaRegisterFatBinary(fatCubin);
-
}
-void CUDARTAPI __cudaUnregisterFatBinary(
- void **fatCubinHandle
-)
-{
+void CUDARTAPI __cudaUnregisterFatBinary(void **fatCubinHandle) {
return dynload::__cudaUnregisterFatBinary(fatCubinHandle);
}
-void CUDARTAPI __cudaRegisterFunction(
- void **fatCubinHandle,
- const char *hostFun,
- char *deviceFun,
- const char *deviceName,
- int thread_limit,
- uint3 *tid,
- uint3 *bid,
- dim3 *bDim,
- dim3 *gDim,
- int *wSize
-) {
- return dynload::__cudaRegisterFunction(
- fatCubinHandle, hostFun, deviceFun, deviceName,
- thread_limit, tid, bid, bDim, gDim, wSize);
+void CUDARTAPI __cudaRegisterFunction(void **fatCubinHandle,
+ const char *hostFun,
+ char *deviceFun,
+ const char *deviceName,
+ int thread_limit,
+ uint3 *tid,
+ uint3 *bid,
+ dim3 *bDim,
+ dim3 *gDim,
+ int *wSize) {
+ return dynload::__cudaRegisterFunction(fatCubinHandle,
+ hostFun,
+ deviceFun,
+ deviceName,
+ thread_limit,
+ tid,
+ bid,
+ bDim,
+ gDim,
+ wSize);
}
-void CUDARTAPI __cudaRegisterVar(
- void **fatCubinHandle,
- char *hostVar,
- char *deviceAddress,
- const char *deviceName,
- int ext,
- int size,
- int constant,
- int global
-) {
- return dynload::__cudaRegisterVar(
- fatCubinHandle, hostVar, deviceAddress,
- deviceName, ext, size, constant, global);
+void CUDARTAPI __cudaRegisterVar(void **fatCubinHandle,
+ char *hostVar,
+ char *deviceAddress,
+ const char *deviceName,
+ int ext,
+ int size,
+ int constant,
+ int global) {
+ return dynload::__cudaRegisterVar(fatCubinHandle,
+ hostVar,
+ deviceAddress,
+ deviceName,
+ ext,
+ size,
+ constant,
+ global);
}
-
-
-extern void CUDARTAPI __cudaRegisterManagedVar(
- void **fatCubinHandle,
- void **hostVarPtrAddress,
- char *deviceAddress,
- const char *deviceName,
- int ext,
- int size,
- int constant,
- int global
-) {
- return dynload::__cudaRegisterManagedVar(
- fatCubinHandle, hostVarPtrAddress, deviceAddress,
- deviceName, ext, size, constant, global);
+extern void CUDARTAPI __cudaRegisterManagedVar(void **fatCubinHandle,
+ void **hostVarPtrAddress,
+ char *deviceAddress,
+ const char *deviceName,
+ int ext,
+ int size,
+ int constant,
+ int global) {
+ return dynload::__cudaRegisterManagedVar(fatCubinHandle,
+ hostVarPtrAddress,
+ deviceAddress,
+ deviceName,
+ ext,
+ size,
+ constant,
+ global);
}
-char CUDARTAPI __cudaInitModule(
- void **fatCubinHandle
-) {
+char CUDARTAPI __cudaInitModule(void **fatCubinHandle) {
return dynload::__cudaInitModule(fatCubinHandle);
}
-void CUDARTAPI __cudaRegisterTexture(
- void **fatCubinHandle,
- const struct textureReference *hostVar,
- const void **deviceAddress,
- const char *deviceName,
- int dim,
- int norm,
- int ext
-) {
+void CUDARTAPI __cudaRegisterTexture(void **fatCubinHandle,
+ const struct textureReference *hostVar,
+ const void **deviceAddress,
+ const char *deviceName,
+ int dim,
+ int norm,
+ int ext) {
return dynload::__cudaRegisterTexture(
- fatCubinHandle, hostVar, deviceAddress,
- deviceName, dim, norm, ext);
+ fatCubinHandle, hostVar, deviceAddress, deviceName, dim, norm, ext);
}
-void CUDARTAPI __cudaRegisterSurface(
- void **fatCubinHandle,
- const struct surfaceReference *hostVar,
- const void **deviceAddress,
- const char *deviceName,
- int dim,
- int ext
-) {
+void CUDARTAPI __cudaRegisterSurface(void **fatCubinHandle,
+ const struct surfaceReference *hostVar,
+ const void **deviceAddress,
+ const char *deviceName,
+ int dim,
+ int ext) {
return dynload::__cudaRegisterSurface(
- fatCubinHandle, hostVar, deviceAddress,
- deviceName, dim, ext);
+ fatCubinHandle, hostVar, deviceAddress, deviceName, dim, ext);
}
} /* extern "C" */
diff --git a/paddle/cuda/src/hl_dso_loader.cc b/paddle/cuda/src/hl_dso_loader.cc
index c0b5d6e357fc70ed17180ab38458164918b13878..1a3ce08619fc3a5787576b30e9f4c13336990e74 100644
--- a/paddle/cuda/src/hl_dso_loader.cc
+++ b/paddle/cuda/src/hl_dso_loader.cc
@@ -12,27 +12,28 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "hl_dso_loader.h"
-#include "paddle/utils/Logging.h"
#include "paddle/utils/CommandLineParser.h"
+#include "paddle/utils/Logging.h"
-P_DEFINE_string(cudnn_dir, "",
+P_DEFINE_string(cudnn_dir,
+ "",
"Specify path for loading libcudnn.so. For instance, "
- "/usr/local/cudnn/lib64. If empty [default], dlopen will search "
- "cudnn from LD_LIBRARY_PATH");
+ "/usr/local/cudnn/lib. If empty [default], dlopen "
+ "will search cudnn from LD_LIBRARY_PATH");
-P_DEFINE_string(cuda_dir, "",
+P_DEFINE_string(cuda_dir,
+ "",
"Specify path for loading cuda library, such as libcublas, "
- "libcurand. For instance, /usr/local/cuda/lib64. "
- "(Note: libcudart can not be specified by cuda_dir, since some "
+ "libcurand. For instance, /usr/local/cuda/lib64. (Note: "
+ "libcudart can not be specified by cuda_dir, since some "
"build-in function in cudart already ran before main entry). "
- "If empty [default], dlopen will search cuda from LD_LIBRARY_PATH");
+ "If default, dlopen will search cuda from LD_LIBRARY_PATH");
-static inline std::string join(const std::string& part1, const std::string& part2) {
+static inline std::string join(const std::string& part1,
+ const std::string& part2) {
// directory separator
const char sep = '/';
-
if (!part2.empty() && part2.front() == sep) {
return part2;
}
@@ -46,100 +47,115 @@ static inline std::string join(const std::string& part1, const std::string& part
return ret;
}
-static inline void GetDsoHandleFromDefaultPath(
- std::string& dso_path, void** dso_handle, int dynload_flags) {
- LOG(INFO) << "Try to find cuda library: " << dso_path
- << " from default system path.";
- // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
+static inline void GetDsoHandleFromDefaultPath(std::string& dso_path,
+ void** dso_handle,
+ int dynload_flags) {
+ VLOG(3) << "Try to find cuda library: " << dso_path
+ << " from default system path.";
+ // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
+ *dso_handle = dlopen(dso_path.c_str(), dynload_flags);
+
+// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
+// bring System Integrity Projection (SIP), if dso_handle
+// is null, search from default package path in Mac OS.
+#if defined(__APPLE__) || defined(__OSX__)
+ if (nullptr == *dso_handle) {
+ dso_path = join("/usr/local/cuda/lib/", dso_path);
*dso_handle = dlopen(dso_path.c_str(), dynload_flags);
-
- // DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
- // bring System Integrity Projection (SIP), if dso_handle
- // is null, search from default package path in Mac OS.
- #if defined(__APPLE__) || defined(__OSX__)
if (nullptr == *dso_handle) {
- dso_path = join("/usr/local/cuda/lib/", dso_path);
- *dso_handle = dlopen(dso_path.c_str(), dynload_flags);
- if (nullptr == *dso_handle) {
- if (dso_path == "libcudnn.dylib") {
- LOG(FATAL) << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n"
- << "For instance, sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C "
- << "/usr/local \n sudo chmod a+r /usr/local/cuda/include/cudnn.h "
- << "/usr/local/cuda/lib/libcudnn*";
- }
- }
- }
- #endif
+ if (dso_path == "libcudnn.dylib") {
+ LOG(FATAL)
+ << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n" // NOLINT
+ << "For instance, sudo tar -xzf "
+ "cudnn-7.5-osx-x64-v5.0-ga.tgz -C " // NOLINT
+ << "/usr/local \n sudo chmod a+r "
+ "/usr/local/cuda/include/cudnn.h " // NOLINT
+ << "/usr/local/cuda/lib/libcudnn*";
+ }
+ }
+ }
+#endif
}
-static inline void GetDsoHandleFromSearchPath(
- const std::string& search_root,
- const std::string& dso_name,
- void** dso_handle) {
- int dynload_flags = RTLD_LAZY | RTLD_LOCAL;
- *dso_handle = nullptr;
-
- std::string dlPath = dso_name;
- if (search_root.empty()) {
- GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags);
- } else {
- // search xxx.so from custom path
- dlPath = join(search_root, dso_name);
- *dso_handle = dlopen(dlPath.c_str(), dynload_flags);
- // if not found, search from default path
- if (nullptr == dso_handle) {
- LOG(WARNING) << "Failed to find cuda library: " << dlPath;
- dlPath = dso_name;
- GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags);
- }
+static inline void GetDsoHandleFromSearchPath(const std::string& search_root,
+ const std::string& dso_name,
+ void** dso_handle) {
+ int dynload_flags = RTLD_LAZY | RTLD_LOCAL;
+ *dso_handle = nullptr;
+
+ std::string dlPath = dso_name;
+ if (search_root.empty()) {
+ GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags);
+ } else {
+ // search xxx.so from custom path
+ dlPath = join(search_root, dso_name);
+ *dso_handle = dlopen(dlPath.c_str(), dynload_flags);
+ // if not found, search from default path
+ if (nullptr == *dso_handle) {
+ LOG(WARNING) << "Failed to find cuda library: " << dlPath;
+ dlPath = dso_name;
+ GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags);
}
+ }
- CHECK(nullptr != *dso_handle)
- << "Failed to find cuda library: " << dlPath << std::endl
- << "Please specify its path correctly using one of the following ideas: \n"
-
- << "Idea 1. set cuda and cudnn lib path at runtime. "
- << "http://www.paddlepaddle.org/doc/ui/cmd_argument/argument_outline.html \n"
- << "For instance, issue command: paddle train --use_gpu=1 "
- << "--cuda_dir=/usr/local/cudnn/lib --cudnn_dir=/usr/local/cudnn/lib ...\n"
-
- << "Idea 2. set environment variable LD_LIBRARY_PATH on Linux or "
- << "DYLD_LIBRARY_PATH on Mac OS. \n"
- << "For instance, issue command: export LD_LIBRARY_PATH=... \n"
-
- << "Note: After Mac OS 10.11, using the DYLD_LIBRARY_PATH is impossible "
- << "unless System Integrity Protection (SIP) is disabled. However, @Idea 1"
- << "always work well.";
+ CHECK(nullptr != *dso_handle) << "Failed to find cuda library: " << dlPath
+ << std::endl
+ << "Please specify its path correctly using "
+ "one of the following ways: \n" // NOLINT
+
+ << "Method 1. set cuda and cudnn lib path at "
+ "runtime. "
+ << "http://www.paddlepaddle.org/doc/ui/"
+ "cmd_argument/"
+ "argument_outline.html \n" // NOLINT
+ << "For instance, issue command: paddle train "
+ "--use_gpu=1 "
+ << "--cuda_dir=/usr/local/cuda/lib64 "
+ "--cudnn_dir=/usr/local/cudnn/lib "
+ "...\n" // NOLINT
+
+ << "Method 2. set environment variable "
+ "LD_LIBRARY_PATH on Linux or "
+ << "DYLD_LIBRARY_PATH on Mac OS. \n"
+ << "For instance, issue command: export "
+ "LD_LIBRARY_PATH=... \n"
+
+ << "Note: After Mac OS 10.11, using the "
+ "DYLD_LIBRARY_PATH is impossible "
+ << "unless System Integrity Protection (SIP) "
+ "is disabled. However, "
+ "method 1 " // NOLINT
+ << "always work well.";
}
void GetCublasDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
- GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib", dso_handle);
+ GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib", dso_handle);
#else
- GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.so", dso_handle);
+ GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.so", dso_handle);
#endif
}
void GetCudnnDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
- GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle);
+ GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle);
#else
- GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", dso_handle);
+ GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", dso_handle);
#endif
}
void GetCudartDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
- GetDsoHandleFromSearchPath("", "libcudart.dylib", dso_handle);
+ GetDsoHandleFromSearchPath("", "libcudart.dylib", dso_handle);
#else
- GetDsoHandleFromSearchPath("", "libcudart.so", dso_handle);
+ GetDsoHandleFromSearchPath("", "libcudart.so", dso_handle);
#endif
}
void GetCurandDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
- GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib", dso_handle);
+ GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib", dso_handle);
#else
- GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so", dso_handle);
+ GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so", dso_handle);
#endif
}
diff --git a/paddle/cuda/src/hl_math.cc b/paddle/cuda/src/hl_math.cc
index 76d48c4a9b94d402cf84c57bd240e03a1a83b1a0..f4bf888bab4e92dd940714ef1b7aeee9242eb817 100644
--- a/paddle/cuda/src/hl_math.cc
+++ b/paddle/cuda/src/hl_math.cc
@@ -12,24 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "avx_mathfun.h"
namespace hppl {
-__m256 exp(__m256 a) {
- return exp256_ps(a);
-}
+__m256 exp(__m256 a) { return exp256_ps(a); }
-__m256 log(__m256 a) {
- return log256_ps(a);
-}
+__m256 log(__m256 a) { return log256_ps(a); }
-__m256 sin(__m256 a) {
- return sin256_ps(a);
-}
+__m256 sin(__m256 a) { return sin256_ps(a); }
-__m256 cos(__m256 a) {
- return cos256_ps(a);
-}
+__m256 cos(__m256 a) { return cos256_ps(a); }
} // namespace hppl
diff --git a/paddle/cuda/src/hl_time.cc b/paddle/cuda/src/hl_time.cc
index adc88d60dd8d547cedcae5fd088b2fa581d8e5be..d52b2a1df07374f632def12eb52e10e10ca86028 100644
--- a/paddle/cuda/src/hl_time.cc
+++ b/paddle/cuda/src/hl_time.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include
#include
#include
@@ -21,8 +20,7 @@ limitations under the License. */
using std::chrono::high_resolution_clock;
int64_t getCurrentTimeStick() {
- high_resolution_clock::time_point tp = high_resolution_clock::now();
- high_resolution_clock::duration dtn = tp.time_since_epoch();
- return dtn.count();
+ high_resolution_clock::time_point tp = high_resolution_clock::now();
+ high_resolution_clock::duration dtn = tp.time_since_epoch();
+ return dtn.count();
}
-
diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt
index 9ac4d210f6d376639df20800b6782f1f8c03d6aa..a066f80c221ee8ab4383ee6463f7b111984b58ff 100644
--- a/paddle/gserver/CMakeLists.txt
+++ b/paddle/gserver/CMakeLists.txt
@@ -50,7 +50,7 @@ if(NOT WITH_PYTHON)
endif()
if(WITH_GPU)
- add_paddle_culib(paddle_gserver ${GSERVER_SOURCES})
+ cuda_add_library(paddle_gserver ${GSERVER_SOURCES})
else()
add_library(paddle_gserver STATIC
${GSERVER_SOURCES})
diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
index 27eed75d4d76c351e381a3b71dc44a3254fb1a4d..f1bb94216c44b3e915f87a3ae49bdfd3ef812916 100644
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@@ -51,12 +51,14 @@ static ClassRegistrar gActivationRegistrar;
* @brief Macro for registering a derived activation class
*/
#define END_DEFINE_ACTIVATION(ACTIVATION_NAME) \
- }; \
+ } \
+ ; \
const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \
#ACTIVATION_NAME; \
static InitFunction __reg_activation__##ACTIVATION_NAME([] { \
- gActivationRegistrar.registerClass< \
- ACTIVATION_CLASS_NAME(ACTIVATION_NAME)>(#ACTIVATION_NAME); \
+ gActivationRegistrar \
+ .registerClass( \
+ #ACTIVATION_NAME); \
});
/**
@@ -111,14 +113,22 @@ void backward(Argument& act) {
outputG->softmaxBackward(*outputV);
} else {
SetDevice device(act.deviceId);
- Matrix::resizeOrCreate(sftMaxDot_, outputG->getHeight(),
+ Matrix::resizeOrCreate(sftMaxDot_,
+ outputG->getHeight(),
outputG->getWidth(),
- /* trans */ false, useGpu(act.deviceId));
- Matrix::resizeOrCreate(sftMaxSum_, outputG->getHeight(), 1,
- /* trans */ false, useGpu(act.deviceId));
+ /* trans */ false,
+ useGpu(act.deviceId));
+ Matrix::resizeOrCreate(sftMaxSum_,
+ outputG->getHeight(),
+ 1,
+ /* trans */ false,
+ useGpu(act.deviceId));
if (!one_ || one_->getWidth() != outputG->getWidth()) {
- Matrix::resizeOrCreate(one_, 1, outputG->getWidth(),
- /* trans */ false, useGpu(act.deviceId));
+ Matrix::resizeOrCreate(one_,
+ 1,
+ outputG->getWidth(),
+ /* trans */ false,
+ useGpu(act.deviceId));
one_->one();
}
@@ -130,7 +140,6 @@ void backward(Argument& act) {
}
END_DEFINE_ACTIVATION(softmax)
-
/**
* @brief Sequence_softmax Activation
* @note Softmax on all frames of one sequence.
@@ -146,10 +155,16 @@ void forward(Argument& act) {
CHECK_EQ(act.value->getWidth(), 1UL);
if (!argument_.value) {
- argument_.value = Matrix::create(nullptr, /* height= */ 1, 1,
- /* trans= */ false, useGpu(act.deviceId));
- argument_.grad = Matrix::create(nullptr, /* height= */ 1, 1,
- /* trans= */ false, useGpu(act.deviceId));
+ argument_.value = Matrix::create(nullptr,
+ /* height= */ 1,
+ 1,
+ /* trans= */ false,
+ useGpu(act.deviceId));
+ argument_.grad = Matrix::create(nullptr,
+ /* height= */ 1,
+ 1,
+ /* trans= */ false,
+ useGpu(act.deviceId));
}
auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
@@ -267,8 +282,11 @@ END_DEFINE_ACTIVATION(softrelu)
BEGIN_DEFINE_ACTIVATION(abs)
void forward(Argument& act) {
SetDevice device(act.deviceId);
- Matrix::resizeOrCreate(act.in, act.value->getHeight(), act.value->getWidth(),
- /* trans */ false, useGpu(act.deviceId));
+ Matrix::resizeOrCreate(act.in,
+ act.value->getHeight(),
+ act.value->getWidth(),
+ /* trans */ false,
+ useGpu(act.deviceId));
act.in->copyFrom(*act.value);
act.value->abs(*act.value);
@@ -286,8 +304,11 @@ END_DEFINE_ACTIVATION(abs)
BEGIN_DEFINE_ACTIVATION(square)
void forward(Argument& act) {
SetDevice device(act.deviceId);
- Matrix::resizeOrCreate(act.in, act.value->getHeight(), act.value->getWidth(),
- /* trans */ false, useGpu(act.deviceId));
+ Matrix::resizeOrCreate(act.in,
+ act.value->getHeight(),
+ act.value->getWidth(),
+ /* trans */ false,
+ useGpu(act.deviceId));
act.in->copyFrom(*act.value);
act.value->square(*act.value);
@@ -317,8 +338,11 @@ END_DEFINE_ACTIVATION(exponential)
BEGIN_DEFINE_ACTIVATION(log)
void forward(Argument& act) {
SetDevice device(act.deviceId);
- Matrix::resizeOrCreate(act.in, act.value->getHeight(), act.value->getWidth(),
- /* trans */ false, useGpu(act.deviceId));
+ Matrix::resizeOrCreate(act.in,
+ act.value->getHeight(),
+ act.value->getWidth(),
+ /* trans */ false,
+ useGpu(act.deviceId));
act.in->copyFrom(*act.value);
act.value->log(*act.value);
@@ -333,11 +357,9 @@ ActivationFunction* ActivationFunction::create(const std::string& type) {
std::vector ActivationFunction::getAllRegisteredTypes() {
std::vector types;
- gActivationRegistrar.forEachType([&](const std::string& type) {
- types.push_back(type);
- });
+ gActivationRegistrar.forEachType(
+ [&](const std::string& type) { types.push_back(type); });
return types;
}
-
} // namespace paddle
diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h
index c483372256c035e39bfdbcaa4193a1a2e7fd80b8..e9ed5c619ab5e4dd9c52c0dac24478c2a57aa1bf 100644
--- a/paddle/gserver/activations/ActivationFunction.h
+++ b/paddle/gserver/activations/ActivationFunction.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#pragma once
#include
#include
diff --git a/paddle/gserver/dataproviders/DataProvider.cpp b/paddle/gserver/dataproviders/DataProvider.cpp
index 2cfb5a3a18c8a63d69bf0598eeee2807376340bc..e6cc4a246a8494d287f8638674f4ae213f38f657 100644
--- a/paddle/gserver/dataproviders/DataProvider.cpp
+++ b/paddle/gserver/dataproviders/DataProvider.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "DataProvider.h"
#include "paddle/utils/Util.h"
@@ -57,7 +56,7 @@ void BufferBatch::clone(DataBatch* srcBatch, bool useGpu) {
}
}
-DoubleBuffer::DoubleBuffer(DataProvider *dataPool,
+DoubleBuffer::DoubleBuffer(DataProvider* dataPool,
bool useGpu,
int64_t batchSize) {
batchSize_ = batchSize;
@@ -155,7 +154,7 @@ void DoubleBuffer::startAsyncLoad() {
}
ClassRegistrar
-DataProvider::registrar_;
+ DataProvider::registrar_;
DataProvider* DataProvider::create(const DataConfig& config,
const ModelConfig& modelConfig,
@@ -182,7 +181,8 @@ int64_t DataProvider::getNextBatch(int64_t size, DataBatch* batch) {
for (int i = 0; i < config_.constant_slots_size(); ++i) {
MemoryHandlePtr handle =
constantSlots[i] ? constantSlots[i]->getMemoryHandle() : nullptr;
- Matrix::resizeOrCreate(constantSlots[i], batchSize,
+ Matrix::resizeOrCreate(constantSlots[i],
+ batchSize,
1, // = width
false, // = trans
useGpu_); // = useGpu
@@ -216,7 +216,8 @@ void DataProvider::initAsyncLoader() {
}
SimpleDataProviderBase::SimpleDataProviderBase(const DataConfig& config,
- bool useGpu, bool withInfo)
+ bool useGpu,
+ bool withInfo)
: DataProvider(config, useGpu) {
/* initialize the size of a sample, and the buffer */
sampleDim_ = config_.feat_dim() * (2 * config_.context_len() + 1);
@@ -337,7 +338,8 @@ int64_t SimpleDataProviderBase::fillBuffer() {
sampleNumInBuf_ =
n + fillBufferImp(hInputDataBuf_->getData() + n * sampleDim_,
hInputLabelBuf_->getData() + n,
- hInputInfoBuf_->getData() + n, bufferCapacity_ - n);
+ hInputInfoBuf_->getData() + n,
+ bufferCapacity_ - n);
/* for stachastic gradient training */
if (!skipShuffle_) {
@@ -357,11 +359,14 @@ SimpleDataProvider::SimpleDataProvider(const DataConfig& config, bool useGpu)
SimpleDataProvider::~SimpleDataProvider() {}
-int64_t SimpleDataProvider::fillBufferImp(real* data, int* label, int* info,
+int64_t SimpleDataProvider::fillBufferImp(real* data,
+ int* label,
+ int* info,
int64_t size) {
(void)info;
int64_t n = std::min(labels_.size() - currentSampleIndex_, size);
- memcpy(data, &data_[currentSampleIndex_ * sampleDim_],
+ memcpy(data,
+ &data_[currentSampleIndex_ * sampleDim_],
n * sampleDim_ * sizeof(real));
memcpy(label, &labels_[currentSampleIndex_], sizeof(int) * n);
currentSampleIndex_ += n;
diff --git a/paddle/gserver/dataproviders/DataProvider.h b/paddle/gserver/dataproviders/DataProvider.h
index 112e45de1cb232097ed63b120d5ac631b37952e9..8b7fb27f821a47d830413eced79b3352a6969c90 100644
--- a/paddle/gserver/dataproviders/DataProvider.h
+++ b/paddle/gserver/dataproviders/DataProvider.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#pragma once
#include
@@ -44,15 +43,15 @@ namespace paddle {
* @brief Macro for registering a data provider. The class type should contain
* a consturctor with parameter (DataConfig, bool).
*/
-#define REGISTER_DATA_PROVIDER(__type_name, __class_name)\
- static InitFunction __reg_type_##__type_name([]() {\
- DataProvider::registrar_.registerClass(\
- #__type_name, \
- [](DataConfig conf, ModelConfig, bool useGpu) -> DataProvider* { \
- DataProvider* dp = new __class_name (conf, useGpu);\
- return dp;\
- });\
-})
+#define REGISTER_DATA_PROVIDER(__type_name, __class_name) \
+ static InitFunction __reg_type_##__type_name([]() { \
+ DataProvider::registrar_.registerClass( \
+ #__type_name, \
+ [](DataConfig conf, ModelConfig, bool useGpu) -> DataProvider* { \
+ DataProvider* dp = new __class_name(conf, useGpu); \
+ return dp; \
+ }); \
+ })
/**
* @def REGISTER_DATA_PROVIDER_EX
@@ -61,8 +60,8 @@ namespace paddle {
*/
#define REGISTER_DATA_PROVIDER_EX(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([] { \
- DataProvider::registrar_.registerClass<__class_name>(#__type_name); \
-})
+ DataProvider::registrar_.registerClass<__class_name>(#__type_name); \
+ })
class DataBatch;
class BufferBatch;
@@ -181,7 +180,8 @@ public:
* @param[in] size DataBatch.getSize()
* @param[in] dataId sub dataprovider id (in MultiDataProvider)
*/
- void appendArguments(const std::vector& argus, int size,
+ void appendArguments(const std::vector& argus,
+ int size,
int dataId) {
size_ += size;
for (const auto& argu : argus) {
@@ -259,9 +259,7 @@ typedef Queue BufferBatchQueue;
class DoubleBuffer {
public:
- DoubleBuffer(DataProvider* dataPool,
- bool useGpu,
- int64_t batchSize = 0);
+ DoubleBuffer(DataProvider* dataPool, bool useGpu, int64_t batchSize = 0);
virtual ~DoubleBuffer();
void removeOneBatch(DataBatch* dataBatch);
@@ -310,7 +308,7 @@ public:
/**
* @brief create only used for unittest.
*/
- inline static DataProvider* create(const DataConfig &config,
+ inline static DataProvider* create(const DataConfig& config,
bool useGpu = FLAGS_use_gpu) {
return create(config, ModelConfig(), useGpu);
}
@@ -462,7 +460,9 @@ protected:
*
* label[n] is the label for the n-th sample.
*/
- virtual int64_t fillBufferImp(real* data, int* label, int* info,
+ virtual int64_t fillBufferImp(real* data,
+ int* label,
+ int* info,
int64_t size) = 0;
};
@@ -475,7 +475,9 @@ public:
protected:
void loadData(const std::string& fileName);
void loadDataFile(const std::string& fileName);
- virtual int64_t fillBufferImp(real* data, int* label, int* info,
+ virtual int64_t fillBufferImp(real* data,
+ int* label,
+ int* info,
int64_t size);
protected:
diff --git a/paddle/gserver/dataproviders/DataProviderGroup.h b/paddle/gserver/dataproviders/DataProviderGroup.h
index 0689f90f3e7dd3d3e1df19f3958c821d53e69700..6c178e29ee714a6bd7f58861d7cf15716fee848d 100644
--- a/paddle/gserver/dataproviders/DataProviderGroup.h
+++ b/paddle/gserver/dataproviders/DataProviderGroup.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#pragma once
#include "DataProvider.h"
@@ -65,8 +64,8 @@ void DataProviderGroup::reset() {
provider_ = nullptr;
// shuffle file list
- std::shuffle(fileList_.begin(), fileList_.end(),
- ThreadLocalRandomEngine::get());
+ std::shuffle(
+ fileList_.begin(), fileList_.end(), ThreadLocalRandomEngine::get());
startLoader();
DataProvider::reset();
@@ -113,8 +112,9 @@ void DataProviderGroup::startLoader() {
size_t endPos = std::min(fileList_.size(), startPos + loadFileCount);
std::vector fileVec(fileList_.begin() + startPos,
fileList_.begin() + endPos);
- loader_->addJob([this, fileVec]()
- -> ProviderPtrType { return this->loadFile(fileVec); });
+ loader_->addJob([this, fileVec]() -> ProviderPtrType {
+ return this->loadFile(fileVec);
+ });
}
loader_->stopAddJob();
}
diff --git a/paddle/gserver/dataproviders/MultiDataProvider.cpp b/paddle/gserver/dataproviders/MultiDataProvider.cpp
index 8e4f53978a0451f3bb6cd5da30f017708448f9ac..51fb1f26668c55dc1c2aecd5389f327e2569a52f 100644
--- a/paddle/gserver/dataproviders/MultiDataProvider.cpp
+++ b/paddle/gserver/dataproviders/MultiDataProvider.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "paddle/utils/Util.h"
#include "MultiDataProvider.h"
#include "paddle/utils/Logging.h"
@@ -59,10 +58,8 @@ MultiDataProvider::MultiDataProvider(const DataConfig& config,
"MultiDataProvider";
subConfig.set_async_load_data(false);
}
- subDataProviders_[i] =
- std::unique_ptr(DataProvider::create(subConfig,
- modelConfig,
- useGpu_));
+ subDataProviders_[i] = std::unique_ptr(
+ DataProvider::create(subConfig, modelConfig, useGpu_));
}
}
diff --git a/paddle/gserver/dataproviders/MultiDataProvider.h b/paddle/gserver/dataproviders/MultiDataProvider.h
index b498ba6516c4320566b1b3cc2bd557ae016d7c39..876467c04f074cf37e48fdfa9b24f236fcfe8ba1 100644
--- a/paddle/gserver/dataproviders/MultiDataProvider.h
+++ b/paddle/gserver/dataproviders/MultiDataProvider.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#pragma once
#include "DataProvider.h"
diff --git a/paddle/gserver/dataproviders/ProtoDataProvider.cpp b/paddle/gserver/dataproviders/ProtoDataProvider.cpp
index 344644755f24045443b8cb3ebd08004a4b1cdcb5..0a7ff802461f2ded0e6e842c088bddf218361f79 100644
--- a/paddle/gserver/dataproviders/ProtoDataProvider.cpp
+++ b/paddle/gserver/dataproviders/ProtoDataProvider.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "ProtoDataProvider.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/StringUtil.h"
@@ -23,7 +22,8 @@ limitations under the License. */
#include "paddle/utils/Logging.h"
#include "DataProviderGroup.h"
-P_DEFINE_double(memory_threshold_on_load_data, 1.0,
+P_DEFINE_double(memory_threshold_on_load_data,
+ 1.0,
"stop loading data when memory is not sufficient");
namespace paddle {
@@ -32,7 +32,8 @@ REGISTER_DATA_PROVIDER(proto_group, DataProviderGroup);
REGISTER_DATA_PROVIDER(proto_sequence_group,
DataProviderGroup);
-ProtoDataProvider::ProtoDataProvider(const DataConfig& config, bool useGpu,
+ProtoDataProvider::ProtoDataProvider(const DataConfig& config,
+ bool useGpu,
bool loadDataAll)
: DataProvider(config, useGpu), sampleNums_(0), currentSequenceIndex_(0) {
if (loadDataAll) {
@@ -279,7 +280,8 @@ void ProtoDataProvider::fillSlots(const DataSample& sample) {
}
slot.sparseNonValueData.resize(slot.indices.back() + slotSize);
const unsigned int* ids = sample.vector_slots(i).ids().data();
- memcpy(slot.sparseNonValueData.data() + slot.indices.back(), ids,
+ memcpy(slot.sparseNonValueData.data() + slot.indices.back(),
+ ids,
sizeof(*ids) * slotSize);
slot.indices.push_back(slot.indices.back() + slotSize);
if (subSlotSize) {
@@ -318,10 +320,11 @@ void ProtoDataProvider::fillSlots(const DataSample& sample) {
slot.varDenseData[oldSize].data.resize(varDim);
const float* values = sample.vector_slots(i).values().data();
#ifdef PADDLE_TYPE_DOUBLE
- std::copy(values, values + varDim,
- slot.varDenseData[oldSize].data.data());
+ std::copy(
+ values, values + varDim, slot.varDenseData[oldSize].data.data());
#else
- memcpy(slot.varDenseData[oldSize].data.data(), values,
+ memcpy(slot.varDenseData[oldSize].data.data(),
+ values,
sizeof(real) * varDim);
#endif
slot.varDenseData[oldSize].dims.resize(
@@ -374,8 +377,9 @@ void ProtoDataProvider::reset() {
}
void ProtoDataProvider::shuffle() {
- std::shuffle(shuffledSequenceIds_.begin(), shuffledSequenceIds_.end(),
- ThreadLocalRandomEngine::get());
+ std::shuffle(shuffledSequenceIds_.begin(),
+ shuffledSequenceIds_.end(),
+ ThreadLocalRandomEngine::get());
}
/*
@@ -502,7 +506,8 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
if (!iidData()) {
ICpuGpuVector::resizeOrCreate(cpuArguments[0].sequenceStartPositions,
- numSequences + 1, /* useGpu= */ false);
+ numSequences + 1,
+ /* useGpu= */ false);
int* buf = cpuArguments[0].sequenceStartPositions->getMutableData(false);
int pos = 0;
int i = 0;
@@ -530,7 +535,9 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
switch (slotType) {
case SlotDef::VECTOR_DENSE: {
- Matrix::resizeOrCreate(cpuArguments[slot].value, size, dim,
+ Matrix::resizeOrCreate(cpuArguments[slot].value,
+ size,
+ dim,
false, // trans = false
false); // useGpu = false
real* buf = cpuArguments[slot].value->getData();
@@ -543,19 +550,27 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
}
case SlotDef::VECTOR_SPARSE_NON_VALUE: {
if (!(cpuArguments[slot].value)) {
- cpuArguments[slot].value = Matrix::createSparseMatrix(
- size, dim, size /*DEFAULT_AVG_WIDTH = 1*/, NO_VALUE, SPARSE_CSR,
- false, useGpu_);
+ cpuArguments[slot].value =
+ Matrix::createSparseMatrix(size,
+ dim,
+ size /*DEFAULT_AVG_WIDTH = 1*/,
+ NO_VALUE,
+ SPARSE_CSR,
+ false,
+ useGpu_);
}
auto mat = cpuArguments[slot].value;
mat->resize(size, dim);
if (std::dynamic_pointer_cast(mat)) {
std::dynamic_pointer_cast(mat)
- ->copyFrom(dataPos.data(), slots_[slot].indices.data(),
- slots_[slot].sparseNonValueData.data(), HPPL_STREAM_1);
+ ->copyFrom(dataPos.data(),
+ slots_[slot].indices.data(),
+ slots_[slot].sparseNonValueData.data(),
+ HPPL_STREAM_1);
} else if (std::dynamic_pointer_cast(mat)) {
std::dynamic_pointer_cast(mat)
- ->copyFrom(dataPos.data(), slots_[slot].indices.data(),
+ ->copyFrom(dataPos.data(),
+ slots_[slot].indices.data(),
slots_[slot].sparseNonValueData.data());
} else {
LOG(FATAL) << "Not Supported";
@@ -571,19 +586,27 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
}
case SlotDef::VECTOR_SPARSE_VALUE: {
if (!(cpuArguments[slot].value)) {
- cpuArguments[slot].value = Matrix::createSparseMatrix(
- size, dim, size /*DEFAULT_AVG_WIDTH = 1*/, FLOAT_VALUE,
- SPARSE_CSR, false, useGpu_);
+ cpuArguments[slot].value =
+ Matrix::createSparseMatrix(size,
+ dim,
+ size /*DEFAULT_AVG_WIDTH = 1*/,
+ FLOAT_VALUE,
+ SPARSE_CSR,
+ false,
+ useGpu_);
}
auto mat = cpuArguments[slot].value;
mat->resize(size, dim);
if (std::dynamic_pointer_cast(mat)) {
- std::dynamic_pointer_cast(mat)->copyFrom(
- dataPos.data(), slots_[slot].indices.data(),
- slots_[slot].sparseFloatValueData.data(), HPPL_STREAM_1);
+ std::dynamic_pointer_cast(mat)
+ ->copyFrom(dataPos.data(),
+ slots_[slot].indices.data(),
+ slots_[slot].sparseFloatValueData.data(),
+ HPPL_STREAM_1);
} else if (std::dynamic_pointer_cast(mat)) {
std::dynamic_pointer_cast(mat)
- ->copyFrom(dataPos.data(), slots_[slot].indices.data(),
+ ->copyFrom(dataPos.data(),
+ slots_[slot].indices.data(),
slots_[slot].sparseFloatValueData.data());
} else {
LOG(FATAL) << "Not Supported";
@@ -591,7 +614,8 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
break;
}
case SlotDef::INDEX: {
- IVector::resizeOrCreate(cpuArguments[slot].ids, size,
+ IVector::resizeOrCreate(cpuArguments[slot].ids,
+ size,
/* useGpu= */ false);
int* buf = cpuArguments[slot].ids->getData();
for (int i = 0; i < size; ++i) {
@@ -621,7 +645,9 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
if (oldWidth < height) {
totalDim = width * height * depth;
}
- Matrix::resizeOrCreate(cpuArguments[slot].value, size, totalDim,
+ Matrix::resizeOrCreate(cpuArguments[slot].value,
+ size,
+ totalDim,
false, // trans = false
false); // useGpu = false
real* buf = cpuArguments[slot].value->getData();
@@ -637,13 +663,13 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
}
}
} else {
- memcpy(buf, slots_[slot].varDenseData[dataPos[0]].data.data(),
+ memcpy(buf,
+ slots_[slot].varDenseData[dataPos[0]].data.data(),
sizeof(real) * totalDim);
}
- ICpuGpuVector::resizeOrCreate(
- cpuArguments[slot].sequenceStartPositions,
- size + 1, /* size == 1 currently */
- /* useGpu= */ false);
+ ICpuGpuVector::resizeOrCreate(cpuArguments[slot].sequenceStartPositions,
+ size + 1, /* size == 1 currently */
+ /* useGpu= */ false);
int* bufStarts =
cpuArguments[slot].sequenceStartPositions->getMutableData(false);
bufStarts[0] = 0;
@@ -653,16 +679,17 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
case SlotDef::VAR_MDIM_INDEX: {
CHECK_EQ(size, 1);
size_t totalDim = slots_[slot].varIndices[dataPos[0]].size();
- IVector::resizeOrCreate(cpuArguments[slot].ids, totalDim,
+ IVector::resizeOrCreate(cpuArguments[slot].ids,
+ totalDim,
/* useGpu= */ false);
int* buf = cpuArguments[slot].ids->getData();
- memcpy(buf, slots_[slot].varIndices[dataPos[0]].data(),
+ memcpy(buf,
+ slots_[slot].varIndices[dataPos[0]].data(),
sizeof(int) * totalDim);
- ICpuGpuVector::resizeOrCreate(
- cpuArguments[slot].sequenceStartPositions,
- size + 1, /* size == 1 currently */
- /* useGpu= */ false);
+ ICpuGpuVector::resizeOrCreate(cpuArguments[slot].sequenceStartPositions,
+ size + 1, /* size == 1 currently */
+ /* useGpu= */ false);
int* bufStarts =
cpuArguments[slot].sequenceStartPositions->getMutableData(false);
bufStarts[0] = 0;
@@ -700,8 +727,8 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
gpuArguments[i].sequenceStartPositions =
cpuArguments[i].sequenceStartPositions;
} else {
- gpuArguments[i].resizeAndCopyFrom(cpuArguments[i], useGpu_,
- HPPL_STREAM_1);
+ gpuArguments[i].resizeAndCopyFrom(
+ cpuArguments[i], useGpu_, HPPL_STREAM_1);
}
}
hl_stream_synchronize(HPPL_STREAM_1);
@@ -746,10 +773,9 @@ int64_t ProtoSequenceDataProvider::getNextBatchInternal(int64_t size,
sampleLoop(op, size);
// current slot: sequenceStartPositions
- ICpuGpuVector::resizeOrCreate(
- cpuArguments[slot].sequenceStartPositions,
- size + 1,
- /* useGpu= */ false);
+ ICpuGpuVector::resizeOrCreate(cpuArguments[slot].sequenceStartPositions,
+ size + 1,
+ /* useGpu= */ false);
switch (slotType) {
case SlotDef::VECTOR_SPARSE_VALUE:
@@ -821,10 +847,10 @@ int64_t ProtoSequenceDataProvider::getNextBatchInternal(int64_t size,
};
int subSize = subSampleLoop(op, size, slot);
ICpuGpuVector::resizeOrCreate(
- cpuArguments[slot].subSequenceStartPositions, subSize + 1,
- false);
+ cpuArguments[slot].subSequenceStartPositions, subSize + 1, false);
int* currPosOfArgumentSubSeqStart =
- cpuArguments[slot].subSequenceStartPositions->getMutableData(false);
+ cpuArguments[slot].subSequenceStartPositions->getMutableData(
+ false);
int64_t* subSeqs = dataSubPos.data();
int64_t* subIndexs = slots_[slot].subIndices.data();
int allSubSequenceLength = 0;
@@ -849,7 +875,8 @@ int64_t ProtoSequenceDataProvider::getNextBatchInternal(int64_t size,
}
case SlotDef::INDEX: {
// label slot
- IVector::resizeOrCreate(cpuArguments[slot].ids, size,
+ IVector::resizeOrCreate(cpuArguments[slot].ids,
+ size,
/* useGpu= */ false);
// fill labels
int* buf = cpuArguments[slot].ids->getData();
@@ -863,7 +890,9 @@ int64_t ProtoSequenceDataProvider::getNextBatchInternal(int64_t size,
case SlotDef::VECTOR_DENSE: {
// copy values
size_t dim = header_.slot_defs(slot).dim();
- Matrix::resizeOrCreate(cpuArguments[slot].value, size, dim,
+ Matrix::resizeOrCreate(cpuArguments[slot].value,
+ size,
+ dim,
false, // trans = false
false); // useGpu = false
real* buf = cpuArguments[slot].value->getData();
@@ -887,8 +916,8 @@ int64_t ProtoSequenceDataProvider::getNextBatchInternal(int64_t size,
gpuArguments.resize(cpuArguments.size());
gpuBatch.setSize(size);
for (size_t i = 0; i < cpuArguments.size(); ++i) {
- gpuArguments[i].resizeAndCopyFrom(cpuArguments[i], useGpu_,
- HPPL_STREAM_1);
+ gpuArguments[i].resizeAndCopyFrom(
+ cpuArguments[i], useGpu_, HPPL_STREAM_1);
}
hl_stream_synchronize(HPPL_STREAM_1);
*batch = gpuBatch;
diff --git a/paddle/gserver/dataproviders/ProtoDataProvider.h b/paddle/gserver/dataproviders/ProtoDataProvider.h
index 846dd7673abe8b836be1b728bb690daa0e8acc20..ffdcc8fdc977f53e29dc9f03fa3cf7af56acb92f 100644
--- a/paddle/gserver/dataproviders/ProtoDataProvider.h
+++ b/paddle/gserver/dataproviders/ProtoDataProvider.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#pragma once
#include
@@ -48,7 +47,8 @@ namespace paddle {
*/
class ProtoDataProvider : public DataProvider {
public:
- ProtoDataProvider(const DataConfig& config, bool useGpu,
+ ProtoDataProvider(const DataConfig& config,
+ bool useGpu,
bool loadDataAll = true);
virtual void reset();
@@ -161,14 +161,16 @@ protected:
};
/**
- * @brief Special use for Proto data: instances should contain sparse-non-value slots
+ * @brief Special use for Proto data: instances should contain sparse-non-value
+ * slots
* and label.
*
* @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
*/
class ProtoSequenceDataProvider : public ProtoDataProvider {
public:
- ProtoSequenceDataProvider(const DataConfig& config, bool useGpu,
+ ProtoSequenceDataProvider(const DataConfig& config,
+ bool useGpu,
bool loadDataAll = true);
~ProtoSequenceDataProvider() {}
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
diff --git a/paddle/gserver/dataproviders/ProtoReader.h b/paddle/gserver/dataproviders/ProtoReader.h
index 3b1eb7e9ef03c42df31c6efc9f0e0240d64e78df..b8fca3cd7f3c5efaea35dc8e09f7ca0ec250830f 100644
--- a/paddle/gserver/dataproviders/ProtoReader.h
+++ b/paddle/gserver/dataproviders/ProtoReader.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#pragma once
#include
@@ -138,7 +137,8 @@ protected:
*
* @note this code depends on protobuf 2.4.0. There is nothing like
* CodedInputStream::CurrentPosition() in protobuf 2.5.0 to tell us how many
- * bytes has the object readed so far. Therefore, we calculated bytes ourselves.
+ * bytes has the object readed so far. Therefore, we calculated bytes
+ * ourselves.
*/
int approximateReadedBytes_;
};
diff --git a/paddle/gserver/dataproviders/PyDataProvider.cpp b/paddle/gserver/dataproviders/PyDataProvider.cpp
index 1332c0ab635b6ebec05f25fd77b9703b39227bc1..bee6ca14a2ec3995a3b432fc5a39419a5dd8a8ce 100644
--- a/paddle/gserver/dataproviders/PyDataProvider.cpp
+++ b/paddle/gserver/dataproviders/PyDataProvider.cpp
@@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#include "PyDataProvider.h"
#include "paddle/utils/PythonUtil.h"
#include
#include "paddle/utils/Util.h"
#include "paddle/utils/Excepts.h"
-
namespace paddle {
#ifndef PADDLE_NO_PYTHON
REGISTER_DATA_PROVIDER(py, PyDataProvider);
#endif
-PyDataProvider::PyDataProvider(const DataConfig& config, bool useGpu,
+PyDataProvider::PyDataProvider(const DataConfig& config,
+ bool useGpu,
bool loadDataAll)
: DataProvider(config, useGpu), batchSize_(0) {
PyGuard guard;
@@ -50,8 +49,8 @@ void PyDataProvider::loadData(const std::vector& fileList) {
classInstance_ =
createPythonClass(pyModuleName_, pyClassName_, fileList, pyUserArgs_);
CHECK(classInstance_) << "Create class instance failed.";
- PyObjectPtr obj(PyObject_CallMethod(classInstance_.get(),
- const_cast("getHeader"), NULL));
+ PyObjectPtr obj(PyObject_CallMethod(
+ classInstance_.get(), const_cast("getHeader"), NULL));
CHECK_PY(obj) << "Call function getHeader failed.";
std::string headerInfo =
std::string(PyString_AsString(obj.get()), PyString_Size(obj.get()));
@@ -90,7 +89,8 @@ void PyDataProvider::resetSlots() {
}
}
-void PyDataProvider::fillDenseSlot(ProtoSlot& slot, char*& data,
+void PyDataProvider::fillDenseSlot(ProtoSlot& slot,
+ char*& data,
const char* dataEnd) {
unsigned int dim = slot.dim;
slot.sampleNum = readT(data, dataEnd);
@@ -102,14 +102,17 @@ void PyDataProvider::fillDenseSlot(ProtoSlot& slot, char*& data,
float* dat = reinterpret_cast(data);
std::copy(dat, dat + slot.sampleNum * dim, slot.denseData.begin());
#else
- memcpyWithCheck(slot.denseData.data(), data,
- sizeof(real) * dim * slot.sampleNum, dataEnd);
+ memcpyWithCheck(slot.denseData.data(),
+ data,
+ sizeof(real) * dim * slot.sampleNum,
+ dataEnd);
#endif
// PyDataProvider always provide data in float
data += sizeof(float) * dim * slot.sampleNum;
}
-void PyDataProvider::fillSparseNonValueSlot(ProtoSlot& slot, char*& data,
+void PyDataProvider::fillSparseNonValueSlot(ProtoSlot& slot,
+ char*& data,
const char* dataEnd) {
slot.sampleNum = readT(data, dataEnd);
unsigned int* indexPtr = (unsigned int*)data;
@@ -121,12 +124,15 @@ void PyDataProvider::fillSparseNonValueSlot(ProtoSlot& slot, char*& data,
length = readT(data, dataEnd);
slot.indices.push_back(length);
slot.sparseNonValueData.resize(length);
- memcpyWithCheck(slot.sparseNonValueData.data(), data,
- sizeof(unsigned int) * length, dataEnd);
+ memcpyWithCheck(slot.sparseNonValueData.data(),
+ data,
+ sizeof(unsigned int) * length,
+ dataEnd);
data += sizeof(unsigned int) * length;
}
-void PyDataProvider::fillSparseValueSlot(ProtoSlot& slot, char*& data,
+void PyDataProvider::fillSparseValueSlot(ProtoSlot& slot,
+ char*& data,
const char* dataEnd) {
slot.sampleNum = readT(data, dataEnd);
unsigned int* indexPtr = (unsigned int*)data;
@@ -153,7 +159,8 @@ void PyDataProvider::fillSparseValueSlot(ProtoSlot& slot, char*& data,
}
}
-void PyDataProvider::fillIndexSlot(ProtoSlot& slot, char*& data,
+void PyDataProvider::fillIndexSlot(ProtoSlot& slot,
+ char*& data,
const char* dataEnd) {
slot.sampleNum = readT(data, dataEnd);
CHECK_LE(data + sizeof(unsigned int) * slot.sampleNum, dataEnd)
@@ -163,7 +170,8 @@ void PyDataProvider::fillIndexSlot(ProtoSlot& slot, char*& data,
data += sizeof(unsigned int) * slot.sampleNum;
}
-void PyDataProvider::fillStringSlot(ProtoSlot& slot, char*& data,
+void PyDataProvider::fillStringSlot(ProtoSlot& slot,
+ char*& data,
const char* dataEnd) {
slot.sampleNum = readT(data, dataEnd);
for (unsigned int i = 0; i < slot.sampleNum; ++i) {
@@ -225,9 +233,8 @@ void PyDataProvider::fillSlotsByStr(const std::string& samples) {
}
for (size_t i = 0; i < sequenceNum; ++i) {
size_t begin = slot.sequenceStartPositions[i];
- size_t end = (i < sequenceNum - 1)
- ? slot.sequenceStartPositions[i + 1]
- : slot.sampleNum;
+ size_t end = (i < sequenceNum - 1) ? slot.sequenceStartPositions[i + 1]
+ : slot.sampleNum;
for (size_t ii = begin; ii < end; ++ii) {
slot.sampleSequenceIdVec.push_back(ii);
}
@@ -255,8 +262,8 @@ void PyDataProvider::fillSlotsByStr(const std::string& samples) {
void PyDataProvider::reset() {
{ // Invoke PyDataProvider Reset
PyGuard guard;
- PyObjectPtr obj(PyObject_CallMethod(classInstance_.get(),
- const_cast("reset"), NULL));
+ PyObjectPtr obj(PyObject_CallMethod(
+ classInstance_.get(), const_cast("reset"), NULL));
CHECK_PY(obj) << "Call function reset failed.";
}
@@ -270,15 +277,18 @@ void PyDataProvider::reset() {
void PyDataProvider::shuffle() {
// py shuffle
PyGuard guard;
- PyObjectPtr obj(PyObject_CallMethod(classInstance_.get(),
- const_cast("shuffle"), NULL));
+ PyObjectPtr obj(PyObject_CallMethod(
+ classInstance_.get(), const_cast("shuffle"), NULL));
CHECK_PY(obj) << "Call function shuffle failed.";
}
-void PyDataProvider::handleDenseSlot(ProtoSlot& slot, size_t slotIndex,
+void PyDataProvider::handleDenseSlot(ProtoSlot& slot,
+ size_t slotIndex,
std::vector& cpuArguments) {
unsigned int dim = slot.dim;
- Matrix::resizeOrCreate(cpuArguments[slotIndex].value, slot.sampleNum, dim,
+ Matrix::resizeOrCreate(cpuArguments[slotIndex].value,
+ slot.sampleNum,
+ dim,
false, // trans = false
false); // useGpu = false
real* buf = cpuArguments[slotIndex].value->getData();
@@ -294,19 +304,27 @@ void PyDataProvider::handleSparseNonValueSlot(
ProtoSlot& slot, size_t slotIndex, std::vector& cpuArguments) {
unsigned int dim = slot.dim;
if (!(cpuArguments[slotIndex].value)) {
- cpuArguments[slotIndex].value = Matrix::createSparseMatrix(
- slot.sampleNum, dim, slot.sampleNum /*DEFAULT_AVG_WIDTH = 1*/, NO_VALUE,
- SPARSE_CSR, false, useGpu_);
+ cpuArguments[slotIndex].value =
+ Matrix::createSparseMatrix(slot.sampleNum,
+ dim,
+ slot.sampleNum /*DEFAULT_AVG_WIDTH = 1*/,
+ NO_VALUE,
+ SPARSE_CSR,
+ false,
+ useGpu_);
}
auto mat = cpuArguments[slotIndex].value;
mat->resize(slot.sampleNum, dim, slot.sampleNum, NO_VALUE, SPARSE_CSR);
if (std::dynamic_pointer_cast(mat)) {
std::dynamic_pointer_cast(mat)
- ->copyFrom(slot.sampleSequenceIdVec.data(), slot.indices.data(),
- slot.sparseNonValueData.data(), HPPL_STREAM_1);
+ ->copyFrom(slot.sampleSequenceIdVec.data(),
+ slot.indices.data(),
+ slot.sparseNonValueData.data(),
+ HPPL_STREAM_1);
} else if (std::dynamic_pointer_cast(mat)) {
std::dynamic_pointer_cast(mat)
- ->copyFrom(slot.sampleSequenceIdVec.data(), slot.indices.data(),
+ ->copyFrom(slot.sampleSequenceIdVec.data(),
+ slot.indices.data(),
slot.sparseNonValueData.data());
} else {
LOG(FATAL) << "Not Supported";
@@ -317,28 +335,38 @@ void PyDataProvider::handleSparseValueSlot(
ProtoSlot& slot, size_t slotIndex, std::vector& cpuArguments) {
unsigned int dim = slot.dim;
if (!(cpuArguments[slotIndex].value)) {
- cpuArguments[slotIndex].value = Matrix::createSparseMatrix(
- slot.sampleNum, dim, slot.sampleNum /*DEFAULT_AVG_WIDTH = 1*/,
- FLOAT_VALUE, SPARSE_CSR, false, useGpu_);
+ cpuArguments[slotIndex].value =
+ Matrix::createSparseMatrix(slot.sampleNum,
+ dim,
+ slot.sampleNum /*DEFAULT_AVG_WIDTH = 1*/,
+ FLOAT_VALUE,
+ SPARSE_CSR,
+ false,
+ useGpu_);
}
auto mat = cpuArguments[slotIndex].value;
mat->resize(slot.sampleNum, dim, slot.sampleNum, FLOAT_VALUE, SPARSE_CSR);
if (std::dynamic_pointer_cast(mat)) {
std::dynamic_pointer_cast(mat)
- ->copyFrom(slot.sampleSequenceIdVec.data(), slot.indices.data(),
- slot.sparseFloatValueData.data(), HPPL_STREAM_DEFAULT);
+ ->copyFrom(slot.sampleSequenceIdVec.data(),
+ slot.indices.data(),
+ slot.sparseFloatValueData.data(),
+ HPPL_STREAM_DEFAULT);
} else if (std::dynamic_pointer_cast(mat)) {
std::dynamic_pointer_cast(mat)
- ->copyFrom(slot.sampleSequenceIdVec.data(), slot.indices.data(),
+ ->copyFrom(slot.sampleSequenceIdVec.data(),
+ slot.indices.data(),
slot.sparseFloatValueData.data());
} else {
LOG(FATAL) << "Not Supported";
}
}
-void PyDataProvider::handleIndexSlot(ProtoSlot& slot, size_t slotIndex,
+void PyDataProvider::handleIndexSlot(ProtoSlot& slot,
+ size_t slotIndex,
std::vector& cpuArguments) {
- IVector::resizeOrCreate(cpuArguments[slotIndex].ids, slot.sampleNum,
+ IVector::resizeOrCreate(cpuArguments[slotIndex].ids,
+ slot.sampleNum,
/*useGpu_*/ false);
int* buf = cpuArguments[slotIndex].ids->getData();
for (size_t i = 0; i < slot.sampleNum; ++i) {
@@ -346,7 +374,8 @@ void PyDataProvider::handleIndexSlot(ProtoSlot& slot, size_t slotIndex,
}
}
-void PyDataProvider::handleStringSlot(ProtoSlot& slot, size_t slotIndex,
+void PyDataProvider::handleStringSlot(ProtoSlot& slot,
+ size_t slotIndex,
std::vector& cpuArguments) {
if (cpuArguments[slotIndex].strs) {
cpuArguments[slotIndex].strs->resize(slot.sampleNum);
@@ -364,7 +393,8 @@ int64_t PyDataProvider::getNextBatchInternal(int64_t size, DataBatch* batch) {
PyGuard guard;
PyObjectPtr obj(PyObject_CallMethod(classInstance_.get(),
const_cast("getNextBatch"),
- const_cast("i"), size));
+ const_cast("i"),
+ size));
CHECK_PY(obj) << "Call function getNextBatch failed.";
const std::string& samples =
std::string(PyString_AsString(obj.get()), PyString_Size(obj.get()));
@@ -381,23 +411,24 @@ int64_t PyDataProvider::getNextBatchInternal(int64_t size, DataBatch* batch) {
if (!iidData()) {
for (size_t j = 0; j < slotNum_; ++j) {
auto& slot = slots_[j];
- ICpuGpuVector::resizeOrCreate(
- cpuArguments[j].sequenceStartPositions,
- slot.sequenceNum + 1, /* useGpu= */ false);
+ ICpuGpuVector::resizeOrCreate(cpuArguments[j].sequenceStartPositions,
+ slot.sequenceNum + 1,
+ /* useGpu= */ false);
int* buf = cpuArguments[j].sequenceStartPositions->getMutableData(false);
std::copy(slot.sequenceStartPositions.begin(),
- slot.sequenceStartPositions.end(), buf);
+ slot.sequenceStartPositions.end(),
+ buf);
buf[slot.sequenceStartPositions.size()] = slot.sampleNum;
if (slot.subSequenceStartPositions.size()) {
- ICpuGpuVector::resizeOrCreate(
- cpuArguments[j].subSequenceStartPositions,
- slot.subSequenceNum + 1,
- /* useGpu= */ false);
+ ICpuGpuVector::resizeOrCreate(cpuArguments[j].subSequenceStartPositions,
+ slot.subSequenceNum + 1,
+ /* useGpu= */ false);
int* buf =
- cpuArguments[j].subSequenceStartPositions->getMutableData(false);
+ cpuArguments[j].subSequenceStartPositions->getMutableData(false);
std::copy(slot.subSequenceStartPositions.begin(),
- slot.subSequenceStartPositions.end(), buf);
+ slot.subSequenceStartPositions.end(),
+ buf);
buf[slot.subSequenceNum] = slot.sampleNum;
// check subSequenceStartPositions and sequenceStartPositions
cpuArguments[j].checkSubset();
@@ -452,8 +483,8 @@ int64_t PyDataProvider::getNextBatchInternal(int64_t size, DataBatch* batch) {
cpuArguments[i].subSequenceStartPositions;
}
} else {
- gpuArguments[i].resizeAndCopyFrom(cpuArguments[i], useGpu_,
- HPPL_STREAM_1);
+ gpuArguments[i].resizeAndCopyFrom(
+ cpuArguments[i], useGpu_, HPPL_STREAM_1);
}
}
hl_stream_synchronize(HPPL_STREAM_1);
diff --git a/paddle/gserver/dataproviders/PyDataProvider.h b/paddle/gserver/dataproviders/PyDataProvider.h
index 939d9cf725c2fe6e4989c17e1e768c9f8aedfc95..6bb7c831fdd451abc5241199d6a4d1b1ad814517 100644
--- a/paddle/gserver/dataproviders/PyDataProvider.h
+++ b/paddle/gserver/dataproviders/PyDataProvider.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-
#pragma once
#include
@@ -25,7 +24,8 @@ namespace paddle {
class PyDataProvider : public DataProvider {
public:
- PyDataProvider(const DataConfig& config, bool useGpu,
+ PyDataProvider(const DataConfig& config,
+ bool useGpu,
bool loadDataAll = true);
virtual void reset();
@@ -48,21 +48,27 @@ protected:
void parseHeaderData(const std::string& headerData);
void fillDenseSlot(ProtoSlot& slot, char*& data, const char* dataEnd);
- void fillSparseNonValueSlot(ProtoSlot& slot, char*& data,
+ void fillSparseNonValueSlot(ProtoSlot& slot,
+ char*& data,
const char* dataEnd);
void fillSparseValueSlot(ProtoSlot& slot, char*& data, const char* dataEnd);
void fillIndexSlot(ProtoSlot& slot, char*& data, const char* dataEnd);
void fillStringSlot(ProtoSlot& slot, char*& data, const char* dataEnd);
void fillSlotsByStr(const std::string& samples);
- void handleDenseSlot(ProtoSlot& slot, size_t slotIndex,
+ void handleDenseSlot(ProtoSlot& slot,
+ size_t slotIndex,
std::vector& cpuArguments);
- void handleSparseNonValueSlot(ProtoSlot& slot, size_t slotIndex,
+ void handleSparseNonValueSlot(ProtoSlot& slot,
+ size_t slotIndex,
std::vector& cpuArguments);
- void handleSparseValueSlot(ProtoSlot& slot, size_t slotIndex,
+ void handleSparseValueSlot(ProtoSlot& slot,
+ size_t slotIndex,
std::vector& cpuArguments);
- void handleIndexSlot(ProtoSlot& slot, size_t slotIndex,
+ void handleIndexSlot(ProtoSlot& slot,
+ size_t slotIndex,
std::vector& cpuArguments);
- void handleStringSlot(ProtoSlot& slot, size_t slotIndex,
+ void handleStringSlot(ProtoSlot& slot,
+ size_t slotIndex,
std::vector& cpuArguments);
void resetSlots();
void loadData(const std::vector& fileList);
diff --git a/paddle/gserver/dataproviders/PyDataProvider2.cpp b/paddle/gserver/dataproviders/PyDataProvider2.cpp
index 90391a7c307d8dff7e289d445cafd27dc5008547..967fc9026a39967477d606862e060b680512901a 100644
--- a/paddle/gserver/dataproviders/PyDataProvider2.cpp
+++ b/paddle/gserver/dataproviders/PyDataProvider2.cpp
@@ -34,7 +34,7 @@ namespace paddle {
namespace unittest {
static std::unique_ptr>
- OnPoolFilled;
+ OnPoolFilled;
namespace pydp2 {
@@ -43,15 +43,11 @@ void setOnPoolFilledHook(const std::function& callback) {
*OnPoolFilled = callback;
}
-void clearOnPoolFilledHook() {
- OnPoolFilled.reset();
-}
+void clearOnPoolFilledHook() { OnPoolFilled.reset(); }
} // namespace pydp2
} // namespace unittest
-
-
/**
* Slot type
*/
@@ -65,17 +61,13 @@ enum SlotType {
/**
* Sequence type
*/
-enum SeqType {
- SQT_NONE = 0,
- SQT_SEQ,
- SQT_SUBSEQ
-};
+enum SeqType { SQT_NONE = 0, SQT_SEQ, SQT_SUBSEQ };
/**
* Cache Type.
*/
enum CacheType {
- NO_CACHE = 0, // Each pass will load data from PyDataProvider2.
+ NO_CACHE = 0, // Each pass will load data from PyDataProvider2.
CACHE_PASS_IN_MEM = 1, // First pass will load data from PyDataProvider2,
// then cache all data in memory. Load data from
// memory in rest passes.
@@ -87,8 +79,8 @@ struct SlotHeader { // Slot Header will parse from python object's slots field.
SeqType seqType;
};
-inline std::ostream& operator << (std::ostream& os, const SlotHeader& header) {
- os <<"Dim = " << header.dim << " Type = " << header.slotType
+inline std::ostream& operator<<(std::ostream& os, const SlotHeader& header) {
+ os << "Dim = " << header.dim << " Type = " << header.slotType
<< " SeqType = " << header.seqType;
return os;
}
@@ -158,7 +150,6 @@ protected:
SlotHeader* headerPtr_;
};
-
/**
* Py Data Provider Cache Interface.
*/
@@ -209,17 +200,13 @@ public:
PyDataProvider2(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu)
- :DataProvider(config, useGpu),
- callingContextCreated_(2) {
- if (PyArray_API == NULL)
- import_array();
+ : DataProvider(config, useGpu), callingContextCreated_(2) {
+ if (PyArray_API == NULL) import_array();
auto& args = config.load_data_args();
PyObjectPtr kwargs = PyObjectPtr(PyDict_New());
if (!args.empty()) {
kwargs = callPythonFuncRetPyObj(
- "paddle.trainer.PyDataProvider2",
- "deserialize_args",
- {args});
+ "paddle.trainer.PyDataProvider2", "deserialize_args", {args});
}
py::DictHelper kwargsDict(kwargs);
@@ -245,40 +232,38 @@ public:
* Dtor
* @note will stop loading thread when destructing
*/
- virtual ~PyDataProvider2() {
- resetImpl(false);
- }
+ virtual ~PyDataProvider2() { resetImpl(false); }
private:
void createPyDataObj(const std::string& model,
const std::string& className,
const std::string& fileListName,
- PyObjectPtr && kwargs) {
- LOG(INFO) << "loading dataprovider " << model <<"::" << className;
+ PyObjectPtr&& kwargs // NOLINT
+ ) {
+ LOG(INFO) << "loading dataprovider " << model << "::" << className;
PyObjectPtr module = py::import(model);
PyObjectPtr moduleDict(PyModule_GetDict(module.get()));
CHECK_PY(moduleDict) << "Invoke module.__dict__ error";
- PyObjectPtr cls(PyDict_GetItemString(moduleDict.get(),
- className.c_str()));
+ PyObjectPtr cls(PyDict_GetItemString(moduleDict.get(), className.c_str()));
CHECK_PY(cls) << "load class " << className.c_str() << "error";
// If there are multiple python instance share same module, the PyObjectPtr
// only for instance will make python reference-count error.
//
// So here, we increase reference count manually.
- if (gModuleClsPtrs_.find((uintptr_t) module.get())
- != gModuleClsPtrs_.end()) {
+ if (gModuleClsPtrs_.find((uintptr_t)module.get()) !=
+ gModuleClsPtrs_.end()) {
// Multi instance use same module
Py_XINCREF(module.get());
Py_XINCREF(moduleDict.get());
} else {
- gModuleClsPtrs_.insert((uintptr_t) module.get());
+ gModuleClsPtrs_.insert((uintptr_t)module.get());
}
- if (gModuleClsPtrs_.find((uintptr_t) cls.get()) != gModuleClsPtrs_.end()) {
+ if (gModuleClsPtrs_.find((uintptr_t)cls.get()) != gModuleClsPtrs_.end()) {
Py_XINCREF(cls.get());
} else {
- gModuleClsPtrs_.insert((uintptr_t) cls.get());
+ gModuleClsPtrs_.insert((uintptr_t)cls.get());
}
PyObjectPtr fileListInPy = loadPyFileLists(fileListName);
@@ -294,8 +279,8 @@ private:
py::ObjectHelper self(this->instance_);
bool ok;
- this->skipShuffle_ = !self.getBoolAttr("should_shuffle",
- &ok /*isBoolType*/);
+ this->skipShuffle_ =
+ !self.getBoolAttr("should_shuffle", &ok /*isBoolType*/);
if (!ok) {
this->skipShuffle_ = testing; // shuffle when is training, skip shuffle
// when is testing.
@@ -335,12 +320,12 @@ private:
PyObjectPtr headerPtrWrap(hdPtr);
py::ObjectHelper hd(headerPtrWrap);
header.dim = hd.getIntAttrWithError("dim");
- header.seqType = (SeqType) hd.getIntAttrWithError("seq_type");
- header.slotType = (SlotType) hd.getIntAttrWithError("type");
+ header.seqType = (SeqType)hd.getIntAttrWithError("seq_type");
+ header.slotType = (SlotType)hd.getIntAttrWithError("type");
}
DBG << "Data header size " << headers_.size();
- for (auto & header : headers_) {
+ for (auto& header : headers_) {
DBG << header;
}
cache_.reset(IPyDataProviderCache::create(
@@ -351,8 +336,7 @@ private:
loadFileList(fileListName, fileLists_);
PyObject* lst = PyList_New(fileLists_.size());
for (size_t i = 0; i < fileLists_.size(); ++i) {
- PyList_SET_ITEM(lst, i,
- PyString_FromString(fileLists_[i].c_str()));
+ PyList_SET_ITEM(lst, i, PyString_FromString(fileLists_[i].c_str()));
}
return PyObjectPtr(lst);
}
@@ -414,11 +398,12 @@ private:
CHECK(ok) << "CalcBatchSize must return int or long";
}
- if (this->loadThread_){ // wait poolActualSize < poolSize;
+ if (this->loadThread_) { // wait poolActualSize < poolSize;
std::unique_lock l(mtx_);
- pushCV_.wait(l, [this, additionalBatchSize] {
- return this->poolActualSize_ < poolSize_;
- });
+ pushCV_.wait(l,
+ [this, additionalBatchSize] {
+ return this->poolActualSize_ < poolSize_;
+ });
}
{
@@ -487,14 +472,14 @@ private:
std::vector fileLists_;
std::vector headers_;
static PyObjectPtr zeroTuple_;
- static std::unordered_set gModuleClsPtrs_;
+ static std::unordered_set gModuleClsPtrs_;
class PositionRandom {
public:
- inline explicit PositionRandom(bool skipRand):
- eng_(ThreadLocalRandomEngine::get()), skipRand_(skipRand) {}
+ inline explicit PositionRandom(bool skipRand)
+ : eng_(ThreadLocalRandomEngine::get()), skipRand_(skipRand) {}
- inline size_t operator() (size_t len) {
+ inline size_t operator()(size_t len) {
if (!skipRand_) {
if (!dist_ || dist_->b() != len - 1) {
dist_.reset(new std::uniform_int_distribution(0, len - 1));
@@ -525,32 +510,31 @@ public:
* Shuffle. Do nothing because PyDataProvider do shuffle implicitly by random
* select data from datapool.
*/
- void shuffle() {
- }
+ void shuffle() {}
/**
* Not limited size.
*/
- int64_t getSize() {
- return -1;
- }
+ int64_t getSize() { return -1; }
/**
* Loading a batch of data.
*/
- int64_t getNextBatchInternal(int64_t size_, DataBatch *batch) {
+ int64_t getNextBatchInternal(int64_t size_, DataBatch* batch) {
std::lock_guard guard(mutexForReset_);
REGISTER_TIMER("PyDP2.getNextBatchInternal")
CHECK_GE(size_, 0);
- size_t size = (size_t) size_;
+ size_t size = (size_t)size_;
if (loadThread_) { // loading from thread should wait for data pool ready.
// but, loading from cache, cache object should ensure
// data pool ready.
std::unique_lock l(mtx_);
- pullCV_.wait(l, [this, &size] {
- return this->poolActualSize_ >= std::max(size, this->minPoolSize_)
- || callingContexts_.empty();
- });
+ pullCV_.wait(l,
+ [this, &size] {
+ return this->poolActualSize_ >=
+ std::max(size, this->minPoolSize_) ||
+ callingContexts_.empty();
+ });
if (unittest::OnPoolFilled) {
(*unittest::OnPoolFilled)(this->poolActualSize_);
@@ -633,35 +617,35 @@ public:
cpuBatch.setSize(bsize);
auto& inArgs = cpuBatch.getStreams();
inArgs.resize(headers_.size());
- std::vector > scanners;
+ std::vector> scanners;
scanners.reserve(headers_.size());
for (auto& header : headers_) {
scanners.emplace_back(IFieldScanner::create(&header));
}
DBG << "Scanner created.";
- for (size_t i=0; i < headers_.size(); ++i) {
+ for (size_t i = 0; i < headers_.size(); ++i) {
scanners[i]->startPrepare(inArgs[i]);
}
- for (auto & d : data) {
+ for (auto& d : data) {
py::SequenceHelper s(d);
- for (size_t i=0; i < headers_.size(); ++i) {
+ for (size_t i = 0; i < headers_.size(); ++i) {
scanners[i]->prepare(inArgs[i], s[i]);
}
}
- for (size_t i=0; i < headers_.size(); ++i) {
+ for (size_t i = 0; i < headers_.size(); ++i) {
scanners[i]->finishPrepare(inArgs[i]);
}
- for (size_t i=0; i < headers_.size(); ++i) {
+ for (size_t i = 0; i < headers_.size(); ++i) {
scanners[i]->startFill(inArgs[i]);
}
- for (auto & d : data) {
+ for (auto& d : data) {
py::SequenceHelper s(d);
for (size_t i = 0; i < headers_.size(); ++i) {
scanners[i]->fill(inArgs[i], s[i]);
}
}
- for (size_t i=0; i < headers_.size(); ++i) {
+ for (size_t i = 0; i < headers_.size(); ++i) {
scanners[i]->finishFill(inArgs[i]);
}
@@ -679,8 +663,8 @@ public:
gpuArguments.resize(cpuArguments.size());
gpuBatch.setSize(size);
for (size_t i = 0; i < headers_.size(); ++i) {
- gpuArguments[i].resizeAndCopyFrom(cpuArguments[i], useGpu_,
- HPPL_STREAM_1);
+ gpuArguments[i].resizeAndCopyFrom(
+ cpuArguments[i], useGpu_, HPPL_STREAM_1);
}
hl_stream_synchronize(HPPL_STREAM_1);
} else {
@@ -690,31 +674,28 @@ public:
}
};
-std::unordered_set PyDataProvider2::gModuleClsPtrs_;
+std::unordered_set PyDataProvider2::gModuleClsPtrs_;
PyObjectPtr PyDataProvider2::zeroTuple_(PyTuple_New(0));
REGISTER_DATA_PROVIDER_EX(py2, PyDataProvider2);
-
/**
* Scanner for dense slot.
*/
-class DenseScanner: public IFieldScanner {
+class DenseScanner : public IFieldScanner {
public:
- explicit DenseScanner(SlotHeader* ptr):IFieldScanner(ptr), height_(0) {}
+ explicit DenseScanner(SlotHeader* ptr) : IFieldScanner(ptr), height_(0) {}
/**
* Prepare.
* @param argument target argument
* @param obj each timestep of a sample.
*/
- virtual void prepare(Argument &argument, PyObject *obj) {
- ++height_;
- }
+ virtual void prepare(Argument& argument, PyObject* obj) { ++height_; }
- virtual void finishPrepare(Argument &argument) {
- Matrix::resizeOrCreate(argument.value, height_, headerPtr_->dim,
- false, false);
+ virtual void finishPrepare(Argument& argument) {
+ Matrix::resizeOrCreate(
+ argument.value, height_, headerPtr_->dim, false, false);
height_ = 0;
}
@@ -723,24 +704,23 @@ public:
* @param argument
* @param obj
*/
- virtual void fill(Argument &argument, PyObject *obj) {
+ virtual void fill(Argument& argument, PyObject* obj) {
real* dat = argument.value->getData() + height_ * headerPtr_->dim;
if (PyArray_Check(obj)) {
- auto dtype = PyArray_DTYPE((PyArrayObject*)obj);
- if (dtype->type == 'f' && dtype->elsize == sizeof(real)) {
- real * data = (real*)PyArray_DATA((PyArrayObject*)obj);
- auto sz = PyArray_SIZE((PyArrayObject*)obj);
- std::copy(data, data + sz, dat);
- } else {
- LOG(FATAL) << "You should yield float" << sizeof(real) * 8
- << " array";
- }
- } else {
- py::SequenceHelper s(obj);
- // TODO(yuyang18): Here we can use AVX or SSE to accelerate memory copy.
- for (size_t i=0; i < headerPtr_->dim; ++i) {
- dat[i] = (real) s.getDouble(i);
- }
+ auto dtype = PyArray_DTYPE((PyArrayObject*)obj);
+ if (dtype->type == 'f' && dtype->elsize == sizeof(real)) {
+ real* data = (real*)PyArray_DATA((PyArrayObject*)obj);
+ auto sz = PyArray_SIZE((PyArrayObject*)obj);
+ std::copy(data, data + sz, dat);
+ } else {
+ LOG(FATAL) << "You should yield float" << sizeof(real) * 8 << " array";
+ }
+ } else {
+ py::SequenceHelper s(obj);
+ // TODO(yuyang18): Here we can use AVX or SSE to accelerate memory copy.
+ for (size_t i = 0; i < headerPtr_->dim; ++i) {
+ dat[i] = (real)s.getDouble(i);
+ }
}
++height_;
}
@@ -752,20 +732,18 @@ private:
/**
* Scanner for index slot
*/
-class IndexScanner: public IFieldScanner {
+class IndexScanner : public IFieldScanner {
public:
- explicit IndexScanner(SlotHeader* ptr):IFieldScanner(ptr), cnt_(0) {}
+ explicit IndexScanner(SlotHeader* ptr) : IFieldScanner(ptr), cnt_(0) {}
/**
* Prepare memory space.
*
* @note obj is a single timestep of sample
*/
- virtual void prepare(Argument &argument, PyObject *obj) {
- ++cnt_;
- }
+ virtual void prepare(Argument& argument, PyObject* obj) { ++cnt_; }
- virtual void finishPrepare(Argument &argument) {
+ virtual void finishPrepare(Argument& argument) {
IVector::resizeOrCreate(argument.ids, cnt_, false);
cnt_ = 0;
}
@@ -773,9 +751,9 @@ public:
/**
* Fill one index to argument.
*/
- virtual void fill(Argument &argument, PyObject *obj) {
+ virtual void fill(Argument& argument, PyObject* obj) {
bool ok;
- argument.ids->getData()[cnt_++] = py::castInt(obj, &ok);
+ argument.ids->getData()[cnt_++] = py::castInt(obj, &ok);
CHECK(ok) << "Cannot cast int " << py::repr(obj);
}
@@ -785,27 +763,25 @@ private:
class SparseNonValueScanner : public IFieldScanner {
public:
- explicit SparseNonValueScanner(SlotHeader* ptr): IFieldScanner(ptr),
- nnz_(0),
- height_(0) {}
+ explicit SparseNonValueScanner(SlotHeader* ptr)
+ : IFieldScanner(ptr), nnz_(0), height_(0) {}
/**
* Prepare memory space
* @note obj is a timestep of one sample.
*/
- virtual void prepare(Argument &argument, PyObject *obj) {
+ virtual void prepare(Argument& argument, PyObject* obj) {
++height_;
nnz_ += py::SequenceHelper(obj).size();
}
- virtual void finishPrepare(Argument &argument) {
- Matrix::resizeOrCreateSparseMatrix(argument.value, height_,
- headerPtr_->dim,
- nnz_, NO_VALUE);
+ virtual void finishPrepare(Argument& argument) {
+ Matrix::resizeOrCreateSparseMatrix(
+ argument.value, height_, headerPtr_->dim, nnz_, NO_VALUE);
}
- virtual void startFill(Argument & argument) {
- auto smat = (CpuSparseMatrix*) (argument.value.get());
+ virtual void startFill(Argument& argument) {
+ auto smat = (CpuSparseMatrix*)(argument.value.get());
smat->getRows()[0] = 0;
nnz_ = 0;
height_ = 1;
@@ -818,14 +794,14 @@ public:
virtual void fill(Argument& argument, PyObject* obj) {
py::SequenceHelper s(obj);
auto sz = s.size();
- auto smat = (CpuSparseMatrix*) (argument.value.get());
+ auto smat = (CpuSparseMatrix*)(argument.value.get());
int* row = smat->getRows();
int* col = smat->getCols();
real* dat = smat->getData();
- row[height_] = row[height_-1] + (int)sz;
+ row[height_] = row[height_ - 1] + (int)sz;
for (decltype(sz) i = 0; i < sz; ++i) {
- setData(col+nnz_, dat+nnz_, s[i]);
+ setData(col + nnz_, dat + nnz_, s[i]);
++nnz_;
}
++height_;
@@ -839,7 +815,7 @@ protected:
* @param [in] obj Python Object. For sparse_non_value is a PyInt or PyLong.
* For sparse_value is a Tuple (int, float).
*/
- virtual void setData(int* col, real * dat, PyObject* obj) {
+ virtual void setData(int* col, real* dat, PyObject* obj) {
bool ok;
*col = py::castInt(obj, &ok);
CHECK(ok);
@@ -851,26 +827,25 @@ protected:
class SparseValueScanner : public SparseNonValueScanner {
public:
- explicit SparseValueScanner(SlotHeader *ptr) : SparseNonValueScanner(ptr) {}
+ explicit SparseValueScanner(SlotHeader* ptr) : SparseNonValueScanner(ptr) {}
- virtual void finishPrepare(Argument &argument) {
- Matrix::resizeOrCreateSparseMatrix(argument.value, height_,
- headerPtr_->dim,
- nnz_, FLOAT_VALUE);
+ virtual void finishPrepare(Argument& argument) {
+ Matrix::resizeOrCreateSparseMatrix(
+ argument.value, height_, headerPtr_->dim, nnz_, FLOAT_VALUE);
}
protected:
- virtual void setData(int *col, real *dat, PyObject *obj) {
+ virtual void setData(int* col, real* dat, PyObject* obj) {
py::SequenceHelper s(obj);
SparseNonValueScanner::setData(col, dat, s[0]);
- *dat = (real) s.getDouble(1);
+ *dat = (real)s.getDouble(1);
}
};
/**
* Sequence Scanner. Scanner for sequence or sub-sequence.
*/
-class SequenceScanner: public IFieldScanner {
+class SequenceScanner : public IFieldScanner {
public:
/**
* Ctor
@@ -879,15 +854,18 @@ public:
* return a sequence start position or a sub-sequence
* start position.
*/
- SequenceScanner(std::unique_ptr&& innerScanner,
- const std::function& getSeqStartPos)
- : IFieldScanner(nullptr), inner_(std::move(innerScanner)),
- cnt_(0), getSeqStartPos_(getSeqStartPos) {}
+ SequenceScanner(
+ std::unique_ptr&& innerScanner,
+ const std::function& getSeqStartPos)
+ : IFieldScanner(nullptr),
+ inner_(std::move(innerScanner)),
+ cnt_(0),
+ getSeqStartPos_(getSeqStartPos) {}
/**
* Start prepare. Invoke inner->startPrepare too.
*/
- virtual void startPrepare(Argument &argument) {
+ virtual void startPrepare(Argument& argument) {
inner_->startPrepare(argument);
}
@@ -895,10 +873,10 @@ public:
* Prepare. obj is a list or tuple. it will invoke inner_->prepare for each
* element of sequence obj.
*/
- virtual void prepare(Argument &argument, PyObject *obj) {
+ virtual void prepare(Argument& argument, PyObject* obj) {
py::SequenceHelper s(obj);
++cnt_;
- for (size_t i=0; i < s.size(); ++i) {
+ for (size_t i = 0; i < s.size(); ++i) {
inner_->prepare(argument, s[i]);
}
}
@@ -906,7 +884,7 @@ public:
/**
* Finish prepare. invoke inner_->finishPrepare too.
*/
- virtual void finishPrepare(Argument &argument) {
+ virtual void finishPrepare(Argument& argument) {
ICpuGpuVector::resizeOrCreate(getSeqStartPos_(argument), cnt_ + 1, false);
inner_->finishPrepare(argument);
}
@@ -914,7 +892,7 @@ public:
/**
* Start fill. invoke inner->startFill too.
*/
- virtual void startFill(Argument &argument) {
+ virtual void startFill(Argument& argument) {
getSeqStartPos_(argument)->getMutableData(false)[0] = 0;
cnt_ = 1;
inner_->startFill(argument);
@@ -925,13 +903,13 @@ public:
* sequence obj. And set seqStartPos at same time. The seqStartPos will be
* calculated by getSeqStartPos callback passed in ctor.
*/
- virtual void fill(Argument &argument, PyObject *obj) {
+ virtual void fill(Argument& argument, PyObject* obj) {
getSeqStartPos_(argument)->getMutableData(false)[cnt_] =
- getSeqStartPos_(argument)->getMutableData(false)[cnt_ - 1] +
- (int)getSize(obj);
+ getSeqStartPos_(argument)->getMutableData(false)[cnt_ - 1] +
+ (int)getSize(obj);
py::SequenceHelper s(obj);
++cnt_;
- for (size_t i=0; i < s.size(); ++i) {
+ for (size_t i = 0; i < s.size(); ++i) {
inner_->fill(argument, s[i]);
}
}
@@ -939,9 +917,7 @@ public:
/**
* Finish fill. will invoke inner->finishFill too.
*/
- virtual void finishFill(Argument &argument) {
- inner_->finishFill(argument);
- }
+ virtual void finishFill(Argument& argument) { inner_->finishFill(argument); }
protected:
size_t getSize(PyObject* obj) {
@@ -949,7 +925,7 @@ protected:
auto sc = dynamic_cast(inner_.get());
if (sc) {
size_t sum = 0;
- for (size_t i=0; i < s.size(); ++i) {
+ for (size_t i = 0; i < s.size(); ++i) {
sum += sc->getSize(s[i]);
}
return sum;
@@ -964,8 +940,7 @@ private:
std::function getSeqStartPos_;
};
-
-IFieldScanner* IFieldScanner::create(SlotHeader *header) {
+IFieldScanner* IFieldScanner::create(SlotHeader* header) {
IFieldScanner* retv = nullptr;
switch (header->slotType) {
case ST_DENSE:
@@ -989,15 +964,15 @@ IFieldScanner* IFieldScanner::create(SlotHeader *header) {
break;
case SQT_SUBSEQ:
retv = new SequenceScanner(std::unique_ptr(retv),
- [](Argument& arg) -> ICpuGpuVectorPtr& {
- return arg.subSequenceStartPositions;
- });
- // fall through, not break;
+ [](Argument& arg) -> ICpuGpuVectorPtr& {
+ return arg.subSequenceStartPositions;
+ });
+ // fall through, not break;
case SQT_SEQ:
retv = new SequenceScanner(std::unique_ptr(retv),
- [](Argument& arg) -> ICpuGpuVectorPtr& {
- return arg.sequenceStartPositions;
- });
+ [](Argument& arg) -> ICpuGpuVectorPtr& {
+ return arg.sequenceStartPositions;
+ });
break;
default:
LOG(FATAL) << "Not implemented";
@@ -1010,19 +985,13 @@ IFieldScanner* IFieldScanner::create(SlotHeader *header) {
* No Cache Strategy. Will destruct old data immediately and load data from
* python every pass.
*/
-class NoCacheStrategy: public IPyDataProviderCache {
+class NoCacheStrategy : public IPyDataProviderCache {
public:
- virtual bool reset() {
- return true;
- }
+ virtual bool reset() { return true; }
- virtual void drop(std::deque