提交 c9baf824 编写于 作者: Z zhouti

Merge remote-tracking branch 'upstream/develop' into develop

...@@ -13,8 +13,6 @@ ...@@ -13,8 +13,6 @@
# The document of clang-format is # The document of clang-format is
# http://clang.llvm.org/docs/ClangFormat.html # http://clang.llvm.org/docs/ClangFormat.html
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html # http://clang.llvm.org/docs/ClangFormatStyleOptions.html
#
# TODO(yuyang18): Add python and other language code style
--- ---
Language: Cpp Language: Cpp
BasedOnStyle: Google BasedOnStyle: Google
...@@ -22,8 +20,9 @@ IndentWidth: 2 ...@@ -22,8 +20,9 @@ IndentWidth: 2
TabWidth: 2 TabWidth: 2
ContinuationIndentWidth: 4 ContinuationIndentWidth: 4
AccessModifierOffset: -2 # The private/protected/public has no indent in class AccessModifierOffset: -2 # The private/protected/public has no indent in class
PointerAlignment: Left # int* p/int& p, not int *p/int &p
Standard: Cpp11 Standard: Cpp11
AllowAllParametersOfDeclarationOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true
BinPackParameters: false
BinPackArguments: false
... ...
...@@ -5,4 +5,6 @@ build/ ...@@ -5,4 +5,6 @@ build/
.vscode .vscode
.idea .idea
.project .project
.cproject
.pydevproject .pydevproject
Makefile
- repo: https://github.com/Lucas-C/pre-commit-hooks.git
sha: c25201a00e6b0514370501050cf2a8538ac12270
hooks:
- id: remove-crlf
- repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2
hooks:
- id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
hooks:
- id: check-added-large-files
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
- id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
hooks:
- id: clang-formater
[style]
based_on_style = pep8
column_limit = 80
...@@ -38,10 +38,19 @@ addons: ...@@ -38,10 +38,19 @@ addons:
- curl - curl
- lcov - lcov
- graphviz - graphviz
- swig
before_install: before_install:
- |
if [ ${JOB} == "BUILD_AND_TEST" ]; then
if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)'
then
echo "Only markdown docs were updated, stopping build process."
exit
fi
fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- pip install wheel protobuf sphinx breathe recommonmark - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy
script: script:
- paddle/scripts/travis/main.sh - paddle/scripts/travis/main.sh
notifications: notifications:
......
...@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8) ...@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8)
project(paddle CXX C) project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0) set(PADDLE_MAJOR_VERSION 0)
set(PADDLE_MINOR_VERSION 8) set(PADDLE_MINOR_VERSION 9)
set(PADDLE_PATCH_VERSION 0b3) set(PADDLE_PATCH_VERSION 0a0)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION}) set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
...@@ -95,11 +95,24 @@ if(NOT WITH_GPU) ...@@ -95,11 +95,24 @@ if(NOT WITH_GPU)
add_definitions(-DHPPL_STUB_FUNC) add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else() else()
if(${CUDA_VERSION_MAJOR} GREATER 6)
if(COMPILER_SUPPORT_CXX11)
LIST(APPEND CUDA_NVCC_FLAGS -std=c++11)
endif()
endif()
# TODO(yuyang18): Change it to remove std=c++11 in cuda compile. # TODO(yuyang18): Change it to remove std=c++11 in cuda compile.
set(CUDA_PROPAGATE_HOST_FLAGS OFF) set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if(NOT CUDNN_FOUND) if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile") message(FATAL_ERROR "Paddle need cudnn to compile")
endif() endif()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math")
if(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
else(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
endif(WITH_AVX)
if(WITH_DSO) if(WITH_DSO)
set(CUDA_LIBRARIES "") set(CUDA_LIBRARIES "")
...@@ -123,11 +136,11 @@ if(NOT WITH_TIMER) ...@@ -123,11 +136,11 @@ if(NOT WITH_TIMER)
endif(NOT WITH_TIMER) endif(NOT WITH_TIMER)
if(WITH_AVX) if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX) else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX) endif(WITH_AVX)
if(WITH_PYTHON) if(WITH_PYTHON)
......
...@@ -7,7 +7,7 @@ Before submitting the issue, look over the following criteria before handing you ...@@ -7,7 +7,7 @@ Before submitting the issue, look over the following criteria before handing you
- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github. - [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github.
- [ ] Did you retrieve your issue from widespread search engines ? - [ ] Did you retrieve your issue from widespread search engines ?
- [ ] Is my description of the issue clear enough to reproduce this problem? - [ ] Is my description of the issue clear enough to reproduce this problem?
* If some errors occured, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc. * If some errors occurred, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc.
* If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly. * If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly.
- [ ] Is my description of the issue use the github markdown correctly? - [ ] Is my description of the issue use the github markdown correctly?
* Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc. * Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc.
......
# PaddlePaddle # PaddlePaddle
[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle) [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Coverage Status](https://coveralls.io/repos/github/baidu/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/baidu/Paddle?branch=develop) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/)
[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html)
[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE) [![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
Welcome to the PaddlePaddle GitHub. Welcome to the PaddlePaddle GitHub.
...@@ -14,7 +17,7 @@ developed by Baidu scientists and engineers for the purpose of applying deep ...@@ -14,7 +17,7 @@ developed by Baidu scientists and engineers for the purpose of applying deep
learning to many products at Baidu. learning to many products at Baidu.
Our vision is to enable deep learning for everyone via PaddlePaddle. Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release log](https://github.com/baidu/Paddle/releases) to track the latest feature of PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
## Features ## Features
...@@ -26,15 +29,15 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t ...@@ -26,15 +29,15 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t
connection. connection.
- **Efficiency** - **Efficiency**
In order to unleash the power of heterogeneous computing resource, In order to unleash the power of heterogeneous computing resource,
optimization occurs at different levels of PaddlePaddle, including optimization occurs at different levels of PaddlePaddle, including
computing, memory, architecture and communication. The following are some computing, memory, architecture and communication. The following are some
examples: examples:
- Optimized math operations through SSE/AVX intrinsics, BLAS libraries - Optimized math operations through SSE/AVX intrinsics, BLAS libraries
(e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels. (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
- Highly optimized recurrent networks which can handle **variable-length** - Highly optimized recurrent networks which can handle **variable-length**
sequence without padding. sequence without padding.
- Optimized local and distributed training for models with high dimensional - Optimized local and distributed training for models with high dimensional
sparse data. sparse data.
...@@ -57,41 +60,39 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t ...@@ -57,41 +60,39 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t
## Installation ## Installation
Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
pre-built packages (**docker image**, **deb package**) or pre-built packages (**docker image**, **deb package**) or
directly build on **Linux** and **Mac OS X** from the source code. directly build on **Linux** and **Mac OS X** from the source code.
## Documentation ## Documentation
Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers. Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br> - [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br>
You can follow the quick start tutorial to learn how use PaddlePaddle You can follow the quick start tutorial to learn how use PaddlePaddle
step-by-step. step-by-step.
- [Example and Demo](http://paddlepaddle.org/doc/demo/) <br> - [Example and Demo](http://paddlepaddle.org/doc/demo/) <br>
We provide five demos, including: image classification, sentiment analysis, We provide five demos, including: image classification, sentiment analysis,
sequence to sequence model, recommendation, semantic role labeling. sequence to sequence model, recommendation, semantic role labeling.
- [Distributed Training](http://paddlepaddle.org/doc/cluster) <br> - [Distributed Training](http://paddlepaddle.org/doc/cluster) <br>
This system supports training deep learning models on multiple machines This system supports training deep learning models on multiple machines
with data parallelism. with data parallelism.
- [Python API](http://paddlepaddle.org/doc/ui/) <br> - [Python API](http://paddlepaddle.org/doc/ui/) <br>
PaddlePaddle supports using either Python interface or C++ to build your PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your interface for Python. You can also use SWIG to create interface for your
favorite programming language. favorite programming language.
- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br> - [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br>
We sincerely appreciate your interest and contributions. If you would like to We sincerely appreciate your interest and contributions. If you would like to
contribute, please read the contribution guide. contribute, please read the contribution guide.
- [Source Code Documents](http://paddlepaddle.org/doc/source/) <br> - [Source Code Documents](http://paddlepaddle.org/doc/source/) <br>
## Ask Questions ## Ask Questions
Please join the [**gitter chat**](https://gitter.im/PaddlePaddle/Deep_Learning) or send email to
**paddle-dev@baidu.com** to ask questions and talk about methods and models. You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).
Framework development discussions and
bug reports are collected on [Issues](https://github.com/baidu/paddle/issues).
## Copyright and License ## Copyright and License
PaddlePaddle is provided under the [Apache-2.0 license](LICENSE). PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
...@@ -3,36 +3,55 @@ ...@@ -3,36 +3,55 @@
INCLUDE(CheckCXXSourceRuns) INCLUDE(CheckCXXSourceRuns)
SET(FIND_AVX_10)
SET(FIND_AVX_20)
SET(AVX_FLAGS)
SET(AVX_FOUND)
# Check AVX 2
SET(CMAKE_REQUIRED_FLAGS)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(CMAKE_REQUIRED_FLAGS "-mavx2") set(MMX_FLAG "-mmmx")
ELSEIF(MSVC AND NOT CMAKE_CL_64) # reserve for WINDOWS set(SSE2_FLAG "-msse2")
SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2") set(SSE3_FLAG "-msse3")
SET(AVX_FLAG "-mavx")
SET(AVX2_FLAG "-mavx2")
ELSEIF(MSVC)
set(MMX_FLAG "/arch:MMX")
set(SSE2_FLAG "/arch:SSE2")
set(SSE3_FLAG "/arch:SSE3")
SET(AVX_FLAG "/arch:AVX")
SET(AVX2_FLAG "/arch:AVX2")
ENDIF() ENDIF()
# Check MMX
set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <mmintrin.h>
int main() int main()
{ {
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); _mm_setzero_si64();
__m256i result = _mm256_abs_epi32 (a);
return 0; return 0;
}" FIND_AVX_20) }" MMX_FOUND)
# Check AVX # Check SSE2
SET(CMAKE_REQUIRED_FLAGS) set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") CHECK_CXX_SOURCE_RUNS("
SET(CMAKE_REQUIRED_FLAGS "-mavx") #include <emmintrin.h>
ELSEIF(MSVC AND NOT CMAKE_CL_64) int main()
SET(CMAKE_REQUIRED_FLAGS "/arch:AVX") {
endif() _mm_setzero_si128();
return 0;
}" SSE2_FOUND)
# Check SSE3
set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
CHECK_CXX_SOURCE_RUNS("
#include <pmmintrin.h>
int main()
{
__m128d a = _mm_set1_pd(6.28);
__m128d b = _mm_set1_pd(3.14);
__m128d result = _mm_addsub_pd(a, b);
result = _mm_movedup_pd(result);
return 0;
}" SSE3_FOUND)
# Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <immintrin.h>
int main() int main()
...@@ -41,25 +60,17 @@ int main() ...@@ -41,25 +60,17 @@ int main()
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b); __m256 result = _mm256_add_ps (a, b);
return 0; return 0;
}" FIND_AVX_10) }" AVX_FOUND)
IF(${FIND_AVX_20})
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(AVX_FLAGS "${AVX_FLAGS} -mavx2")
ELSEIF(MSVC)
SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
ENDIF()
ENDIF()
IF(${FIND_AVX_10}) # Check AVX 2
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
SET(AVX_FLAGS "${AVX_FLAGS} -mavx") CHECK_CXX_SOURCE_RUNS("
ELSEIF(MSVC) #include <immintrin.h>
SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX") int main()
ENDIF() {
ENDIF() __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}" AVX2_FOUND)
IF(${FIND_AVX_10}) mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
SET(AVX_FOUND TRUE)
MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.")
ENDIF()
...@@ -188,14 +188,6 @@ macro(add_simple_unittest TARGET_NAME) ...@@ -188,14 +188,6 @@ macro(add_simple_unittest TARGET_NAME)
add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp) add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp)
endmacro() endmacro()
macro(add_paddle_culib TARGET_NAME)
set(NVCC_FLAG ${CUDA_NVCC_FLAGS})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--use_fast_math)
cuda_add_library(${TARGET_NAME} STATIC ${ARGN})
set(CUDA_NVCC_FLAGS ${NVCC_FLAG})
endmacro()
# Creates C resources file from files in given resource file # Creates C resources file from files in given resource file
function(create_resources res_file output) function(create_resources res_file output)
# Create empty output file # Create empty output file
......
...@@ -5,3 +5,5 @@ plot.png ...@@ -5,3 +5,5 @@ plot.png
train.log train.log
image_provider_copy_1.py image_provider_copy_1.py
*pyc *pyc
train.list
test.list
文件模式从 100644 更改为 100755
...@@ -16,7 +16,6 @@ import numpy as np ...@@ -16,7 +16,6 @@ import numpy as np
import sys import sys
import os import os
import PIL.Image as Image import PIL.Image as Image
""" """
Usage: python process_cifar input_dir output_dir Usage: python process_cifar input_dir output_dir
""" """
...@@ -30,6 +29,7 @@ def mkdir_not_exist(path): ...@@ -30,6 +29,7 @@ def mkdir_not_exist(path):
if not os.path.exists(path): if not os.path.exists(path):
os.mkdir(path) os.mkdir(path)
def create_dir_structure(output_dir): def create_dir_structure(output_dir):
""" """
Create the directory structure for the directory. Create the directory structure for the directory.
...@@ -39,8 +39,8 @@ def create_dir_structure(output_dir): ...@@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
mkdir_not_exist(os.path.join(output_dir, "train")) mkdir_not_exist(os.path.join(output_dir, "train"))
mkdir_not_exist(os.path.join(output_dir, "test")) mkdir_not_exist(os.path.join(output_dir, "test"))
def convert_batch(batch_path, label_set, label_map,
output_dir, data_split): def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
""" """
Convert CIFAR batch to the structure of Paddle format. Convert CIFAR batch to the structure of Paddle format.
batch_path: the batch to be converted. batch_path: the batch to be converted.
...@@ -67,11 +67,23 @@ if __name__ == '__main__': ...@@ -67,11 +67,23 @@ if __name__ == '__main__':
output_dir = sys.argv[2] output_dir = sys.argv[2]
num_batch = 5 num_batch = 5
create_dir_structure(output_dir) create_dir_structure(output_dir)
label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer", label_map = {
5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"} 0: "airplane",
1: "automobile",
2: "bird",
3: "cat",
4: "deer",
5: "dog",
6: "frog",
7: "horse",
8: "ship",
9: "truck"
}
labels = {} labels = {}
for i in range(1, num_batch + 1): for i in range(1, num_batch + 1):
convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels, convert_batch(
label_map, output_dir, "train") os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
convert_batch(os.path.join(input_dir, "test_batch"), {}, output_dir, "train")
label_map, output_dir, "test") convert_batch(
\ No newline at end of file os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
"test")
...@@ -46,36 +46,41 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg, ...@@ -46,36 +46,41 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
settings.img_mean = image_util.load_meta(settings.meta_path, settings.img_mean = image_util.load_meta(settings.meta_path,
settings.mean_img_size, settings.mean_img_size,
settings.img_size, settings.img_size, settings.color)
settings.color)
settings.logger.info('Image size: %s', settings.img_size) settings.logger.info('Image size: %s', settings.img_size)
settings.logger.info('Meta path: %s', settings.meta_path) settings.logger.info('Meta path: %s', settings.meta_path)
settings.input_types = [ settings.input_types = [
dense_vector(settings.img_raw_size), # image feature dense_vector(settings.img_raw_size), # image feature
integer_value(settings.num_classes)] # labels integer_value(settings.num_classes)
] # labels
settings.logger.info('DataProvider Initialization finished') settings.logger.info('DataProvider Initialization finished')
@provider(init_hook=hook) @provider(init_hook=hook, min_pool_size=0)
def processData(settings, file_name): def processData(settings, file_list):
""" """
The main function for loading data. The main function for loading data.
Load the batch, iterate all the images and labels in this batch. Load the batch, iterate all the images and labels in this batch.
file_name: the batch file name. file_list: the batch file list.
""" """
data = cPickle.load(io.open(file_name, 'rb')) with open(file_list, 'r') as fdata:
indexes = list(range(len(data['images']))) lines = [line.strip() for line in fdata]
if settings.is_train: random.shuffle(lines)
random.shuffle(indexes) for file_name in lines:
for i in indexes: with io.open(file_name.strip(), 'rb') as file:
if settings.use_jpeg == 1: data = cPickle.load(file)
img = image_util.decode_jpeg(data['images'][i]) indexes = list(range(len(data['images'])))
else: if settings.is_train:
img = data['images'][i] random.shuffle(indexes)
img_feat = image_util.preprocess_img(img, settings.img_mean, for i in indexes:
settings.img_size, settings.is_train, if settings.use_jpeg == 1:
settings.color) img = image_util.decode_jpeg(data['images'][i])
label = data['labels'][i] else:
yield img_feat.tolist(), int(label) img = data['images'][i]
img_feat = image_util.preprocess_img(
img, settings.img_mean, settings.img_size,
settings.is_train, settings.color)
label = data['labels'][i]
yield img_feat.astype('float32'), int(label)
...@@ -16,17 +16,20 @@ import numpy as np ...@@ -16,17 +16,20 @@ import numpy as np
from PIL import Image from PIL import Image
from cStringIO import StringIO from cStringIO import StringIO
def resize_image(img, target_size): def resize_image(img, target_size):
""" """
Resize an image so that the shorter edge has length target_size. Resize an image so that the shorter edge has length target_size.
img: the input image to be resized. img: the input image to be resized.
target_size: the target resized image size. target_size: the target resized image size.
""" """
percent = (target_size/float(min(img.size[0], img.size[1]))) percent = (target_size / float(min(img.size[0], img.size[1])))
resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent)) resized_size = int(round(img.size[0] * percent)), int(
round(img.size[1] * percent))
img = img.resize(resized_size, Image.ANTIALIAS) img = img.resize(resized_size, Image.ANTIALIAS)
return img return img
def flip(im): def flip(im):
""" """
Return the flipped image. Return the flipped image.
...@@ -38,6 +41,7 @@ def flip(im): ...@@ -38,6 +41,7 @@ def flip(im):
else: else:
return im[:, ::-1] return im[:, ::-1]
def crop_img(im, inner_size, color=True, test=True): def crop_img(im, inner_size, color=True, test=True):
""" """
Return cropped image. Return cropped image.
...@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True): ...@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
If True, crop the center of images. If True, crop the center of images.
""" """
if color: if color:
height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2]) height, width = max(inner_size, im.shape[1]), max(inner_size,
im.shape[2])
padded_im = np.zeros((3, height, width)) padded_im = np.zeros((3, height, width))
startY = (height - im.shape[1]) / 2 startY = (height - im.shape[1]) / 2
startX = (width - im.shape[2]) / 2 startX = (width - im.shape[2]) / 2
endY, endX = startY + im.shape[1], startX + im.shape[2] endY, endX = startY + im.shape[1], startX + im.shape[2]
padded_im[:, startY: endY, startX: endX] = im padded_im[:, startY:endY, startX:endX] = im
else: else:
im = im.astype('float32') im = im.astype('float32')
height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1]) height, width = max(inner_size, im.shape[0]), max(inner_size,
im.shape[1])
padded_im = np.zeros((height, width)) padded_im = np.zeros((height, width))
startY = (height - im.shape[0]) / 2 startY = (height - im.shape[0]) / 2
startX = (width - im.shape[1]) / 2 startX = (width - im.shape[1]) / 2
endY, endX = startY + im.shape[0], startX + im.shape[1] endY, endX = startY + im.shape[0], startX + im.shape[1]
padded_im[startY: endY, startX: endX] = im padded_im[startY:endY, startX:endX] = im
if test: if test:
startY = (height - inner_size) / 2 startY = (height - inner_size) / 2
startX = (width - inner_size) / 2 startX = (width - inner_size) / 2
...@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True): ...@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
startX = np.random.randint(0, width - inner_size + 1) startX = np.random.randint(0, width - inner_size + 1)
endY, endX = startY + inner_size, startX + inner_size endY, endX = startY + inner_size, startX + inner_size
if color: if color:
pic = padded_im[:, startY: endY, startX: endX] pic = padded_im[:, startY:endY, startX:endX]
else: else:
pic = padded_im[startY: endY, startX: endX] pic = padded_im[startY:endY, startX:endX]
if (not test) and (np.random.randint(2) == 0): if (not test) and (np.random.randint(2) == 0):
pic = flip(pic) pic = flip(pic)
return pic return pic
def decode_jpeg(jpeg_string): def decode_jpeg(jpeg_string):
np_array = np.array(Image.open(StringIO(jpeg_string))) np_array = np.array(Image.open(StringIO(jpeg_string)))
if len(np_array.shape) == 3: if len(np_array.shape) == 3:
np_array = np.transpose(np_array, (2, 0, 1)) np_array = np.transpose(np_array, (2, 0, 1))
return np_array return np_array
def preprocess_img(im, img_mean, crop_size, is_train, color=True): def preprocess_img(im, img_mean, crop_size, is_train, color=True):
""" """
Does data augmentation for images. Does data augmentation for images.
...@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True): ...@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
pic -= img_mean pic -= img_mean
return pic.flatten() return pic.flatten()
def load_meta(meta_path, mean_img_size, crop_size, color=True): def load_meta(meta_path, mean_img_size, crop_size, color=True):
""" """
Return the loaded meta file. Return the loaded meta file.
...@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True): ...@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
mean = np.load(meta_path)['data_mean'] mean = np.load(meta_path)['data_mean']
border = (mean_img_size - crop_size) / 2 border = (mean_img_size - crop_size) / 2
if color: if color:
assert(mean_img_size * mean_img_size * 3 == mean.shape[0]) assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
mean = mean.reshape(3, mean_img_size, mean_img_size) mean = mean.reshape(3, mean_img_size, mean_img_size)
mean = mean[:, border: border + crop_size, mean = mean[:, border:border + crop_size, border:border +
border: border + crop_size].astype('float32') crop_size].astype('float32')
else: else:
assert(mean_img_size * mean_img_size == mean.shape[0]) assert (mean_img_size * mean_img_size == mean.shape[0])
mean = mean.reshape(mean_img_size, mean_img_size) mean = mean.reshape(mean_img_size, mean_img_size)
mean = mean[border: border + crop_size, mean = mean[border:border + crop_size, border:border +
border: border + crop_size].astype('float32') crop_size].astype('float32')
return mean return mean
def load_image(img_path, is_color=True): def load_image(img_path, is_color=True):
""" """
Load image and return. Load image and return.
...@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True): ...@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
img.load() img.load()
return img return img
def oversample(img, crop_dims): def oversample(img, crop_dims):
""" """
image : iterable of (H x W x K) ndarrays image : iterable of (H x W x K) ndarrays
...@@ -152,50 +163,53 @@ def oversample(img, crop_dims): ...@@ -152,50 +163,53 @@ def oversample(img, crop_dims):
for j in w_indices: for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1]) crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1 curr += 1
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([ crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
-crop_dims / 2.0, [-crop_dims / 2.0, crop_dims / 2.0])
crop_dims / 2.0
])
crops_ix = np.tile(crops_ix, (2, 1)) crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops # Extract crops
crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1], crops = np.empty(
im_shape[-1]), dtype=np.float32) (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
dtype=np.float32)
ix = 0 ix = 0
for im in img: for im in img:
for crop in crops_ix: for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :] crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1 ix += 1
crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :] # flip for mirrors
return crops return crops
class ImageTransformer: class ImageTransformer:
def __init__(self, transpose = None, def __init__(self,
channel_swap = None, mean = None, is_color = True): transpose=None,
channel_swap=None,
mean=None,
is_color=True):
self.transpose = transpose self.transpose = transpose
self.channel_swap = None self.channel_swap = None
self.mean = None self.mean = None
self.is_color = is_color self.is_color = is_color
def set_transpose(self, order): def set_transpose(self, order):
if self.is_color: if self.is_color:
assert 3 == len(order) assert 3 == len(order)
self.transpose = order self.transpose = order
def set_channel_swap(self, order): def set_channel_swap(self, order):
if self.is_color: if self.is_color:
assert 3 == len(order) assert 3 == len(order)
self.channel_swap = order self.channel_swap = order
def set_mean(self, mean): def set_mean(self, mean):
# mean value, may be one value per channel # mean value, may be one value per channel
if mean.ndim == 1: if mean.ndim == 1:
mean = mean[:, np.newaxis, np.newaxis] mean = mean[:, np.newaxis, np.newaxis]
else: else:
# elementwise mean # elementwise mean
if self.is_color: if self.is_color:
assert len(mean.shape) == 3 assert len(mean.shape) == 3
self.mean = mean self.mean = mean
def transformer(self, data): def transformer(self, data):
if self.transpose is not None: if self.transpose is not None:
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os,sys import os, sys
import numpy as np import numpy as np
import logging import logging
from PIL import Image from PIL import Image
...@@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter ...@@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config from paddle.trainer.config_parser import parse_config
logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s') logging.basicConfig(
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
class ImageClassifier(): class ImageClassifier():
def __init__(self, def __init__(self,
train_conf, train_conf,
...@@ -58,18 +60,19 @@ class ImageClassifier(): ...@@ -58,18 +60,19 @@ class ImageClassifier():
self.oversample = oversample self.oversample = oversample
self.is_color = is_color self.is_color = is_color
self.transformer = image_util.ImageTransformer(is_color = is_color) self.transformer = image_util.ImageTransformer(is_color=is_color)
self.transformer.set_transpose((2,0,1)) self.transformer.set_transpose((2, 0, 1))
self.mean_file = mean_file self.mean_file = mean_file
mean = np.load(self.mean_file)['data_mean'] mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
self.transformer.set_mean(mean) # mean pixel self.transformer.set_mean(mean) # mean pixel
gpu = 1 if use_gpu else 0 gpu = 1 if use_gpu else 0
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu) conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
conf = parse_config(train_conf, conf_args) conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (gpu)) swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config) self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine) assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir) self.network.loadParameters(self.model_dir)
...@@ -90,14 +93,14 @@ class ImageClassifier(): ...@@ -90,14 +93,14 @@ class ImageClassifier():
# image_util.resize_image: short side is self.resize_dim # image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim) image = image_util.resize_image(image, self.resize_dim)
image = np.array(image) image = np.array(image)
input = np.zeros((1, image.shape[0], image.shape[1], 3), input = np.zeros(
dtype=np.float32) (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32) input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims) input = image_util.oversample(input, self.crop_dims)
else: else:
image = image.resize(self.crop_dims, Image.ANTIALIAS) image = image.resize(self.crop_dims, Image.ANTIALIAS)
input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3), input = np.zeros(
dtype=np.float32) (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32) input[0] = np.array(image).astype(np.float32)
data_in = [] data_in = []
...@@ -133,22 +136,24 @@ class ImageClassifier(): ...@@ -133,22 +136,24 @@ class ImageClassifier():
lab = np.argsort(-prob) lab = np.argsort(-prob)
logging.info("Label of %s is: %d", image, lab[0]) logging.info("Label of %s is: %d", image, lab[0])
if __name__ == '__main__': if __name__ == '__main__':
image_size=32 image_size = 32
crop_size=32 crop_size = 32
multi_crop=True multi_crop = True
config="vgg_16_cifar.py" config = "vgg_16_cifar.py"
output_layer="__fc_layer_1__" output_layer = "__fc_layer_1__"
mean_path="data/cifar-out/batches/batches.meta" mean_path = "data/cifar-out/batches/batches.meta"
model_path=sys.argv[1] model_path = sys.argv[1]
image=sys.argv[2] image = sys.argv[2]
use_gpu=bool(int(sys.argv[3])) use_gpu = bool(int(sys.argv[3]))
obj = ImageClassifier(train_conf=config, obj = ImageClassifier(
model_dir=model_path, train_conf=config,
resize_dim=image_size, model_dir=model_path,
crop_dim=crop_size, resize_dim=image_size,
mean_file=mean_path, crop_dim=crop_size,
use_gpu=use_gpu, mean_file=mean_path,
oversample=multi_crop) use_gpu=use_gpu,
oversample=multi_crop)
obj.predict(image, output_layer) obj.predict(image, output_layer)
...@@ -19,22 +19,36 @@ from optparse import OptionParser ...@@ -19,22 +19,36 @@ from optparse import OptionParser
def option_parser(): def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\ parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]") "-i data_dir [options]")
parser.add_option("-i", "--input", action="store", parser.add_option(
dest="input", help="Input data directory.") "-i",
parser.add_option("-s", "--size", action="store", "--input",
dest="size", help="Processed image size.") action="store",
parser.add_option("-c", "--color", action="store", dest="input",
dest="color", help="whether to use color images.") help="Input data directory.")
parser.add_option(
"-s",
"--size",
action="store",
dest="size",
help="Processed image size.")
parser.add_option(
"-c",
"--color",
action="store",
dest="color",
help="whether to use color images.")
return parser.parse_args() return parser.parse_args()
if __name__ == '__main__': if __name__ == '__main__':
options, args = option_parser() options, args = option_parser()
data_dir = options.input data_dir = options.input
processed_image_size = int(options.size) processed_image_size = int(options.size)
color = options.color == "1" color = options.color == "1"
data_creator = ImageClassificationDatasetCreater(data_dir, data_creator = ImageClassificationDatasetCreater(
processed_image_size, data_dir, processed_image_size, color)
color) data_creator.train_list_name = "train.txt"
data_creator.num_per_batch = 1000 data_creator.test_list_name = "test.txt"
data_creator.overwrite = True data_creator.num_per_batch = 1000
data_creator.create_batches() data_creator.overwrite = True
data_creator.create_batches()
...@@ -17,3 +17,6 @@ set -e ...@@ -17,3 +17,6 @@ set -e
data_dir=./data/cifar-out data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1 python preprocess.py -i $data_dir -s 32 -c 1
echo "data/cifar-out/batches/train.txt" > train.list
echo "data/cifar-out/batches/test.txt" > test.list
...@@ -18,36 +18,38 @@ is_predict = get_config_arg("is_predict", bool, False) ...@@ -18,36 +18,38 @@ is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ################## ####################Data Configuration ##################
if not is_predict: if not is_predict:
data_dir='data/cifar-out/batches/' data_dir = 'data/cifar-out/batches/'
meta_path=data_dir+'batches.meta' meta_path = data_dir + 'batches.meta'
args = {'meta':meta_path,'mean_img_size': 32, args = {
'img_size': 32,'num_classes': 10, 'meta': meta_path,
'use_jpeg': 1,'color': "color"} 'mean_img_size': 32,
'img_size': 32,
define_py_data_sources2(train_list=data_dir+"train.list", 'num_classes': 10,
test_list=data_dir+'test.list', 'use_jpeg': 1,
module='image_provider', 'color': "color"
obj='processData', }
args=args)
define_py_data_sources2(
train_list="train.list",
test_list="train.list",
module='image_provider',
obj='processData',
args=args)
######################Algorithm Configuration ############# ######################Algorithm Configuration #############
settings( settings(
batch_size = 128, batch_size=128,
learning_rate = 0.1 / 128.0, learning_rate=0.1 / 128.0,
learning_method = MomentumOptimizer(0.9), learning_method=MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * 128) regularization=L2Regularization(0.0005 * 128))
)
#######################Network Configuration ############# #######################Network Configuration #############
data_size=3*32*32 data_size = 3 * 32 * 32
label_size=10 label_size = 10
img = data_layer(name='image', img = data_layer(name='image', size=data_size)
size=data_size)
# small_vgg is predefined in trainer_config_helpers.networks # small_vgg is predefined in trainer_config_helpers.networks
predict = small_vgg(input_image=img, predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
num_channels=3,
num_classes=label_size)
if not is_predict: if not is_predict:
lbl = data_layer(name="label", size=label_size) lbl = data_layer(name="label", size=label_size)
......
This folder contains scripts used in PaddlePaddle introduction. This folder contains scripts used in PaddlePaddle introduction.
- use `bash train.sh` to train a simple linear regression model - use `bash train.sh` to train a simple linear regression model
- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3]. - use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
import random import random
# define data types of input: 2 real numbers # define data types of input: 2 real numbers
@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False) @provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
def process(settings, input_file): def process(settings, input_file):
for i in xrange(2000): for i in xrange(2000):
x = random.random() x = random.random()
yield [x], [2*x+0.3] yield [x], [2 * x + 0.3]
...@@ -23,14 +23,17 @@ Usage: ...@@ -23,14 +23,17 @@ Usage:
import numpy as np import numpy as np
import os import os
def load(file_name): def load(file_name):
with open(file_name, 'rb') as f: with open(file_name, 'rb') as f:
f.read(16) # skip header for float type. f.read(16) # skip header for float type.
return np.fromfile(f, dtype=np.float32) return np.fromfile(f, dtype=np.float32)
def main(): def main():
print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'), print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
load('output/pass-00029/b')) load('output/pass-00029/b'))
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -16,9 +16,14 @@ from paddle.trainer_config_helpers import * ...@@ -16,9 +16,14 @@ from paddle.trainer_config_helpers import *
# 1. read data. Suppose you saved above python code as dataprovider.py # 1. read data. Suppose you saved above python code as dataprovider.py
data_file = 'empty.list' data_file = 'empty.list'
with open(data_file, 'w') as f: f.writelines(' ') with open(data_file, 'w') as f:
define_py_data_sources2(train_list=data_file, test_list=None, f.writelines(' ')
module='dataprovider', obj='process',args={}) define_py_data_sources2(
train_list=data_file,
test_list=None,
module='dataprovider',
obj='process',
args={})
# 2. learning algorithm # 2. learning algorithm
settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer()) settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
...@@ -26,7 +31,11 @@ settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer()) ...@@ -26,7 +31,11 @@ settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
# 3. Network configuration # 3. Network configuration
x = data_layer(name='x', size=1) x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1) y = data_layer(name='y', size=1)
y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b')) y_predict = fc_layer(
input=x,
param_attr=ParamAttr(name='w'),
size=1,
act=LinearActivation(),
bias_attr=ParamAttr(name='b'))
cost = regression_cost(input=y_predict, label=y) cost = regression_cost(input=y_predict, label=y)
outputs(cost) outputs(cost)
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
# limitations under the License. # limitations under the License.
o = open("./" + "train.list", "w") o = open("./" + "train.list", "w")
o.write("./data/raw_data/train" +"\n") o.write("./data/raw_data/train" + "\n")
o.close() o.close()
o = open("./" + "test.list", "w") o = open("./" + "test.list", "w")
o.write("./data/raw_data/t10k" +"\n") o.write("./data/raw_data/t10k" + "\n")
o.close() o.close()
\ No newline at end of file
...@@ -19,4 +19,3 @@ done ...@@ -19,4 +19,3 @@ done
cd $DIR cd $DIR
rm -f *.list rm -f *.list
python generate_list.py python generate_list.py
...@@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import * ...@@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *
# Define a py data provider # Define a py data provider
@provider(input_types={ @provider(
'pixel': dense_vector(28 * 28), input_types={'pixel': dense_vector(28 * 28),
'label': integer_value(10) 'label': integer_value(10)})
})
def process(settings, filename): # settings is not used currently. def process(settings, filename): # settings is not used currently.
imgf = filename + "-images-idx3-ubyte" imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte" labelf = filename + "-labels-idx1-ubyte"
......
...@@ -18,32 +18,29 @@ is_predict = get_config_arg("is_predict", bool, False) ...@@ -18,32 +18,29 @@ is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ################## ####################Data Configuration ##################
if not is_predict: if not is_predict:
data_dir='./data/' data_dir = './data/'
define_py_data_sources2(train_list= data_dir + 'train.list', define_py_data_sources2(
test_list= data_dir + 'test.list', train_list=data_dir + 'train.list',
module='mnist_provider', test_list=data_dir + 'test.list',
obj='process') module='mnist_provider',
obj='process')
######################Algorithm Configuration ############# ######################Algorithm Configuration #############
settings( settings(
batch_size = 128, batch_size=128,
learning_rate = 0.1 / 128.0, learning_rate=0.1 / 128.0,
learning_method = MomentumOptimizer(0.9), learning_method=MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * 128) regularization=L2Regularization(0.0005 * 128))
)
#######################Network Configuration ############# #######################Network Configuration #############
data_size=1*28*28 data_size = 1 * 28 * 28
label_size=10 label_size = 10
img = data_layer(name='pixel', size=data_size) img = data_layer(name='pixel', size=data_size)
# small_vgg is predined in trainer_config_helpers.network # small_vgg is predined in trainer_config_helpers.network
predict = small_vgg(input_image=img, predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)
num_channels=1,
num_classes=label_size)
if not is_predict: if not is_predict:
lbl = data_layer(name="label", size=label_size) lbl = data_layer(name="label", size=label_size)
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Example: Example:
python extract_para.py --preModel PREMODEL --preDict PREDICT \ python extract_para.py --preModel PREMODEL --preDict PREDICT \
...@@ -29,6 +28,7 @@ Options: ...@@ -29,6 +28,7 @@ Options:
from optparse import OptionParser from optparse import OptionParser
import struct import struct
def get_row_index(preDict, usrDict): def get_row_index(preDict, usrDict):
""" """
Get the row positions for all words in user dictionary from pre-trained dictionary. Get the row positions for all words in user dictionary from pre-trained dictionary.
...@@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict): ...@@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict):
pos.append(index[word]) pos.append(index[word])
return pos return pos
def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
paraDim):
""" """
Extract desired parameters from a pretrained embedding model based on user dictionary Extract desired parameters from a pretrained embedding model based on user dictionary
""" """
...@@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim) ...@@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim)
print "extract parameters finish, total", len(rowIndex), "lines" print "extract parameters finish, total", len(rowIndex), "lines"
fi.close() fi.close()
def main(): def main():
""" """
Main entry for running paraconvert.py Main entry for running paraconvert.py
...@@ -78,19 +81,33 @@ def main(): ...@@ -78,19 +81,33 @@ def main():
"python %prog --preModel PREMODEL --preDict PREDICT" \ "python %prog --preModel PREMODEL --preDict PREDICT" \
" --usrModel USRMODEL --usrDict USRDICT -d DIM" " --usrModel USRMODEL --usrDict USRDICT -d DIM"
parser = OptionParser(usage) parser = OptionParser(usage)
parser.add_option("--preModel", action="store", dest="preModel", parser.add_option(
help="the name of pretrained embedding model") "--preModel",
parser.add_option("--preDict", action="store", dest="preDict", action="store",
help="the name of pretrained dictionary") dest="preModel",
parser.add_option("--usrModel", action="store", dest="usrModel", help="the name of pretrained embedding model")
help="the name of output usr embedding model") parser.add_option(
parser.add_option("--usrDict", action="store", dest="usrDict", "--preDict",
help="the name of user specified dictionary") action="store",
parser.add_option("-d", action="store", dest="dim", dest="preDict",
help="dimension of parameter") help="the name of pretrained dictionary")
parser.add_option(
"--usrModel",
action="store",
dest="usrModel",
help="the name of output usr embedding model")
parser.add_option(
"--usrDict",
action="store",
dest="usrDict",
help="the name of user specified dictionary")
parser.add_option(
"-d", action="store", dest="dim", help="dimension of parameter")
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
extract_parameters_by_usrDict(options.preModel, options.preDict, extract_parameters_by_usrDict(options.preModel, options.preDict,
options.usrModel, options.usrDict, int(options.dim)) options.usrModel, options.usrDict,
int(options.dim))
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Example: Example:
python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
...@@ -29,6 +28,7 @@ Options: ...@@ -29,6 +28,7 @@ Options:
from optparse import OptionParser from optparse import OptionParser
import struct import struct
def binary2text(input, output, paraDim): def binary2text(input, output, paraDim):
""" """
Convert a binary parameter file of embedding model to be a text file. Convert a binary parameter file of embedding model to be a text file.
...@@ -76,12 +76,13 @@ def binary2text(input, output, paraDim): ...@@ -76,12 +76,13 @@ def binary2text(input, output, paraDim):
fo.close() fo.close()
print "binary2text finish, total", line, "lines" print "binary2text finish, total", line, "lines"
def get_para_count(input): def get_para_count(input):
""" """
Compute the total number of embedding parameters in input text file. Compute the total number of embedding parameters in input text file.
input: the name of input text file input: the name of input text file
""" """
numRows = 1 numRows = 1
paraDim = 0 paraDim = 0
with open(input) as f: with open(input) as f:
line = f.readline() line = f.readline()
...@@ -90,6 +91,7 @@ def get_para_count(input): ...@@ -90,6 +91,7 @@ def get_para_count(input):
numRows += 1 numRows += 1
return numRows * paraDim return numRows * paraDim
def text2binary(input, output, paddle_head=True): def text2binary(input, output, paddle_head=True):
""" """
Convert a text parameter file of embedding model to be a binary file. Convert a text parameter file of embedding model to be a binary file.
...@@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True): ...@@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True):
fo.close() fo.close()
print "text2binary finish, total", count, "lines" print "text2binary finish, total", count, "lines"
def main(): def main():
""" """
Main entry for running paraconvert.py Main entry for running paraconvert.py
...@@ -131,21 +134,26 @@ def main(): ...@@ -131,21 +134,26 @@ def main():
"python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \ "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
"python %prog --t2b -i INPUT -o OUTPUT" "python %prog --t2b -i INPUT -o OUTPUT"
parser = OptionParser(usage) parser = OptionParser(usage)
parser.add_option("--b2t", action="store_true", parser.add_option(
help="convert parameter file of embedding model from binary to text") "--b2t",
parser.add_option("--t2b", action="store_true", action="store_true",
help="convert parameter file of embedding model from text to binary") help="convert parameter file of embedding model from binary to text")
parser.add_option("-i", action="store", dest="input", parser.add_option(
help="input parameter file name") "--t2b",
parser.add_option("-o", action="store", dest="output", action="store_true",
help="output parameter file name") help="convert parameter file of embedding model from text to binary")
parser.add_option("-d", action="store", dest="dim", parser.add_option(
help="dimension of parameter") "-i", action="store", dest="input", help="input parameter file name")
parser.add_option(
"-o", action="store", dest="output", help="output parameter file name")
parser.add_option(
"-d", action="store", dest="dim", help="dimension of parameter")
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
if options.b2t: if options.b2t:
binary2text(options.input, options.output, options.dim) binary2text(options.input, options.output, options.dim)
if options.t2b: if options.t2b:
text2binary(options.input, options.output) text2binary(options.input, options.output)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -26,16 +26,22 @@ from py_paddle import swig_paddle, DataProviderConverter ...@@ -26,16 +26,22 @@ from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config from paddle.trainer.config_parser import parse_config
logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s') logging.basicConfig(
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
class ImageClassifier(): class ImageClassifier():
def __init__(self, train_conf, model_dir=None, def __init__(self,
resize_dim=256, crop_dim=224, train_conf,
model_dir=None,
resize_dim=256,
crop_dim=224,
use_gpu=True, use_gpu=True,
mean_file=None, mean_file=None,
output_layer=None, output_layer=None,
oversample=False, is_color=True): oversample=False,
is_color=True):
""" """
train_conf: network configure. train_conf: network configure.
model_dir: string, directory of model. model_dir: string, directory of model.
...@@ -62,24 +68,25 @@ class ImageClassifier(): ...@@ -62,24 +68,25 @@ class ImageClassifier():
assert isinstance(self.output_layer, basestring) assert isinstance(self.output_layer, basestring)
self.output_layer = self.output_layer.split(",") self.output_layer = self.output_layer.split(",")
self.transformer = image_util.ImageTransformer(is_color = is_color) self.transformer = image_util.ImageTransformer(is_color=is_color)
self.transformer.set_transpose((2,0,1)) self.transformer.set_transpose((2, 0, 1))
self.transformer.set_channel_swap((2,1,0)) self.transformer.set_channel_swap((2, 1, 0))
self.mean_file = mean_file self.mean_file = mean_file
if self.mean_file is not None: if self.mean_file is not None:
mean = np.load(self.mean_file)['data_mean'] mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
self.transformer.set_mean(mean) # mean pixel self.transformer.set_mean(mean) # mean pixel
else: else:
# if you use three mean value, set like: # if you use three mean value, set like:
# this three mean value is calculated from ImageNet. # this three mean value is calculated from ImageNet.
self.transformer.set_mean(np.array([103.939,116.779,123.68])) self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu)) conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
conf = parse_config(train_conf, conf_args) conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu))) swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config) self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine) assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir) self.network.loadParameters(self.model_dir)
...@@ -105,14 +112,14 @@ class ImageClassifier(): ...@@ -105,14 +112,14 @@ class ImageClassifier():
# image_util.resize_image: short side is self.resize_dim # image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim) image = image_util.resize_image(image, self.resize_dim)
image = np.array(image) image = np.array(image)
input = np.zeros((1, image.shape[0], image.shape[1], 3), input = np.zeros(
dtype=np.float32) (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32) input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims) input = image_util.oversample(input, self.crop_dims)
else: else:
image = image.resize(self.crop_dims, Image.ANTIALIAS) image = image.resize(self.crop_dims, Image.ANTIALIAS)
input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3), input = np.zeros(
dtype=np.float32) (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32) input[0] = np.array(image).astype(np.float32)
data_in = [] data_in = []
...@@ -172,7 +179,7 @@ class ImageClassifier(): ...@@ -172,7 +179,7 @@ class ImageClassifier():
logging.info("Label of %s is: %d", image, lab[0]) logging.info("Label of %s is: %d", image, lab[0])
return results return results
def extract(self, data_file, output_dir, batch_size = 10000): def extract(self, data_file, output_dir, batch_size=10000):
""" """
extract and save features of output layers, which are extract and save features of output layers, which are
specify in Outputs() in network configure. specify in Outputs() in network configure.
...@@ -197,7 +204,7 @@ class ImageClassifier(): ...@@ -197,7 +204,7 @@ class ImageClassifier():
image_feature[file_name] = feature image_feature[file_name] = feature
sample_num += 1 sample_num += 1
if sample_num == batch_size: if sample_num == batch_size:
batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num)) batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
self.save_file(image_feature, batch_name) self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num) logging.info('Finish batch %d', batch_num)
batch_num += 1 batch_num += 1
...@@ -206,7 +213,7 @@ class ImageClassifier(): ...@@ -206,7 +213,7 @@ class ImageClassifier():
if idx % 1000 == 0: if idx % 1000 == 0:
logging.info('%d/%d, %s', idx, len(image_files), file_name) logging.info('%d/%d, %s', idx, len(image_files), file_name)
if sample_num > 0: if sample_num > 0:
batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num)) batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
self.save_file(image_feature, batch_name) self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num) logging.info('Finish batch %d', batch_num)
logging.info('Done: make image feature batch') logging.info('Done: make image feature batch')
...@@ -215,38 +222,64 @@ class ImageClassifier(): ...@@ -215,38 +222,64 @@ class ImageClassifier():
of = open(file, 'wb') of = open(file, 'wb')
cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
def option_parser(): def option_parser():
""" """
Main entry for predciting Main entry for predciting
""" """
usage = "%prog -c config -i data_list -w model_dir [options]" usage = "%prog -c config -i data_list -w model_dir [options]"
parser = OptionParser(usage="usage: %s" % usage) parser = OptionParser(usage="usage: %s" % usage)
parser.add_option("-j", "--job", parser.add_option(
action="store", dest="job_type", "-j",
help="job type: predict, extract\ "--job",
action="store",
dest="job_type",
help="job type: predict, extract\
predict: predicting,\ predict: predicting,\
extract: extract features") extract: extract features")
parser.add_option("-c", "--conf", parser.add_option(
action="store", dest="train_conf", "-c",
help="network config") "--conf",
parser.add_option("-i", "--data", action="store",
action="store", dest="data_file", dest="train_conf",
help="image list") help="network config")
parser.add_option("-w", "--model", parser.add_option(
action="store", dest="model_path", "-i", "--data", action="store", dest="data_file", help="image list")
default=None, help="model path") parser.add_option(
parser.add_option("-g", "--use_gpu", action="store", "-w",
dest="use_gpu", default=True, "--model",
help="Whether to use gpu mode.") action="store",
parser.add_option("-o", "--output_dir", dest="model_path",
action="store", dest="output_dir", default=None,
default="output", help="output path") help="model path")
parser.add_option("-m", "--mean", action="store", parser.add_option(
dest="mean", default=None, "-g",
help="mean file.") "--use_gpu",
parser.add_option("-p", "--multi_crop", action="store_true", action="store",
dest="multi_crop", default=False, dest="use_gpu",
help="Wether to use multiple crops on image.") default=True,
help="Whether to use gpu mode.")
parser.add_option(
"-o",
"--output_dir",
action="store",
dest="output_dir",
default="output",
help="output path")
parser.add_option(
"-m",
"--mean",
action="store",
dest="mean",
default=None,
help="mean file.")
parser.add_option(
"-p",
"--multi_crop",
action="store_true",
dest="multi_crop",
default=False,
help="Wether to use multiple crops on image.")
parser.add_option("-l", "--output_layer", action="store", parser.add_option("-l", "--output_layer", action="store",
dest="output_layer", default=None, dest="output_layer", default=None,
help="--job=extract, specify layers to extract "\ help="--job=extract, specify layers to extract "\
...@@ -254,24 +287,26 @@ def option_parser(): ...@@ -254,24 +287,26 @@ def option_parser():
"classification probability, output in resnet.py.") "classification probability, output in resnet.py.")
return parser.parse_args() return parser.parse_args()
def main(): def main():
""" """
1. parse input arguments. 1. parse input arguments.
2. predicting or extract features according job type. 2. predicting or extract features according job type.
""" """
options, args = option_parser() options, args = option_parser()
obj = ImageClassifier(options.train_conf, obj = ImageClassifier(
options.model_path, options.train_conf,
use_gpu=options.use_gpu, options.model_path,
mean_file=options.mean, use_gpu=options.use_gpu,
output_layer=options.output_layer, mean_file=options.mean,
oversample=options.multi_crop) output_layer=options.output_layer,
oversample=options.multi_crop)
if options.job_type == "predict": if options.job_type == "predict":
obj.predict(options.data_file) obj.predict(options.data_file)
elif options.job_type == "extract": elif options.job_type == "extract":
obj.extract(options.data_file, obj.extract(options.data_file, options.output_dir)
options.output_dir)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -11,4 +11,3 @@ ...@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
...@@ -16,8 +16,7 @@ from paddle.utils.image_util import * ...@@ -16,8 +16,7 @@ from paddle.utils.image_util import *
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
def hook(settings, image_size, crop_size, color, file_list, def hook(settings, image_size, crop_size, color, file_list, is_train, **kwargs):
is_train, **kwargs):
""" """
Description: Init with a list of data file Description: Init with a list of data file
file_list is the name list of input files. file_list is the name list of input files.
...@@ -58,7 +57,7 @@ def hook(settings, image_size, crop_size, color, file_list, ...@@ -58,7 +57,7 @@ def hook(settings, image_size, crop_size, color, file_list,
sz = settings.crop_size * settings.crop_size sz = settings.crop_size * settings.crop_size
settings.img_mean = np.zeros(sz * 3, dtype=np.single) settings.img_mean = np.zeros(sz * 3, dtype=np.single)
for idx, value in enumerate(settings.mean_value): for idx, value in enumerate(settings.mean_value):
settings.img_mean[idx * sz: (idx + 1) * sz] = value settings.img_mean[idx * sz:(idx + 1) * sz] = value
settings.img_mean = settings.img_mean.reshape(3, settings.crop_size, settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
settings.crop_size) settings.crop_size)
...@@ -69,7 +68,8 @@ def hook(settings, image_size, crop_size, color, file_list, ...@@ -69,7 +68,8 @@ def hook(settings, image_size, crop_size, color, file_list,
settings.input_types = [ settings.input_types = [
dense_vector(settings.img_input_size), # image feature dense_vector(settings.img_input_size), # image feature
integer_value(1)] # labels integer_value(1)
] # labels
settings.logger.info('Image short side: %s', settings.img_size) settings.logger.info('Image short side: %s', settings.img_size)
settings.logger.info('Crop size: %s', settings.crop_size) settings.logger.info('Crop size: %s', settings.crop_size)
...@@ -97,9 +97,6 @@ def processData(settings, file_list): ...@@ -97,9 +97,6 @@ def processData(settings, file_list):
# swap channel # swap channel
if settings.is_swap_channel: if settings.is_swap_channel:
img = img[settings.swap_channel, :, :] img = img[settings.swap_channel, :, :]
img_feat = preprocess_img(img, img_feat = preprocess_img(img, settings.img_mean, settings.crop_size,
settings.img_mean, settings.is_train, settings.color)
settings.crop_size,
settings.is_train,
settings.color)
yield img_feat.tolist(), int(lab.strip()) yield img_feat.tolist(), int(lab.strip())
...@@ -17,9 +17,11 @@ import sys ...@@ -17,9 +17,11 @@ import sys
import cPickle import cPickle
import logging import logging
logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s') logging.basicConfig(
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
def load_feature_c(file): def load_feature_c(file):
""" """
Load feature extracted by C++ interface. Load feature extracted by C++ interface.
...@@ -30,14 +32,15 @@ def load_feature_c(file): ...@@ -30,14 +32,15 @@ def load_feature_c(file):
f = open(file, 'r') f = open(file, 'r')
for line in f: for line in f:
sample = [] sample = []
for slot in line.strip().split(";"): for slot in line.strip().split(";"):
fea = [float(val) for val in slot.strip().split()] fea = [float(val) for val in slot.strip().split()]
if fea: if fea:
sample.append(fea) sample.append(fea)
features.append(sample) features.append(sample)
f.close() f.close()
return features return features
def load_feature_py(feature_dir): def load_feature_py(feature_dir):
""" """
Load feature extracted by python interface. Load feature extracted by python interface.
...@@ -54,6 +57,7 @@ def load_feature_py(feature_dir): ...@@ -54,6 +57,7 @@ def load_feature_py(feature_dir):
logging.info('Load feature file %s', file_name) logging.info('Load feature file %s', file_name)
return features return features
if __name__ == '__main__': if __name__ == '__main__':
print load_feature_py(sys.argv[1]) print load_feature_py(sys.argv[1])
#print load_feature_c(sys.argv[1]) #print load_feature_c(sys.argv[1])
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
""" """
paper: https://arxiv.org/abs/1512.03385 paper: https://arxiv.org/abs/1512.03385
""" """
...@@ -28,15 +27,19 @@ if not is_predict and data_provider: ...@@ -28,15 +27,19 @@ if not is_predict and data_provider:
# mean.meta size : 3 x 224 x 224. # mean.meta size : 3 x 224 x 224.
# If you use three mean value, set like: # If you use three mean value, set like:
# "mean_value:103.939,116.779,123.68;" # "mean_value:103.939,116.779,123.68;"
args={ args = {
'mean_meta': "model/mean_meta_224/mean.meta", 'mean_meta': "model/mean_meta_224/mean.meta",
'image_size': 224, 'crop_size': 224, 'image_size': 224,
'color': True,'swap_channel:': [2, 1, 0]} 'crop_size': 224,
define_py_data_sources2(train_list, 'color': True,
'example/test.list', 'swap_channel:': [2, 1, 0]
module="example.image_list_provider", }
obj="processData", define_py_data_sources2(
args=args) train_list,
'example/test.list',
module="example.image_list_provider",
obj="processData",
args=args)
batch_size = 1 batch_size = 1
learning_rate = 0.1 / batch_size learning_rate = 0.1 / batch_size
...@@ -54,12 +57,16 @@ Settings( ...@@ -54,12 +57,16 @@ Settings(
learning_method='momentum', learning_method='momentum',
learning_rate_decay_a=0.5, learning_rate_decay_a=0.5,
learning_rate_decay_b=1200000 * 10, learning_rate_decay_b=1200000 * 10,
learning_rate_schedule="discexp", learning_rate_schedule="discexp", )
)
def conv_bn_layer(name, input, filter_size, num_filters, def conv_bn_layer(name,
stride, padding, channels=None, input,
filter_size,
num_filters,
stride,
padding,
channels=None,
active_type=ReluActivation()): active_type=ReluActivation()):
""" """
A wrapper for conv layer with batch normalization layers. A wrapper for conv layer with batch normalization layers.
...@@ -67,19 +74,18 @@ def conv_bn_layer(name, input, filter_size, num_filters, ...@@ -67,19 +74,18 @@ def conv_bn_layer(name, input, filter_size, num_filters,
conv layer has no activation. conv layer has no activation.
""" """
tmp = img_conv_layer(name=name + "_conv", tmp = img_conv_layer(
input=input, name=name + "_conv",
filter_size=filter_size, input=input,
num_channels=channels, filter_size=filter_size,
num_filters=num_filters, num_channels=channels,
stride=stride, num_filters=num_filters,
padding=padding, stride=stride,
act=LinearActivation(), padding=padding,
bias_attr=False) act=LinearActivation(),
return batch_norm_layer(name=name + "_bn", bias_attr=False)
input=tmp, return batch_norm_layer(
act=active_type, name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
use_global_stats=is_test)
def bottleneck_block(name, input, num_filters1, num_filters2): def bottleneck_block(name, input, num_filters1, num_filters2):
...@@ -88,29 +94,31 @@ def bottleneck_block(name, input, num_filters1, num_filters2): ...@@ -88,29 +94,31 @@ def bottleneck_block(name, input, num_filters1, num_filters2):
Last conv_bn_layer has no activation. Last conv_bn_layer has no activation.
Addto layer has activation of relu. Addto layer has activation of relu.
""" """
last_name = conv_bn_layer(name=name + '_branch2a', last_name = conv_bn_layer(
input=input, name=name + '_branch2a',
filter_size=1, input=input,
num_filters=num_filters1, filter_size=1,
stride=1, num_filters=num_filters1,
padding=0) stride=1,
last_name = conv_bn_layer(name=name + '_branch2b', padding=0)
input=last_name, last_name = conv_bn_layer(
filter_size=3, name=name + '_branch2b',
num_filters=num_filters1, input=last_name,
stride=1, filter_size=3,
padding=1) num_filters=num_filters1,
last_name = conv_bn_layer(name=name + '_branch2c', stride=1,
input=last_name, padding=1)
filter_size=1, last_name = conv_bn_layer(
num_filters=num_filters2, name=name + '_branch2c',
stride=1, input=last_name,
padding=0, filter_size=1,
active_type=LinearActivation()) num_filters=num_filters2,
stride=1,
return addto_layer(name=name + "_addto", padding=0,
input=[input, last_name], active_type=LinearActivation())
act=ReluActivation())
return addto_layer(
name=name + "_addto", input=[input, last_name], act=ReluActivation())
def mid_projection(name, input, num_filters1, num_filters2, stride=2): def mid_projection(name, input, num_filters1, num_filters2, stride=2):
...@@ -123,38 +131,41 @@ def mid_projection(name, input, num_filters1, num_filters2, stride=2): ...@@ -123,38 +131,41 @@ def mid_projection(name, input, num_filters1, num_filters2, stride=2):
branch2x: bottleneck building block, shortcuts are identity. branch2x: bottleneck building block, shortcuts are identity.
""" """
# stride = 2 # stride = 2
branch1 = conv_bn_layer(name=name + '_branch1', branch1 = conv_bn_layer(
input=input, name=name + '_branch1',
filter_size=1, input=input,
num_filters=num_filters2, filter_size=1,
stride=stride, num_filters=num_filters2,
padding=0, stride=stride,
active_type=LinearActivation()) padding=0,
active_type=LinearActivation())
last_name = conv_bn_layer(name=name + '_branch2a',
input=input, last_name = conv_bn_layer(
filter_size=1, name=name + '_branch2a',
num_filters=num_filters1, input=input,
stride=stride, filter_size=1,
padding=0) num_filters=num_filters1,
last_name = conv_bn_layer(name=name + '_branch2b', stride=stride,
input=last_name, padding=0)
filter_size=3, last_name = conv_bn_layer(
num_filters=num_filters1, name=name + '_branch2b',
stride=1, input=last_name,
padding=1) filter_size=3,
num_filters=num_filters1,
last_name = conv_bn_layer(name=name + '_branch2c', stride=1,
input=last_name, padding=1)
filter_size=1,
num_filters=num_filters2, last_name = conv_bn_layer(
stride=1, name=name + '_branch2c',
padding=0, input=last_name,
active_type=LinearActivation()) filter_size=1,
num_filters=num_filters2,
return addto_layer(name=name + "_addto", stride=1,
input=[branch1, last_name], padding=0,
act=ReluActivation()) active_type=LinearActivation())
return addto_layer(
name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3): def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
...@@ -168,67 +179,67 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3): ...@@ -168,67 +179,67 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
# For ImageNet # For ImageNet
# conv1: 112x112 # conv1: 112x112
img = data_layer(name='input', size=224 * 224 * 3) img = data_layer(name='input', size=224 * 224 * 3)
tmp = conv_bn_layer("conv1", img, tmp = conv_bn_layer(
filter_size=7, "conv1",
channels=3, img,
num_filters=64, filter_size=7,
stride=2, channels=3,
padding=3) num_filters=64,
stride=2,
padding=3)
tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2) tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
# conv2_x: 56x56 # conv2_x: 56x56
tmp = mid_projection(name="res2_1", tmp = mid_projection(
input=tmp, name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
num_filters1=64,
num_filters2=256,
stride=1)
for i in xrange(2, res2_num + 1, 1): for i in xrange(2, res2_num + 1, 1):
tmp = bottleneck_block(name="res2_" + str(i), tmp = bottleneck_block(
input=tmp, name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
num_filters1=64,
num_filters2=256)
# conv3_x: 28x28 # conv3_x: 28x28
tmp = mid_projection(name="res3_1", tmp = mid_projection(
input=tmp, name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
num_filters1=128,
num_filters2=512)
for i in xrange(2, res3_num + 1, 1): for i in xrange(2, res3_num + 1, 1):
tmp = bottleneck_block(name="res3_" + str(i), tmp = bottleneck_block(
input=tmp, num_filters1=128, name="res3_" + str(i),
num_filters2=512) input=tmp,
num_filters1=128,
num_filters2=512)
# conv4_x: 14x14 # conv4_x: 14x14
tmp = mid_projection(name="res4_1", input=tmp, tmp = mid_projection(
num_filters1=256, num_filters2=1024) name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
for i in xrange(2, res4_num + 1, 1): for i in xrange(2, res4_num + 1, 1):
tmp = bottleneck_block(name="res4_" + str(i), tmp = bottleneck_block(
input=tmp, name="res4_" + str(i),
num_filters1=256, input=tmp,
num_filters2=1024) num_filters1=256,
num_filters2=1024)
# conv5_x: 7x7 # conv5_x: 7x7
tmp = mid_projection(name="res5_1", input=tmp, tmp = mid_projection(
num_filters1=512, num_filters2=2048) name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
for i in xrange(2, res5_num + 1, 1): for i in xrange(2, res5_num + 1, 1):
tmp = bottleneck_block(name="res5_" + str(i), tmp = bottleneck_block(
input=tmp, num_filters1=512, name="res5_" + str(i),
num_filters2=2048) input=tmp,
num_filters1=512,
tmp = img_pool_layer(name='avgpool', num_filters2=2048)
input=tmp,
pool_size=7, tmp = img_pool_layer(
stride=1, name='avgpool',
pool_type=AvgPooling()) input=tmp,
pool_size=7,
output = fc_layer(name='output', stride=1,
input=tmp, pool_type=AvgPooling())
size=1000,
act=SoftmaxActivation()) output = fc_layer(
name='output', input=tmp, size=1000, act=SoftmaxActivation())
if not is_predict: if not is_predict:
classification_cost(input=output, label=data_layer(name='label', classification_cost(
size=1)) input=output, label=data_layer(
name='label', size=1))
def res_net_50(): def res_net_50():
......
...@@ -22,27 +22,32 @@ from py_paddle import DataProviderConverter ...@@ -22,27 +22,32 @@ from py_paddle import DataProviderConverter
from paddle.trainer.PyDataProvider2 \ from paddle.trainer.PyDataProvider2 \
import integer_value, integer_value_sequence, sparse_binary_vector import integer_value, integer_value_sequence, sparse_binary_vector
def parse_arguments(): def parse_arguments():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--train_data", parser.add_argument(
type=str, required=False, help="train data file") "--train_data", type=str, required=False, help="train data file")
parser.add_argument("--test_data", type=str, help="test data file") parser.add_argument("--test_data", type=str, help="test data file")
parser.add_argument("--config", parser.add_argument(
type=str, required=True, help="config file name") "--config", type=str, required=True, help="config file name")
parser.add_argument("--dict_file", required=True, help="dictionary file") parser.add_argument("--dict_file", required=True, help="dictionary file")
parser.add_argument("--seq", parser.add_argument(
default=1, type=int, "--seq", default=1, type=int, help="whether use sequence training")
help="whether use sequence training") parser.add_argument(
parser.add_argument("--use_gpu", default=0, type=int, "--use_gpu", default=0, type=int, help="whether use GPU for training")
help="whether use GPU for training") parser.add_argument(
parser.add_argument("--trainer_count", default=1, type=int, "--trainer_count",
help="Number of threads for training") default=1,
parser.add_argument("--num_passes", default=5, type=int, type=int,
help="Number of training passes") help="Number of threads for training")
parser.add_argument(
"--num_passes", default=5, type=int, help="Number of training passes")
return parser.parse_args() return parser.parse_args()
UNK_IDX = 0 UNK_IDX = 0
def load_data(file_name, word_dict): def load_data(file_name, word_dict):
with open(file_name, 'r') as f: with open(file_name, 'r') as f:
for line in f: for line in f:
...@@ -51,6 +56,7 @@ def load_data(file_name, word_dict): ...@@ -51,6 +56,7 @@ def load_data(file_name, word_dict):
word_slot = [word_dict.get(w, UNK_IDX) for w in words] word_slot = [word_dict.get(w, UNK_IDX) for w in words]
yield word_slot, int(label) yield word_slot, int(label)
def load_dict(dict_file): def load_dict(dict_file):
word_dict = dict() word_dict = dict()
with open(dict_file, 'r') as f: with open(dict_file, 'r') as f:
...@@ -59,6 +65,7 @@ def load_dict(dict_file): ...@@ -59,6 +65,7 @@ def load_dict(dict_file):
word_dict[w] = i word_dict[w] = i
return word_dict return word_dict
def main(): def main():
options = parse_arguments() options = parse_arguments()
api.initPaddle("--use_gpu=%s" % options.use_gpu, api.initPaddle("--use_gpu=%s" % options.use_gpu,
...@@ -86,9 +93,9 @@ def main(): ...@@ -86,9 +93,9 @@ def main():
# create a data converter which converts data to PaddlePaddle # create a data converter which converts data to PaddlePaddle
# internal format # internal format
input_types = [ input_types = [
integer_value_sequence(len(word_dict)) if options.seq integer_value_sequence(len(word_dict)) if options.seq else
else sparse_binary_vector(len(word_dict)), sparse_binary_vector(len(word_dict)), integer_value(2)
integer_value(2)] ]
converter = DataProviderConverter(input_types) converter = DataProviderConverter(input_types)
batch_size = trainer_config.opt_config.batch_size batch_size = trainer_config.opt_config.batch_size
...@@ -102,7 +109,7 @@ def main(): ...@@ -102,7 +109,7 @@ def main():
trainer.trainOneDataBatch(size, converter(batch)) trainer.trainOneDataBatch(size, converter(batch))
trainer.finishTrainPass() trainer.finishTrainPass()
if test_dataset: if test_dataset:
trainer.startTestPeriod(); trainer.startTestPeriod()
for pos in xrange(0, len(test_dataset), batch_size): for pos in xrange(0, len(test_dataset), batch_size):
batch = itertools.islice(test_dataset, pos, pos + batch_size) batch = itertools.islice(test_dataset, pos, pos + batch_size)
size = min(batch_size, len(test_dataset) - pos) size = min(batch_size, len(test_dataset) - pos)
...@@ -110,5 +117,6 @@ def main(): ...@@ -110,5 +117,6 @@ def main():
trainer.finishTestPeriod() trainer.finishTestPeriod()
trainer.finishTrain() trainer.finishTrain()
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -17,6 +17,7 @@ from paddle.trainer.PyDataProvider2 import * ...@@ -17,6 +17,7 @@ from paddle.trainer.PyDataProvider2 import *
# id of the word not in dictionary # id of the word not in dictionary
UNK_IDX = 0 UNK_IDX = 0
# initializer is called by the framework during initialization. # initializer is called by the framework during initialization.
# It allows the user to describe the data types and setup the # It allows the user to describe the data types and setup the
# necessary data structure for later use. # necessary data structure for later use.
...@@ -38,7 +39,9 @@ def initializer(settings, dictionary, **kwargs): ...@@ -38,7 +39,9 @@ def initializer(settings, dictionary, **kwargs):
# The second input is an integer. It represents the category id of the # The second input is an integer. It represents the category id of the
# sample. 2 means there are two labels in the dataset. # sample. 2 means there are two labels in the dataset.
# (1 for positive and 0 for negative) # (1 for positive and 0 for negative)
integer_value(2)] integer_value(2)
]
# Delaring a data provider. It has an initializer 'data_initialzer'. # Delaring a data provider. It has an initializer 'data_initialzer'.
# It will cache the generated data of the first pass in memory, so that # It will cache the generated data of the first pass in memory, so that
...@@ -69,9 +72,8 @@ def process(settings, file_name): ...@@ -69,9 +72,8 @@ def process(settings, file_name):
def predict_initializer(settings, dictionary, **kwargs): def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary settings.word_dict = dictionary
settings.input_types = [ settings.input_types = [sparse_binary_vector(len(dictionary))]
sparse_binary_vector(len(dictionary))
]
# Declaring a data provider for prediction. The difference with process # Declaring a data provider for prediction. The difference with process
# is that label is not generated. # is that label is not generated.
......
...@@ -24,7 +24,8 @@ def initializer(settings, dictionary, **kwargs): ...@@ -24,7 +24,8 @@ def initializer(settings, dictionary, **kwargs):
# The value of the integers range from 0 to len(dictrionary)-1 # The value of the integers range from 0 to len(dictrionary)-1
integer_value_sequence(len(dictionary)), integer_value_sequence(len(dictionary)),
# Define the second input for label id # Define the second input for label id
integer_value(2)] integer_value(2)
]
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM) @provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
...@@ -40,7 +41,8 @@ def process(settings, file_name): ...@@ -40,7 +41,8 @@ def process(settings, file_name):
def predict_initializer(settings, dictionary, **kwargs): def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary settings.word_dict = dictionary
settings.input_types = [ settings.input_types = [
integer_value(len(dictionary), seq_type=SequenceType.SEQUENCE) integer_value(
len(dictionary), seq_type=SequenceType.SEQUENCE)
] ]
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
1. (remove HTML before or not)tokensizing 1. (remove HTML before or not)tokensizing
2. pos sample : rating score 5; neg sample: rating score 1-2. 2. pos sample : rating score 5; neg sample: rating score 1-2.
...@@ -35,7 +34,8 @@ import multiprocessing ...@@ -35,7 +34,8 @@ import multiprocessing
batch_size = 5000 batch_size = 5000
word_count = {} word_count = {}
num_tokenize = max(1, multiprocessing.cpu_count() - 2) # parse + tokenize + save num_tokenize = max(1,
multiprocessing.cpu_count() - 2) # parse + tokenize + save
max_queue_size = 8 max_queue_size = 8
parse_queue = Queue(maxsize=max_queue_size + num_tokenize) parse_queue = Queue(maxsize=max_queue_size + num_tokenize)
tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize) tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize)
......
...@@ -21,14 +21,21 @@ ...@@ -21,14 +21,21 @@
set -e set -e
export LC_ALL=C export LC_ALL=C
UNAME_STR=`uname`
if [ ${UNAME_STR} == 'Linux' ]; then
SHUF_PROG='shuf'
else
SHUF_PROG='gshuf'
fi
mkdir -p data/tmp mkdir -p data/tmp
python preprocess.py -i data/reviews_Electronics_5.json.gz python preprocess.py -i data/reviews_Electronics_5.json.gz
# uniq and shuffle # uniq and shuffle
cd data/tmp cd data/tmp
echo 'uniq and shuffle...' echo 'uniq and shuffle...'
cat pos_*|sort|uniq|shuf> pos.shuffed cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
cat neg_*|sort|uniq|shuf> neg.shuffed cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
min_len=`sed -n '$=' neg.shuffed` min_len=`sed -n '$=' neg.shuffed`
test_num=$((min_len/10)) test_num=$((min_len/10))
...@@ -42,8 +49,8 @@ head -n$train_num neg.shuffed >train.neg ...@@ -42,8 +49,8 @@ head -n$train_num neg.shuffed >train.neg
tail -n$test_num pos.shuffed >test.pos tail -n$test_num pos.shuffed >test.pos
tail -n$test_num neg.shuffed >test.neg tail -n$test_num neg.shuffed >test.neg
cat train.pos train.neg|shuf>../train.txt cat train.pos train.neg | ${SHUF_PROG} >../train.txt
cat test.pos test.neg|shuf>../test.txt cat test.pos test.neg | ${SHUF_PROG} >../test.txt
cd - cd -
echo 'data/train.txt' > data/train.list echo 'data/train.txt' > data/train.list
......
...@@ -20,6 +20,7 @@ cfg=trainer_config.lr.py ...@@ -20,6 +20,7 @@ cfg=trainer_config.lr.py
#cfg=trainer_config.lstm.py #cfg=trainer_config.lstm.py
#cfg=trainer_config.bidi-lstm.py #cfg=trainer_config.bidi-lstm.py
#cfg=trainer_config.db-lstm.py #cfg=trainer_config.db-lstm.py
#cfg=trainer_config.resnet-lstm.py
paddle train \ paddle train \
--config=$cfg \ --config=$cfg \
--save_dir=./output \ --save_dir=./output \
......
...@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False) ...@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list' tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict' process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn, define_py_data_sources2(
test_list=tst, train_list=trn,
module="dataprovider_emb", test_list=tst,
obj=process, module="dataprovider_emb",
args={"dictionary": word_dict}) obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1 batch_size = 128 if not is_predict else 1
settings( settings(
...@@ -39,19 +40,17 @@ settings( ...@@ -39,19 +40,17 @@ settings(
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25 gradient_clipping_threshold=25)
)
bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
data = data_layer(name="word", size=len(word_dict)) data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128) emb = embedding_layer(input=data, size=128)
bi_lstm = bidirectional_lstm(input=emb, size=128) bi_lstm = bidirectional_lstm(input=emb, size=128)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5) dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
output = fc_layer(input=dropout, size=2, output = fc_layer(
bias_attr=bias_attr, input=dropout, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
act=SoftmaxActivation())
if is_predict: if is_predict:
maxid = maxid_layer(output) maxid = maxid_layer(output)
......
...@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False) ...@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list' tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict' process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn, define_py_data_sources2(
test_list=tst, train_list=trn,
module="dataprovider_emb", test_list=tst,
obj=process, module="dataprovider_emb",
args={"dictionary": word_dict}) obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1 batch_size = 128 if not is_predict else 1
settings( settings(
...@@ -39,8 +40,7 @@ settings( ...@@ -39,8 +40,7 @@ settings(
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25 gradient_clipping_threshold=25)
)
data = data_layer(name="word", size=len(word_dict)) data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128) embedding = embedding_layer(input=data, size=128)
......
...@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False) ...@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list' tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict' process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn, define_py_data_sources2(
test_list=tst, train_list=trn,
module="dataprovider_emb", test_list=tst,
obj=process, module="dataprovider_emb",
args={"dictionary": word_dict}) obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1 batch_size = 128 if not is_predict else 1
settings( settings(
...@@ -39,10 +40,9 @@ settings( ...@@ -39,10 +40,9 @@ settings(
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25 gradient_clipping_threshold=25)
)
bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
data = data_layer(name="word", size=len(word_dict)) data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128) emb = embedding_layer(input=data, size=128)
...@@ -52,17 +52,18 @@ lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1)) ...@@ -52,17 +52,18 @@ lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
input_layers = [hidden_0, lstm_0] input_layers = [hidden_0, lstm_0]
for i in range(1,8): for i in range(1, 8):
fc = fc_layer(input=input_layers, size=128) fc = fc_layer(input=input_layers, size=128)
lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1), lstm = lstmemory(
reverse=(i % 2) == 1,) input=fc,
layer_attr=ExtraAttr(drop_rate=0.1),
reverse=(i % 2) == 1, )
input_layers = [fc, lstm] input_layers = [fc, lstm]
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_last, size=2, output = fc_layer(
bias_attr=bias_attr, input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
act=SoftmaxActivation())
if is_predict: if is_predict:
maxid = maxid_layer(output) maxid = maxid_layer(output)
......
...@@ -27,18 +27,16 @@ is_predict = get_config_arg('is_predict', bool, False) ...@@ -27,18 +27,16 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list' tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict' process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn, define_py_data_sources2(
test_list=tst, train_list=trn,
module="dataprovider_emb", test_list=tst,
obj=process, module="dataprovider_emb",
args={"dictionary": word_dict}) obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1 batch_size = 128 if not is_predict else 1
settings( settings(
batch_size=batch_size, batch_size=batch_size, learning_rate=2e-3, learning_method=AdamOptimizer())
learning_rate=2e-3,
learning_method=AdamOptimizer()
)
data = data_layer(name="word", size=len(word_dict)) data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128) embedding = embedding_layer(input=data, size=128)
......
...@@ -32,11 +32,12 @@ process = 'process' if not is_predict else 'process_predict' ...@@ -32,11 +32,12 @@ process = 'process' if not is_predict else 'process_predict'
# We need to use different process for training and prediction. # We need to use different process for training and prediction.
# For training, the input data includes both word IDs and labels. # For training, the input data includes both word IDs and labels.
# For prediction, the input data only includs word Ids. # For prediction, the input data only includs word Ids.
define_py_data_sources2(train_list=trn, define_py_data_sources2(
test_list=tst, train_list=trn,
module="dataprovider_bow", test_list=tst,
obj=process, module="dataprovider_bow",
args={"dictionary": word_dict}) obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1 batch_size = 128 if not is_predict else 1
settings( settings(
...@@ -44,8 +45,7 @@ settings( ...@@ -44,8 +45,7 @@ settings(
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25 gradient_clipping_threshold=25)
)
# Define the data for text features. The size of the data layer is the number # Define the data for text features. The size of the data layer is the number
# of words in the dictionary. # of words in the dictionary.
......
...@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False) ...@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list' tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict' process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn, define_py_data_sources2(
test_list=tst, train_list=trn,
module="dataprovider_emb", test_list=tst,
obj=process, module="dataprovider_emb",
args={"dictionary": word_dict}) obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1 batch_size = 128 if not is_predict else 1
settings( settings(
...@@ -39,17 +40,14 @@ settings( ...@@ -39,17 +40,14 @@ settings(
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25 gradient_clipping_threshold=25)
)
data = data_layer(name="word", size=len(word_dict)) data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128) emb = embedding_layer(input=data, size=128)
lstm = simple_lstm(input=emb, size=128, lstm = simple_lstm(
lstm_cell_attr=ExtraAttr(drop_rate=0.25)) input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.25))
lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling()) lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_max, size=2, output = fc_layer(input=lstm_max, size=2, act=SoftmaxActivation())
act=SoftmaxActivation())
if is_predict: if is_predict:
maxid = maxid_layer(output) maxid = maxid_layer(output)
outputs([maxid, output]) outputs([maxid, output])
......
# edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This configuration is a demonstration of how to implement the stacked LSTM
with residual connections, i.e. an LSTM layer takes the sum of the hidden states
and inputs of the previous LSTM layer instead of only the hidden states.
This architecture is from:
Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi,
Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey,
Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser,
Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens,
George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, Jason Riesa,
Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, Jeffrey Dean. 2016.
Google's Neural Machine Translation System: Bridging the Gap between Human and
Machine Translation. In arXiv https://arxiv.org/pdf/1609.08144v2.pdf
Different from the architecture described in the paper, we use a stack single
direction LSTM layers as the first layer instead of bi-directional LSTM. Also,
since this is a demo code, to reduce computation time, we stacked 4 layers
instead of 8 layers.
"""
from paddle.trainer_config_helpers import *
dict_file = "./data/dict.txt"
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(f):
w = line.strip().split()[0]
word_dict[w] = i
is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn,
test_list=tst,
module="dataprovider_emb",
obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
batch_size=batch_size,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
lstm = simple_lstm(input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
previous_input, previous_hidden_state = emb, lstm
for i in range(3):
# The input to the current layer is the sum of the hidden state
# and input of the previous layer.
current_input = addto_layer(input=[previous_input, previous_hidden_state])
hidden_state = simple_lstm(input=current_input, size=128,
lstm_cell_attr=ExtraAttr(drop_rate=0.1))
previous_input, previous_hidden_state = current_input, hidden_state
lstm = previous_hidden_state
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_last, size=2,
bias_attr=bias_attr,
act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
outputs([maxid, output])
else:
label = data_layer(name="label", size=2)
cls = classification_cost(input=output, label=label)
outputs(cls)
...@@ -21,8 +21,9 @@ def meta_to_header(meta, name): ...@@ -21,8 +21,9 @@ def meta_to_header(meta, name):
yield integer_value(each_meta['max']) yield integer_value(each_meta['max'])
elif each_meta['type'] == 'embedding': elif each_meta['type'] == 'embedding':
is_seq = each_meta['seq'] == 'sequence' is_seq = each_meta['seq'] == 'sequence'
yield integer_value(len(each_meta['dict']), yield integer_value(
seq_type=SequenceType.SEQUENCE if is_seq len(each_meta['dict']),
else SequenceType.NO_SEQUENCE) seq_type=SequenceType.SEQUENCE
if is_seq else SequenceType.NO_SEQUENCE)
elif each_meta['type'] == 'one_hot_dense': elif each_meta['type'] == 'one_hot_dense':
yield dense_vector(len(each_meta['dict'])) yield dense_vector(len(each_meta['dict']))
...@@ -14,4 +14,3 @@ ...@@ -14,4 +14,3 @@
"fields": ["id", "title", "genres"] "fields": ["id", "title", "genres"]
} }
} }
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
config_generator.py config_generator.py
...@@ -29,10 +28,7 @@ import json ...@@ -29,10 +28,7 @@ import json
import docopt import docopt
import copy import copy
DEFAULT_FILE = { DEFAULT_FILE = {"type": "split", "delimiter": ","}
"type": "split",
"delimiter": ","
}
DEFAULT_FIELD = { DEFAULT_FIELD = {
"id": { "id": {
...@@ -107,19 +103,16 @@ def main(filename, fmt): ...@@ -107,19 +103,16 @@ def main(filename, fmt):
field = copy.deepcopy(DEFAULT_FIELD[field_key]) field = copy.deepcopy(DEFAULT_FIELD[field_key])
field['pos'] = pos field['pos'] = pos
fields.append(field) fields.append(field)
obj[k] = { obj[k] = {"file": file_dict, "fields": fields}
"file": file_dict, meta = {"meta": obj}
"fields": fields
}
meta = {
"meta": obj
}
# print meta # print meta
if fmt == 'json': if fmt == 'json':
def formatter(x): def formatter(x):
import json import json
return json.dumps(x, indent=2) return json.dumps(x, indent=2)
elif fmt == 'yaml': elif fmt == 'yaml':
def formatter(x): def formatter(x):
import yaml import yaml
return yaml.safe_dump(x, default_flow_style=False) return yaml.safe_dump(x, default_flow_style=False)
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Preprocess Movielens dataset, to get movie/user object. Preprocess Movielens dataset, to get movie/user object.
...@@ -66,8 +65,8 @@ class SortedIDGenerator(object): ...@@ -66,8 +65,8 @@ class SortedIDGenerator(object):
self.__key_set__.add(key) self.__key_set__.add(key)
def finish_scan(self, compare=None, key=None, reverse=False): def finish_scan(self, compare=None, key=None, reverse=False):
self.__key_set__ = sorted(list(self.__key_set__), cmp=compare, self.__key_set__ = sorted(
key=key, reverse=reverse) list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
self.dict = dict() self.dict = dict()
for idx, each_key in enumerate(self.__key_set__): for idx, each_key in enumerate(self.__key_set__):
self.dict[each_key] = idx self.dict[each_key] = idx
...@@ -207,11 +206,10 @@ class EmbeddingFieldParser(object): ...@@ -207,11 +206,10 @@ class EmbeddingFieldParser(object):
self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict( self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
self.seq_type == EmbeddingFieldParser.SEQUENCE) self.seq_type == EmbeddingFieldParser.SEQUENCE)
elif config['dict']['type'] == 'split': elif config['dict']['type'] == 'split':
self.dict = SplitEmbeddingDict( self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
config['dict'].get('delimiter', ','))
elif config['dict']['type'] == 'whole_content': elif config['dict']['type'] == 'whole_content':
self.dict = EmbeddingFieldParser.WholeContentDict( self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
config['dict']['sort']) 'sort'])
else: else:
print config print config
assert False assert False
...@@ -333,8 +331,8 @@ class ContentExtractorFactory(object): ...@@ -333,8 +331,8 @@ class ContentExtractorFactory(object):
return PositionContentExtractor(config['pos']) return PositionContentExtractor(config['pos'])
else: else:
extra_args = config['regex'] extra_args = config['regex']
return RegexPositionContentExtractor(pos=config['pos'], return RegexPositionContentExtractor(
**extra_args) pos=config['pos'], **extra_args)
class MetaFile(object): class MetaFile(object):
...@@ -364,9 +362,10 @@ class MetaFile(object): ...@@ -364,9 +362,10 @@ class MetaFile(object):
metas = map(lambda x: x.meta_field(), field_parsers) metas = map(lambda x: x.meta_field(), field_parsers)
# print metas # print metas
key_index = filter(lambda x: x is not None, map( key_index = filter(
lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] lambda x: x is not None,
else None, enumerate(metas)))[0] map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
enumerate(metas)))[0]
key_map = [] key_map = []
for i in range(min(key_index, len(metas))): for i in range(min(key_index, len(metas))):
...@@ -374,12 +373,7 @@ class MetaFile(object): ...@@ -374,12 +373,7 @@ class MetaFile(object):
for i in range(key_index + 1, len(metas)): for i in range(key_index + 1, len(metas)):
key_map.append(i) key_map.append(i)
obj = { obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
'__meta__': {
'raw_meta': metas,
'feature_map': key_map
}
}
for each_block in reader.read(): for each_block in reader.read():
idx = field_parsers[key_index].parse(each_block) idx = field_parsers[key_index].parse(each_block)
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Separate movielens 1m dataset to train/test file. Separate movielens 1m dataset to train/test file.
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
import common_utils # parse import common_utils # parse
def hook(settings, meta, **kwargs): def hook(settings, meta, **kwargs):
""" """
Init hook is invoked before process data. It will set obj.slots and store Init hook is invoked before process data. It will set obj.slots and store
...@@ -41,6 +42,7 @@ def hook(settings, meta, **kwargs): ...@@ -41,6 +42,7 @@ def hook(settings, meta, **kwargs):
settings.input_types = headers settings.input_types = headers
settings.meta = meta settings.meta = meta
@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM) @provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename): def process(settings, filename):
with open(filename, 'r') as f: with open(filename, 'r') as f:
......
...@@ -28,7 +28,8 @@ if __name__ == '__main__': ...@@ -28,7 +28,8 @@ if __name__ == '__main__':
model_path = sys.argv[1] model_path = sys.argv[1]
swig_paddle.initPaddle('--use_gpu=0') swig_paddle.initPaddle('--use_gpu=0')
conf = parse_config("trainer_config.py", "is_predict=1") conf = parse_config("trainer_config.py", "is_predict=1")
network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config) network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine) assert isinstance(network, swig_paddle.GradientMachine)
network.loadParameters(model_path) network.loadParameters(model_path)
with open('./data/meta.bin', 'rb') as f: with open('./data/meta.bin', 'rb') as f:
...@@ -39,11 +40,12 @@ if __name__ == '__main__': ...@@ -39,11 +40,12 @@ if __name__ == '__main__':
while True: while True:
movie_id = int(raw_input("Input movie_id: ")) movie_id = int(raw_input("Input movie_id: "))
user_id = int(raw_input("Input user_id: ")) user_id = int(raw_input("Input user_id: "))
movie_meta = meta['movie'][movie_id] # Query Data From Meta. movie_meta = meta['movie'][movie_id] # Query Data From Meta.
user_meta = meta['user'][user_id] user_meta = meta['user'][user_id]
data = [movie_id - 1] data = [movie_id - 1]
data.extend(movie_meta) data.extend(movie_meta)
data.append(user_id - 1) data.append(user_id - 1)
data.extend(user_meta) data.extend(user_meta)
print "Prediction Score is %.2f" % ((network.forwardTest( print "Prediction Score is %.2f" % (
cvt.convert([data]))[0]['value'][0][0] + 5) / 2) (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
/ 2)
...@@ -27,8 +27,8 @@ with open(META_FILE, 'rb') as f: ...@@ -27,8 +27,8 @@ with open(META_FILE, 'rb') as f:
# load meta file # load meta file
meta = pickle.load(f) meta = pickle.load(f)
settings(batch_size=1600, learning_rate=1e-3, settings(
learning_method=RMSPropOptimizer()) batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
def construct_feature(name): def construct_feature(name):
...@@ -59,11 +59,10 @@ def construct_feature(name): ...@@ -59,11 +59,10 @@ def construct_feature(name):
slot_name = each_meta.get('name', '%s_id' % name) slot_name = each_meta.get('name', '%s_id' % name)
if type_name == 'id': if type_name == 'id':
slot_dim = each_meta['max'] slot_dim = each_meta['max']
embedding = embedding_layer(input=data_layer(slot_name, embedding = embedding_layer(
size=slot_dim), input=data_layer(
size=256) slot_name, size=slot_dim), size=256)
fusion.append(fc_layer(input=embedding, fusion.append(fc_layer(input=embedding, size=256))
size=256))
elif type_name == 'embedding': elif type_name == 'embedding':
is_seq = each_meta['seq'] == 'sequence' is_seq = each_meta['seq'] == 'sequence'
slot_dim = len(each_meta['dict']) slot_dim = len(each_meta['dict'])
...@@ -71,17 +70,14 @@ def construct_feature(name): ...@@ -71,17 +70,14 @@ def construct_feature(name):
embedding = embedding_layer(input=din, size=256) embedding = embedding_layer(input=din, size=256)
if is_seq: if is_seq:
fusion.append( fusion.append(
text_conv_pool(input=embedding, context_len=5, text_conv_pool(
hidden_size=256)) input=embedding, context_len=5, hidden_size=256))
else: else:
fusion.append(fc_layer(input=embedding, fusion.append(fc_layer(input=embedding, size=256))
size=256))
elif type_name == 'one_hot_dense': elif type_name == 'one_hot_dense':
slot_dim = len(each_meta['dict']) slot_dim = len(each_meta['dict'])
hidden = fc_layer(input=data_layer(slot_name, slot_dim), hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
size=256) fusion.append(fc_layer(input=hidden, size=256))
fusion.append(fc_layer(input=hidden,
size=256))
return fc_layer(name="%s_fusion" % name, input=fusion, size=256) return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
...@@ -90,10 +86,16 @@ movie_feature = construct_feature("movie") ...@@ -90,10 +86,16 @@ movie_feature = construct_feature("movie")
user_feature = construct_feature("user") user_feature = construct_feature("user")
similarity = cos_sim(a=movie_feature, b=user_feature) similarity = cos_sim(a=movie_feature, b=user_feature)
if not is_predict: if not is_predict:
outputs(regression_cost(input=similarity, outputs(
label=data_layer('rating', size=1))) regression_cost(
input=similarity, label=data_layer(
define_py_data_sources2('data/train.list', 'data/test.list', module='dataprovider', 'rating', size=1)))
obj='process', args={'meta': meta})
define_py_data_sources2(
'data/train.list',
'data/test.list',
module='dataprovider',
obj='process',
args={'meta': meta})
else: else:
outputs(similarity) outputs(similarity)
...@@ -17,24 +17,15 @@ import os ...@@ -17,24 +17,15 @@ import os
from optparse import OptionParser from optparse import OptionParser
def extract_dict_features(pair_file, feature_file, src_dict_file, def extract_dict_features(pair_file, feature_file):
tgt_dict_file):
src_dict = set() with open(pair_file) as fin, open(feature_file, 'w') as feature_out:
tgt_dict = set()
with open(pair_file) as fin, open(feature_file, 'w') as feature_out, open(
src_dict_file, 'w') as src_dict_out, open(tgt_dict_file,
'w') as tgt_dict_out:
for line in fin: for line in fin:
sentence, labels = line.strip().split('\t') sentence, predicate, labels = line.strip().split('\t')
sentence_list = sentence.split() sentence_list = sentence.split()
labels_list = labels.split() labels_list = labels.split()
src_dict.update(sentence_list)
tgt_dict.update(labels_list)
verb_index = labels_list.index('B-V') verb_index = labels_list.index('B-V')
verb_feature = sentence_list[verb_index]
mark = [0] * len(labels_list) mark = [0] * len(labels_list)
if verb_index > 0: if verb_index > 0:
...@@ -42,47 +33,50 @@ def extract_dict_features(pair_file, feature_file, src_dict_file, ...@@ -42,47 +33,50 @@ def extract_dict_features(pair_file, feature_file, src_dict_file,
ctx_n1 = sentence_list[verb_index - 1] ctx_n1 = sentence_list[verb_index - 1]
else: else:
ctx_n1 = 'bos' ctx_n1 = 'bos'
ctx_n1_feature = ctx_n1
if verb_index > 1:
mark[verb_index - 2] = 1
ctx_n2 = sentence_list[verb_index - 2]
else:
ctx_n2 = 'bos'
mark[verb_index] = 1 mark[verb_index] = 1
ctx_0_feature = sentence_list[verb_index] ctx_0 = sentence_list[verb_index]
if verb_index < len(labels_list) - 2: if verb_index < len(labels_list) - 2:
mark[verb_index + 1] = 1 mark[verb_index + 1] = 1
ctx_p1 = sentence_list[verb_index + 1] ctx_p1 = sentence_list[verb_index + 1]
else: else:
ctx_p1 = 'eos' ctx_p1 = 'eos'
ctx_p1_feature = ctx_p1
if verb_index < len(labels_list) - 3:
mark[verb_index + 2] = 1
ctx_p2 = sentence_list[verb_index + 2]
else:
ctx_p2 = 'eos'
feature_str = sentence + '\t' \ feature_str = sentence + '\t' \
+ verb_feature + '\t' \ + predicate + '\t' \
+ ctx_n1_feature + '\t' \ + ctx_n2 + '\t' \
+ ctx_0_feature + '\t' \ + ctx_n1 + '\t' \
+ ctx_p1_feature + '\t' \ + ctx_0 + '\t' \
+ ctx_p1 + '\t' \
+ ctx_p2 + '\t' \
+ ' '.join([str(i) for i in mark]) + '\t' \ + ' '.join([str(i) for i in mark]) + '\t' \
+ labels + labels
feature_out.write(feature_str + '\n') feature_out.write(feature_str + '\n')
src_dict_out.write('<unk>\n')
src_dict_out.write('\n'.join(list(src_dict)))
tgt_dict_out.write('\n'.join(list(tgt_dict)))
if __name__ == '__main__': if __name__ == '__main__':
usage = '-p pair_file -f feature_file -s source dictionary -t target dictionary ' usage = '-p pair_file -f feature_file'
parser = OptionParser(usage) parser = OptionParser(usage)
parser.add_option('-p', dest='pair_file', help='the pair file') parser.add_option('-p', dest='pair_file', help='the pair file')
parser.add_option( parser.add_option('-f', dest='feature_file', help='the feature file')
'-f', dest='feature_file', help='the file to store feature')
parser.add_option(
'-s', dest='src_dict', help='the file to store source dictionary')
parser.add_option(
'-t', dest='tgt_dict', help='the file to store target dictionary')
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
extract_dict_features(options.pair_file, options.feature_file, extract_dict_features(options.pair_file, options.feature_file)
options.src_dict, options.tgt_dict)
...@@ -51,7 +51,7 @@ def read_sentences(words_file): ...@@ -51,7 +51,7 @@ def read_sentences(words_file):
for line in fin: for line in fin:
line = line.strip() line = line.strip()
if line == '': if line == '':
sentences.append(s.lower()) sentences.append(s)
s = '' s = ''
else: else:
s += line + ' ' s += line + ' '
...@@ -64,6 +64,11 @@ def transform_labels(sentences, labels): ...@@ -64,6 +64,11 @@ def transform_labels(sentences, labels):
if len(labels[i]) == 1: if len(labels[i]) == 1:
continue continue
else: else:
verb_list = []
for x in labels[i][0]:
if x !='-':
verb_list.append(x)
for j in xrange(1, len(labels[i])): for j in xrange(1, len(labels[i])):
label_list = labels[i][j] label_list = labels[i][j]
current_tag = 'O' current_tag = 'O'
...@@ -88,8 +93,7 @@ def transform_labels(sentences, labels): ...@@ -88,8 +93,7 @@ def transform_labels(sentences, labels):
is_in_bracket = True is_in_bracket = True
else: else:
print 'error:', ll print 'error:', ll
sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq))
sen_lab_pair.append((sentences[i], label_seq))
return sen_lab_pair return sen_lab_pair
...@@ -97,9 +101,9 @@ def write_file(sen_lab_pair, output_file): ...@@ -97,9 +101,9 @@ def write_file(sen_lab_pair, output_file):
with open(output_file, 'w') as fout: with open(output_file, 'w') as fout:
for x in sen_lab_pair: for x in sen_lab_pair:
sentence = x[0] sentence = x[0]
label_seq = ' '.join(x[1]) label_seq = ' '.join(x[2])
assert len(sentence.split()) == len(x[1]) assert len(sentence.split()) == len(x[2])
fout.write(sentence + '\t' + label_seq + '\n') fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n')
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
# limitations under the License. # limitations under the License.
set -e set -e
wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/verbDict.txt --no-check-certificate
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/targetDict.txt --no-check-certificate
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/wordDict.txt --no-check-certificate
wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/emb --no-check-certificate
tar -xzvf conll05st-tests.tar.gz tar -xzvf conll05st-tests.tar.gz
rm conll05st-tests.tar.gz rm conll05st-tests.tar.gz
cp ./conll05st-release/test.wsj/words/test.wsj.words.gz . cp ./conll05st-release/test.wsj/words/test.wsj.words.gz .
...@@ -22,4 +26,4 @@ gunzip test.wsj.words.gz ...@@ -22,4 +26,4 @@ gunzip test.wsj.words.gz
gunzip test.wsj.props.gz gunzip test.wsj.props.gz
python extract_pairs.py -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair python extract_pairs.py -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair
python extract_dict_feature.py -p test.wsj.seq_pair -f feature -s src.dict -t tgt.dict python extract_dict_feature.py -p test.wsj.seq_pair -f feature
...@@ -17,41 +17,51 @@ from paddle.trainer.PyDataProvider2 import * ...@@ -17,41 +17,51 @@ from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 0 UNK_IDX = 0
def hook(settings, word_dict, label_dict, **kwargs): def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
settings.word_dict = word_dict settings.word_dict = word_dict
settings.label_dict = label_dict settings.label_dict = label_dict
settings.predicate_dict = predicate_dict
#all inputs are integral and sequential type #all inputs are integral and sequential type
settings.slots = [ settings.slots = [
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(2), integer_value_sequence(len(word_dict)), integer_value_sequence(2),
integer_value_sequence(len(label_dict))] integer_value_sequence(len(label_dict))
]
@provider(init_hook=hook) def get_batch_size(yeild_data):
def process(obj, file_name): return len(yeild_data[0])
@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata: with open(file_name, 'r') as fdata:
for line in fdata: for line in fdata:
sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = \ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t') line.strip().split('\t')
words = sentence.split() words = sentence.split()
sen_len = len(words) sen_len = len(words)
word_slot = [obj.word_dict.get(w, UNK_IDX) for w in words] word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
predicate_slot = [obj.word_dict.get(predicate, UNK_IDX)] * sen_len predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
ctx_n1_slot = [obj.word_dict.get(ctx_n1, UNK_IDX)] * sen_len ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_0_slot = [obj.word_dict.get(ctx_0, UNK_IDX)] * sen_len ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_p1_slot = [obj.word_dict.get(ctx_p1, UNK_IDX)] * sen_len ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split() marks = mark.split()
mark_slot = [int(w) for w in marks] mark_slot = [int(w) for w in marks]
label_list = label.split() label_list = label.split()
label_slot = [obj.label_dict.get(w) for w in label_list] label_slot = [settings.label_dict.get(w) for w in label_list]
yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
yield word_slot, predicate_slot, ctx_n1_slot, \ ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
ctx_0_slot, ctx_p1_slot, mark_slot, label_slot
...@@ -12,15 +12,15 @@ ...@@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math import math
import os import os
import sys import sys
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
#file paths #file paths
word_dict_file = './data/src.dict' word_dict_file = './data/wordDict.txt'
label_dict_file = './data/tgt.dict' label_dict_file = './data/targetDict.txt'
predicate_file= './data/verbDict.txt'
train_list_file = './data/train.list' train_list_file = './data/train.list'
test_list_file = './data/test.list' test_list_file = './data/test.list'
...@@ -31,8 +31,10 @@ if not is_predict: ...@@ -31,8 +31,10 @@ if not is_predict:
#load dictionaries #load dictionaries
word_dict = dict() word_dict = dict()
label_dict = dict() label_dict = dict()
predicate_dict = dict()
with open(word_dict_file, 'r') as f_word, \ with open(word_dict_file, 'r') as f_word, \
open(label_dict_file, 'r') as f_label: open(label_dict_file, 'r') as f_label, \
open(predicate_file, 'r') as f_pre:
for i, line in enumerate(f_word): for i, line in enumerate(f_word):
w = line.strip() w = line.strip()
word_dict[w] = i word_dict[w] = i
...@@ -41,8 +43,13 @@ if not is_predict: ...@@ -41,8 +43,13 @@ if not is_predict:
w = line.strip() w = line.strip()
label_dict[w] = i label_dict[w] = i
for i, line in enumerate(f_pre):
w = line.strip()
predicate_dict[w] = i
if is_test: if is_test:
train_list_file = None train_list_file = None
#define data provider #define data provider
define_py_data_sources2( define_py_data_sources2(
...@@ -51,91 +58,157 @@ if not is_predict: ...@@ -51,91 +58,157 @@ if not is_predict:
module='dataprovider', module='dataprovider',
obj='process', obj='process',
args={'word_dict': word_dict, args={'word_dict': word_dict,
'label_dict': label_dict}) 'label_dict': label_dict,
'predicate_dict': predicate_dict })
word_dict_len = len(word_dict) word_dict_len = len(word_dict)
label_dict_len = len(label_dict) label_dict_len = len(label_dict)
pred_len = len(predicate_dict)
else: else:
word_dict_len = get_config_arg('dict_len', int) word_dict_len = get_config_arg('dict_len', int)
label_dict_len = get_config_arg('label_len', int) label_dict_len = get_config_arg('label_len', int)
pred_len = get_config_arg('pred_len', int)
############################## Hyper-parameters ##################################
mark_dict_len = 2 mark_dict_len = 2
word_dim = 32 word_dim = 32
mark_dim = 5 mark_dim = 5
hidden_dim = 128 hidden_dim = 512
depth = 8 depth = 8
emb_lr = 1e-2
fc_lr = 1e-2
lstm_lr = 2e-2
########################### Optimizer #######################################
settings( settings(
batch_size=150, batch_size=150,
learning_method=AdamOptimizer(), learning_method=MomentumOptimizer(momentum=0),
learning_rate=1e-3, learning_rate=2e-2,
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25) is_async=False,
model_average=ModelAverage(average_window=0.5,
max_average_window=10000),
)
#6 features
####################################### network ##############################
#8 features and 1 target
word = data_layer(name='word_data', size=word_dict_len) word = data_layer(name='word_data', size=word_dict_len)
predicate = data_layer(name='verb_data', size=word_dict_len) predicate = data_layer(name='verb_data', size=pred_len)
ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len)
ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len) ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len)
ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len) ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len)
ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len) ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len) mark = data_layer(name='mark_data', size=mark_dict_len)
if not is_predict: if not is_predict:
target = data_layer(name='target', size=label_dict_len) target = data_layer(name='target', size=label_dict_len)
ptt = ParameterAttribute(name='src_emb', learning_rate=emb_lr)
layer_attr = ExtraLayerAttribute(drop_rate=0.5)
fc_para_attr = ParameterAttribute(learning_rate=fc_lr)
lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=lstm_lr)
para_attr = [fc_para_attr, lstm_para_attr]
word_embedding = embedding_layer(size=word_dim, input=word, param_attr=ptt) default_std=1/math.sqrt(hidden_dim)/3.0
predicate_embedding = embedding_layer(
size=word_dim, input=predicate, param_attr=ptt) emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
ctx_n1_embedding = embedding_layer(size=word_dim, input=ctx_n1, param_attr=ptt) std_0 = ParameterAttribute(initial_std=0.)
ctx_0_embedding = embedding_layer(size=word_dim, input=ctx_0, param_attr=ptt) std_default = ParameterAttribute(initial_std=default_std)
ctx_p1_embedding = embedding_layer(size=word_dim, input=ctx_p1, param_attr=ptt)
mark_embedding = embedding_layer(size=mark_dim, input=mark) predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std))
mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = mixed_layer( hidden_0 = mixed_layer(
name='hidden0',
size=hidden_dim, size=hidden_dim,
input=[ bias_attr=std_default,
full_matrix_projection(input=word_embedding), input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ])
full_matrix_projection(input=predicate_embedding),
full_matrix_projection(input=ctx_n1_embedding),
full_matrix_projection(input=ctx_0_embedding),
full_matrix_projection(input=ctx_p1_embedding),
full_matrix_projection(input=mark_embedding),
])
lstm_0 = lstmemory(input=hidden_0, layer_attr=layer_attr) mix_hidden_lr = 1e-3
lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = lstmemory(name='lstm0',
input=hidden_0,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges #stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0] input_tmp = [hidden_0, lstm_0]
for i in range(1, depth): for i in range(1, depth):
fc = fc_layer(input=input_tmp, size=hidden_dim, param_attr=para_attr) mix_hidden = mixed_layer(name='hidden'+str(i),
size=hidden_dim,
bias_attr=std_default,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
]
)
lstm = lstmemory(name='lstm'+str(i),
input=mix_hidden,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
reverse=((i % 2)==1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = mixed_layer(name='output',
size=label_dict_len,
bias_attr=std_default,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
],
)
lstm = lstmemory(
input=fc,
act=ReluActivation(),
reverse=(i % 2) == 1,
layer_attr=layer_attr)
input_tmp = [fc, lstm]
prob = fc_layer(
input=input_tmp,
size=label_dict_len,
act=SoftmaxActivation(),
param_attr=para_attr)
if not is_predict: if not is_predict:
cls = classification_cost(input=prob, label=target) crf_l = crf_layer( name = 'crf',
outputs(cls) size = label_dict_len,
input = feature_out,
label = target,
param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr)
)
crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
size = label_dict_len,
input = feature_out,
label = target,
param_attr=ParameterAttribute(name='crfw')
)
eval = sum_evaluator(input=crf_dec_l)
outputs(crf_l)
else: else:
outputs(prob) crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
size = label_dict_len,
input = feature_out,
param_attr=ParameterAttribute(name='crfw')
)
outputs(crf_dec_l)
...@@ -26,7 +26,7 @@ UNK_IDX = 0 ...@@ -26,7 +26,7 @@ UNK_IDX = 0
class Prediction(): class Prediction():
def __init__(self, train_conf, dict_file, model_dir, label_file): def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file):
""" """
train_conf: trainer configure. train_conf: trainer configure.
dict_file: word dictionary file name. dict_file: word dictionary file name.
...@@ -35,16 +35,19 @@ class Prediction(): ...@@ -35,16 +35,19 @@ class Prediction():
self.dict = {} self.dict = {}
self.labels = {} self.labels = {}
self.predicate_dict={}
self.labels_reverse = {} self.labels_reverse = {}
self.load_dict_label(dict_file, label_file) self.load_dict_label(dict_file, label_file, predicate_dict_file)
len_dict = len(self.dict) len_dict = len(self.dict)
len_label = len(self.labels) len_label = len(self.labels)
len_pred = len(self.predicate_dict)
conf = parse_config( conf = parse_config(
train_conf, train_conf,
'dict_len=' + str(len_dict) + 'dict_len=' + str(len_dict) +
',label_len=' + str(len_label) + ',label_len=' + str(len_label) +
',pred_len=' + str(len_pred) +
',is_predict=True') ',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto( self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config) conf.model_config)
...@@ -52,15 +55,21 @@ class Prediction(): ...@@ -52,15 +55,21 @@ class Prediction():
slots = [ slots = [
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_pred),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(2) integer_value_sequence(2)
]
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(2)
] ]
self.converter = DataProviderConverter(slots) self.converter = DataProviderConverter(slots)
def load_dict_label(self, dict_file, label_file): def load_dict_label(self, dict_file, label_file, predicate_dict_file):
""" """
Load dictionary from self.dict_file. Load dictionary from self.dict_file.
""" """
...@@ -71,52 +80,55 @@ class Prediction(): ...@@ -71,52 +80,55 @@ class Prediction():
self.labels[line.strip()] = line_count self.labels[line.strip()] = line_count
self.labels_reverse[line_count] = line.strip() self.labels_reverse[line_count] = line.strip()
for line_count, line in enumerate(open(predicate_dict_file, 'r')):
self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file): def get_data(self, data_file):
""" """
Get input data of paddle format. Get input data of paddle format.
""" """
with open(data_file, 'r') as fdata: with open(data_file, 'r') as fdata:
for line in fdata: for line in fdata:
sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = line.strip( sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip(
).split('\t') ).split('\t')
words = sentence.split() words = sentence.split()
sen_len = len(words) sen_len = len(words)
word_slot = [self.dict.get(w, UNK_IDX) for w in words] word_slot = [self.dict.get(w, UNK_IDX) for w in words]
predicate_slot = [self.dict.get(predicate, UNK_IDX)] * sen_len predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len
ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split() marks = mark.split()
mark_slot = [int(w) for w in marks] mark_slot = [int(w) for w in marks]
yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot
yield word_slot, predicate_slot, ctx_n1_slot, \ def predict(self, data_file, output_file):
ctx_0_slot, ctx_p1_slot, mark_slot
def predict(self, data_file):
""" """
data_file: file name of input data. data_file: file name of input data.
""" """
input = self.converter(self.get_data(data_file)) input = self.converter(self.get_data(data_file))
output = self.network.forwardTest(input) output = self.network.forwardTest(input)
prob = output[0]["value"] lab = output[0]["id"].tolist()
lab = list(np.argsort(-prob)[:, 0])
with open(data_file, 'r') as fin, open('predict.res', 'w') as fout: with open(data_file, 'r') as fin, open(output_file, 'w') as fout:
index = 0 index = 0
for line in fin: for line in fin:
sen = line.split('\t')[0] sen = line.split('\t')[0]
len_sen = len(sen.split()) len_sen = len(sen.split())
line_labels = lab[index:index + len_sen] line_labels = lab[index:index + len_sen]
index += len_sen index += len_sen
fout.write(sen + '\t' + ' '.join([self.labels_reverse[ fout.write(sen + '\t' + ' '.join(
i] for i in line_labels]) + '\n') [self.labels_reverse[i] for i in line_labels]) + '\n')
def option_parser(): def option_parser():
usage = ("python predict.py -c config -w model_dir " usage = ("python predict.py -c config -w model_dir "
"-d word dictionary -l label_file -i input_file") "-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage) parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option( parser.add_option(
"-c", "-c",
...@@ -137,6 +149,13 @@ def option_parser(): ...@@ -137,6 +149,13 @@ def option_parser():
dest="label_file", dest="label_file",
default=None, default=None,
help="label file") help="label file")
parser.add_option(
"-p",
"--predict_dict_file",
action="store",
dest="predict_dict_file",
default=None,
help="predict_dict_file")
parser.add_option( parser.add_option(
"-i", "-i",
"--data", "--data",
...@@ -150,6 +169,14 @@ def option_parser(): ...@@ -150,6 +169,14 @@ def option_parser():
dest="model_path", dest="model_path",
default=None, default=None,
help="model path") help="model path")
parser.add_option(
"-o",
"--output_file",
action="store",
dest="output_file",
default=None,
help="output file")
return parser.parse_args() return parser.parse_args()
...@@ -160,10 +187,12 @@ def main(): ...@@ -160,10 +187,12 @@ def main():
dict_file = options.dict_file dict_file = options.dict_file
model_path = options.model_path model_path = options.model_path
label_file = options.label_file label_file = options.label_file
predict_dict_file = options.predict_dict_file
output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0") swig_paddle.initPaddle("--use_gpu=0")
predict = Prediction(train_conf, dict_file, model_path, label_file) predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file)
predict.predict(data_file) predict.predict(data_file,output_file)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -26,15 +26,18 @@ LOG=`get_best_pass $log` ...@@ -26,15 +26,18 @@ LOG=`get_best_pass $log`
LOG=(${LOG}) LOG=(${LOG})
best_model_path="output/pass-${LOG[1]}" best_model_path="output/pass-${LOG[1]}"
config_file=db_lstm.py config_file=db_lstm.py
dict_file=./data/src.dict dict_file=./data/wordDict.txt
label_file=./data/tgt.dict label_file=./data/targetDict.txt
predicate_dict_file=./data/verbDict.txt
input_file=./data/feature input_file=./data/feature
output_file=predict.res
python predict.py \ python predict.py \
-c $config_file \ -c $config_file \
-w $best_model_path \ -w $best_model_path \
-l $label_file \ -l $label_file \
-p $predicate_dict_file \
-d $dict_file \ -d $dict_file \
-i $input_file -i $input_file \
-o $output_file
...@@ -36,5 +36,5 @@ paddle train \ ...@@ -36,5 +36,5 @@ paddle train \
--job=test \ --job=test \
--use_gpu=false \ --use_gpu=false \
--config_args=is_test=1 \ --config_args=is_test=1 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'test.log' 2>&1 | tee 'test.log'
...@@ -16,12 +16,14 @@ ...@@ -16,12 +16,14 @@
set -e set -e
paddle train \ paddle train \
--config=./db_lstm.py \ --config=./db_lstm.py \
--use_gpu=0 \
--log_period=5000 \
--trainer_count=1 \
--show_parameter_stats_period=5000 \
--save_dir=./output \ --save_dir=./output \
--trainer_count=4 \ --num_passes=10000 \
--log_period=10 \ --average_test_period=10000000 \
--num_passes=500 \ --init_model_path=./data \
--use_gpu=false \ --load_missing_parameter_strategy=rand \
--show_parameter_stats_period=10 \
--test_all_data_in_one_period=1 \ --test_all_data_in_one_period=1 \
2>&1 | tee 'train.log' 2>&1 | tee 'train.log'
...@@ -38,11 +38,11 @@ unzip master.zip ...@@ -38,11 +38,11 @@ unzip master.zip
mkdir -p imdb/train mkdir -p imdb/train
mkdir -p imdb/test mkdir -p imdb/test
cp -r aclImdb/train/pos/ imdb/train/ cp -r aclImdb/train/pos/ imdb/train/pos
cp -r aclImdb/train/neg/ imdb/train/ cp -r aclImdb/train/neg/ imdb/train/neg
cp -r aclImdb/test/pos/ imdb/test/ cp -r aclImdb/test/pos/ imdb/test/pos
cp -r aclImdb/test/neg/ imdb/test/ cp -r aclImdb/test/neg/ imdb/test/neg
#remove compressed package #remove compressed package
rm aclImdb_v1.tar.gz rm aclImdb_v1.tar.gz
......
...@@ -17,8 +17,8 @@ from paddle.trainer.PyDataProvider2 import * ...@@ -17,8 +17,8 @@ from paddle.trainer.PyDataProvider2 import *
def hook(settings, dictionary, **kwargs): def hook(settings, dictionary, **kwargs):
settings.word_dict = dictionary settings.word_dict = dictionary
settings.input_types = [ settings.input_types = [
integer_value_sequence(len(settings.word_dict)), integer_value_sequence(len(settings.word_dict)), integer_value(2)
integer_value(2)] ]
settings.logger.info('dict len : %d' % (len(settings.word_dict))) settings.logger.info('dict len : %d' % (len(settings.word_dict)))
...@@ -29,6 +29,7 @@ def process(settings, file_name): ...@@ -29,6 +29,7 @@ def process(settings, file_name):
label, comment = line.strip().split('\t\t') label, comment = line.strip().split('\t\t')
label = int(label) label = int(label)
words = comment.split() words = comment.split()
word_slot = [settings.word_dict[w] for w in words if w in word_slot = [
settings.word_dict] settings.word_dict[w] for w in words if w in settings.word_dict
]
yield word_slot, label yield word_slot, label
...@@ -18,14 +18,14 @@ from optparse import OptionParser ...@@ -18,14 +18,14 @@ from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config from paddle.trainer.config_parser import parse_config
""" """
Usage: run following command to show help message. Usage: run following command to show help message.
python predict.py -h python predict.py -h
""" """
class SentimentPrediction(): class SentimentPrediction():
def __init__(self, train_conf, dict_file, model_dir=None, label_file = None): def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
""" """
train_conf: trainer configure. train_conf: trainer configure.
dict_file: word dictionary file name. dict_file: word dictionary file name.
...@@ -44,7 +44,8 @@ class SentimentPrediction(): ...@@ -44,7 +44,8 @@ class SentimentPrediction():
self.load_label(label_file) self.load_label(label_file)
conf = parse_config(train_conf, "is_predict=1") conf = parse_config(train_conf, "is_predict=1")
self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config) self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(self.model_dir) self.network.loadParameters(self.model_dir)
input_types = [integer_value_sequence(self.dict_dim)] input_types = [integer_value_sequence(self.dict_dim)]
self.converter = DataProviderConverter(input_types) self.converter = DataProviderConverter(input_types)
...@@ -61,7 +62,7 @@ class SentimentPrediction(): ...@@ -61,7 +62,7 @@ class SentimentPrediction():
""" """
Load label. Load label.
""" """
self.label={} self.label = {}
for v in open(label_file, 'r'): for v in open(label_file, 'r'):
self.label[int(v.split('\t')[1])] = v.split('\t')[0] self.label[int(v.split('\t')[1])] = v.split('\t')[0]
...@@ -72,7 +73,9 @@ class SentimentPrediction(): ...@@ -72,7 +73,9 @@ class SentimentPrediction():
with open(data_file, 'r') as fdata: with open(data_file, 'r') as fdata:
for line in fdata: for line in fdata:
words = line.strip().split() words = line.strip().split()
word_slot = [self.word_dict[w] for w in words if w in self.word_dict] word_slot = [
self.word_dict[w] for w in words if w in self.word_dict
]
if not word_slot: if not word_slot:
print "all words are not in dictionary: %s", line print "all words are not in dictionary: %s", line
continue continue
...@@ -89,25 +92,48 @@ class SentimentPrediction(): ...@@ -89,25 +92,48 @@ class SentimentPrediction():
if self.label is None: if self.label is None:
print("%s: predicting label is %d" % (data_file, lab[0][0])) print("%s: predicting label is %d" % (data_file, lab[0][0]))
else: else:
print("%s: predicting label is %s" % (data_file, self.label[lab[0][0]])) print("%s: predicting label is %s" %
(data_file, self.label[lab[0][0]]))
def option_parser(): def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file " usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
parser = OptionParser(usage="usage: %s [options]" % usage) parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option("-n", "--tconf", action="store", parser.add_option(
dest="train_conf", help="network config") "-n",
parser.add_option("-d", "--dict", action="store", "--tconf",
dest="dict_file",help="dictionary file") action="store",
parser.add_option("-b", "--label", action="store", dest="train_conf",
dest="label", default=None, help="network config")
help="dictionary file") parser.add_option(
parser.add_option("-i", "--data", action="store", "-d",
dest="data", help="data file to predict") "--dict",
parser.add_option("-w", "--model", action="store", action="store",
dest="model_path", default=None, dest="dict_file",
help="model path") help="dictionary file")
parser.add_option(
"-b",
"--label",
action="store",
dest="label",
default=None,
help="dictionary file")
parser.add_option(
"-i",
"--data",
action="store",
dest="data",
help="data file to predict")
parser.add_option(
"-w",
"--model",
action="store",
dest="model_path",
default=None,
help="model path")
return parser.parse_args() return parser.parse_args()
def main(): def main():
options, args = option_parser() options, args = option_parser()
train_conf = options.train_conf train_conf = options.train_conf
...@@ -119,5 +145,6 @@ def main(): ...@@ -119,5 +145,6 @@ def main():
predict = SentimentPrediction(train_conf, dict_file, model_path, label) predict = SentimentPrediction(train_conf, dict_file, model_path, label)
predict.predict(data) predict.predict(data)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -22,13 +22,13 @@ from os.path import join as join_path ...@@ -22,13 +22,13 @@ from os.path import join as join_path
from optparse import OptionParser from optparse import OptionParser
from paddle.utils.preprocess_util import * from paddle.utils.preprocess_util import *
""" """
Usage: run following command to show help message. Usage: run following command to show help message.
python preprocess.py -h python preprocess.py -h
""" """
def save_dict(dict, filename, is_reverse = True):
def save_dict(dict, filename, is_reverse=True):
""" """
Save dictionary into file. Save dictionary into file.
dict: input dictionary. dict: input dictionary.
...@@ -39,9 +39,10 @@ def save_dict(dict, filename, is_reverse = True): ...@@ -39,9 +39,10 @@ def save_dict(dict, filename, is_reverse = True):
f = open(filename, 'w') f = open(filename, 'w')
for k, v in sorted(dict.items(), key=operator.itemgetter(1),\ for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
reverse=is_reverse): reverse=is_reverse):
f.write('%s\t%s\n'%(k, v)) f.write('%s\t%s\n' % (k, v))
f.close() f.close()
def tokenize(sentences): def tokenize(sentences):
""" """
Use tokenizer.perl to tokenize input sentences. Use tokenizer.perl to tokenize input sentences.
...@@ -58,6 +59,7 @@ def tokenize(sentences): ...@@ -58,6 +59,7 @@ def tokenize(sentences):
toks = tok_text.split('\n')[:-1] toks = tok_text.split('\n')[:-1]
return toks return toks
def read_lines(path): def read_lines(path):
""" """
path: String, file path. path: String, file path.
...@@ -71,12 +73,17 @@ def read_lines(path): ...@@ -71,12 +73,17 @@ def read_lines(path):
seqs.append(line) seqs.append(line)
return seqs return seqs
class SentimentDataSetCreate(): class SentimentDataSetCreate():
""" """
A class to process data for sentiment analysis task. A class to process data for sentiment analysis task.
""" """
def __init__(self, data_path, output_path,
use_okenizer = True, multi_lines = False): def __init__(self,
data_path,
output_path,
use_okenizer=True,
multi_lines=False):
""" """
data_path: string, traing and testing dataset path data_path: string, traing and testing dataset path
output_path: string, output path, store processed dataset output_path: string, output path, store processed dataset
...@@ -164,23 +171,17 @@ class SentimentDataSetCreate(): ...@@ -164,23 +171,17 @@ class SentimentDataSetCreate():
# Preprocess train data. # Preprocess train data.
train_data, train_lab_set = self.data_list(self.train_dir) train_data, train_lab_set = self.data_list(self.train_dir)
print "processing train set..." print "processing train set..."
file_lists = self.save_data(train_data, file_lists = self.save_data(train_data, "train", self.batch_size, True,
"train", True)
self.batch_size,
True,
True)
save_list(file_lists, self.train_list) save_list(file_lists, self.train_list)
# If have test data path, preprocess test data. # If have test data path, preprocess test data.
if os.path.exists(self.test_dir): if os.path.exists(self.test_dir):
test_data, test_lab_set = self.data_list(self.test_dir) test_data, test_lab_set = self.data_list(self.test_dir)
assert(train_lab_set == test_lab_set) assert (train_lab_set == test_lab_set)
print "processing test set..." print "processing test set..."
file_lists = self.save_data(test_data, file_lists = self.save_data(test_data, "test", self.batch_size,
"test", False, self.dict_with_test)
self.batch_size,
False,
self.dict_with_test)
save_list(file_lists, self.test_list) save_list(file_lists, self.test_list)
# save labels set. # save labels set.
...@@ -191,7 +192,9 @@ class SentimentDataSetCreate(): ...@@ -191,7 +192,9 @@ class SentimentDataSetCreate():
save_dict(self.word_count, self.dict_file, True) save_dict(self.word_count, self.dict_file, True)
self.dict_size = len(self.word_count) self.dict_size = len(self.word_count)
def save_data(self, data, prefix = "", def save_data(self,
data,
prefix="",
batch_size=50000, batch_size=50000,
is_shuffle=False, is_shuffle=False,
build_dict=False): build_dict=False):
...@@ -205,7 +208,8 @@ class SentimentDataSetCreate(): ...@@ -205,7 +208,8 @@ class SentimentDataSetCreate():
return: list of batch names return: list of batch names
""" """
if is_shuffle and self.multi_lines: if is_shuffle and self.multi_lines:
return self.save_data_multi_lines(data, prefix, batch_size, build_dict) return self.save_data_multi_lines(data, prefix, batch_size,
build_dict)
if is_shuffle: if is_shuffle:
random.shuffle(data) random.shuffle(data)
...@@ -213,7 +217,7 @@ class SentimentDataSetCreate(): ...@@ -213,7 +217,7 @@ class SentimentDataSetCreate():
batch_names = [] batch_names = []
for i in range(num_batches): for i in range(num_batches):
batch_name = join_path(self.output_path, batch_name = join_path(self.output_path,
"%s_part_%03d" %(prefix, i)) "%s_part_%03d" % (prefix, i))
begin = i * batch_size begin = i * batch_size
end = min((i + 1) * batch_size, len(data)) end = min((i + 1) * batch_size, len(data))
# read a batch of data # read a batch of data
...@@ -246,7 +250,9 @@ class SentimentDataSetCreate(): ...@@ -246,7 +250,9 @@ class SentimentDataSetCreate():
data_list = tokenize(data_list) data_list = tokenize(data_list)
return label_list, data_list return label_list, data_list
def save_data_multi_lines(self, data, prefix = "", def save_data_multi_lines(self,
data,
prefix="",
batch_size=50000, batch_size=50000,
build_dict=False): build_dict=False):
""" """
...@@ -274,14 +280,14 @@ class SentimentDataSetCreate(): ...@@ -274,14 +280,14 @@ class SentimentDataSetCreate():
self.create_dict(data_list) self.create_dict(data_list)
length = len(label_list) length = len(label_list)
perm_list = np.array([ i for i in xrange(length) ]) perm_list = np.array([i for i in xrange(length)])
random.shuffle(perm_list) random.shuffle(perm_list)
num_batches = int(math.ceil(length / float(batch_size))) num_batches = int(math.ceil(length / float(batch_size)))
batch_names = [] batch_names = []
for i in range(num_batches): for i in range(num_batches):
batch_name = join_path(self.output_path, batch_name = join_path(self.output_path,
"%s_part_%03d" %(prefix, i)) "%s_part_%03d" % (prefix, i))
begin = i * batch_size begin = i * batch_size
end = min((i + 1) * batch_size, length) end = min((i + 1) * batch_size, length)
sub_label = [label_list[perm_list[i]] for i in range(begin, end)] sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
...@@ -304,35 +310,50 @@ class SentimentDataSetCreate(): ...@@ -304,35 +310,50 @@ class SentimentDataSetCreate():
f.write('%s\t\t%s\n' % (lab, seq)) f.write('%s\t\t%s\n' % (lab, seq))
f.close() f.close()
def option_parser(): def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\ parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]") "-i data_dir [options]")
parser.add_option("-i", "--data", action="store", parser.add_option(
dest="input", help="Input data directory.") "-i",
parser.add_option("-o", "--output", action="store", "--data",
dest="output", default=None, action="store",
help="Output directory.") dest="input",
parser.add_option("-t", "--tokenizer", action="store", help="Input data directory.")
dest="use_tokenizer", default=True, parser.add_option(
help="Whether to use tokenizer.") "-o",
"--output",
action="store",
dest="output",
default=None,
help="Output directory.")
parser.add_option(
"-t",
"--tokenizer",
action="store",
dest="use_tokenizer",
default=True,
help="Whether to use tokenizer.")
parser.add_option("-m", "--multi_lines", action="store", parser.add_option("-m", "--multi_lines", action="store",
dest="multi_lines", default=False, dest="multi_lines", default=False,
help="If input text files have multi lines and they "\ help="If input text files have multi lines and they "\
"need to be shuffled, you should set -m True,") "need to be shuffled, you should set -m True,")
return parser.parse_args() return parser.parse_args()
def main(): def main():
options, args = option_parser() options, args = option_parser()
data_dir=options.input data_dir = options.input
output_dir=options.output output_dir = options.output
use_tokenizer=options.use_tokenizer use_tokenizer = options.use_tokenizer
multi_lines=options.multi_lines multi_lines = options.multi_lines
if output_dir is None: if output_dir is None:
outname = os.path.basename(options.input) outname = os.path.basename(options.input)
output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname) output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
data_creator = SentimentDataSetCreate(data_dir, output_dir, data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
use_tokenizer, multi_lines) multi_lines)
data_creator.create_dataset() data_creator.create_dataset()
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -47,10 +47,12 @@ def sentiment_data(data_dir=None, ...@@ -47,10 +47,12 @@ def sentiment_data(data_dir=None,
for i, line in enumerate(open(dict_file, 'r')): for i, line in enumerate(open(dict_file, 'r')):
word_dict[line.split('\t')[0]] = i word_dict[line.split('\t')[0]] = i
define_py_data_sources2(train_list, test_list, define_py_data_sources2(
module="dataprovider", train_list,
obj="process", test_list,
args={'dictionary': word_dict}) module="dataprovider",
obj="process",
args={'dictionary': word_dict})
return dict_dim, class_dim return dict_dim, class_dim
...@@ -64,8 +66,7 @@ def bidirectional_lstm_net(input_dim, ...@@ -64,8 +66,7 @@ def bidirectional_lstm_net(input_dim,
emb = embedding_layer(input=data, size=emb_dim) emb = embedding_layer(input=data, size=emb_dim)
bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim) bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5) dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
output = fc_layer(input=dropout, size=class_dim, output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
act=SoftmaxActivation())
if not is_predict: if not is_predict:
lbl = data_layer("label", 1) lbl = data_layer("label", 1)
...@@ -109,27 +110,36 @@ def stacked_lstm_net(input_dim, ...@@ -109,27 +110,36 @@ def stacked_lstm_net(input_dim,
data = data_layer("word", input_dim) data = data_layer("word", input_dim)
emb = embedding_layer(input=data, size=emb_dim) emb = embedding_layer(input=data, size=emb_dim)
fc1 = fc_layer(input=emb, size=hid_dim, act=linear, fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
bias_attr=bias_attr) lstm1 = lstmemory(
lstm1 = lstmemory(input=fc1, act=relu, bias_attr=bias_attr, input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
layer_attr=layer_attr)
inputs = [fc1, lstm1] inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1): for i in range(2, stacked_num + 1):
fc = fc_layer(input=inputs, size=hid_dim, act=linear, fc = fc_layer(
param_attr=para_attr, bias_attr=bias_attr) input=inputs,
lstm = lstmemory(input=fc, reverse=(i % 2) == 0, act=relu, size=hid_dim,
bias_attr=bias_attr, layer_attr=layer_attr) act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm] inputs = [fc, lstm]
fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling()) fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling()) lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
output = fc_layer(input=[fc_last, lstm_last], size=class_dim, output = fc_layer(
act=SoftmaxActivation(), input=[fc_last, lstm_last],
bias_attr=bias_attr, param_attr=para_attr) size=class_dim,
act=SoftmaxActivation(),
bias_attr=bias_attr,
param_attr=para_attr)
if is_predict: if is_predict:
outputs(output) outputs(output)
else: else:
outputs( outputs(classification_cost(input=output, label=data_layer('label', 1)))
classification_cost(input=output, label=data_layer('label', 1)))
...@@ -20,20 +20,20 @@ is_test = get_config_arg('is_test', bool, False) ...@@ -20,20 +20,20 @@ is_test = get_config_arg('is_test', bool, False)
# whether this config is used for prediction # whether this config is used for prediction
is_predict = get_config_arg('is_predict', bool, False) is_predict = get_config_arg('is_predict', bool, False)
data_dir = "./data/pre-imdb" data_dir = "./data/pre-imdb"
dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict) dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
################## Algorithm Config ##################### ################## Algorithm Config #####################
settings( settings(
batch_size=128, batch_size=128,
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4), average_window=0.5,
gradient_clipping_threshold=25 regularization=L2Regularization(8e-4),
) gradient_clipping_threshold=25)
#################### Network Config ###################### #################### Network Config ######################
stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_lstm_net(
stacked_num=3, is_predict=is_predict) dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
# bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict) # bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
...@@ -30,14 +30,14 @@ def hook(settings, src_dict, trg_dict, file_list, **kwargs): ...@@ -30,14 +30,14 @@ def hook(settings, src_dict, trg_dict, file_list, **kwargs):
if settings.job_mode: if settings.job_mode:
settings.trg_dict = trg_dict settings.trg_dict = trg_dict
settings.slots = [ settings.slots = [
integer_value_sequence(len(settings.src_dict)), integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(settings.trg_dict)), integer_value_sequence(len(settings.trg_dict)),
integer_value_sequence(len(settings.trg_dict)) integer_value_sequence(len(settings.trg_dict))
] ]
settings.logger.info("trg dict len : %d" % (len(settings.trg_dict))) settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
else: else:
settings.slots = [ settings.slots = [
integer_value_sequence(len(settings.src_dict)), integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(open(file_list[0], "r").readlines())) integer_value_sequence(len(open(file_list[0], "r").readlines()))
] ]
...@@ -62,8 +62,7 @@ def process(settings, file_name): ...@@ -62,8 +62,7 @@ def process(settings, file_name):
if settings.job_mode: if settings.job_mode:
trg_seq = line_split[1] # one target sequence trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split() trg_words = trg_seq.split()
trg_ids = [settings.trg_dict.get(w, UNK_IDX) trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
for w in trg_words]
# remove sequence whose length > 80 in training mode # remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80: if len(src_ids) > 80 or len(trg_ids) > 80:
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Example: Example:
python preprocess.py -i INPUT [-d DICTSIZE] [-m] python preprocess.py -i INPUT [-d DICTSIZE] [-m]
...@@ -24,12 +23,13 @@ Options: ...@@ -24,12 +23,13 @@ Options:
-m --mergeDict merge source and target dictionary -m --mergeDict merge source and target dictionary
""" """
import os import os
import sys import sys
import string import string
from optparse import OptionParser from optparse import OptionParser
from paddle.utils.preprocess_util import save_list, DatasetCreater from paddle.utils.preprocess_util import save_list, DatasetCreater
class SeqToSeqDatasetCreater(DatasetCreater): class SeqToSeqDatasetCreater(DatasetCreater):
""" """
A class to process data for sequence to sequence application. A class to process data for sequence to sequence application.
...@@ -75,7 +75,7 @@ class SeqToSeqDatasetCreater(DatasetCreater): ...@@ -75,7 +75,7 @@ class SeqToSeqDatasetCreater(DatasetCreater):
if not os.path.exists(output): if not os.path.exists(output):
os.system(cmd + '> ' + output) os.system(cmd + '> ' + output)
def build_dict(self, file_path, dict_path, dict_size = -1): def build_dict(self, file_path, dict_path, dict_size=-1):
""" """
Create the dictionary for the file, Note that Create the dictionary for the file, Note that
1. Valid characters include all printable characters 1. Valid characters include all printable characters
...@@ -99,20 +99,23 @@ class SeqToSeqDatasetCreater(DatasetCreater): ...@@ -99,20 +99,23 @@ class SeqToSeqDatasetCreater(DatasetCreater):
for word in words: for word in words:
if word not in dictory: if word not in dictory:
dictory[word] = 1 dictory[word] = 1
else: else:
dictory[word] += 1 dictory[word] += 1
output = open(dict_path, "w+") output = open(dict_path, "w+")
output.write('<s>\n<e>\n<unk>\n') output.write('<s>\n<e>\n<unk>\n')
count = 3 count = 3
for key, value in sorted(dictory.items(), key = lambda d:d[1], reverse = True): for key, value in sorted(
dictory.items(), key=lambda d: d[1], reverse=True):
output.write(key + "\n") output.write(key + "\n")
count += 1 count += 1
if count == dict_size: if count == dict_size:
break break
self.dict_size = count self.dict_size = count
def create_dataset(self, dict_size = -1, mergeDict = False, def create_dataset(self,
suffixes = ['.src', '.trg']): dict_size=-1,
mergeDict=False,
suffixes=['.src', '.trg']):
""" """
Create seqToseq dataset Create seqToseq dataset
""" """
...@@ -135,13 +138,14 @@ class SeqToSeqDatasetCreater(DatasetCreater): ...@@ -135,13 +138,14 @@ class SeqToSeqDatasetCreater(DatasetCreater):
# checkout dataset should be parallel corpora # checkout dataset should be parallel corpora
suffix_len = len(suffixes[0]) suffix_len = len(suffixes[0])
for dataset in dataset_list: for dataset in dataset_list:
file_list = os.listdir(dataset) file_list = os.listdir(dataset)
if len(file_list) % 2 == 1: if len(file_list) % 2 == 1:
raise RuntimeError("dataset should be parallel corpora") raise RuntimeError("dataset should be parallel corpora")
file_list.sort() file_list.sort()
for i in range(0, len(file_list), 2): for i in range(0, len(file_list), 2):
if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]: if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
raise RuntimeError("source and target file name should be equal") raise RuntimeError(
"source and target file name should be equal")
# cat all the files with the same suffix in dataset # cat all the files with the same suffix in dataset
for suffix in suffixes: for suffix in suffixes:
...@@ -155,16 +159,18 @@ class SeqToSeqDatasetCreater(DatasetCreater): ...@@ -155,16 +159,18 @@ class SeqToSeqDatasetCreater(DatasetCreater):
list = ['train.list', 'test.list', 'gen.list'] list = ['train.list', 'test.list', 'gen.list']
for dataset in dataset_list: for dataset in dataset_list:
outname = os.path.basename(dataset) outname = os.path.basename(dataset)
self.concat_file(dataset, outname + suffixes[0], self.concat_file(dataset, outname + suffixes[0],
outname + suffixes[1], dir_list[id], outname) outname + suffixes[1], dir_list[id], outname)
save_list([os.path.join(dir_list[id], outname)], save_list([os.path.join(dir_list[id], outname)],
os.path.join(self.output_path, list[id])) os.path.join(self.output_path, list[id]))
id += 1 id += 1
# build dictionary for train data # build dictionary for train data
dict = ['src.dict', 'trg.dict'] dict = ['src.dict', 'trg.dict']
dict_path = [os.path.join(self.output_path, dict[0]), dict_path = [
os.path.join(self.output_path, dict[1])] os.path.join(self.output_path, dict[0]),
os.path.join(self.output_path, dict[1])
]
if mergeDict: if mergeDict:
outname = os.path.join(train_dir, train_dataset.split('/')[-1]) outname = os.path.join(train_dir, train_dataset.split('/')[-1])
print 'build src dictionary for train data' print 'build src dictionary for train data'
...@@ -173,22 +179,30 @@ class SeqToSeqDatasetCreater(DatasetCreater): ...@@ -173,22 +179,30 @@ class SeqToSeqDatasetCreater(DatasetCreater):
os.system('cp ' + dict_path[0] + ' ' + dict_path[1]) os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
else: else:
outname = os.path.join(train_dataset, self.train_dir_name) outname = os.path.join(train_dataset, self.train_dir_name)
for id in range(0,2): for id in range(0, 2):
suffix = suffixes[id] suffix = suffixes[id]
print 'build ' + suffix[1:] + ' dictionary for train data' print 'build ' + suffix[1:] + ' dictionary for train data'
self.build_dict(outname + suffix, dict_path[id], dict_size) self.build_dict(outname + suffix, dict_path[id], dict_size)
print 'dictionary size is', self.dict_size print 'dictionary size is', self.dict_size
def main(): def main():
usage = "usage: \n" \ usage = "usage: \n" \
"python %prog -i INPUT [-d DICTSIZE] [-m]" "python %prog -i INPUT [-d DICTSIZE] [-m]"
parser = OptionParser(usage) parser = OptionParser(usage)
parser.add_option("-i", action="store", dest="input", parser.add_option(
help="input original dataset path") "-i", action="store", dest="input", help="input original dataset path")
parser.add_option("-d", action="store", dest="dictsize", parser.add_option(
help="specified word count of dictionary") "-d",
parser.add_option("-m", "--mergeDict", action="store_true", dest="mergeDict", action="store",
help="merge source and target dictionary") dest="dictsize",
help="specified word count of dictionary")
parser.add_option(
"-m",
"--mergeDict",
action="store_true",
dest="mergeDict",
help="merge source and target dictionary")
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
if options.input[-1] == os.path.sep: if options.input[-1] == os.path.sep:
options.input = options.input[:-1] options.input = options.input[:-1]
...@@ -200,5 +214,6 @@ def main(): ...@@ -200,5 +214,6 @@ def main():
data_creator = SeqToSeqDatasetCreater(options.input, output_path) data_creator = SeqToSeqDatasetCreater(options.input, output_path)
data_creator.create_dataset(dictsize, options.mergeDict) data_creator.create_dataset(dictsize, options.mergeDict)
if __name__ == "__main__": if __name__ == "__main__":
main(); main()
...@@ -50,16 +50,21 @@ def seq_to_seq_data(data_dir, ...@@ -50,16 +50,21 @@ def seq_to_seq_data(data_dir,
trg_dict = None trg_dict = None
else: else:
train_list = os.path.join(data_dir, train_list) train_list = os.path.join(data_dir, train_list)
test_list = os.path.join(data_dir,test_list) test_list = os.path.join(data_dir, test_list)
define_py_data_sources2(train_list, test_list, define_py_data_sources2(
module = "dataprovider", train_list,
obj = "process", test_list,
args = {"src_dict": src_dict, module="dataprovider",
"trg_dict": trg_dict}) obj="process",
args={"src_dict": src_dict,
"trg_dict": trg_dict})
return {"src_dict_path": src_lang_dict, "trg_dict_path": trg_lang_dict, return {
"gen_result": gen_result} "src_dict_path": src_lang_dict,
"trg_dict_path": trg_lang_dict,
"gen_result": gen_result
}
def gru_encoder_decoder(data_conf, def gru_encoder_decoder(data_conf,
...@@ -90,51 +95,55 @@ def gru_encoder_decoder(data_conf, ...@@ -90,51 +95,55 @@ def gru_encoder_decoder(data_conf,
size=word_vector_dim, size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding')) param_attr=ParamAttr(name='_source_language_embedding'))
src_forward = simple_gru(input=src_embedding, size=encoder_size) src_forward = simple_gru(input=src_embedding, size=encoder_size)
src_backward = simple_gru(input=src_embedding, src_backward = simple_gru(
size=encoder_size, input=src_embedding, size=encoder_size, reverse=True)
reverse=True)
encoded_vector = concat_layer(input=[src_forward, src_backward]) encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj: with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(input=encoded_vector) encoded_proj += full_matrix_projection(input=encoded_vector)
backward_first = first_seq(input=src_backward) backward_first = first_seq(input=src_backward)
with mixed_layer(size=decoder_size, with mixed_layer(
act=TanhActivation(), ) as decoder_boot: size=decoder_size,
act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(input=backward_first) decoder_boot += full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word): def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(name='gru_decoder', decoder_mem = memory(
size=decoder_size, name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
boot_layer=decoder_boot)
context = simple_attention(encoded_sequence=enc_vec, context = simple_attention(
encoded_proj=enc_proj, encoded_sequence=enc_vec,
decoder_state=decoder_mem, ) encoded_proj=enc_proj,
decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs: with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(input=context) decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word) decoder_inputs += full_matrix_projection(input=current_word)
gru_step = gru_step_layer(name='gru_decoder', gru_step = gru_step_layer(
input=decoder_inputs, name='gru_decoder',
output_mem=decoder_mem, input=decoder_inputs,
size=decoder_size) output_mem=decoder_mem,
size=decoder_size)
with mixed_layer(size=target_dict_dim, with mixed_layer(
bias_attr=True, size=target_dict_dim, bias_attr=True,
act=SoftmaxActivation()) as out: act=SoftmaxActivation()) as out:
out += full_matrix_projection(input=gru_step) out += full_matrix_projection(input=gru_step)
return out return out
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
group_inputs=[StaticInput(input=encoded_vector,is_seq=True), group_inputs = [
StaticInput(input=encoded_proj,is_seq=True)] StaticInput(
input=encoded_vector, is_seq=True), StaticInput(
input=encoded_proj, is_seq=True)
]
if not is_generating: if not is_generating:
trg_embedding = embedding_layer( trg_embedding = embedding_layer(
input=data_layer(name='target_language_word', input=data_layer(
size=target_dict_dim), name='target_language_word', size=target_dict_dim),
size=word_vector_dim, size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding')) param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding) group_inputs.append(trg_embedding)
...@@ -144,12 +153,12 @@ def gru_encoder_decoder(data_conf, ...@@ -144,12 +153,12 @@ def gru_encoder_decoder(data_conf,
# while encoded source sequence is accessed to as an unbounded memory. # while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory # Here, the StaticInput defines a read-only memory
# for the recurrent_group. # for the recurrent_group.
decoder = recurrent_group(name=decoder_group_name, decoder = recurrent_group(
step=gru_decoder_with_attention, name=decoder_group_name,
input=group_inputs) step=gru_decoder_with_attention,
input=group_inputs)
lbl = data_layer(name='target_language_next_word', lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
size=target_dict_dim)
cost = classification_cost(input=decoder, label=lbl) cost = classification_cost(input=decoder, label=lbl)
outputs(cost) outputs(cost)
else: else:
...@@ -168,16 +177,19 @@ def gru_encoder_decoder(data_conf, ...@@ -168,16 +177,19 @@ def gru_encoder_decoder(data_conf,
embedding_size=word_vector_dim) embedding_size=word_vector_dim)
group_inputs.append(trg_embedding) group_inputs.append(trg_embedding)
beam_gen = beam_search(name=decoder_group_name, beam_gen = beam_search(
step=gru_decoder_with_attention, name=decoder_group_name,
input=group_inputs, step=gru_decoder_with_attention,
bos_id=0, input=group_inputs,
eos_id=1, bos_id=0,
beam_size=beam_size, eos_id=1,
max_length=max_length) beam_size=beam_size,
max_length=max_length)
seqtext_printer_evaluator(input=beam_gen,
id_input=data_layer(name="sent_id", size=1), seqtext_printer_evaluator(
dict_file=trg_dict_path, input=beam_gen,
result_file=gen_trans_file) id_input=data_layer(
name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen) outputs(beam_gen)
...@@ -17,8 +17,7 @@ import gzip ...@@ -17,8 +17,7 @@ import gzip
import logging import logging
logging.basicConfig( logging.basicConfig(
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
)
logger = logging.getLogger('paddle') logger = logging.getLogger('paddle')
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
...@@ -32,59 +31,58 @@ num_original_columns = 3 ...@@ -32,59 +31,58 @@ num_original_columns = 3
# [[-1,0], [0,0]] means previous token at column 0 and current token at # [[-1,0], [0,0]] means previous token at column 0 and current token at
# column 0 are combined as one feature. # column 0 are combined as one feature.
patterns = [ patterns = [
[[-2,0]], [[-2, 0]],
[[-1,0]], [[-1, 0]],
[[0,0]], [[0, 0]],
[[1,0]], [[1, 0]],
[[2,0]], [[2, 0]],
[[-1, 0], [0, 0]],
[[-1,0], [0,0]], [[0, 0], [1, 0]],
[[0,0], [1,0]], [[-2, 1]],
[[-1, 1]],
[[-2,1]], [[0, 1]],
[[-1,1]], [[1, 1]],
[[0,1]], [[2, 1]],
[[1,1]], [[-2, 1], [-1, 1]],
[[2,1]], [[-1, 1], [0, 1]],
[[-2,1], [-1,1]], [[0, 1], [1, 1]],
[[-1,1], [0,1]], [[1, 1], [2, 1]],
[[0,1], [1,1]], [[-2, 1], [-1, 1], [0, 1]],
[[1,1], [2,1]], [[-1, 1], [0, 1], [1, 1]],
[[0, 1], [1, 1], [2, 1]],
[[-2,1], [-1,1], [0,1]],
[[-1,1], [0,1], [1,1]],
[[0,1], [1,1], [2,1]],
] ]
dict_label = { dict_label = {
'B-ADJP': 0, 'B-ADJP': 0,
'I-ADJP': 1, 'I-ADJP': 1,
'B-ADVP': 2, 'B-ADVP': 2,
'I-ADVP': 3, 'I-ADVP': 3,
'B-CONJP': 4, 'B-CONJP': 4,
'I-CONJP': 5, 'I-CONJP': 5,
'B-INTJ': 6, 'B-INTJ': 6,
'I-INTJ': 7, 'I-INTJ': 7,
'B-LST': 8, 'B-LST': 8,
'I-LST': 9, 'I-LST': 9,
'B-NP': 10, 'B-NP': 10,
'I-NP': 11, 'I-NP': 11,
'B-PP': 12, 'B-PP': 12,
'I-PP': 13, 'I-PP': 13,
'B-PRT': 14, 'B-PRT': 14,
'I-PRT': 15, 'I-PRT': 15,
'B-SBAR': 16, 'B-SBAR': 16,
'I-SBAR': 17, 'I-SBAR': 17,
'B-UCP': 18, 'B-UCP': 18,
'I-UCP': 19, 'I-UCP': 19,
'B-VP': 20, 'B-VP': 20,
'I-VP': 21, 'I-VP': 21,
'O': 22 'O': 22
} }
def make_features(sequence): def make_features(sequence):
length = len(sequence) length = len(sequence)
num_features = len(sequence[0]) num_features = len(sequence[0])
def get_features(pos): def get_features(pos):
if pos < 0: if pos < 0:
return ['#B%s' % -pos] * num_features return ['#B%s' % -pos] * num_features
...@@ -94,9 +92,10 @@ def make_features(sequence): ...@@ -94,9 +92,10 @@ def make_features(sequence):
for i in xrange(length): for i in xrange(length):
for pattern in patterns: for pattern in patterns:
fname = '/'.join([get_features(i+pos)[f] for pos, f in pattern]) fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
sequence[i].append(fname) sequence[i].append(fname)
''' '''
Source file format: Source file format:
Each line is for one timestep. The features are separated by space. Each line is for one timestep. The features are separated by space.
...@@ -109,6 +108,8 @@ i-th column. ...@@ -109,6 +108,8 @@ i-th column.
return a list of dict for each column return a list of dict for each column
''' '''
def create_dictionaries(filename, cutoff, oov_policy): def create_dictionaries(filename, cutoff, oov_policy):
def add_to_dict(sequence, dicts): def add_to_dict(sequence, dicts):
num_features = len(dicts) num_features = len(dicts)
...@@ -140,7 +141,6 @@ def create_dictionaries(filename, cutoff, oov_policy): ...@@ -140,7 +141,6 @@ def create_dictionaries(filename, cutoff, oov_policy):
features = line.split(' ') features = line.split(' ')
sequence.append(features) sequence.append(features)
for i in xrange(num_features): for i in xrange(num_features):
dct = dicts[i] dct = dicts[i]
n = 1 if oov_policy[i] == OOV_POLICY_USE else 0 n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
...@@ -151,7 +151,7 @@ def create_dictionaries(filename, cutoff, oov_policy): ...@@ -151,7 +151,7 @@ def create_dictionaries(filename, cutoff, oov_policy):
else: else:
dct[k] = n dct[k] = n
n += 1 n += 1
if oov_policy[i] == OOV_POLICY_USE: if oov_policy[i] == OOV_POLICY_USE:
# placeholder so that len(dct) will be the number of features # placeholder so that len(dct) will be the number of features
# including OOV # including OOV
...@@ -187,12 +187,15 @@ def initializer(settings, **xargs): ...@@ -187,12 +187,15 @@ def initializer(settings, **xargs):
logger.info("feature size=%s" % dim) logger.info("feature size=%s" % dim)
settings.input_types = input_types settings.input_types = input_types
''' '''
if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
existed in dicts[i] will be assigned to id 0. existed in dicts[i] will be assigned to id 0.
if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
in dicts[i]. in dicts[i].
''' '''
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM) @provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename): def process(settings, filename):
input_file = filename input_file = filename
...@@ -231,7 +234,7 @@ def process(settings, filename): ...@@ -231,7 +234,7 @@ def process(settings, filename):
logger.fatal("Unknown token: %s" % features[i]) logger.fatal("Unknown token: %s" % features[i])
else: else:
vec.ids.append(dim + 0) vec.ids.append(dim + 0)
dim += len(dicts[i]) dim += len(dicts[i])
sample[-1].append(vec) sample[-1].append(vec)
return sample return sample
...@@ -255,4 +258,3 @@ def process(settings, filename): ...@@ -255,4 +258,3 @@ def process(settings, filename):
f.close() f.close()
logger.info("num_sequences=%s" % num_sequences) logger.info("num_sequences=%s" % num_sequences)
...@@ -16,11 +16,11 @@ from paddle.trainer_config_helpers import * ...@@ -16,11 +16,11 @@ from paddle.trainer_config_helpers import *
import math import math
define_py_data_sources2(train_list="data/train.list", define_py_data_sources2(
test_list="data/test.list", train_list="data/train.list",
module="dataprovider", test_list="data/test.list",
obj="process") module="dataprovider",
obj="process")
batch_size = 1 batch_size = 1
settings( settings(
...@@ -30,14 +30,15 @@ settings( ...@@ -30,14 +30,15 @@ settings(
average_window=0.5, average_window=0.5,
learning_rate=1e-1, learning_rate=1e-1,
learning_rate_decay_a=1e-5, learning_rate_decay_a=1e-5,
learning_rate_decay_b=0.25, learning_rate_decay_b=0.25, )
)
num_label_types = 23
num_label_types=23
def get_simd_size(size): def get_simd_size(size):
return int(math.ceil(float(size) / 8)) * 8 return int(math.ceil(float(size) / 8)) * 8
# Currently, in order to use sparse_update=True, # Currently, in order to use sparse_update=True,
# the size has to be aligned. # the size has to be aligned.
num_label_types = get_simd_size(num_label_types) num_label_types = get_simd_size(num_label_types)
...@@ -45,40 +46,37 @@ num_label_types = get_simd_size(num_label_types) ...@@ -45,40 +46,37 @@ num_label_types = get_simd_size(num_label_types)
features = data_layer(name="features", size=76328) features = data_layer(name="features", size=76328)
word = data_layer(name="word", size=6778) word = data_layer(name="word", size=6778)
pos = data_layer(name="pos", size=44) pos = data_layer(name="pos", size=44)
chunk = data_layer(name="chunk", chunk = data_layer(name="chunk", size=num_label_types)
size=num_label_types)
crf_input = fc_layer( crf_input = fc_layer(
input=features, input=features,
size=num_label_types, size=num_label_types,
act=LinearActivation(), act=LinearActivation(),
bias_attr=False, bias_attr=False,
param_attr=ParamAttr(initial_std=0, sparse_update=True)) param_attr=ParamAttr(
initial_std=0, sparse_update=True))
crf=crf_layer( crf = crf_layer(
input=crf_input, input=crf_input,
label=chunk, label=chunk,
param_attr=ParamAttr(name="crfw", initial_std=0), param_attr=ParamAttr(
) name="crfw", initial_std=0), )
crf_decoding=crf_decoding_layer( crf_decoding = crf_decoding_layer(
size=num_label_types, size=num_label_types,
input=crf_input, input=crf_input,
label=chunk, label=chunk,
param_attr=ParamAttr(name="crfw"), param_attr=ParamAttr(name="crfw"), )
)
sum_evaluator( sum_evaluator(
name="error", name="error",
input=crf_decoding, input=crf_decoding, )
)
chunk_evaluator( chunk_evaluator(
name="chunk_f1", name="chunk_f1",
input =[crf_decoding, chunk], input=[crf_decoding, chunk],
chunk_scheme="IOB", chunk_scheme="IOB",
num_chunk_types=11, num_chunk_types=11, )
)
inputs(word, pos, chunk, features) inputs(word, pos, chunk, features)
outputs(crf) outputs(crf)
...@@ -16,10 +16,11 @@ from paddle.trainer_config_helpers import * ...@@ -16,10 +16,11 @@ from paddle.trainer_config_helpers import *
import math import math
define_py_data_sources2(train_list="data/train.list", define_py_data_sources2(
test_list="data/test.list", train_list="data/train.list",
module="dataprovider", test_list="data/test.list",
obj="process") module="dataprovider",
obj="process")
batch_size = 16 batch_size = 16
settings( settings(
...@@ -27,29 +28,27 @@ settings( ...@@ -27,29 +28,27 @@ settings(
batch_size=batch_size, batch_size=batch_size,
regularization=L2Regularization(batch_size * 1e-5), regularization=L2Regularization(batch_size * 1e-5),
average_window=0.5, average_window=0.5,
learning_rate = 2e-3, learning_rate=2e-3,
learning_rate_decay_a = 5e-7, learning_rate_decay_a=5e-7,
learning_rate_decay_b = 0.5, learning_rate_decay_b=0.5, )
)
word_dim=128 word_dim = 128
hidden_dim = 128 hidden_dim = 128
with_rnn = True with_rnn = True
initial_std=1/math.sqrt(hidden_dim) initial_std = 1 / math.sqrt(hidden_dim)
param_attr=ParamAttr(initial_std=initial_std) param_attr = ParamAttr(initial_std=initial_std)
cpu_layer_attr=ExtraLayerAttribute(device=-1) cpu_layer_attr = ExtraLayerAttribute(device=-1)
default_device(0) default_device(0)
num_label_types=23 num_label_types = 23
features = data_layer(name="features", size=76328) features = data_layer(name="features", size=76328)
word = data_layer(name="word", size=6778) word = data_layer(name="word", size=6778)
pos = data_layer(name="pos", size=44) pos = data_layer(name="pos", size=44)
chunk = data_layer(name="chunk", chunk = data_layer(
size=num_label_types, name="chunk", size=num_label_types, layer_attr=cpu_layer_attr)
layer_attr=cpu_layer_attr)
emb = embedding_layer( emb = embedding_layer(
input=word, size=word_dim, param_attr=ParamAttr(initial_std=0)) input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
...@@ -58,73 +57,64 @@ hidden1 = mixed_layer( ...@@ -58,73 +57,64 @@ hidden1 = mixed_layer(
size=hidden_dim, size=hidden_dim,
act=STanhActivation(), act=STanhActivation(),
bias_attr=True, bias_attr=True,
input=[full_matrix_projection(emb), input=[
table_projection(pos, param_attr=param_attr)] full_matrix_projection(emb), table_projection(
) pos, param_attr=param_attr)
])
if with_rnn: if with_rnn:
rnn1 = recurrent_layer( rnn1 = recurrent_layer(
act=ReluActivation(), act=ReluActivation(),
bias_attr=True, bias_attr=True,
input=hidden1, input=hidden1,
param_attr=ParamAttr(initial_std=0), param_attr=ParamAttr(initial_std=0), )
)
hidden2 = mixed_layer( hidden2 = mixed_layer(
size=hidden_dim, size=hidden_dim,
act=STanhActivation(), act=STanhActivation(),
bias_attr=True, bias_attr=True,
input=[full_matrix_projection(hidden1) input=[full_matrix_projection(hidden1)] +
] + ([ ([full_matrix_projection(
full_matrix_projection(rnn1, param_attr=ParamAttr(initial_std=0)) rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
] if with_rnn else []),
)
if with_rnn: if with_rnn:
rnn2=recurrent_layer( rnn2 = recurrent_layer(
reverse=True, reverse=True,
act=ReluActivation(), act=ReluActivation(),
bias_attr=True, bias_attr=True,
input=hidden2, input=hidden2,
param_attr=ParamAttr(initial_std=0), param_attr=ParamAttr(initial_std=0), )
)
crf_input = mixed_layer( crf_input = mixed_layer(
size=num_label_types, size=num_label_types,
bias_attr=False, bias_attr=False,
input=[ input=[full_matrix_projection(hidden2), ] +
full_matrix_projection(hidden2), ([full_matrix_projection(
] + ([ rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
full_matrix_projection(rnn2, param_attr=ParamAttr(initial_std=0))
] if with_rnn else []),
)
crf = crf_layer( crf = crf_layer(
input=crf_input, input=crf_input,
label=chunk, label=chunk,
param_attr=ParamAttr(name="crfw", initial_std=0), param_attr=ParamAttr(
layer_attr=cpu_layer_attr, name="crfw", initial_std=0),
) layer_attr=cpu_layer_attr, )
crf_decoding = crf_decoding_layer( crf_decoding = crf_decoding_layer(
size=num_label_types, size=num_label_types,
input=crf_input, input=crf_input,
label=chunk, label=chunk,
param_attr=ParamAttr(name="crfw"), param_attr=ParamAttr(name="crfw"),
layer_attr=cpu_layer_attr, layer_attr=cpu_layer_attr, )
)
sum_evaluator( sum_evaluator(
name="error", name="error",
input=crf_decoding, input=crf_decoding, )
)
chunk_evaluator( chunk_evaluator(
name="chunk_f1", name="chunk_f1",
input =[crf_decoding, chunk], input=[crf_decoding, chunk],
chunk_scheme="IOB", chunk_scheme="IOB",
num_chunk_types=11, num_chunk_types=11, )
)
inputs(word, pos, chunk, features) inputs(word, pos, chunk, features)
outputs(crf) outputs(crf)
Algorithm Tutorial
==================
.. toctree::
:maxdepth: 1
rnn/rnn.rst
Recurrent Neural Network Configuration RNN Configuration
====================================== =================
This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to: This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to:
...@@ -17,7 +17,7 @@ PaddlePaddle does not need any preprocessing to sequence data, such as padding. ...@@ -17,7 +17,7 @@ PaddlePaddle does not need any preprocessing to sequence data, such as padding.
.. code-block:: python .. code-block:: python
settings.slots = [ settings.input_types = [
integer_value_sequence(len(settings.src_dict)), integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(settings.trg_dict)), integer_value_sequence(len(settings.trg_dict)),
integer_value_sequence(len(settings.trg_dict))] integer_value_sequence(len(settings.trg_dict))]
......
...@@ -4,7 +4,6 @@ Installing from Sources ...@@ -4,7 +4,6 @@ Installing from Sources
* [1. Download and Setup](#download) * [1. Download and Setup](#download)
* [2. Requirements](#requirements) * [2. Requirements](#requirements)
* [3. Build on Ubuntu](#ubuntu) * [3. Build on Ubuntu](#ubuntu)
* [4. Build on Mac OS X](#mac)
## <span id="download">Download and Setup</span> ## <span id="download">Download and Setup</span>
You can download PaddlePaddle from the [github source](https://github.com/gangliao/Paddle). You can download PaddlePaddle from the [github source](https://github.com/gangliao/Paddle).
...@@ -191,121 +190,3 @@ sudo pip install <path to install>/opt/paddle/share/wheels/*.whl ...@@ -191,121 +190,3 @@ sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
# or just run # or just run
sudo paddle version sudo paddle version
``` ```
## <span id="mac">Building on Mac OS X</span>
### Prerequisites
This guide is based on Mac OS X 10.11 (El Capitan). Note that if you are running an up to date version of OS X,
you will already have Python 2.7.10 and Numpy 1.8 installed.
The best option is to use the package manager homebrew to handle installations and upgrades for you.
To install [homebrew](http://brew.sh/), first open a terminal window (you can find Terminal in the Utilities folder in Applications), and issue the command:
```bash
# install brew
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
# install pip
easy_install pip
```
### Install Dependencies
- **CPU Dependencies**
```bash
# Install fundamental dependents
brew install glog gflags cmake protobuf openblas
# Install google test on Mac OS X
# Download gtest 1.7.0
wget https://github.com/google/googletest/archive/release-1.7.0.tar.gz
tar -xzf googletest-release-1.7.0.tar.gz && cd googletest-release-1.7.0
# Build gtest
mkdir build && cd build && cmake .. && make
# Install gtest library
sudo cp -r ../include/gtest /usr/local/include/
sudo cp lib*.a /usr/local/lib
```
- **GPU Dependencies(optional)**
To build GPU version, you will need the following installed:
1. a CUDA-capable GPU
2. Mac OS X 10.11 or later
2. the Clang compiler and toolchain installed using Xcode
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on
distribution versions that have been qualified for this CUDA Toolkit release.
1. After downloading cuDNN library, issue the following commands:
```bash
sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib/libcudnn*
```
2. Then you need to set DYLD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
```bash
export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:$PATH
```
### Build and Install
As usual, the best option is to create build folder under paddle project directory.
```bash
mkdir build && cd build
cmake ..
```
CMake first check PaddlePaddle's dependencies in system default path. After installing some optional
libraries, corresponding build option will be set automatically (for instance, glog, gtest and gflags).
If still not found, you can manually set it based on CMake error information from your screen.
As a simple example, consider the following:
- **Only CPU**
```bash
cmake .. -DWITH_GPU=OFF
```
- **GPU**
```bash
cmake .. -DWITH_GPU=ON
```
- **GPU with doc and swig**
```bash
cmake .. -DWITH_GPU=ON -DWITH_DOC=ON -DWITH_SWIG_PY=ON
```
Finally, you can build PaddlePaddle:
```bash
# you can add build option here, such as:
cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=<installation path>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `sysctl -n hw.ncpu` && make install
# set PaddlePaddle installation path in ~/.bashrc
export PATH=<installation path>/bin:$PATH
```
**Note:**
If you set `WITH_SWIG_PY=ON`, related python dependencies also need to be installed.
Otherwise, PaddlePaddle will automatically install python dependencies
at first time when user run paddle commands, such as `paddle version`, `paddle train`.
It may require sudo privileges:
```bash
# you can run
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
# or just run
sudo paddle version
```
# Contribute to PaddlePaddle # Contribute Code
We sincerely appreciate your contributions. You can use fork and pull request We sincerely appreciate your contributions. You can use fork and pull request
workflow to merge your code. workflow to merge your code.
......
Build And Install PaddlePaddle Install and Build
================================ =================
Install PaddlePaddle Install PaddlePaddle
---------------------- ----------------------
...@@ -18,11 +18,7 @@ Build from Source ...@@ -18,11 +18,7 @@ Build from Source
.. warning:: .. warning::
Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing to PaddlePaddle. Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code.
If you want to hack and contribute PaddlePaddle source code, following guides can help you\:
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
...@@ -30,4 +26,3 @@ If you want to hack and contribute PaddlePaddle source code, following guides ca ...@@ -30,4 +26,3 @@ If you want to hack and contribute PaddlePaddle source code, following guides ca
build_from_source.md build_from_source.md
contribute_to_paddle.md contribute_to_paddle.md
# Cluster Training # Distributed Training
We provide some simple scripts ```paddle/scripts/cluster_train``` to help you to launch cluster training Job to harness PaddlePaddle's distributed trainning. For MPI and other cluster scheduler refer this naive script to implement more robust cluster training platform by yourself. In this article, we explain how to run distributed Paddle training jobs on clusters. We will create the distributed version of the single-process training example, [recommendation](https://github.com/baidu/Paddle/tree/develop/demo/recommendation).
The following cluster demo is based on RECOMMENDATION local training demo in PaddlePaddle ```demo/recommendation``` directory. Assuming you enter the ```paddle/scripts/cluster_train/``` directory. [Scripts](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train) used in this article launch distributed jobs via SSH. They also work as a reference for users running more sophisticated cluster management systems like MPI and Kubernetes.
## Pre-requirements ## Prerequisite
Firstly, 1. Aforementioned scripts use a Python library [fabric](http://www.fabfile.org/) to run SSH commands. We can use `pip` to install fabric:
```bash ```bash
pip install fabric pip install fabric
``` ```
Secondly, go through installing scripts to install PaddlePaddle at all nodes to make sure demo can run as local mode. For CUDA enabled training, we assume that CUDA is installed in ```/usr/local/cuda```, otherwise missed cuda runtime libraries error could be reported at cluster runtime. In one word, the local training environment should be well prepared for the simple scripts.
Then you should prepare same ROOT_DIR directory in all nodes. ROOT_DIR is from in cluster_train/conf.py. Assuming that the ROOT_DIR = /home/paddle, you can create ```paddle``` user account as well, at last ```paddle.py``` can ssh connections to all nodes with ```paddle``` user automatically. 1. We need to install PaddlePaddle on all nodes in the cluster. To enable GPUs, we need to install CUDA in `/usr/local/cuda`; otherwise Paddle would report errors at runtime.
At last you can create ssh mutual trust relationship between all nodes for easy ssh login, otherwise ```password``` should be provided at runtime from ```paddle.py```. 1. Set the `ROOT_DIR` variable in [`cluster_train/conf.py`] on all nodes. For convenience, we often create a Unix user `paddle` on all nodes and set `ROOT_DIR=/home/paddle`. In this way, we can write public SSH keys into `/home/paddle/.ssh/authorized_keys` so that user `paddle` can SSH to all nodes without password.
## Prepare Job Workspace ## Prepare Job Workspace
```Job workspace``` is defined as one package directory which contains dependency libraries, train data, test data, model config file and all other related file dependencies. We refer to the directory where we put dependent libraries, config files, etc., as *workspace*.
These ```train/test``` data should be prepared before launching cluster job. To satisfy the requirement that train/test data are placed in different directory from workspace, PADDLE refers train/test data according to index file named as ```train.list/test.list``` which are used in model config file. So the train/test data also contains train.list/test.list two list file. All local training demo already provides scripts to help you create these two files, and all nodes in cluster job will handle files with same logical code in normal condition. These ```train/test``` data should be prepared before launching cluster job. To satisfy the requirement that train/test data are placed in different directory from workspace, PADDLE refers train/test data according to index file named as ```train.list/test.list``` which are used in model config file. So the train/test data also contains train.list/test.list two list file. All local training demo already provides scripts to help you create these two files, and all nodes in cluster job will handle files with same logical code in normal condition.
......
# Quick Start Tutorial # Quick Start
This tutorial will teach the basics of deep learning (DL), including how to implement many different models in PaddlePaddle. You will learn how to: This tutorial will teach the basics of deep learning (DL), including how to implement many different models in PaddlePaddle. You will learn how to:
- Prepare data into the standardized format that PaddlePaddle accepts. - Prepare data into the standardized format that PaddlePaddle accepts.
......
...@@ -30,8 +30,6 @@ Several new files appear in the `data `directory as follows. ...@@ -30,8 +30,6 @@ Several new files appear in the `data `directory as follows.
conll05st-release:the test data set of CoNll-2005 shared task conll05st-release:the test data set of CoNll-2005 shared task
test.wsj.words:the Wall Street Journal data sentences test.wsj.words:the Wall Street Journal data sentences
test.wsj.props: the propositional arguments test.wsj.props: the propositional arguments
src.dict:the dictionary of words in sentences
tgt.dict:the labels dictionary
feature: the extracted features from data set feature: the extracted features from data set
``` ```
...@@ -67,6 +65,8 @@ def hook(settings, word_dict, label_dict, **kwargs): ...@@ -67,6 +65,8 @@ def hook(settings, word_dict, label_dict, **kwargs):
settings.label_dict = label_dict settings.label_dict = label_dict
#all inputs are integral and sequential type #all inputs are integral and sequential type
settings.slots = [ settings.slots = [
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
...@@ -77,34 +77,39 @@ def hook(settings, word_dict, label_dict, **kwargs): ...@@ -77,34 +77,39 @@ def hook(settings, word_dict, label_dict, **kwargs):
``` ```
The corresponding data iterator is as following: The corresponding data iterator is as following:
``` ```
@provider(use_seq=True, init_hook=hook) @provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
def process(obj, file_name): can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata: with open(file_name, 'r') as fdata:
for line in fdata: for line in fdata:
sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = line.strip().split('\t') sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
words = sentence.split() words = sentence.split()
sen_len = len(words) sen_len = len(words)
word_slot = [obj.word_dict.get(w, UNK_IDX) for w in words] word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
predicate_slot = [obj.word_dict.get(predicate, UNK_IDX)] * sen_len predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
ctx_n1_slot = [obj.word_dict.get(ctx_n1, UNK_IDX) ] * sen_len ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_0_slot = [obj.word_dict.get(ctx_0, UNK_IDX) ] * sen_len ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_p1_slot = [obj.word_dict.get(ctx_p1, UNK_IDX) ] * sen_len ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split() marks = mark.split()
mark_slot = [int(w) for w in marks] mark_slot = [int(w) for w in marks]
label_list = label.split() label_list = label.split()
label_slot = [obj.label_dict.get(w) for w in label_list] label_slot = [settings.label_dict.get(w) for w in label_list]
yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
yield word_slot, predicate_slot, ctx_n1_slot, ctx_0_slot, ctx_p1_slot, mark_slot, label_slot ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
``` ```
The `process`function yield 7 lists which are six features and labels. The `process`function yield 9 lists which are 8 features and label.
### Neural Network Config ### Neural Network Config
`db_lstm.py` is the neural network config file to load the dictionaries and define the data provider module and network architecture during the training procedure. `db_lstm.py` is the neural network config file to load the dictionaries and define the data provider module and network architecture during the training procedure.
Seven `data_layer` load instances from data provider. Six features are transformed into embedddings respectively, and mixed by `mixed_layer` . Deep bidirectional LSTM layers extract features for the softmax layer. The objective function is cross entropy of labels. Nine `data_layer` load instances from data provider. Eight features are transformed into embedddings respectively, and mixed by `mixed_layer` . Deep bidirectional LSTM layers extract features for the softmax layer. The objective function is cross entropy of labels.
### Run Training ### Run Training
The script for training is `train.sh`, user just need to execute: The script for training is `train.sh`, user just need to execute:
...@@ -115,27 +120,36 @@ The content in `train.sh`: ...@@ -115,27 +120,36 @@ The content in `train.sh`:
``` ```
paddle train \ paddle train \
--config=./db_lstm.py \ --config=./db_lstm.py \
--use_gpu=0 \
--log_period=5000 \
--trainer_count=1 \
--show_parameter_stats_period=5000 \
--save_dir=./output \ --save_dir=./output \
--trainer_count=4 \ --num_passes=10000 \
--log_period=10 \ --average_test_period=10000000 \
--num_passes=500 \ --init_model_path=./data \
--use_gpu=false \ --load_missing_parameter_strategy=rand \
--show_parameter_stats_period=10 \
--test_all_data_in_one_period=1 \ --test_all_data_in_one_period=1 \
2>&1 | tee 'train.log' 2>&1 | tee 'train.log'
``` ```
- \--config=./db_lstm.py : network config file. - \--config=./db_lstm.py : network config file.
- \--save_di=./output: output path to save models. - \--use_gpu=false: use CPU to train, set true, if you install GPU version of PaddlePaddle and want to use GPU to train, until now crf_layer do not support GPU
- \--trainer_count=4 : set thread number (or GPU count). - \--log_period=500: print log every 20 batches.
- \--log_period=10 : print log every 20 batches. - \--trainer_count=1: set thread number (or GPU count).
- \--num_passes=500: set pass number, one pass in PaddlePaddle means training all samples in dataset one time. - \--show_parameter_stats_period=5000: show parameter statistic every 100 batches.
- \--use_gpu=false: use CPU to train, set true, if you install GPU version of PaddlePaddle and want to use GPU to train. - \--save_dir=./output: output path to save models.
- \--show_parameter_stats_period=10: show parameter statistic every 100 batches. - \--num_passes=10000: set pass number, one pass in PaddlePaddle means training all samples in dataset one time.
- \--test_all_data_in_one_period=1: test all data in every testing. - \--average_test_period=10000000: do test on average parameter every average_test_period batches
- \--init_model_path=./data: parameter initialization path
- \--load_missing_parameter_strategy=rand: random initialization unexisted parameters
After training, the models will be saved in directory `output`. - \--test_all_data_in_one_period=1: test all data in one period
After training, the models will be saved in directory `output`. Our training curve is as following:
<center>
![pic](./curve.jpg)
</center>
### Run testing ### Run testing
The script for testing is `test.sh`, user just need to execute: The script for testing is `test.sh`, user just need to execute:
...@@ -155,6 +169,7 @@ paddle train \ ...@@ -155,6 +169,7 @@ paddle train \
- \--model_list=$model_list.list: model list file - \--model_list=$model_list.list: model list file
- \--job=test: indicate the test job - \--job=test: indicate the test job
- \--config_args=is_test=1: flag to indicate test - \--config_args=is_test=1: flag to indicate test
- \--test_all_data_in_one_period=1: test all data in 1 period
### Run prediction ### Run prediction
...@@ -166,11 +181,13 @@ The script for prediction is `predict.sh`, user just need to execute: ...@@ -166,11 +181,13 @@ The script for prediction is `predict.sh`, user just need to execute:
In `predict.sh`, user should offer the network config file, model path, label file, word dictionary file, feature file In `predict.sh`, user should offer the network config file, model path, label file, word dictionary file, feature file
``` ```
python predict.py python predict.py
-c $config_file -c $config_file \
-w $model_path -w $best_model_path \
-l $label_file -l $label_file \
-d $dict_file -p $predicate_dict_file \
-i $input_file -d $dict_file \
-i $input_file \
-o $output_file
``` ```
`predict.py` is the main executable python script, which includes functions: load model, load data, data prediction. The network model will output the probability distribution of labels. In the demo, we take the label with maximum probability as result. User can also implement the beam search or viterbi decoding upon the probability distribution matrix. `predict.py` is the main executable python script, which includes functions: load model, load data, data prediction. The network model will output the probability distribution of labels. In the demo, we take the label with maximum probability as result. User can also implement the beam search or viterbi decoding upon the probability distribution matrix.
......
...@@ -6,7 +6,7 @@ Sentiment analysis is also used to monitor social media based on large amount of ...@@ -6,7 +6,7 @@ Sentiment analysis is also used to monitor social media based on large amount of
On the other hand, grabbing the user comments of products and analyzing their sentiment are useful to understand user preferences for companies, products, even competing products. On the other hand, grabbing the user comments of products and analyzing their sentiment are useful to understand user preferences for companies, products, even competing products.
This tutorial will guide you through the process of training a Long Short Term Memory (LSTM) Network to classify the sentiment of sentences from [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/), sometimes known as the [Internet Movie Database (IMDB)](http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf). This dataset contains movie reviews along with their associated binary sentiment polarity labels, namely positive and negative. So randomly guessing yields 50% accuracy. This tutorial will guide you through the process of training a Long Short Term Memory (LSTM) Network to classify the sentiment of sentences from [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/), sometimes known as the Internet Movie Database (IMDB). This dataset contains movie reviews along with their associated binary sentiment polarity labels, namely positive and negative. So randomly guessing yields 50% accuracy.
## Data Preparation ## Data Preparation
...@@ -39,7 +39,7 @@ imdbEr.txt imdb.vocab README test train ...@@ -39,7 +39,7 @@ imdbEr.txt imdb.vocab README test train
* imdbEr.txt: expected rating for each token in imdb.vocab. * imdbEr.txt: expected rating for each token in imdb.vocab.
* README: data documentation. * README: data documentation.
Both train and test set directory contains: The file in train set directory is as follows. The test set also contains them except `unsup` and `urls_unsup.txt`.
``` ```
labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt
...@@ -151,6 +151,7 @@ settings( ...@@ -151,6 +151,7 @@ settings(
batch_size=128, batch_size=128,
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
average_window=0.5,
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25 gradient_clipping_threshold=25
) )
...@@ -163,17 +164,18 @@ stacked_lstm_net(dict_dim, class_dim=class_dim, ...@@ -163,17 +164,18 @@ stacked_lstm_net(dict_dim, class_dim=class_dim,
* **Data Definition**: * **Data Definition**:
* get\_config\_arg(): get arguments setted by `--config_args=xx` in commandline argument. * get\_config\_arg(): get arguments setted by `--config_args=xx` in commandline argument.
* Define TrainData and TestData provider, here using Python interface (PyDataProviderWrapper) of PaddlePaddle to load data. For details, you can refer to the document of PyDataProvider. * Define data provider, here using Python interface to load data. For details, you can refer to the document of PyDataProvider2.
* **Algorithm Configuration**: * **Algorithm Configuration**:
* use sgd algorithm.
* use adam optimization.
* set batch size of 128. * set batch size of 128.
* set average sgd window.
* set global learning rate. * set global learning rate.
* use adam optimization.
* set average sgd window.
* set L2 regularization.
* set gradient clipping threshold.
* **Network Configuration**: * **Network Configuration**:
* dict_dim: get dictionary dimension. * dict_dim: dictionary dimension.
* class_dim: set category number, IMDB has two label, namely positive and negative label. * class_dim: category number, IMDB has two label, namely positive and negative label.
* `stacked_lstm_net`: predefined network as shown in Figure 3, use this network by default. * `stacked_lstm_net`: predefined network as shown in Figure 3, use this network by default.
* `bidirectional_lstm_net`: predefined network as shown in Figure 2. * `bidirectional_lstm_net`: predefined network as shown in Figure 2.
......
Development Guide
=================
.. toctree::
:maxdepth: 1
layer.md
new_layer/new_layer.rst
../source/index.md
# Layer Documents
* [Layer Source Code Document](../source/gserver/layers/index.rst)
* [Layer Python API Document](../ui/api/trainer_config_helpers/index.rst)
Writing New Layers
==================
.. toctree::
:maxdepth: 3
new_layer.rst
==================
Writing New Layers Writing New Layers
================== ==================
...@@ -59,7 +60,7 @@ Implement C++ Class ...@@ -59,7 +60,7 @@ Implement C++ Class
The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below. The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below.
It needs to derive the base class :code:`paddle::BaseLayer`, and it needs to override the following functions: It needs to derive the base class :code:`paddle::Layer`, and it needs to override the following functions:
- constructor and destructor. - constructor and destructor.
- :code:`init` function. It is used to initialize the parameters and settings. - :code:`init` function. It is used to initialize the parameters and settings.
......
PaddlePaddle Documentation
==========================
User Guide
----------
* [Introduction](introduction/index.md)
* [Quick Start](demo/quick_start/index_en.md)
* [Build and Installation](build/index.rst)
* [Contribute Code](build/contribute_to_paddle.md)
* [User Interface](ui/index.md)
* [Model Config Interface](ui/api/trainer_config_helpers/index.md)
* [Example and Demo](demo/index.md)
* [Cluster Train](cluster/index.md)
Development Guide
-----------------
* [Layer Documents](layer.md)
* [Writing New Layers](dev/new_layer/index.rst)
* [Source Code Documents](source/index.md)
Algorithm Tutorial
------------------
* [RNN Configuration](algorithm/rnn/rnn.rst)
PaddlePaddle Documentation
==========================
.. toctree::
:maxdepth: 1
introduction/index.md
user_guide.rst
dev/index.rst
algorithm/index.rst
...@@ -98,4 +98,3 @@ There, you have recovered the underlying pattern between `X` and `Y` only from o ...@@ -98,4 +98,3 @@ There, you have recovered the underlying pattern between `X` and `Y` only from o
- <a href="../build/index.html"> Build and Installation </a> - <a href="../build/index.html"> Build and Installation </a>
- <a href="../demo/quick_start/index_en.html">Quick Start</a> - <a href="../demo/quick_start/index_en.html">Quick Start</a>
- <a href="../demo/index.html">Example and Demo</a> - <a href="../demo/index.html">Example and Demo</a>
# Layer Documents
* [Layer Source Code Document](source/gserver/layers/index.rst)
* [Layer Python API Document](ui/api/trainer_config_helpers/layers_index.rst)
API API
======== ===
.. doxygenfile:: paddle/api/PaddleAPI.h .. doxygenfile:: paddle/api/PaddleAPI.h
.. doxygenfile:: paddle/api/Internal.h .. doxygenfile:: paddle/api/Internal.h
CUDA
====================
.. toctree::
:maxdepth: 3
cuda.rst
Matrix CUDA
==================== ====
.. toctree:: .. toctree::
:maxdepth: 3 :maxdepth: 2
matrix.rst matrix.rst
nn.rst
utils.rst
Matrix Matrix
======= ======
Base Matrix Base
------------- ----
hl_matrix.h hl_matrix.h
`````````````````` ```````````
.. doxygenfile:: paddle/cuda/include/hl_matrix.h .. doxygenfile:: paddle/cuda/include/hl_matrix.h
hl_matrix_base.h hl_matrix_base.h
`````````````````` ````````````````
.. doxygenfile:: paddle/cuda/include/hl_matrix_base.cuh .. doxygenfile:: paddle/cuda/include/hl_matrix_base.cuh
hl_matrix_apply.cuh hl_matrix_apply.cuh
`````````````````````` ```````````````````
.. doxygenfile:: paddle/cuda/include/hl_matrix_apply.cuh .. doxygenfile:: paddle/cuda/include/hl_matrix_apply.cuh
hl_matrix_ops.cuh hl_matrix_ops.cuh
`````````````````````` `````````````````
.. doxygenfile:: paddle/cuda/include/hl_matrix_ops.cuh .. doxygenfile:: paddle/cuda/include/hl_matrix_ops.cuh
hl_matrix_type.cuh hl_matrix_type.cuh
`````````````````````` ``````````````````
.. doxygenfile:: paddle/cuda/include/hl_matrix_type.cuh .. doxygenfile:: paddle/cuda/include/hl_matrix_type.cuh
hl_sse_matrix_kernel.cuh hl_sse_matrix_kernel.cuh
`````````````````````````` ````````````````````````
.. doxygenfile:: paddle/cuda/include/hl_sse_matrix_kernel.cuh .. doxygenfile:: paddle/cuda/include/hl_sse_matrix_kernel.cuh
Matrix Function
---------------
hl_batch_transpose.h hl_batch_transpose.h
`````````````````````````` ````````````````````
.. doxygenfile:: paddle/cuda/include/hl_batch_transpose.h .. doxygenfile:: paddle/cuda/include/hl_batch_transpose.h
Sparse Matrix
--------------
hl_sparse.h
``````````````````
.. doxygenfile:: paddle/cuda/include/hl_sparse.h
hl_sparse.ph
``````````````````````
.. doxygenfile:: paddle/cuda/include/hl_sparse.ph
Others
---------------
hl_aggregate.h hl_aggregate.h
`````````````````` ``````````````
.. doxygenfile:: paddle/cuda/include/hl_aggregate.h .. doxygenfile:: paddle/cuda/include/hl_aggregate.h
hl_top_k.h
``````````
.. doxygenfile:: paddle/cuda/include/hl_top_k.h
hl_table_apply.h hl_table_apply.h
`````````````````` ````````````````
.. doxygenfile:: paddle/cuda/include/hl_table_apply.h .. doxygenfile:: paddle/cuda/include/hl_table_apply.h
hl_top_k.h Sparse Matrix
`````````````````` -------------
.. doxygenfile:: paddle/cuda/include/hl_top_k.h
hl_sparse.h
```````````
.. doxygenfile:: paddle/cuda/include/hl_sparse.h
hl_sparse.ph
````````````
.. doxygenfile:: paddle/cuda/include/hl_sparse.ph
RNN
====================
.. toctree::
:maxdepth: 3
rnn.rst
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册