提交 d539e780 编写于 作者: Y Yu Yang

Merge commit '12b61934'

*.DS_Store
build/
*.user
.vscode
.idea
\ No newline at end of file
language: cpp
cache: ccache
sudo: required
dist: trusty
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
addons:
apt:
packages:
- gcc-4.8
- g++-4.8
- wget
- git
- build-essential
- libatlas-base-dev
- python
- python-pip
- python2.7-dev
- m4
- libprotobuf-dev
- doxygen
- protobuf-compiler
- python-protobuf
- python-numpy
- python-wheel
- libgoogle-glog-dev
- libgflags-dev
- libgtest-dev
before_install:
- pip install wheel protobuf sphinx breathe recommonmark
- sudo paddle/scripts/travis/before_install.sh
script:
- paddle/scripts/travis/main.sh
notifications:
email:
on_success: change
on_failure: always
......@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8)
project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0)
set(PADDLE_MINOR_VERSION 8)
set(PADDLE_PATCH_VERSION 0b)
set(PADDLE_PATCH_VERSION 0b1)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
......@@ -14,8 +14,10 @@ find_package(CUDA QUIET)
find_package(Protobuf REQUIRED)
find_package(PythonLibs 2.7 REQUIRED)
find_package(PythonInterp 2.7 REQUIRED)
find_package(NumPy)
find_package(ZLIB REQUIRED)
find_package(NumPy REQUIRED)
find_package(Threads REQUIRED)
find_package(AVX QUIET)
find_package(Glog)
find_package(Gflags QUIET)
find_package(GTest)
......@@ -27,7 +29,7 @@ find_program(M4_EXECUTABLE m4)
option(WITH_DSO "Compile PaddlePaddle with dynamic linked libraries" ON)
option(WITH_GPU "Compile PaddlePaddle with gpu" ${CUDA_FOUND})
option(WITH_DOUBLE "Compile PaddlePaddle with double precision, otherwise use single precision" OFF)
option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ON) # TODO(yuyang18): Check AVX is supported or not as default value
option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND})
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND})
option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
......@@ -37,6 +39,7 @@ option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
option(ON_TRAVIS "Running test on travis-ci or not." OFF)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
......@@ -99,8 +102,8 @@ if(NOT WITH_TIMER)
endif(NOT WITH_TIMER)
if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
......
# PaddlePaddle
[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)
Welcome to the PaddlePaddle GitHub.
The software will be released on Sept. 30 with full documentation and installation support.
A pre-release version is available now for those who are eager to take a look.
PaddlePaddle (PArallel Distributed Deep LEarning) is an easy-to-use,
efficient, flexible and scalable deep learning platform, which is originally
......
# This file is use to check all support level of AVX on your machine
# so that PaddlePaddle can unleash the vectorization power of muticore.
INCLUDE(CheckCXXSourceRuns)
SET(FIND_AVX_10)
SET(FIND_AVX_20)
SET(AVX_FLAGS)
SET(AVX_FOUND)
# Check AVX 2
SET(CMAKE_REQUIRED_FLAGS)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(CMAKE_REQUIRED_FLAGS "-mavx2")
ELSEIF(MSVC AND NOT CMAKE_CL_64) # reserve for WINDOWS
SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
ENDIF()
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}" FIND_AVX_20)
# Check AVX
SET(CMAKE_REQUIRED_FLAGS)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(CMAKE_REQUIRED_FLAGS "-mavx")
ELSEIF(MSVC AND NOT CMAKE_CL_64)
SET(CMAKE_REQUIRED_FLAGS "/arch:AVX")
endif()
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}" FIND_AVX_10)
IF(${FIND_AVX_20})
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(AVX_FLAGS "${AVX_FLAGS} -mavx2")
ELSEIF(MSVC)
SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
ENDIF()
ENDIF()
IF(${FIND_AVX_10})
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(AVX_FLAGS "${AVX_FLAGS} -mavx")
ELSEIF(MSVC)
SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
ENDIF()
ENDIF()
IF("${FIND_AVX_10}" OR "${FIND_AVX_20}")
SET(AVX_FOUND TRUE)
MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.")
ENDIF()
......@@ -65,12 +65,14 @@ set(OPENBLAS_ROOT $ENV{OPENBLAS_ROOT} CACHE PATH "Folder contains Openblas")
set(OPENBLAS_INCLUDE_SEARCH_PATHS
${OPENBLAS_ROOT}/include
/usr/include
/usr/include/openblas)
/usr/include/openblas
/usr/local/opt/openblas/include)
set(OPENBLAS_LIB_SEARCH_PATHS
${OPENBLAS_ROOT}/lib
/usr/lib
/usr/lib/blas/openblas
/usr/lib/openblas)
/usr/lib/openblas
/usr/local/opt/openblas/lib)
find_path(OPENBLAS_INC_DIR NAMES cblas.h
PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
......
......@@ -15,7 +15,7 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
$ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib
/usr/lib)
find_library(CUDNN_LIBRARY NAMES libcudnn.so # libcudnn_static.a
find_library(CUDNN_LIBRARY NAMES libcudnn.so libcudnn.dylib # libcudnn_static.a
PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist}
NO_DEFAULT_PATH
DOC "Path to cuDNN library.")
......
......@@ -74,13 +74,37 @@ endforeach()
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
function(specify_cuda_arch cuda_version cuda_arch)
if(${cuda_version} VERSION_GREATER "8.0")
foreach(capability 61 62)
if(${cuda_arch} STREQUAL ${capability})
list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}")
endif()
endforeach()
elseif(${cuda_version} VERSION_GREATER "7.0" and ${cuda_arch} STREQUAL "53")
list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}")
endif()
endfunction()
# Common gpu architectures: Kepler, Maxwell
foreach(capability 30 35 50)
list(APPEND __arch_flags "-gencode arch=compute_${capability},code=sm_${capability}")
list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}")
endforeach()
if (CUDA_VERSION VERSION_GREATER "7.0")
list(APPEND __arch_flags "-gencode arch=compute_52,code=sm_52")
list(APPEND __arch_flags " -gencode arch=compute_52,code=sm_52")
endif()
set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS})
# Modern gpu architectures: Pascal
if (CUDA_VERSION VERSION_GREATER "8.0")
list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60")
endif()
# Custom gpu architecture
set(CUDA_ARCH)
if(CUDA_ARCH)
specify_cuda_arch(${CUDA_VERSION} ${CUDA_ARCH})
endif()
set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS})
# Some common routine for paddle compile.
# target_circle_link_libraries
# Link libraries to target which has circle dependencies.
#
# First Argument: target name want to be linked with libraries
# Rest Arguments: libraries which link together.
function(target_circle_link_libraries TARGET_NAME)
if(APPLE)
set(LIBS)
set(inArchive OFF)
set(libsInArgn)
foreach(arg ${ARGN})
if(${arg} STREQUAL "ARCHIVE_START")
set(inArchive ON)
elseif(${arg} STREQUAL "ARCHIVE_END")
set(inArchive OFF)
else()
if(inArchive)
list(APPEND LIBS "-Wl,-force_load")
endif()
list(APPEND LIBS ${arg})
list(APPEND libsInArgn ${arg})
endif()
endforeach()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
list(APPEND LIBS "-undefined dynamic_lookup")
endif()
list(REVERSE libsInArgn)
target_link_libraries(${TARGET_NAME}
${LIBS}
${libsInArgn})
else() # LINUX
set(LIBS)
foreach(arg ${ARGN})
if(${arg} STREQUAL "ARCHIVE_START")
list(APPEND LIBS "-Wl,--whole-archive")
elseif(${arg} STREQUAL "ARCHIVE_END")
list(APPEND LIBS "-Wl,--no-whole-archive")
else()
list(APPEND LIBS ${arg})
endif()
endforeach()
target_link_libraries(${TARGET_NAME}
-Wl,--start-group
${ARGN}
-Wl,--end-group)
"-Wl,--start-group"
${LIBS}
"-Wl,--end-group")
endif()
endfunction()
# compile_cu_as_cpp
......@@ -41,20 +80,20 @@ function(link_paddle_exe TARGET_NAME)
if(PADDLE_WITH_INTERNAL)
set(INTERAL_LIBS paddle_internal_gserver paddle_internal_parameter)
target_circle_link_libraries(${TARGET_NAME}
-Wl,--whole-archive
ARCHIVE_START
paddle_internal_gserver
paddle_internal_owlqn
-Wl,--no-whole-archive
ARCHIVE_END
paddle_internal_parameter)
else()
set(INTERAL_LIBS "")
endif()
target_circle_link_libraries(${TARGET_NAME}
-Wl,--whole-archive
ARCHIVE_START
paddle_gserver
${METRIC_LIBS}
-Wl,--no-whole-archive
ARCHIVE_END
paddle_pserver
paddle_trainer_lib
paddle_network
......@@ -67,9 +106,9 @@ function(link_paddle_exe TARGET_NAME)
${PROTOBUF_LIBRARY}
${CMAKE_THREAD_LIBS_INIT}
${CBLAS_LIBS}
${CMAKE_DL_LIBS}
${ZLIB_LIBRARIES}
${INTERAL_LIBS}
-lz)
${CMAKE_DL_LIBS})
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
......
......@@ -20,9 +20,8 @@ from optparse import OptionParser
import paddle.utils.image_util as image_util
from py_paddle import swig_paddle, util
from py_paddle import DataProviderWrapperConverter
from paddle.trainer.PyDataProviderWrapper import DenseSlot
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
......@@ -75,8 +74,8 @@ class ImageClassifier():
self.network.loadParameters(self.model_dir)
data_size = 3 * self.crop_dims[0] * self.crop_dims[1]
slots = [DenseSlot(data_size)]
self.converter = util.DataProviderWrapperConverter(False, slots)
slots = [dense_vector(data_size)]
self.converter = DataProviderConverter(slots)
def get_data(self, img_path):
"""
......
......@@ -14,8 +14,6 @@
# limitations under the License.
set -e
export PYTHONPATH=$PYTHONPATH:../../
data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1
......@@ -22,9 +22,8 @@ from optparse import OptionParser
import paddle.utils.image_util as image_util
from py_paddle import swig_paddle, util
from py_paddle import DataProviderWrapperConverter
from paddle.trainer.PyDataProviderWrapper import DenseSlot
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
......@@ -85,9 +84,8 @@ class ImageClassifier():
self.network.loadParameters(self.model_dir)
data_size = 3 * self.crop_dims[0] * self.crop_dims[1]
slots = [DenseSlot(data_size)]
is_sequence = False
self.converter = util.DataProviderWrapperConverter(is_sequence, slots)
slots = [dense_vector(data_size)]
self.converter = DataProviderConverter(slots)
def get_data(self, img_path):
"""
......
#!/bin/sh
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -33,7 +33,7 @@ test_num=$((min_len/10))
if [ $test_num -gt 12500 ];then
test_num=12500
fi
train_num=((min_len-test_num))
train_num=$((min_len-test_num))
head -n$train_num pos.shuffed >train.pos
head -n$train_num neg.shuffed >train.neg
......
......@@ -12,15 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
try:
import cPickle as pickle
except ImportError:
import pickle
from paddle.trainer.PyDataProvider2 import *
import common_utils # parse
def hook(settings, meta, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store
......@@ -47,7 +41,6 @@ def hook(settings, meta, **kwargs):
settings.input_types = headers
settings.meta = meta
@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename):
with open(filename, 'r') as f:
......
......@@ -15,8 +15,8 @@
import os
import numpy as np
from optparse import OptionParser
from py_paddle import swig_paddle, util, DataProviderWrapperConverter
from paddle.trainer.PyDataProviderWrapper import IndexSlot
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config
"""
Usage: run following command to show help message.
......@@ -50,9 +50,15 @@ class Prediction():
conf.model_config)
self.network.loadParameters(model_dir)
slots = [IndexSlot(len_dict), IndexSlot(len_dict), IndexSlot(len_dict),
IndexSlot(len_dict), IndexSlot(len_dict), IndexSlot(2)]
self.converter = util.DataProviderWrapperConverter(True, slots)
slots = [
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(2)
]
self.converter = DataProviderConverter(slots)
def load_dict_label(self, dict_file, label_file):
"""
......
......@@ -15,8 +15,8 @@
import os
import numpy as np
from optparse import OptionParser
from py_paddle import swig_paddle, util, DataProviderWrapperConverter
from paddle.trainer.PyDataProviderWrapper import IndexSlot
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config
"""
......@@ -46,8 +46,8 @@ class SentimentPrediction():
conf = parse_config(train_conf, "is_predict=1")
self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
self.network.loadParameters(self.model_dir)
slots = [IndexSlot(self.dict_dim)]
self.converter = util.DataProviderWrapperConverter(True, slots)
slots = [integer_value_sequence(self.dict_dim)]
self.converter = DataProviderConverter(slots)
def load_dict(self):
"""
......
......@@ -65,7 +65,7 @@ def bidirectional_lstm_net(input_dim,
bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
output = fc_layer(input=dropout, size=class_dim,
act_type=SoftmaxActivation())
act=SoftmaxActivation())
if not is_predict:
lbl = data_layer("label", 1)
......
......@@ -128,12 +128,16 @@ def gru_encoder_decoder(data_conf,
return out
decoder_group_name = "decoder_group"
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
StaticInput(input=encoded_proj,is_seq=True)]
if not is_generating:
trg_embedding = embedding_layer(
input=data_layer(name='target_language_word',
size=target_dict_dim),
size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
......@@ -142,22 +146,13 @@ def gru_encoder_decoder(data_conf,
# for the recurrent_group.
decoder = recurrent_group(name=decoder_group_name,
step=gru_decoder_with_attention,
input=[
StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True), trg_embedding
])
input=group_inputs)
lbl = data_layer(name='target_language_next_word',
size=target_dict_dim)
cost = classification_cost(input=decoder, label=lbl, )
cost = classification_cost(input=decoder, label=lbl)
outputs(cost)
else:
gen_inputs = [StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True), ]
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
......@@ -171,16 +166,18 @@ def gru_encoder_decoder(data_conf,
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
gen_inputs.append(trg_embedding)
group_inputs.append(trg_embedding)
beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_with_attention,
input=gen_inputs,
id_input=data_layer(name="sent_id",
size=1),
dict_file=trg_dict_path,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length,
max_length=max_length)
seqtext_printer_evaluator(input=beam_gen,
id_input=data_layer(name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen)
if(NOT DEFINED SPHINX_THEME)
set(SPHINX_THEME default)
endif()
......
......@@ -30,7 +30,7 @@ Then at the :code:`process` function, each :code:`yield` function will return th
yield src_ids, trg_ids, trg_ids_next
For more details description of how to write a data provider, please refer to :doc:`Python Data Provider <../py_data_provider_wrapper>`. The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
For more details description of how to write a data provider, please refer to `PyDataProvider2 <../../ui/data_provider/index.html>`_. The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
===============================================
Configure Recurrent Neural Network Architecture
......@@ -106,7 +106,7 @@ We will use the sequence to sequence model with attention as an example to demon
In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`.
The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to :doc:`Layers <../trainer_config_helpers/layers>` for more details.
The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to `Layers <../../ui/api/trainer_config_helpers/layers_index.html>`_ for more details.
We also project the encoder vector to :code:`decoder_size` dimensional space, get the first instance of the backward recurrent network, and project it to :code:`decoder_size` dimensional space:
......@@ -143,11 +143,15 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network.
.. code-block:: python
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
StaticInput(input=encoded_proj,is_seq=True)]
trg_embedding = embedding_layer(
input=data_layer(name='target_language_word',
size=target_dict_dim),
size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embedding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
......@@ -156,13 +160,7 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network.
# All sequence inputs should have the same length.
decoder = recurrent_group(name=decoder_group_name,
step=gru_decoder_with_attention,
input=[
StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True),
trg_embedding
])
input=group_inputs)
The implementation of the step function is listed as below. First, it defines the **memory** of the decoder network. Then it defines attention, gated recurrent unit step function, and the output function:
......@@ -205,22 +203,23 @@ After training the model, we can use it to generate sequences. A common practice
* use :code:`GeneratedInput` for trg_embedding. :code:`GeneratedInput` computes the embedding of the generated token at the last time step for the input at the current time step.
* use :code:`beam_search` function. This function needs to set:
- :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
- :code:`dict_file`: the dictionary file for converting word id to word.
- :code:`bos_id`: the start token. Every sentence starts with the start token.
- :code:`eos_id`: the end token. Every sentence ends with the end token.
- :code:`beam_size`: the beam size used in beam search.
- :code:`max_length`: the maximum length of the generated sentences.
* use :code:`seqtext_printer_evaluator` to print text according to index matrix and dictionary. This function needs to set:
- :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
- :code:`dict_file`: the dictionary file for converting word id to word.
- :code:`result_file`: the path of the generation result file.
The code is listed below:
.. code-block:: python
gen_inputs = [StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True), ]
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
StaticInput(input=encoded_proj,is_seq=True)]
# In generation, decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
......@@ -231,21 +230,22 @@ The code is listed below:
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
gen_inputs.append(trg_embedding)
group_inputs.append(trg_embedding)
beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_with_attention,
input=gen_inputs,
id_input=data_layer(name="sent_id",
size=1),
dict_file=trg_dict_path,
input=group_inputs,
bos_id=0, # Beginnning token.
eos_id=1, # End of sentence token.
beam_size=beam_size,
max_length=max_length,
max_length=max_length)
seqtext_printer_evaluator(input=beam_gen,
id_input=data_layer(name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen)
Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to :doc:`Semantic Role Labeling Demo <../../../demo/semantic_role_labeling>` for more details.
Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to `Semantic Role Labeling Demo <../../demo/semantic_role_labeling/index.html>`_ for more details.
The full configuration file is located at :code:`demo/seqToseq/seqToseq_net.py`.
Build and Install
Installing from Sources
=================
## Requirement
* [1. Download and Setup](#download)
* [2. Requirements](#requirements)
* [3. Build on Ubuntu](#ubuntu)
* [4. Build on Mac OS X](#mac)
### Dependents
## <span id="download">Download and Setup</span>
You can download PaddlePaddle from the [github source](https://github.com/gangliao/Paddle).
- **CMake**: required for 2.8+ version
- **g++**: a recent c++ compiler supporting c++11, >= 4.6, < 5
- **BLAS library**: such as openBLAS, MKL, ATLAS
- **protobuf**: required for 2.4+ version, 3.x is not supported
- **python**: currently only 2.7 version is supported
### Optional
```bash
git clone https://github.com/baidu/Paddle paddle
cd paddle
```
PaddlePaddle also support some build options, you have to install related libraries.
## <span id="requirements">Requirements</span>
To compile the source code, your computer must be equipped with GCC >=4.6 or Clang compiler.
### Dependencies
- **CMake**: version >= 2.8
- **BLAS**: MKL, OpenBlas or ATLAS
- **protobuf**: version >= 2.4, **Note: 3.x is not supported**
- **python**: only python 2.7 is supported currently
### Options
PaddlePaddle supports some build options. To enable it, first you need to install the related libraries.
<style type="text/css">
.tg {border-collapse:collapse;border-spacing:0;border-color:#ccc;}
.tg td{font-family:Arial, sans-serif;font-size:14px;padding:10px 5px;border-style:solid;border-width:0px;overflow:hidden;word-break:normal;border-color:#ccc;color:#333;background-color:#fff;border-top-width:1px;border-bottom-width:1px;}
.tg th{font-family:Arial, sans-serif;font-size:14px;font-weight:normal;padding:10px 5px;border-style:solid;border-width:0px;overflow:hidden;word-break:normal;border-color:#ccc;color:#333;background-color:#f0f0f0;border-top-width:1px;border-bottom-width:1px;}
.tg .tg-yw4l{vertical-align:top}
.tg .tg-9hbo{font-weight:bold;vertical-align:top}
</style>
<table class="tg">
<tr>
<th class="tg-yw4l">Optional</th>
<th class="tg-yw4l">Description</th>
</tr>
<tr>
<td class="tg-9hbo">WITH_GPU</td>
<td class="tg-yw4l">Compile with GPU mode.</td>
</tr>
<tr>
<td class="tg-9hbo">WITH_DOUBLE</td>
<td class="tg-yw4l">Compile with double precision floating-point, default: single precision.</td>
</tr>
<tr>
<td class="tg-9hbo">WITH_GLOG</td>
<td class="tg-yw4l">Compile with glog. If not found, default: an internal log implementation.</td>
</tr>
<tr>
<td class="tg-9hbo">WITH_GFLAGS</td>
<td class="tg-yw4l">Compile with gflags. If not found, default: an internal flag implementation.</td>
</tr>
<tr>
<td class="tg-9hbo">WITH_TESTING</td>
<td class="tg-yw4l">Compile with gtest for PaddlePaddle's unit testing.</td>
</tr>
<tr>
<td class="tg-9hbo">WITH_DOC</td>
<td class="tg-yw4l">Compile to generate PaddlePaddle's docs, default: disabled (OFF)</td>
</tr>
<tr>
<td class="tg-9hbo">WITH_SWIG_PY</td>
<td class="tg-yw4l">Compile with python predict API, default: disabled (OFF).</td>
</tr>
<tr>
<td class="tg-9hbo">WITH_STYLE_CHECK</td>
<td class="tg-yw4l">Compile with code style check, default: enabled (ON).</td>
</tr>
</table>
**Note:**
- The GPU version works best with Cuda Toolkit 7.5 and cuDNN v5.
- Other versions like Cuda Toolkit 6.5, 7.0, 8.0 and cuDNN v2, v3, v4 are also supported.
- **To utilize cuDNN v5, Cuda Toolkit 7.5 is prerequisite and vice versa.**
As a simple example, consider the following:
1. **Python Dependencies(optional)**
To compile PaddlePaddle with python predict API, make sure swig installed and set `-DWITH_SWIG_PY=ON` as follows:
```bash
# install swig on ubuntu
sudo apt-get install swig
# install swig on Mac OS X
brew install swig
# active swig in cmake
cmake .. -DWITH_SWIG_PY=ON
```
2. **Doc Dependencies(optional)**
To generate PaddlePaddle's documentation, install dependencies and set `-DWITH_DOC=ON` as follows:
```bash
pip install 'sphinx>=1.4.0'
pip install sphinx_rtd_theme breathe recommonmark
# install doxygen on Ubuntu
sudo apt-get install doxygen
# install doxygen on Mac OS X
brew install doxygen
# active docs in cmake
cmake .. -DWITH_DOC=ON`
```
## <span id="ubuntu">Build on Ubuntu 14.04</span>
- **WITH_GPU**: Compile with gpu mode
- The GPU version works best with Cuda Toolkit 7.5 and cuDNN v5
- Other versions Cuda Toolkit 6.5, 7.0 and cuDNN v2, v3, v4 are also supported
- Note: to utilize cuDNN v5, Cuda Toolkit 7.5 is prerequisite and vice versa
- **WITH_DOUBLE**: Compile with double precision, otherwise use single precision
- **WITH_GLOG**: Compile with glog, otherwise use a log implement internally
- **WITH_GFLAGS**: Compile with gflags, otherwise use a flag implement internally
- **WITH_TESTING**: Compile with gtest and run unittest for PaddlePaddle
- **WITH_DOC**: Compile with documentation
- **WITH_SWIG_PY**: Compile with python predict api
- **WITH_STYLE_CHECK**: Style check for source code
### Install Dependencies
- **CPU Dependencies**
## Building on Ubuntu14.04
```bash
# necessary
sudo apt-get update
sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev m4 libprotobuf-dev protobuf-compiler python-protobuf python-numpy git
# optional
sudo apt-get install libgoogle-glog-dev
sudo apt-get install libgflags-dev
sudo apt-get install libgtest-dev
sudo pip install wheel
pushd /usr/src/gtest
cmake .
make
sudo cp *.a /usr/lib
popd
```
- **GPU Dependencies (optional)**
To build GPU version, you will need the following installed:
1. a CUDA-capable GPU
2. A supported version of Linux with a gcc compiler and toolchain
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on
distribution versions that have been qualified for this CUDA Toolkit release.
After downloading cuDNN library, issue the following commands:
```bash
sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
```
Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
```bash
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:$PATH
```
### Install Dependencies
### Build and Install
- **CPU Dependencies**
As usual, the best option is to create build folder under paddle project directory.
```bash
# necessary
sudo apt-get update
sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev m4 libprotobuf-dev protobuf-compiler python-protobuf python-numpy git
# optional
sudo apt-get install libgoogle-glog-dev
sudo apt-get install libgflags-dev
sudo apt-get install libgtest-dev
pushd /usr/src/gtest
cmake .
make
sudo cp *.a /usr/lib
popd
mkdir build && cd build
cmake ..
```
CMake first check PaddlePaddle's dependencies in system default path. After installing some optional
libraries, corresponding build option will be set automatically (for instance, glog, gtest and gflags).
If still not found, you can manually set it based on CMake error information from your screen.
- **GPU Dependencies(optional)**
As a simple example, consider the following:
If you need to build GPU version, the first thing you need is a machine that has GPU and CUDA installed.
And you also need to install cuDNN.
- **Only CPU**
You can download CUDA toolkit and cuDNN from nvidia website:
```bash
cmake .. -DWITH_GPU=OFF -DWITH_DOC=OFF
```
- **GPU**
```bash
https://developer.nvidia.com/cuda-downloads
https://developer.nvidia.com/cudnn
```
You can copy cuDNN files into the CUDA toolkit directory, such as:
```bash
cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF
```
```bash
sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
```
Then you need to set LD\_LIBRARY\_PATH, CUDA\_HOME and PATH environment variables in ~/.bashrc.
- **GPU with doc and swig**
```bash
cmake .. -DWITH_GPU=ON -DWITH_DOC=ON -DWITH_SWIG_PY=ON
```
Finally, you can build PaddlePaddle:
```bash
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export CUDA_HOME=/usr/local/cuda
export PATH=/usr/local/cuda/bin:$PATH
# you can add build option here, such as:
cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX=<path to install>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# set PaddlePaddle installation path in ~/.bashrc
export PATH=<path to install>/bin:$PATH
```
- **Python Dependencies(optional)**
If you want to compile PaddlePaddle with python predict api, you need to add -DWITH_SWIG_PY=ON in cmake command and install these first:
**Note:**
If you set `WITH_SWIG_PY=ON`, related python dependencies also need to be installed.
Otherwise, PaddlePaddle will automatically install python dependencies
at first time when user run paddle commands, such as `paddle version`, `paddle train`.
It may require sudo privileges:
```bash
sudo apt-get install swig
# you can run
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
# or just run
sudo paddle version
```
- **Doc Dependencies(optional)**
## <span id="mac">Building on Mac OS X</span>
If you want to compile PaddlePaddle with doc, you need to add -DWITH_DOC=ON in cmake command and install these first:
### Prerequisites
This guide is based on Mac OS X 10.11 (El Capitan). Note that if you are running an up to date version of OS X,
you will already have Python 2.7.10 and Numpy 1.8 installed.
The best option is to use the package manager homebrew to handle installations and upgrades for you.
To install [homebrew](http://brew.sh/), first open a terminal window (you can find Terminal in the Utilities folder in Applications), and issue the command:
```bash
pip install sphinx
pip install sphinx_rtd_theme breathe recommonmark
sudo apt-get install python-sphinx doxygen
# install brew
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
# install pip
easy_install pip
```
### Build and Install
### Install Dependencies
CMake will find dependent libraries in system default paths first. After installing some optional libraries, corresponding build option will automatically be on(such as glog, gtest and gflags). And if libraries are not found, you have to set following variables manually in cmake command(CUDNN_ROOT, ATLAS_ROOT, MKL_ROOT, OPENBLAS_ROOT).
- **CPU Dependencies**
Here are some examples of cmake command with different options:
```bash
# Install fundamental dependents
brew install glog gflags cmake protobuf openblas
# Install google test on Mac OS X
# Download gtest 1.7.0
wget https://github.com/google/googletest/archive/release-1.7.0.tar.gz
tar -xvf googletest-release-1.7.0.tar.gz && cd googletest-release-1.7.0
# Build gtest
mkdir build && cmake ..
make
# Install gtest library
sudo cp -r ../include/gtest /usr/local/include/
sudo cp lib*.a /usr/local/lib
```
**only cpu**
- **GPU Dependencies(optional)**
```bash
cmake -DWITH_GPU=OFF -DWITH_DOC=OFF
```
To build GPU version, you will need the following installed:
**gpu**
1. a CUDA-capable GPU
2. Mac OS X 10.11 or later
2. the Clang compiler and toolchain installed using Xcode
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
```bash
cmake -DWITH_GPU=ON -DWITH_DOC=OFF
```
The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on
distribution versions that have been qualified for this CUDA Toolkit release.
1. After downloading cuDNN library, issue the following commands:
```bash
sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
```
2. Then you need to set DYLD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
```bash
export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:$PATH
```
### Build and Install
**gpu with doc and swig**
As usual, the best option is to create build folder under paddle project directory.
```bash
cmake -DWITH_GPU=ON -DWITH_DOC=ON -DWITH_SWIG_PY=ON
mkdir build && cd build
cmake ..
```
Finally, you can download source code and build:
CMake first check PaddlePaddle's dependencies in system default path. After installing some optional
libraries, corresponding build option will be set automatically (for instance, glog, gtest and gflags).
If still not found, you can manually set it based on CMake error information from your screen.
As a simple example, consider the following:
- **Only CPU**
```bash
cmake .. -DWITH_GPU=OFF -DWITH_DOC=OFF
```
- **GPU**
```bash
cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF
```
- **GPU with doc and swig**
```bash
cmake .. -DWITH_GPU=ON -DWITH_DOC=ON -DWITH_SWIG_PY=ON
```
Finally, you can build PaddlePaddle:
```bash
git clone https://github.com/baidu/Paddle paddle
cd paddle
mkdir build
cd build
# you can add build option here, such as:
cmake -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX=<path to install> ..
cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX=<installation path>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# PaddlePaddle installation path
export PATH=<path to install>/bin:$PATH
# set PaddlePaddle installation path in ~/.bashrc
export PATH=<installation path>/bin:$PATH
```
**Note**
**Note:**
And if you set WITH_SWIG_PY=ON, you have to install related python predict api at the same time:
If you set `WITH_SWIG_PY=ON`, related python dependencies also need to be installed.
Otherwise, PaddlePaddle will automatically install python dependencies
at first time when user run paddle commands, such as `paddle version`, `paddle train`.
It may require sudo privileges:
```bash
pip install <path to install>/opt/paddle/share/wheels/*.whl
# you can run
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
# or just run
sudo paddle version
```
\ No newline at end of file
......@@ -25,9 +25,12 @@ repo or just head straight to the command line:
```shell
# Clone your fork to your local machine
git clone git@github.com:USERNAME/paddle.git
git clone https://github.com/USERNAME/Paddle.git
```
Then you can start to develop by making a local developement branch
```shell
git checkout -b MY_COOL_STUFF_BRANCH origin/master
```
Then you can start to develop.
## Commit
......@@ -45,14 +48,14 @@ are the details if any.
## Keeping Fork Up to Date
Before pull your request, you shold sync you code from the latest PaddlePaddle.
Before pull your request, you should sync your code from the latest PaddlePaddle.
To do this, you'll need to add a remote at first:
```shell
# see the current configured remote repository
git remote -v
# add upstream repository
git remote add upstream https://github.com/paddle/paddle.git
git remote add upstream https://github.com/baidu/Paddle.git
# verify the new upstream
git remote -v
```
......@@ -60,8 +63,7 @@ git remote -v
Update your fork with the latest upstream changes:
```shell
git fetch upstream
git pull upstream master
git pull --rebase upstream HEAD
```
If there are no unique commits locally, git will simply perform a fast-forward.
......@@ -74,10 +76,26 @@ Now, your local master branch is up-to-date with everything modified upstream.
```shell
# push to your repository in Github
git push origin master
git push origin HEAD
```
## Pull Request
Go to the page for your fork on GitHub, select your development branch,
and click the **pull request button**.
## Update your pull request with the lastest version
During the code review, your pull request may become stale because new commits in
baidu/Paddle. GitHub allows autmotic update if there is no conflict. You can do this
by clicking the "Update Branch" button in your pull request page. However, in the case
of conflict, you need to do the update manually. You need to do the following on
your local repository:
```shell
git checkout MY_COOL_STUFF_BRANCH
git pull --rebase upstream HEAD
# You may need to resolve the conflict according to the git prompt.
# Make and test your code.
git push -f origin HEAD
```
Now your Pull Request is updated with the latest version.
Docker installation guide
====================
PaddlePaddle provides some pre-compiled binary, including Docker images, ubuntu deb packages. It is welcomed to contributed more installation package of different linux distribution (such as ubuntu, centos, debian, gentoo and so on). We recommend to use Docker images to deploy PaddlePaddle.
## Docker installation
Docker is a tool designed to make it easier to create, deploy, and run applications by using containers.
### PaddlePaddle Docker images
There are six Docker images:
- paddledev/paddle:cpu-latest: PaddlePaddle CPU binary image.
- paddledev/paddle:gpu-latest: PaddlePaddle GPU binary image.
- paddledev/paddle:cpu-devel-latest: PaddlePaddle CPU binary image plus source code.
- paddledev/paddle:gpu-devel-latest: PaddlePaddle GPU binary image plus source code.
- paddledev/paddle:cpu-demo-latest: PaddlePaddle CPU binary image plus source code and demo
- paddledev/paddle:gpu-demo-latest: PaddlePaddle GPU binary image plus source code and demo
Tags with latest will be replaced by a released version.
### Download and Run Docker images
You have to install Docker in your machine which has linux kernel version 3.10+ first. You can refer to the official guide https://docs.docker.com/engine/installation/ for further information.
You can use ```docker pull ```to download images first, or just launch a container with ```docker run```:
```bash
docker run -it paddledev/paddle:cpu-latest
```
If you want to launch container with GPU support, you need to set some environment variables at the same time:
```bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run -it paddledev/paddle:gpu-latest
```
### Notice
#### Performance
Since Docker is based on the lightweight virtual containers, the CPU computing performance maintains well. And GPU driver and equipments are all mapped to the container, so the GPU computing performance would not be seriously affected.
If you use high performance nic, such as RDMA(RoCE 40GbE or IB 56GbE), Ethernet(10GbE), it is recommended to use config "-net = host".
#### Remote access
If you want to enable ssh access background, you need to build an image by yourself. Please refer to official guide https://docs.docker.com/engine/reference/builder/ for further information.
Following is a simple Dockerfile with ssh:
```bash
FROM paddledev/paddle
MAINTAINER PaddlePaddle dev team <paddle-dev@baidu.com>
RUN apt-get update
RUN apt-get install -y openssh-server
RUN mkdir /var/run/sshd
RUN echo 'root:root' | chpasswd
RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
```
Then you can build an image with Dockerfile and launch a container:
```bash
# cd into Dockerfile directory
docker build . -t paddle_ssh
# run container, and map host machine port 8022 to container port 22
docker run -d -p 8022:22 --name paddle_ssh_machine paddle_ssh
```
Now, you can ssh on port 8022 to access the container, username is root, password is also root:
```bash
ssh -p 8022 root@YOUR_HOST_MACHINE
```
You can stop and delete the container as following:
```bash
# stop
docker stop paddle_ssh_machine
# delete
docker rm paddle_ssh_machine
```
......@@ -5,9 +5,11 @@ Install PaddlePaddle
----------------------
.. toctree::
:maxdepth: 1
:glob:
install_*
internal/install_from_jumbo.md
Build from Source
-----------------
......@@ -15,20 +17,24 @@ Build from Source
If you want to hack and contribute PaddlePaddle source code, following guides can help you\:
.. toctree::
:maxdepth: 1
:glob:
build_from_source.md
contribute_to_paddle.md
Build Docker Images
-------------------
Docker and Debian Package installation
--------------------------------------
Note: The intallation packages are still in pre-release
Note: The installation packages are still in pre-release
state and your experience of installation may not be smooth.
If you want to pack docker image, the following guide can help you\:
.. toctree::
:maxdepth: 1
:glob:
docker/*
docker_install.md
ubuntu_install.md
Debian Package installation guide
=================================
## Debian Package installation
Currently , PaddlePaddle only provides ubuntu14.04 debian packages.
There are two versions package, including CPU and GPU. The download address is:
https://github.com/baidu/Paddle/releases/tag/V0.8.0b0
After downloading PaddlePaddle deb packages, you can run:
```bash
dpkg -i paddle-0.8.0b-cpu.deb
apt-get install -f
```
And if you use GPU version deb package, you need to install CUDA toolkit and cuDNN, and set related environment variables(such as LD_LIBRARY_PATH) first. It is normal when `dpkg -i` get errors. `apt-get install -f` will continue install paddle, and install dependences.
**Note**
PaddlePaddle package only supports x86 CPU with AVX instructions. If not, you have to download and build from source code.
......@@ -5,3 +5,4 @@ Cluster Train
:glob:
opensource/cluster_train.md
internal/index.md
......@@ -23,6 +23,8 @@ AutoStructify = transform.AutoStructify
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc/templates"]
# -- Doxygen Settings
breathe_projects = {
'paddle': '@PADDLE_DOXYGEN_OUTPUT@/xml'
......@@ -66,8 +68,6 @@ extensions = [
autodoc_member_order = 'bysource'
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
......
......@@ -93,7 +93,7 @@ where `train.sh` is almost the same as `demo/seqToseq/translation/train.sh`, the
- `--init_model_path`: path of the initialization model, here is `data/paraphrase_model`
- `--load_missing_parameter_strategy`: operations when model file is missing, here use a normal distibution to initialize the other parameters except for the embedding layer
For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](text_generation.md).
For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/text_generation.md).
## Optional Function ##
### Embedding Parameters Observation
......
#Image Classification Tutorial
Image Classification Tutorial
==============================
This tutorial will guide you through training a convolutional neural network to classify objects using the CIFAR-10 image classification dataset.
As shown in the following figure, the convolutional neural network can recognize the main object in images, and output the classification result.
......@@ -172,7 +173,7 @@ python -m paddle.utils.plotcurve -i $log > plot.png
- The script `plotcurve.py` requires the python module of `matplotlib`, so if it fails, maybe you need to install `matplotlib`.
After training finishes, the training and testing error curve will be saved to `plot.png` using `plotcurve.py` script. An example of the plot is shown below:
After training finishes, the training and testing error curves will be saved to `plot.png` using `plotcurve.py` script. An example of the plot is shown below:
<center>![Training and testing curves.](./plot.png)</center>
......
# Model Zoo - ImageNet #
[ImageNet](http://www.image-net.org/) is a popular dataset for generic object classification. This tutorial provided convolutional neural network(CNN) models for ImageNet.
[ImageNet](http://www.image-net.org/) is a popular dataset for generic object classification. This tutorial provides convolutional neural network(CNN) models for ImageNet.
## ResNet Introduction
......@@ -48,11 +48,11 @@ We present three ResNet models, which are converted from the models provided by
## ResNet Model
See ```demo/model_zoo/resnet/resnet.py```. This confgiure contains network of 50, 101 and 152 layers. You can specify layer number by adding argument like this ```--config_args=layer_num=50``` in command line arguments.
See ```demo/model_zoo/resnet/resnet.py```. This config contains network of 50, 101 and 152 layers. You can specify layer number by adding argument like ```--config_args=layer_num=50``` in command line arguments.
### Network Visualization
You can get a diagram of ResNet network by running the following command. The script generates dot file and then converts dot file to PNG file, which uses installed draw_dot tool in our server. If you can not access the server, just install graphviz to convert dot file.
You can get a diagram of ResNet network by running the following commands. The script generates dot file and then converts dot file to PNG file, which uses installed draw_dot tool in our server. If you can not access the server, just install graphviz to convert dot file.
```
cd demo/model_zoo/resnet
......@@ -165,7 +165,7 @@ We provide both C++ and Python interfaces to extract features. The following exa
### C++ Interface
First, specify image data list in `define_py_data_sources` in the config, see example `demo/model_zoo/resnet/resnet.py`.
First, specify image data list in `define_py_data_sources2` in the config, see example `demo/model_zoo/resnet/resnet.py`.
```
train_list = 'train.list' if not is_test else None
......@@ -190,8 +190,7 @@ Second, specify layers to extract features in `Outputs()` of `resnet.py`. For ex
Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
```
Third, specify model path and output directory in `extract_fea_c++.sh
`, and then run following commands
Third, specify model path and output directory in `extract_fea_c++.sh`, and then run the following commands.
```
cd demo/model_zoo/resnet
......
......@@ -9,7 +9,7 @@ There are serveral examples and demos here.
* [Sentiment Analysis](sentiment_analysis/index.rst)
* [Text Generation](text_generation/index.rst)
* [Semantic Role Labeling](semantic_role_labeling/index.md)
* [Semantic Role Labeling](semantic_role_labeling/index.rst)
## Recommendation
......@@ -19,6 +19,3 @@ There are serveral examples and demos here.
## Model Zoo
* [ImageNet: ResNet](imagenet_model/resnet_model.md)
* [Embedding: Chinese Word](embedding_model/index.md)
## Customization
* [Writing New Layers](new_layer/index.rst)
......@@ -59,7 +59,7 @@ To build your text classification system, your code will need to perform five st
## Preprocess data into standardized format
In this example, you are going to use [Amazon electronic product review dataset](http://jmcauley.ucsd.edu/data/amazon/) to build a bunch of deep neural network models for text classification. Each text in this dataset is a product review. This dataset has two categories: “positive” and “negative”. Positive means the reviewer likes the product, while negative means the reviewer does not like the product.
`demo/quick_start` provides scripts for downloading data and preprocessing data as shown below. The data process takes several minutes (about 3 minutes in our machine).
`demo/quick_start` in the [source code](https://github.com/baidu/Paddle) provides scripts for downloading data and preprocessing data as shown below. The data process takes several minutes (about 3 minutes in our machine).
```bash
cd demo/quick_start
......@@ -157,9 +157,7 @@ define_py_data_sources2(train_list='data/train.list',
obj="process",
args={"dictionary": word_dict})
```
You can refer to the following link for more detailed examples
: <a href = "../../ui/data_provider/python_case.html">Python Use Case</a>,The detailed documentation on data format is: <a href = "../../ui/api/py_data_provider_wrapper.html"> PyDataProviderWrapper</a>
You can refer to the following link for more detailed examples and data formats: <a href = "../../ui/data_provider/pydataprovider2.html">PyDataProvider2</a>.
## Network Architecture
You will describe four kinds of network architectures in this section.
......@@ -425,7 +423,7 @@ paddle train \
mv rank-00000 result.txt
```
There are several differences between training and inference network configurations.
User can choose the best model base on the training log instead of model `output/pass-00003`. There are several differences between training and inference network configurations.
- You do not need labels during inference.
- Outputs need to be specified to the classification probability layer (the output of softmax layer), or the id of maximum probability (`max_id` layer). An example to output the id and probability is given in the code snippet.
- batch_size = 1.
......
......@@ -219,9 +219,9 @@ The network structure shows below.
The demo's neural network config file "trainer_config.py" show as below.
.. include:: ../../../demo/recommendation/trainer_config.py
:code: python
:literal:
.. literalinclude:: ../../../demo/recommendation/trainer_config.py
:language: python
:lines: 15-
In this :code:`trainer_config.py`, we just map each feature type to
a feature vector, following shows how to map each feature to a vector shows below.
......@@ -257,15 +257,15 @@ In these network, we use several api in `trainer_config_helpers
* Text Convolution Pooling Layer, `text_conv_pool
<../../ui/api/trainer_config_helpers/networks.html
#trainer_config_helpers.networks.text_conv_pool>`_
* Declare Python Data Sources, `define_py_data_sources
* Declare Python Data Sources, `define_py_data_sources2
<../../ui/api/trainer_config_helpers/data_sources.html>`_
Data Provider
'''''''''''''
.. include:: ../../../demo/recommendation/dataprovider.py
:code: python
:literal:
.. literalinclude:: ../../../demo/recommendation/dataprovider.py
:language: python
:lines: 15-
The data provider just read the meta.bin and rating file, yield each sample for training.
In this :code:`dataprovider.py`, we should set\:
......@@ -274,7 +274,7 @@ In this :code:`dataprovider.py`, we should set\:
* use_seq\: Whether this :code:`dataprovider.py` in sequence mode or not.
* process\: Return each sample of data to :code:`paddle`.
The data provider details document see `there <../../ui/DataProvider.html>`_.
The data provider details document see `there <../../ui/data_provider/pydataprovider2.html>`_.
Train
`````
......@@ -283,15 +283,15 @@ After prepare data, config network, writting data provider, now we can run paddl
The run.sh is shown as follow:
.. include:: ../../../demo/recommendation/run.sh
:code: bash
:literal:
.. literalinclude:: ../../../demo/recommendation/run.sh
:language: bash
:lines: 16-
It just start a paddle training process, write the log to `log.txt`,
then print it on screen.
Each command line argument in :code:`run.sh`, please refer to the `command line
arguments <TBD>`_ page. The short description of these arguments is shown as follow.
arguments <../../ui/index.html#command-line-argument>`_ page. The short description of these arguments is shown as follow.
* config\: Tell paddle which file is neural network configuration.
* save_dir\: Tell paddle save model into './output'
......@@ -303,8 +303,6 @@ arguments <TBD>`_ page. The short description of these arguments is shown as fol
* dot_period\: Print a :code:`.` after train :code:`dot_period` batches.
* num_passes\: Train at most :code:`num_passes`.
If training process starts successfully, the output likes follow:
.. code-block:: text
......
Semantic Role Labeling Tutorial
===============================
.. toctree::
:maxdepth: 3
semantic_role_labeling.md
# Semantic Role Labelling Tutorial
Semantic role labeling (SRL) is a form of shallow semantic parsing whose goal is to discover the predicate-argument structure of each predicate in a given input sentence. SRL is useful as an intermediate step in a wide range of natural language processing tasks, such as information extraction. automatic document categorization and question answering. An instance is as following [1]:
# Semantic Role labeling Tutorial #
Semantic role labeling (SRL) is a form of shallow semantic parsing whose goal is to discover the predicate-argument structure of each predicate in a given input sentence. SRL is useful as an intermediate step in a wide range of natural language processing tasks, such as information extraction. automatic document categorization and question answering. An instance is as following [1]:
[ <sub>A0</sub> He ] [ <sub>AM-MOD</sub> would ][ <sub>AM-NEG</sub> n’t ] [ <sub>V</sub> accept] [ <sub>A1</sub> anything of value ] from [<sub>A2</sub> those he was writing about ].
......@@ -12,12 +12,10 @@ Semantic role labeling (SRL) is a form of shallow semantic parsing whose goal is
- AM-MOD: modal
- AM-NEG: negation
Given the verb "accept", the chunks in sentence would play certain semantic roles. Here, the label scheme is from Penn Proposition Bank.
To this date, most of the successful SRL systems are built on top of some form of parsing results where pre-defined feature templates over the syntactic structure are used. This tutorial will present an end-to-end system using deep bidirectional long short-term memory (DB-LSTM)[2] for solving the SRL task, which largely outperforms the previous state-of-the-art systems. The system regards SRL task as the sequence labelling problem.
## Data Description
The relevant paper[2] takes the data set in CoNLL-2005&2012 Shared Task for training and testing. Accordingto data license, the demo adopts the test data set of CoNLL-2005, which can be reached on website.
......@@ -37,7 +35,6 @@ tgt.dict:the labels dictionary
feature: the extracted features from data set
```
## Training
### DB-LSTM
Please refer to the Sentiment Analysis demo to learn more about the long short-term memory unit.
......@@ -49,8 +46,6 @@ The following figure shows a temporal expanded 2-layer DB-LSTM network.
![pic](./network_arch.png)
</center>
### Features
Two input features play an essential role in this pipeline: predicate (pred) and argument (argu). Two other features: predicate context (ctx-p) and region mark (mr) are also adopted. Because a single predicate word can not exactly describe the predicate information, especially when the same words appear more than one times in a sentence. With the predicate context, the ambiguity can be largely eliminated. Similarly, we use region mark m<sub>r</sub> = 1 to denote the argument position if it locates in the predicate context region, or m<sub>r</sub> = 0 if does not. These four simple features are all we need for our SRL system. Features of one sample with context size set to 1 is showed as following[2]:
<center>
......@@ -130,7 +125,6 @@ paddle train \
2>&1 | tee 'train.log'
```
- \--config=./db_lstm.py : network config file.
- \--save_di=./output: output path to save models.
- \--trainer_count=4 : set thread number (or GPU count).
......@@ -183,12 +177,7 @@ python predict.py
After prediction, the result is saved in `predict.res`.
## Reference
[1] Martha Palmer, Dan Gildea, and Paul Kingsbury. The Proposition Bank: An Annotated Corpus of Semantic Roles , Computational Linguistics, 31(1), 2005.
[2] Zhou, Jie, and Wei Xu. "End-to-end learning of semantic role labeling using recurrent neural networks." Proceedings of the Annual Meeting of the Association for Computational Linguistics. 2015.
Utils
=======
Bits
-------
.. doxygenfile:: paddle/math/Bits.h
Memory Handle
--------------
.. doxygenfile:: paddle/math/MemoryHandle.h
......
{# layout.html #}
{# Import the theme's layout. #}
{% extends "!layout.html" %}
{%- block extrahead %}
<script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "//hm.baidu.com/hm.js?b9a314ab40d04d805655aab1deee08ba";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();
</script>
{% endblock %}
......@@ -12,6 +12,13 @@ AbsActivation
:members: AbsActivation
:noindex:
ExpActivation
===============
.. automodule:: paddle.trainer_config_helpers.activations
:members: ExpActivation
:noindex:
IdentityActivation
==================
......
......@@ -82,12 +82,6 @@ img_cmrnorm_layer
:members: img_cmrnorm_layer
:noindex:
img_rnorm_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: img_rnorm_layer
:noindex:
batch_norm_layer
---------------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -175,6 +169,12 @@ dotmul_projection
:members: dotmul_projection
:noindex:
dotmul_operator
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: dotmul_operator
:noindex:
full_matrix_projection
----------------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -251,10 +251,10 @@ addto_layer
:members: addto_layer
:noindex:
convex_comb_layer
linear_comb_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: convex_comb_layer
:members: linear_comb_layer
:noindex:
interpolation_layer
......@@ -287,6 +287,12 @@ tensor_layer
:members: tensor_layer
:noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
trans_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -347,12 +353,6 @@ rank_cost
:members: rank_cost
:noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
crf_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
......
......@@ -4,6 +4,12 @@ BaseSGDOptimizer
:members: BaseSGDOptimizer
:noindex:
MomentumOptimizer
=================
.. automodule:: paddle.trainer_config_helpers.optimizers
:members: MomentumOptimizer
:noindex:
AdamOptimizer
=============
.. automodule:: paddle.trainer_config_helpers.optimizers
......
PaddlePaddle DataProvider Introduction
================================
DataProvider Introduction
=========================
DataProvider is a module that loads training or testing data into cpu or gpu
memory for the following triaining or testing process.
......@@ -10,7 +10,7 @@ customized, with sacrificing the efficiency only a little. This is extremly
useful when you have to dynamically generate certain kinds of data according to,
for example, the training performance.
Besides, users also can also customize a C++ :code:`DataProvider` for a more
Besides, users also can customize a C++ :code:`DataProvider` for a more
complex usage, or for a higher efficiency.
The following parameters are required to define in the PaddlePaddle network
......
......@@ -17,24 +17,23 @@ how to write a simple PyDataProvider.
MNIST is a handwriting classification data set. It contains 70,000 digital
grayscale images. Labels of the training sample range from 0 to 9. All the
images have been size-normalized and centered into images with a same size
images have been size-normalized and centered into images with the same size
of 28 x 28 pixels.
A small part of the original data as an example can be found in the path below:
A small part of the original data as an example is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_train.txt
Each line of the data contains two parts, separated by ';'. The first part is
Each line of the data contains two parts, separated by :code:`;`. The first part is
label of an image. The second part contains 28x28 pixel float values.
Just write path of the above data into train.list. It looks like this:
.. literalinclude:: ../../../doc_cn/ui/data_provider/train.list
The corresponding dataprovider can be found in the path below:
The corresponding dataprovider is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.py
: linenos:
The first line imports PyDataProvider2 package.
The main function is the process function, that has two parameters.
......@@ -45,8 +44,8 @@ This parameter is passed to the process function by PaddlePaddle.
:code:`@provider` is a Python
`Decorator <http://www.learnpython.org/en/Decorators>`_ .
It sets some properties to DataProvider, and constructs a real PaddlePaddle
DataProvider from a very sample user implemented python function. It does not
matter if you are not familiar with `Decorator`_. You can keep it sample by
DataProvider from a very simple user implemented python function. It does not
matter if you are not familiar with `Decorator`_. You can keep it simple by
just taking :code:`@provider` as a fixed mark above the provider function you
implemented.
......@@ -59,9 +58,9 @@ document of `input_types`_ for more details.
The process method is the core part to construct a real DataProvider in
PaddlePaddle. It implements how to open the text file, how to read one sample
from the original text file, converted them into `input_types`_, and give them
from the original text file, convert them into `input_types`_, and give them
back to PaddlePaddle process at line 23.
Note that data yields by the process function must follow a same order that
Note that data yielded by the process function must follow the same order that
`input_types`_ are defined.
......@@ -75,7 +74,20 @@ you can take this as an example.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_config.py
Here we specify training data by 'train.list', and no testing data is specified.
Here we specify training data by :code:`train.list`, and no testing data is specified.
The method which actually provide data is :code:`process`.
User also can use another style to provide data, which defines the
:code:`data_layer`'s name explicitly when `yield`. For example,
the :code:`dataprovider` is shown as below.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.dict.py
:linenos:
If user did't give the :code:`data_layer`'s name, PaddlePaddle will use
the order of :code:`data_layer` definition roughly to determine which feature to
which :code:`data_layer`. This order may be not correct, so TO DEFINE THE
:code:`data_layer`'s NAMES EXPLICITLY IS THE RECOMMANDED WAY TO PROVIDER DATA.
Now, this simple example of using PyDataProvider is finished.
The only thing that the user should know is how to generte **one sample** from
......@@ -94,7 +106,7 @@ DataProvider for the sequential model
-------------------------------------
A sequence model takes sequences as its input. A sequence is made up of several
timesteps. The so-called timestep, is not necessary to have something to do
with 'time'. It can also be explained to that the order of data are taken into
with time. It can also be explained to that the order of data are taken into
consideration into model design and training.
For example, the sentence can be interpreted as a kind of sequence data in NLP
tasks.
......@@ -111,7 +123,7 @@ The corresponding data provider can be found in the path below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_provider.py
This data provider for sequential model is a little bit complex than that
This data provider for sequential model is a little more complex than that
for MINST dataset.
A new initialization method is introduced here.
The method :code:`on_init` is configured to DataProvider by :code:`@provider`'s
......@@ -153,49 +165,29 @@ Please refer to the following section reference for details.
Reference
---------
.. _@provider::
@provider
+++++++++
'@provider' is a Python `Decorator`_, it can construct a PyDataProvider in
PaddlePaddle from a user defined function. Its parameters are:
* `input_types`_ defines format of the data input.
* should_shuffle defines whether to shuffle data or not. By default, it is set
true during training, and false during testing.
* pool_size is the memory pool size (in sample number) in DataProvider.
-1 means no limit.
* can_over_batch_size defines whether PaddlePaddle can store little more
samples than pool_size. It is better to set True to avoid some deadlocks.
* calc_batch_size is a function define how to calculate batch size. This is
usefull in sequential model, that defines batch size is counted upon sequence
or token. By default, each sample or sequence counts to 1 when calculating
batch size.
* cache is a data cache strategy, see `cache`_
* Init_hook function is invoked once the data provider is initialized,
see `init_hook`_
.. _input_types::
.. autofunction:: paddle.trainer.PyDataProvider2.provider
input_types
+++++++++++
PaddlePaddle has four data types, and three sequence types.
The four data types are:
* dense_vector represents dense float vector.
* sparse_binary_vector sparse binary vector, most of the value is 0, and
* :code:`dense_vector`: dense float vector.
* :code:`sparse_binary_vector`: sparse binary vector, most of the value is 0, and
the non zero elements are fixed to 1.
* sparse_float_vector sparse float vector, most of the value is 0, and some
non zero elements that can be any float value. They are given by the user.
* integer represents an integer scalar, that is especially used for label or
word index.
* :code:`sparse_float_vector`: sparse float vector, most of the value is 0, and some
non zero elements can be any float value. They are given by the user.
* :code:`integer`: an integer scalar, that is especially used for label or word index.
The three sequence types are
The three sequence types are:
* SequenceType.NO_SEQUENCE means the sample is not a sequence
* SequenceType.SEQUENCE means the sample is a sequence
* SequenceType.SUB_SEQUENCE means it is a nested sequence, that each timestep of
* :code:`SequenceType.NO_SEQUENCE` means the sample is not a sequence.
* :code:`SequenceType.SEQUENCE` means the sample is a sequence.
* :code:`SequenceType.SUB_SEQUENCE` means it is a nested sequence, that each timestep of
the input sequence is also a sequence.
Different input type has a defferenct input format. Their formats are shown
......@@ -215,36 +207,39 @@ in the above table.
where f represents a float value, i represents an integer value.
.. _init_hook::
.. _settings::
init_hook
+++++++++
init_hook is a function that is invoked once the data provoder is initialized.
Its parameters lists as follows:
* The first parameter is a settings object, which is the same to :code:'settings'
* The first parameter is a settings object, which is the same to :code:`settings`
in :code:`process` method. The object contains several attributes, including:
* settings.input_types the input types. Reference `input_types`_
* settings.logger a logging object
* :code:`settings.input_types`: the input types. Reference `input_types`_.
* :code:`settings.logger`: a logging object.
* The rest parameters are the key word arguments. It is made up of PaddpePaddle
pre-defined parameters and user defined parameters.
* PaddlePaddle defines parameters including:
* is_train is a bool parameter that indicates the DataProvider is used in
training or testing
* file_list is the list of all files.
* PaddlePaddle-defined parameters including:
* :code:`is_train` is a bool parameter that indicates the DataProvider is used in
training or testing.
* :code:`file_list` is the list of all files.
* User-defined parameters args can be set in training configuration.
Note, PaddlePaddle reserves the right to add pre-defined parameter, so please
use :code:`**kwargs` in init_hook to ensure compatibility by accepting the
parameters which your init_hook does not use.
.. _cache ::
cache
+++++
DataProvider provides two simple cache strategy. They are
* CacheType.NO_CACHE means do not cache any data, then data is read runtime by
DataProvider provides two simple cache strategy. They are:
* :code:`CacheType.NO_CACHE` means do not cache any data, then data is read at runtime by
the user implemented python module every pass.
* CacheType.CACHE_PASS_IN_MEM means the first pass reads data by the user
* :code:`CacheType.CACHE_PASS_IN_MEM` means the first pass reads data by the user
implemented python module, and the rest passes will directly read data from
memory.
......@@ -7,7 +7,7 @@
## API Reference
* [Trainer Config Helpers](api/trainer_config_helpers/index.md)
* [Model Config Interface](api/trainer_config_helpers/index.md)
## Command Line Argument
......
......@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from py_paddle import swig_paddle, DataProviderWrapperConverter
from paddle.trainer.PyDataProviderWrapper import DenseSlot
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
TEST_DATA = [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
......@@ -89,12 +89,12 @@ TEST_DATA = [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
def main():
conf = parse_config("./mnist_model/trainer_config.conf.norm", "")
conf = parse_config("./mnist_model/trainer_config.py", "")
print conf.data_config.load_data_args
network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine) # For code hint.
network.loadParameters("./mnist_model/")
converter = DataProviderWrapperConverter(False, [DenseSlot(784)])
converter = DataProviderConverter([dense_vector(784)])
inArg = converter(TEST_DATA)
print network.forwardTest(inArg)
......
......@@ -10,27 +10,35 @@ SWIG. The main steps of predict values in python are:
* Predict
Here is a sample python script that shows the typical prediction process for the
MNIST classification problem.
MNIST classification problem. A complete sample code could be found at
:code:`src_root/doc/ui/predict/predict_sample.py`.
.. literalinclude:: ./predict_sample.py
:language: python
:linenos:
:lines: 15-18,90-100,101-104
The module that does the most of the job is py_paddle.swig_paddle, it's
generated by SWIG and has complete documents, for more details you can use
python's :code:`help()` function. Let's walk through the above python script:
* At the beginning, initialize PaddlePaddle with command line arguments(line 90).
* Parse the configuration file that is used in training(line 93).
* Create a neural network at line 95 according the parsed configuration, then
load the trained parameters from model at line 97.
* A utility class for data transformation is created at line 98.
* At the beginning, use :code:`swig_paddle.initPaddle()` to initialize
PaddlePaddle with command line arguments, for more about command line arguments
see `Command Line Arguments <../cmd_argument/detail_introduction.html>`_.
* Parse the configuration file that is used in training with :code:`parse_config()`.
Because data to predict with always have no label, and output of prediction work
normally is the output layer rather than the cost layer, so you should modify
the configuration file accordingly before using it in the prediction work.
* Create a neural network with
:code:`swig_paddle.GradientMachine.createFromConfigproto()`, which takes the
parsed configuration :code:`conf.model_config` as argument. Then load the
trained parameters from the model with :code:`network.loadParameters()`.
* Create a data converter object of utility class :code:`DataProviderConverter`.
- Note: As swig_paddle can only accept C++ matrices, we offer a utility
class DataProviderWraaperConverter that can accept the same input data with
PyDataProviderWrapper, for more information please refer to document
of `PyDataProviderWrapper <../py_data_provider_wrapper_api.html>`_.
* Do the prediction and output the result at line 100, forwardTest is another
utility class that directly takes the activations of the output layer.
class DataProviderConverter that can accept the same input data with
PyDataProvider2, for more information please refer to document
of `PyDataProvider2 <../data_provider/pydataprovider2.html>`_.
* Do the prediction with :code:`forwardTest()`, which takes the converted
input data and outputs the activations of the output layer.
Here is a typical output:
......
RNN 配置
========
.. toctree::
:maxdepth: 3
* `RNN配置 <../../../doc/algorithm/rnn/rnn.html>`_
编译与安装
========================
PaddlePaddle提供数个预编译的二进制来进行安装,包括Docker镜像,ubuntu的deb安装包等。我们推荐使用Docker镜像来部署环境,同时欢迎贡献更多的安装包。
Note: The intallation packages are still in pre-release state and your experience of installation may not be smooth.
注意:目前PaddlePaddle的安装包还处在pre-release的状态,使用起来或许会不是很顺畅。
.. toctree::
:maxdepth: 1
:glob:
install/index.rst
源码下载(对内) <../build/internal/download_paddle_source_zh_cn.rst>
使用Jumbo安装(对内) <../build/internal/install_from_jumbo.rst>
从源码编译安装(对内) <../build/internal/build_from_source_zh_cn.rst>
install/docker_install.rst
install/ubuntu_install.rst
cmake/index.rst
安装PaddlePaddle
==========
PaddlePaddle提供数个预编译的二进制来进行安装。他们包括Docker镜像,ubuntu的deb安装包等
。欢迎贡献更多的安装包。我们更推荐使用Docker镜像来部署PaddlePaddle环境。
Note: The intallation packages are still in pre-release
state and your experience of installation may not be smooth.
注意!目前PaddlePaddle的安装包还处在pre-release的状态,
使用起来或许会不是很顺畅。
.. toctree::
docker_install.rst
ubuntu_install.rst
......@@ -4,10 +4,8 @@
PaddlePaddle目前支持ubuntu 14.04版本使用deb包安装。更多的安装包PaddlePaddle会在近期提供。
欢迎大家贡献各个发行版的安装包(例如,ubuntu,centos,debian,gentoo)。
PaddlePaddle的ubuntu安装包分为两个版本,即CPU版本,和GPU版本,他们的下载地址是:
* CPU版本的PaddlePaddle安装包: TBD
* GPU版本的PaddlePaddle安装包: TBD
PaddlePaddle的ubuntu安装包分为两个版本,即CPU版本,和GPU版本,他们的下载地址是\:
https://github.com/baidu/Paddle/releases/tag/V0.8.0b0
需要注意的是,目前PaddlePaddle的安装包只支持
`AVX <https://en.wikipedia.org/wiki/Advanced_Vector_Extensions>`_
......@@ -21,8 +19,10 @@ PaddlePaddle的ubuntu安装包分为两个版本,即CPU版本,和GPU版本
dpkg -i paddle-0.8.0b-cpu.deb
apt-get install -f
需要注意的是,如果使用GPU版本的PaddlePaddle,请安装CUDA 7.5 和CUDNN 5到本地环境中,并
设置好对应的环境变量(LD_LIBRARY_PATH等等)。
在 :code:`dpkg -i` 的时候如果报一些依赖未找到的错误是正常的,
在 :code:`apt-get install -f` 里会继续安装 PaddlePaddle。
需要注意的是,如果使用GPU版本的PaddlePaddle,请安装CUDA 7.5 和CUDNN 5到本地环境中,
并设置好对应的环境变量(LD_LIBRARY_PATH等等)。
可能遇到的问题
--------------
......
集群训练
========
参见 `集群训练 <../../doc/cluster/index.html>`_
* `集群训练 <../../doc/cluster/index.html>`_
.. toctree::
:maxdepth: 2
:glob:
集群训练(对内) <internal/index.md>
......@@ -22,6 +22,7 @@ AutoStructify = transform.AutoStructify
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc/templates"]
# -- General configuration ------------------------------------------------
......@@ -51,9 +52,6 @@ table_styling_embed_css = True
autodoc_member_order = 'bysource'
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
......
......@@ -21,5 +21,6 @@
常用模型
''''''''
* `ImageNet: ResNet <../../doc/demo/imagenet_model/resnet_model.html>`_
* `Embedding: Chinese Word <../../doc/demo/embedding_model/index.html>`_
......@@ -4,7 +4,7 @@
## 安装(Install)
首先请参考<a href = "../../build/index.html">安装教程</a>安装PaddlePaddle。
首先请参考<a href = "../../build_and_install/index.html">安装教程</a>安装PaddlePaddle。
## 使用概述(Overview)
......@@ -32,7 +32,7 @@
## 数据格式准备(Data Preparation)
在本问题中,我们使用[Amazon电子产品评论数据](http://jmcauley.ucsd.edu/data/amazon/)
将评论分为好评(正样本)和差评(负样本)两类。`demo/quick_start`里提供了数据下载脚本
将评论分为好评(正样本)和差评(负样本)两类。[源码](https://github.com/baidu/Paddle)`demo/quick_start`里提供了数据下载脚本
和预处理脚本。
```bash
......@@ -134,8 +134,8 @@ define_py_data_sources2(train_list='data/train.list',
* obj="process": 指定生成数据的函数
* args={"dictionary": word_dict}: 额外的参数,这里指定词典
更详细用例请参考文档<a href = "../../ui/data_provider/python_case.html">Python Use Case</a>
数据格式和详细文档请参考<a href = "../../ui/py_data_provider_wrapper_api.html">
更详细用例请参考文档<a href = "../../../doc/ui/data_provider/python_case.html">Python Use Case</a>
数据格式和详细文档请参考<a href = "../../../doc/ui/data_provider/pydataprovider2.html">
PyDataProviderWrapper</a>
## 网络结构(Network Architecture)
......@@ -143,8 +143,8 @@ PyDataProviderWrapper</a>。
<center> ![](./PipelineNetwork.jpg) </center>
我们将以基本的逻辑回归网络作为起点,并逐渐展示更加深入的功能。更详细的网络配置
连接请参考<a href = "../../ui/trainer_config_helpers_api.html#module-paddle.trainer_config_helpers.layers">Layer文档</a>
所有配置在`demo/quick_start`目录,首先列举逻辑回归网络。
连接请参考<a href = "../../../doc/layer.html">Layer文档</a>
所有配置在[源码](https://github.com/baidu/Paddle)`demo/quick_start`目录,首先列举逻辑回归网络。
### 逻辑回归模型(Logistic Regression)
......@@ -350,7 +350,7 @@ lstm = simple_lstm(input=emb, size=lstm_size)
<br>
## 优化算法(Optimization Algorithm)
<a href = "../../ui/trainer_config_helpers_api.html#module-paddle.trainer_config_helpers.optimizers">优化算法</a>包括
<a href = "../../../doc/ui/trainer_config_helpers_api.html#module-paddle.trainer_config_helpers.optimizers">优化算法</a>包括
Momentum, RMSProp,AdaDelta,AdaGrad,ADAM,Adamax等,这里采用Adam优化方法,加了L2正则和梯度截断。
```python
......@@ -375,7 +375,7 @@ paddle train \
--num_passes=15 \
--use_gpu=false
```
这里没有介绍多机分布式训练,可以参考<a href = "../../platform/index.html">分布式训练</a>的demo学习如何进行多机训练。
这里没有介绍多机分布式训练,可以参考<a href = "../../cluster/index.html">分布式训练</a>的demo学习如何进行多机训练。
## 预测(Prediction)
可以使用训练好的模型评估带有label的验证集,也可以预测没有label的测试集。
......@@ -407,7 +407,7 @@ paddle train \
mv rank-00000 result.txt
```
与训练网络配置不同的是:无需label相关的层,指定outputs输出概率层(softmax输出),
这里以`output/pass-00003`为例进行预测,用户可以根据训练log选择test结果最好的模型来预测。与训练网络配置不同的是:无需label相关的层,指定outputs输出概率层(softmax输出),
指定batch_size=1,数据传输无需label数据,预测数据指定test_list的位置。
预测结果以文本的形式保存在`result.txt`中,一行为一个样本,格式如下:
......
新写Layer
=========
* `新写Layer <../../../doc/dev/new_layer/index.html>`_
PaddlePaddle文档
================
使用指南
--------
* [快速入门](demo/quick_start/index.md)
* [编译与安装](build_and_install/index.rst)
* [用户接口](ui/index.rst)
* [使用示例](demo/index.rst)
* [模型配置](ui/model.rst)
* [集群训练](cluster/index.rst)
开发指南
--------
* [新写Layer](dev/new_layer/index.rst)
算法教程
--------
* [RNN配置](algorithm/rnn/rnn.rst)
PaddlePaddle文档
================
使用指南
--------
* `快速入门 <demo/quick_start/index.html>`_
* `编译与安装 <build_and_install/index.html>`_
* `用户接口 <ui/index.html>`_
* `使用示例 <demo/index.html>`_
* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_
* `集群训练 <cluster/index.html>`_
开发指南
--------
* `新写Layer <../doc/dev/new_layer/index.html>`_
算法教程
--------
* `RNN配置 <../doc/algorithm/rnn/rnn.html>`_
PaddlePaddle的数据提供(DataProvider)介绍
==================================
========================================
数据提供(DataProvider,后用DataProvider代替)是PaddlePaddle负责提供数据的模块。其作用是将训练数据
传入内存或者显存,让神经网络可以进行训练。简单的使用,用户可以使用Python的
:code:`PyDataProvider` 来自定义传数据的过程。如果有更复杂的使用,或者需要更高的效率,
用户也可以在C++端自定义一个 :code:`DataProvider` 。
数据提供(DataProvider)是PaddlePaddle负责提供数据的模块。其作用是将训练数据传入内存或者显存,让神经网络可以进行训练。简单的使用,用户可以使用Python的 :code:`PyDataProvider` 来自定义传数据的过程。如果有更复杂的使用,或者需要更高的效率,用户也可以在C++端自定义一个 :code:`DataProvider` 。
PaddlePaddle需要用户在网络配置(trainer_config.py)中定义使用什么DataProvider,和DataProvider
的一些参数,训练文件列表(train.list)和测试文件列表(test.list)。
PaddlePaddle需要用户在网络配置(trainer_config.py)中定义使用哪种DataProvider及其参数,训练文件列表(train.list)和测试文件列表(test.list)。
其中,train.list和test.list均为本地的两个文件(推荐直接放置到训练目录,以相对路径引用)。如果
test.list不设置,或者设置为None的话,那么在训练过程中,不会执行测试操作。否则,则会根据命令行
参数指定的测试方式,在训练过程中进行测试,从而防止过拟合。
其中,train.list和test.list均为本地的两个文件(推荐直接放置到训练目录,以相对路径引用)。如果test.list不设置,或者设置为None,那么在训练过程中,不会执行测试操作。否则,会根据命令行参数指定的测试方式,在训练过程中进行测试,从而防止过拟合。
一般情况下,train.list和test.list为纯文本文件,其每一行对应这每一个数据文件。数据文件存放在
本地磁盘中,将文件的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)的方式写在train.list和
test.list中。当然,train.list和test.list也可以放置hdfs文件路径,或者数据库连接地址等等。
用户在DataProvider中需要实现如何访问其中每一个文件。
一般情况下,train.list和test.list为纯文本文件,一行对应一个数据文件,数据文件存放在本地磁盘中。将文件的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)写在train.list和test.list中。当然,train.list和test.list也可以放置hdfs文件路径,或者数据库连接地址等等。
DataProvider的具体用法和如何实现一个新的DataProvider,请参考下述文章:
用户在DataProvider中需要实现如何访问其中每一个文件。DataProvider的具体用法和如何实现一个新的DataProvider,请参考下述文章:
.. toctree::
......
......@@ -4,3 +4,5 @@ define_py_data_sources2(train_list='train.list',
test_list=None,
module='mnist_provider',
obj='process')
img = data_layer(name='pixel', size=784)
label = data_layer(name='label', size=10)
from paddle.trainer.PyDataProvider2 import *
# Define a py data provider
@provider(input_types=[
dense_vector(28 * 28),
integer_value(10)
])
def process(settings, filename): # settings is not used currently.
f = open(filename, 'r') # open one of training file
for line in f: # read each line
label, pixel = line.split(';')
# get features and label
pixels_str = pixel.split(' ')
pixels_float = []
for each_pixel_str in pixels_str:
pixels_float.append(float(each_pixel_str))
# give data to paddle.
yield { "pixel": pixels_float, 'label': int(label) }
f.close() # close file
......@@ -56,6 +56,14 @@ process函数调用多次 :code:`yield` 即可。 :code:`yield` 是Python的一
这里说明了训练数据是 'train.list',而没有测试数据。引用的DataProvider是 'mnist_provider'
这个模块中的 'process' 函数。
同时,根据模型配置文件中 :code:`data_layer` 的名字,用户也可以显式指定返回的数据对应关系。例如:
.. literalinclude:: mnist_provider.dict.py
:linenos:
如果用户不指定返回数据的对应关系,那么PaddlePaddle会粗略的根据layer的声明顺序,
来确定对应关系。这个对应关系可能不正确。所以推荐使用显式指定返回值和数据对应关系。
至此,简单的PyDataProvider样例就说明完毕了。对于用户来说,讲数据发送给PaddlePaddle,仅仅需要
知道如何从 **一个文件** 里面读取 **一条** 样本。而PaddlePaddle进程帮助用户做了
......@@ -116,16 +124,16 @@ DataProvider创建的时候执行。这个初始化函数具有如下参数:
参考(Reference)
---------------
.. _@provider::
@provider
+++++++++
'@provider'是一个Python的 `Decorator`_ ,他可以将某一个函数标记成一个PyDataProvider。它包含的参数有:
:code:`@provider` 是一个Python的 `Decorator`_ ,他可以将某一个函数标记成一个PyDataProvider。它包含的参数有:
* `input_types`_ 是数据输入格式。具体有哪些格式,参考 `input_types`_ 。
* should_shuffle 是个DataProvider是不是要做shuffle,如果不设置的话,训练的时候默认shuffle,
测试的时候默认不shuffle
测试的时候默认不shuffle。
* min_pool_size 是设置DataProvider在内存中最小暂存的数据条数。这个也是PaddlePaddle所能够保证的shuffle粒度。
设置成-1的话,会预先读取全部数据到内存中。
* pool_size 是设置DataProvider在内存中暂存的数据条数。设置成-1的话,即不在乎内存暂存多少条数据。
* can_over_batch_size 表示是否允许Paddle暂存略微多余pool_size的数据。这样做可以避免很多死锁问题。
一般推荐设置成True
......@@ -133,9 +141,11 @@ DataProvider创建的时候执行。这个初始化函数具有如下参数:
是一个batch size,但是有时为了计算均衡性,可以将一条数据设置成多个batch size
* cache 是数据缓存的策略,参考 `cache`_
* init_hook 是初始化时调用的函数,参考 `init_hook`_
.. _input_types::
* use_dynamic_order 如果是true的话,可以返回一个dict,key是data_layer的名字,value是特征值。同时,也可以
返回一个list或者tuple。如果是false的话,只能够返回list或者tuple
* check 设置成true的话,会根据input_types检查数据的合法性。
* check_fail_continue 如果设置成true的话,即使在check中数据不合法,也会扔到这条数据,继续训练。 如果
check是false的话,没有作用。
input_types
+++++++++++
......@@ -169,16 +179,11 @@ PaddlePaddle的数据包括四种主要类型,和三种序列模式。其中
其中,f代表一个浮点数,i代表一个整数。
.. _init_hook::
.. _settings::
init_hook
+++++++++
init_hook可以传入一个函数。这个函数在初始化的时候会被调用。这个函数的参数是:
* 第一个参数是 settings 对象。这个对象和process的第一个参数一致。具有的属性有
* settings.input_types 设置输入类型。参考 `input_types`_
* settings.logger 一个logging对象
......@@ -192,8 +197,6 @@ init_hook可以传入一个函数。这个函数在初始化的时候会被调
注意,PaddlePaddle保留添加参数的权力,所以init_hook尽量使用 :code:`**kwargs` , 来接受不使用的
函数来保证兼容性。
.. _cache::
cache
+++++
......@@ -202,3 +205,55 @@ DataProvider提供了两种简单的Cache策略。他们是
* CacheType.NO_CACHE 不缓存任何数据,每次都会从python端读取数据
* CacheType.CACHE_PASS_IN_MEM 第一个pass会从python端读取数据,剩下的pass会直接从内存里
读取数据。
注意事项
--------
可能的内存泄露问题
++++++++++++++++++
PaddlePaddle将train.list中的每一行,都传递给process函数,从而生成多个generator。
即如果train.list中,有100个训练文件,即会生成100个generator。这个本身不是一个很
严重的问题。
但是,如果在训练时,每一条训练数据都是一个文件,并且,训练数据非常多的情况下,就
会生成多个generator。每个generator在没有调用的时候,是几乎不占内存的。但是,当调
用过一次的时候,generator便会存下当前的上下文(Context)。而这个Context可能会非常
大。并且,generator至少调用两次才会知道是否停止。所以,即使在process里面只会有一
个yield,也需要两次随机选择到同样的generator的时候,才会释放该段内存。
.. code-block:: python
def func():
yield 0
f = func() # 创建generator
tmp = next(f) # 调用一次,返回0
tmp = next(f) # 调用第二次的时候,才会Stop Iteration
而如果按顺序调用这些generator就不会出现这个问题。
所以最佳实践推荐不要将每一个样本都放入train.list。而是将样本的地址放入另一个文本
文件,train.list写入那个文本文件的地址。 或者在python generator的上下文中尽量留
下非常少的变量引用。例如
.. code-block:: python
def real_process(fn):
# ... read from fn
return result # 当函数返回的时候,python可以解除掉内部变量的引用。
def process(fn):
yield real_process(fn)
这个问题是PyDataProvider读数据时候的逻辑问题,基本上不能整体修正。
内存不够用的情况
++++++++++++++++
PyDataProvider2会尽量使用内存。所以如果对于内存比较小的机器,推荐设置
:code:`pool_size` 变量,而这个变量推荐大于训练的batch size,并且在内存足够
的情况下越大越好。
......@@ -5,6 +5,7 @@
''''''''
.. toctree::
:maxdepth: 1
data_provider/index.rst
......
模型配置
========
* `Model Config Interface <../../doc/ui/api/trainer_config_helpers/index.html>`_
......@@ -9,22 +9,30 @@ PaddlePaddle目前使用Swig对其常用的预测接口进行了封装,使在P
* 准备数据
* 预测
典型的预测代码如下,使用mnist手写识别作为样例。
典型的预测代码如下,使用mnist手写识别作为样例, 完整代码见
:code:`src_root/doc/ui/predict/predict_sample.py` 。
.. literalinclude:: ../../../doc/ui/predict/predict_sample.py
:language: python
:linenos:
主要的软件包为py_paddle.swig_paddle,这个软件包文档相对完善。可以使用python的 :code:`help()` 函数查询文档。主要步骤为:
* 在程序开始阶段,使用命令行参数初始化PaddlePaddle
* 在98行载入PaddlePaddle的训练文件。读取config
* 在100行创建神经网络,并在83行载入参数。
* 103行创建一个从工具类,用来转换数据。
:lines: 15-18,90-100,101-104
主要的软件包为py_paddle.swig_paddle,这个软件包文档相对完善。可以使用python的
:code:`help()` 函数查询文档。主要步骤为:
* 在程序开始阶段,使用 :code:`swig_paddle.initPaddle()` 传入命令行参数初始化
PaddlePaddle。详细的命令行参数请参考
`命令行参数 <../cmd_argument/detail_introduction.html>`_ 。
* 接下来使用 :code:`parse_config()` 解析训练时的配置文件。这里要注意预测数据通常
不包含label, 而且预测网络通常直接输出最后一层的结果而不是像训练时一样以cost
layer作为输出,所以用于预测的配置文件要做相应的修改。
* 使用 :code:`swig_paddle.GradientMachine.createFromConfigproto()` 根据上一步解
析好的配置创建神经网络。
* 创建一个 :code:`DataProviderConverter` 对象converter。
- swig_paddle接受的原始数据是C++的Matrix,也就是直接写内存的float数组。
- 这个接口并不用户友好。所以,我们提供了一个工具类DataProviderWrapperConverter.
- 这个工具类接收和PyDataProviderWrapper一样的输入数据,请参考PyDataProviderWrapper的文档。
* 在第105行执行预测。forwardTest是一个工具类,直接提取出神经网络Output层的输出结果。典型的输出结果为\:
这个接口并不用户友好。所以,我们提供了一个工具类DataProviderConverter。
这个工具类接收和PyDataProvider2一样的输入数据,详情请参考
`PyDataProvider2文档 <../../../doc/ui/data_provider/pydataprovider2.html>`_ 。
* 最后使用 :code:`forwardTest()` 直接提取出神经网络Output层的输出结果。典型的输出结果为\:
.. code-block:: text
......@@ -37,4 +45,4 @@ PaddlePaddle目前使用Swig对其常用的预测接口进行了封装,使在P
2.70634608e-08, 3.48565123e-08, 5.25639710e-09,
4.48684503e-08]], dtype=float32)}]
其中,value即为softmax层的输出。由于数据是两个,所以输出的value
其中,value即为softmax层的输出。由于数据是两条,所以输出的value包含两个向量 
......@@ -40,3 +40,4 @@ HPPL_ERROR_LOG
unittest.list
proto
dist
setup.py
......@@ -22,15 +22,21 @@
# It same as PYTHONPATH=${YOUR_PYTHON_PATH}:$PYTHONPATH {exec...}
#
PYPATH=""
set -x
while getopts "d:" opt; do
if ! python -c "import paddle" >/dev/null 2>/dev/null; then
PYPATH=""
set -x
while getopts "d:" opt; do
case $opt in
d)
PYPATH=$OPTARG
;;
esac
done
shift $(($OPTIND - 1))
export PYTHONPATH=$PYPATH
$@
done
shift $(($OPTIND - 1))
export PYTHONPATH=$PYPATH
$@
else
echo "paddle package is already in your PYTHONPATH. But unittest need a clean environment."
echo "Please uninstall paddle package before start unittest. Try to 'pip uninstall paddle'"
exit 1
fi
......@@ -7,6 +7,9 @@ add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(scripts)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_SOURCE_DIR}/setup.py)
if(WITH_PREDICT_SDK)
add_subdirectory(predict)
endif()
......
......@@ -110,8 +110,8 @@ IVector* Arguments::getSlotSequenceStartPositions(size_t idx) const
}
}
IVector*Arguments::getSlotSubSequenceStartPositions(size_t idx) const
throw (RangeError){
IVector* Arguments::getSlotSubSequenceStartPositions(size_t idx) const
throw(RangeError) {
auto& a = m->getArg(idx);
if (a.subSequenceStartPositions) {
return IVector::createByPaddleVectorPtr(
......@@ -129,7 +129,7 @@ void Arguments::setSlotSequenceStartPositions(size_t idx,
}
void Arguments::setSlotSubSequenceStartPositions(
size_t idx, IVector *vec) throw (RangeError) {
size_t idx, IVector *vec) throw(RangeError) {
auto& a = m->getArg(idx);
auto& v = m->cast<paddle::IVector>(vec->getSharedPtr());
a.subSequenceStartPositions = std::make_shared<paddle::ICpuGpuVector>(v);
......
......@@ -20,6 +20,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/utils/GlobalConstants.h"
#include "paddle/utils/TypeDefs.h"
/// Import PaddlePaddle's enumeration into global namespace.
using namespace paddle::enumeration_wrapper; // NOLINT
......@@ -392,7 +393,7 @@ public:
void setSlotSequenceStartPositions(size_t idx,
IVector* vec) throw(RangeError);
void setSlotSubSequenceStartPositions(size_t idx,
IVector* vec) throw (RangeError);
IVector* vec) throw(RangeError);
void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError);
private:
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/utils/Util.h"
#include "paddle/utils/PythonUtil.h"
#include "paddle/utils/Flags.h"
#include "paddle/utils/Excepts.h"
#include "paddle/parameter/Parameter.h"
#include <fenv.h>
......
......@@ -15,6 +15,19 @@
try:
from paddle_api_config import *
import os.path
import platform
system = platform.system().lower()
is_osx = (system == 'darwin')
is_win = (system == 'windows')
is_lin = (system == 'linux')
if is_lin:
whole_start = "-Wl,--whole-archive"
whole_end = "-Wl,--no-whole-archive"
elif is_osx:
whole_start = ""
whole_end = ""
LIB_DIRS = ["math", 'utils', 'parameter', "gserver", "api", "cuda", "pserver", "trainer"]
PARENT_LIB_DIRS = ['proto']
......@@ -56,9 +69,9 @@ try:
def libs_str(self):
libs = [
"-Wl,--whole-archive",
whole_start,
"-lpaddle_gserver",
"-Wl,--no-whole-archive",
whole_end,
"-lpaddle_pserver",
"-lpaddle_trainer_lib",
"-lpaddle_network",
......
set(AVX_SOURCES
src/hl_math.cc
src/hl_avx_functions.cc
)
set(CUDA_SOURCES
src/hl_time.cc
src/hl_math.cc
src/hl_cpu_functions.cc
src/hl_avx_functions.cc)
${AVX_SOURCES})
set(CUDA_CXX_WITH_GPU_SOURCES
src/hl_cuda_cublas.cc
......@@ -12,7 +15,7 @@ set(CUDA_CXX_WITH_GPU_SOURCES
set_source_files_properties(${CUDA_CXX_WITH_GPU_SOURCES}
PROPERTIES COMPILE_FLAGS "-D__NVCC__")
set_source_files_properties(${CUDA_SOURCES}
set_source_files_properties(${AVX_SOURCES}
PROPERTIES COMPILE_FLAGS "-mavx")
set(CUDA_DSO_SOURCES
......@@ -73,4 +76,3 @@ endif()
add_style_check_target(paddle_cuda ${CUDA_SOURCES})
add_style_check_target(paddle_cuda ${CUDA_HEADERS})
# add_style_check_target(hppl ${HPPL_CU_SOURCES}) # TODO(yuyang18): Format hppl style
......@@ -321,13 +321,14 @@ extern const char* hl_get_device_error_string(size_t err);
extern int hl_get_device_last_error();
/**
* @brief hppl query event.
* @brief check cuda event is ready
*
* @param[in] event cuda event to query.
* @param[out] isNotReady this work under device has not yet been
* completed, vice versa.
*
* @return true cuda event is ready.
* false cuda event is not ready.
*/
extern void hl_cuda_event_query(hl_event_t event, bool& isNotReady);
extern bool hl_cuda_event_is_ready(hl_event_t event);
/**
* @brief hppl device synchronization.
......
......@@ -16,9 +16,21 @@ limitations under the License. */
#ifndef HL_DEVICE_FUNCTIONS_CUH_
#define HL_DEVICE_FUNCTIONS_CUH_
namespace hppl {
namespace paddle {
static __inline__ __device__ double atomicAdd(double* address, double val) {
template <class T>
inline __device__ T paddleAtomicAdd(T* address, T val);
template <>
inline __device__ float paddleAtomicAdd(float* address, float val) {
return atomicAdd(address, val);
}
template <>
inline __device__ double paddleAtomicAdd(double* address, double val) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600
return atomicAdd(address, val);
#else
// NOLINTNEXTLINE
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed; // NOLINT
......@@ -32,10 +44,9 @@ static __inline__ __device__ double atomicAdd(double* address, double val) {
} while (assumed != old);
return __longlong_as_double(old);
#endif
}
} // namespace paddle
} // namespace hppl
using hppl::atomicAdd;
#endif /* HL_DEVICE_FUNCTIONS_CUH_ */
......@@ -192,10 +192,10 @@ __global__ void KeLstmBackward(Op op,
if (isBatch) {
if (value.prevStateValue) {
if (grad.checkIgGrad) atomicAdd(grad.checkIgGrad+frameIdx, rCheckIGrad);
if (grad.checkFgGrad) atomicAdd(grad.checkFgGrad+frameIdx, rCheckFGrad);
if (grad.checkIgGrad) paddle::paddleAtomicAdd(grad.checkIgGrad+frameIdx, rCheckIGrad);
if (grad.checkFgGrad) paddle::paddleAtomicAdd(grad.checkFgGrad+frameIdx, rCheckFGrad);
}
if (grad.checkOgGrad) atomicAdd(grad.checkOgGrad+frameIdx, rCheckOGrad);
if (grad.checkOgGrad) paddle::paddleAtomicAdd(grad.checkOgGrad+frameIdx, rCheckOGrad);
} else {
if (value.prevStateValue) {
if (grad.checkIgGrad) grad.checkIgGrad[frameIdx] += rCheckIGrad;
......
......@@ -27,6 +27,8 @@ typedef float4 vecType;
typedef double2 vecType;
#endif
#else
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#ifndef HPPL_TYPE_DOUBLE
typedef __m128 vecType;
......
......@@ -25,6 +25,9 @@ limitations under the License. */
#define VECTOR_LEN 4
#define VECTOR_SET _mm_set_ps1
#else
#if defined(__APPLE__) || defined(__OSX__)
#define _mm_set_pd1 _mm_set1_pd
#endif
/* number of double in vector */
#define VECTOR_LEN 2
#define VECTOR_SET _mm_set_pd1
......
......@@ -89,7 +89,7 @@ inline const char* hl_get_device_error_string() { return NULL; }
inline const char* hl_get_device_error_string(size_t err) { return NULL; }
inline void hl_cuda_event_query(hl_event_t event, bool& isNotReady) {}
inline bool hl_cuda_event_is_ready(hl_event_t event) { return true; }
inline void hl_device_synchronize() {}
......
......@@ -261,11 +261,7 @@ void hl_vector_sum(real *A_d, real *C_h, int dimM) {
struct _hl_event_st hl_event_st = {.cu_event = t_resource.event};
hl_event_t hl_event = &hl_event_st;
bool isNotReady = false;
do {
hl_cuda_event_query(hl_event, isNotReady);
} while (isNotReady == cudaErrorNotReady);
while (!hl_cuda_event_is_ready(hl_event)) {}
KeVectorSum<128><<< grid, threads, 0, STREAM_DEFAULT >>>
(A_d, t_resource.gpu_mem, dimM);
......@@ -275,7 +271,10 @@ void hl_vector_sum(real *A_d, real *C_h, int dimM) {
hl_memcpy_async(C_h, t_resource.cpu_mem, sizeof(real), HPPL_STREAM_DEFAULT);
hl_stream_record_event(HPPL_STREAM_DEFAULT, hl_event);
CHECK_SYNC("hl_vector_sum failed");
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
cudaError_t err = (cudaError_t)hl_get_device_last_error();
CHECK_EQ(cudaSuccess, err)
<< "CUDA error: " << hl_get_device_error_string((size_t)err);
}
template <int blockSize>
......@@ -317,11 +316,7 @@ void hl_vector_abs_sum(real *A_d, real *C_h, int dimM) {
struct _hl_event_st hl_event_st = {.cu_event = t_resource.event};
hl_event_t hl_event = &hl_event_st;
bool isNotReady = false;
do {
hl_cuda_event_query(hl_event, isNotReady);
} while (isNotReady == cudaErrorNotReady);
while (!hl_cuda_event_is_ready(hl_event)) {}
KeVectorAbsSum<128><<< grid, threads, 0, STREAM_DEFAULT >>>
(A_d, t_resource.gpu_mem, dimM);
......@@ -331,5 +326,8 @@ void hl_vector_abs_sum(real *A_d, real *C_h, int dimM) {
hl_memcpy_async(C_h, t_resource.cpu_mem, sizeof(real), HPPL_STREAM_DEFAULT);
hl_stream_record_event(HPPL_STREAM_DEFAULT, hl_event);
CHECK_SYNC("hl_vector_abs_sum failed");
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
cudaError_t err = (cudaError_t)hl_get_device_last_error();
CHECK_EQ(cudaSuccess, err)
<< "CUDA error: " << hl_get_device_error_string((size_t)err);
}
......@@ -217,7 +217,7 @@ void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
} else {
LOG(FATAL) << "parameter transa error!";
}
CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS);
CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS) << hl_cublas_get_error_string(stat);
CHECK_SYNC("hl_matrix_mul failed");
}
......@@ -266,7 +266,7 @@ void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
LOG(FATAL) << "parameter transa error!";
}
CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS);
CHECK_EQ(stat, CUBLAS_STATUS_SUCCESS) << hl_cublas_get_error_string(stat);
CHECK_SYNC("hl_matrix_mul_vector");
}
......
......@@ -150,7 +150,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP)
// APIs available after R4:
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \
__macro(cudnnBatchNormalizationForwardTraining) \
__macro(cudnnBatchNormalizationForwardInference) \
......@@ -999,7 +999,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
double epsilon,
real *savedMean,
real *savedVar) {
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
if ((NULL != runningMean && NULL == runningInvVar) ||
(NULL == runningMean && NULL != runningInvVar)) {
LOG(FATAL) << "runningMean and runningInvVar can be NULL "
......@@ -1024,7 +1024,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
CHECK_SYNC("hl_batch_norm_forward_training failed");
#else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version;
#endif
}
......@@ -1039,7 +1039,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real *estimatedMean,
real *estimatedInvVar,
double epsilon) {
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc);
cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc);
cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc);
......@@ -1053,7 +1053,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
CHECK_SYNC("hl_batch_norm_forward_inference failed");
#else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version;
#endif
}
......@@ -1071,7 +1071,7 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
double epsilon,
real *savedMean,
real *savedInvVar) {
#if CUDNN_VERSION >= 4000
#if CUDNN_VERSION >= 4007
if ((NULL != savedMean && NULL == savedInvVar) ||
(NULL == savedMean && NULL != savedInvVar)) {
LOG(FATAL) << "savedMean and savedVar can be NULL "
......@@ -1087,16 +1087,14 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward(
t_resource.cudnn_handle, mode, &alpha, &beta,
#if CUDNN_VERSION >= 5000
&alpha, &beta,
#endif
xDesc, input, dyDesc, outGrad, dxDesc, inGrad,
bnDesc, scale, scaleGrad, biasGrad, epsilon,
savedMean, savedInvVar));
CHECK_SYNC("hl_batch_norm_backward failed");
#else
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
<< "But cudnn lib version is " << g_cudnn_lib_version;
#endif
}
......@@ -209,7 +209,18 @@ __thread cudaStream_t default_stream = 0;
__thread bool g_sync_flag = true;
bool hl_start_flag = false;
#define gettid() syscall(SYS_gettid)
inline pid_t gettid() {
#if defined(__APPLE__) || defined(__OSX__)
pid_t tid = syscall(SYS_thread_selfid);
#else
#ifndef __NR_gettid
#define __NR_gettid 224
#endif
pid_t tid = syscall(__NR_gettid);
#endif
CHECK_NE(tid, -1);
return tid;
}
void hl_init(int device) {
CHECK(hl_start_flag)
......@@ -751,11 +762,12 @@ void hl_set_device_flags_block() {
cudaDeviceScheduleBlockingSync));
}
void hl_cuda_event_query(hl_event_t event, bool& isNotReady) {
bool hl_cuda_event_is_ready(hl_event_t event) {
cudaError_t err = dynload::cudaEventQuery(event->cu_event);
CHECK(cudaSuccess == err || cudaErrorNotReady == err);
if (cudaErrorNotReady == err) {
isNotReady = true;
return false;
}
return true;
}
......@@ -564,11 +564,11 @@ __global__ void KeLstmBackward(real *gateValue,
/* TODO: Temporary save & merger in another kernel */
if (frameIdy == 1) {
if (checkIgGrad) atomicAdd(checkIgGrad+frameIdx, rCheckGrad);
if (checkIgGrad) paddle::paddleAtomicAdd(checkIgGrad+frameIdx, rCheckGrad);
} else if (frameIdy == 2) {
if (checkFgGrad) atomicAdd(checkFgGrad+frameIdx, rCheckGrad);
if (checkFgGrad) paddle::paddleAtomicAdd(checkFgGrad+frameIdx, rCheckGrad);
} else if (frameIdy == 3) {
if (checkOgGrad) atomicAdd(checkOgGrad+frameIdx, rCheckGrad);
if (checkOgGrad) paddle::paddleAtomicAdd(checkOgGrad+frameIdx, rCheckGrad);
}
}
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include "hl_matrix_apply.cuh"
#include "hl_sequence.h"
#include "paddle/utils/Logging.h"
#include "hl_device_functions.cuh"
DEFINE_MATRIX_UNARY_OP(Zero, a = 0);
DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1*a + p2*b);
......@@ -266,25 +267,21 @@ template<int blockSize>
__global__ void KeMatrixClassificationError(real* in_A,
int* in_B,
real* out_C,
int dimM,
int dimN) {
__shared__ real max_s[blockSize];
__shared__ int max_l[blockSize];
int cnt = (dimN + blockSize -1) / blockSize;
int tid = threadIdx.x;
int lmt = tid;
int index = 0;
real t;
const int tid = threadIdx.x;
const int rowId = blockIdx.x;
max_s[tid] = -1e30f;
for (int ii = 0; ii < cnt && lmt < dimN; ii++) {
index = blockIdx.y*dimN + lmt;
t = in_A[index];
if (max_s[tid] < t) {
max_s[tid] = t;
max_l[tid] = lmt;
in_A += rowId * dimN;
real tmp;
for (int colId = tid; colId < dimN; colId += blockSize) {
tmp = in_A[colId];
if (max_s[tid] < tmp) {
max_s[tid] = tmp;
max_l[tid] = colId;
}
lmt += blockSize;
}
__syncthreads();
......@@ -300,7 +297,7 @@ __global__ void KeMatrixClassificationError(real* in_A,
__syncthreads();
if (tid == 0) {
out_C[blockIdx.y] = (max_l[0] == in_B[blockIdx.y] ? 0 : 1.0f);
out_C[rowId] = (max_l[0] == in_B[rowId] ? 0 : 1.0f);
}
}
......@@ -313,12 +310,9 @@ void hl_matrix_classification_error(real* A_d,
CHECK_NOTNULL(B_d);
CHECK_NOTNULL(C_d);
int blocksX = 1;
int blocksY = dimM;
dim3 threads(1024, 1);
dim3 grid(blocksX, blocksY);
KeMatrixClassificationError<1024><<< grid, threads, 0, STREAM_DEFAULT >>>
(A_d, B_d, C_d, dimM, dimN);
// each sample is calculated by one block
KeMatrixClassificationError<1024><<< dimM, 1024, 0, STREAM_DEFAULT >>>
(A_d, B_d, C_d, dimN);
CHECK_SYNC("hl_matrix_classification_error");
}
......@@ -629,7 +623,7 @@ __global__ void KeCosSimDerivative(real* grad,
prevGradY[index] +=
scale * grad[ty] * prevOutX[index] * reciprocal;
} else {
atomicAdd(prevGradY + index,
paddle::paddleAtomicAdd(prevGradY + index,
scale * grad[ty] * prevOutX[index] * reciprocal);
}
}
......@@ -646,7 +640,7 @@ __global__ void KeCosSimDerivative(real* grad,
(prevOutX[index] * reciprocalXY -
prevOutY[index] * reciprocalSquareSumY);
} else {
atomicAdd(prevGradY + index, output[ty] * grad[ty] *
paddle::paddleAtomicAdd(prevGradY + index, output[ty] * grad[ty] *
(prevOutX[index] * reciprocalXY -
prevOutY[index] * reciprocalSquareSumY));
}
......
......@@ -362,7 +362,7 @@ __global__ void KeMatrixAddRows(real* output,
if (AddRow == 0) {
outputData[i] += tableData[i];
} else {
atomicAdd(&tableData[i], outputData[i]);
paddle::paddleAtomicAdd(&tableData[i], outputData[i]);
}
}
}
......
......@@ -280,7 +280,7 @@ __global__ void KeSMatrixCscMulDense(real *C_d,
if (index_n_t < dimN) {
real tmp;
tmp = alpha*a_r*b_r[n];
atomicAdd(C_d_r, tmp);
paddle::paddleAtomicAdd(C_d_r, tmp);
C_d_r += CU_CSC_MUL_DENSE_THREAD_X;
index_n_t += CU_CSC_MUL_DENSE_THREAD_X;
}
......@@ -328,7 +328,7 @@ __global__ void KeSMatrixCscMulDense(real *C_d,
if (index_n_t < dimN) {
real tmp;
tmp = alpha*a_r*b_r[n];
atomicAdd(C_d_r, tmp);
paddle::paddleAtomicAdd(C_d_r, tmp);
C_d_r += CU_CSC_MUL_DENSE_THREAD_X;
index_n_t += CU_CSC_MUL_DENSE_THREAD_X;
}
......@@ -629,7 +629,7 @@ __global__ void KeSMatrixDenseMulCsr(real *C_d,
for (int n=0; n < CU_DM_CSR_N; n++) {
if (index_m_t++ < dimM) {
tmp = alpha * b_r * a_r[n];
atomicAdd(C_d_r, tmp);
paddle::paddleAtomicAdd(C_d_r, tmp);
C_d_r += dimN;
}
}
......@@ -660,7 +660,7 @@ __global__ void KeSMatrixDenseMulCsr(real *C_d,
for (int n=0; n < CU_DM_CSR_N; n++) {
if (index_m_t++ < dimM) {
tmp = alpha * b_r * a_r[n];
atomicAdd(C_d_r, tmp);
paddle::paddleAtomicAdd(C_d_r, tmp);
C_d_r += dimN;
}
}
......@@ -912,7 +912,7 @@ __global__ void KeSMatrixCsrColumnSum(real* a_val, real* csr_val,
for (int idx = gid; idx < dimNNZ; idx += gridDim.x * blockDim.x) {
int colIdx = csr_col[idx];
real val = csr_val[idx];
atomicAdd(a_val + colIdx, val);
paddle::paddleAtomicAdd(a_val + colIdx, val);
}
}
......
......@@ -69,23 +69,40 @@ static inline void GetDsoHandleWithSearchPath(
CHECK(nullptr != *dso_handle)
<< "For Gpu version of PaddlePaddle, it couldn't find CUDA library: "
<< dlPath.c_str() << " Please make sure you already specify its path."
<< "Note: for training data on Cpu using Gpu version of PaddlePaddle,"
<< "you must specify libcudart.so via LD_LIBRARY_PATH.";
<< dlPath.c_str() << ". Please make sure you already specify its path. "
<< "Note: for training data on Cpu using Gpu version of PaddlePaddle, "
<< "you must specify libcudart via export LD_LIBRARY_PATH for Linux or "
<< "export DYLD_LIBRARY_PATH for MAC OS.";
}
void GetCublasDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleWithSearchPath(FLAGS_cuda_dir, "libcublas.dylib", dso_handle);
#else
GetDsoHandleWithSearchPath(FLAGS_cuda_dir, "libcublas.so", dso_handle);
#endif
}
void GetCudnnDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleWithSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle);
#else
GetDsoHandleWithSearchPath(FLAGS_cudnn_dir, "libcudnn.so", dso_handle);
#endif
}
void GetCudartDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleWithSearchPath("", "libcudart.dylib", dso_handle);
#else
GetDsoHandleWithSearchPath("", "libcudart.so", dso_handle);
#endif
}
void GetCurandDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleWithSearchPath(FLAGS_cuda_dir, "libcurand.dylib", dso_handle);
#else
GetDsoHandleWithSearchPath(FLAGS_cuda_dir, "libcurand.so", dso_handle);
#endif
}
......@@ -35,7 +35,7 @@ __global__ void KeMatrixAddRows(real* output, int ldo,
real *tab = table + tableId * ldt;
for (int i = idx; i < dim; i += blockDimX) {
if (AddRow) {
atomicAdd(&tab[i], out[i]);
paddle::paddleAtomicAdd(&tab[i], out[i]);
} else {
out[i] += tab[i];
}
......
......@@ -149,9 +149,13 @@ void DoubleBuffer::startAsyncLoad() {
taskReadySem_.post();
}
ClassRegistrar<DataProvider, DataConfig, bool> DataProvider::registrar_;
DataProvider* DataProvider::create(const DataConfig& config, bool useGpu) {
return registrar_.createByType(config.type(), config, useGpu);
ClassRegistrar<DataProvider, DataConfig, ModelConfig, bool>
DataProvider::registrar_;
DataProvider* DataProvider::create(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu) {
return registrar_.createByType(config.type(), config, modelConfig, useGpu);
}
REGISTER_DATA_PROVIDER(simple, SimpleDataProvider);
......
......@@ -39,15 +39,30 @@ limitations under the License. */
#include "paddle/parameter/Argument.h"
namespace paddle {
/**
* @def REGISTER_DATA_PROVIDER
* @brief Macro for registering a data provider
* @brief Macro for registering a data provider. The class type should contain
* a consturctor with parameter (DataConfig, bool).
*/
#define REGISTER_DATA_PROVIDER(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([]() { \
#define REGISTER_DATA_PROVIDER(__type_name, __class_name)\
static InitFunction __reg_type_##__type_name([]() {\
DataProvider::registrar_.registerClass(\
#__type_name, \
[](DataConfig conf, ModelConfig, bool useGpu) -> DataProvider* { \
DataProvider* dp = new __class_name (conf, useGpu);\
return dp;\
});\
})
/**
* @def REGISTER_DATA_PROVIDER_EX
* @brief Macro for registering a data provider, which contains a constructor
* with parameter (DataConfig, ModelConfig, bool).
*/
#define REGISTER_DATA_PROVIDER_EX(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([] { \
DataProvider::registrar_.registerClass<__class_name>(#__type_name); \
})
})
class DataBatch;
class BufferBatch;
......@@ -285,10 +300,18 @@ protected:
*/
class DataProvider {
public:
static ClassRegistrar<DataProvider, DataConfig, bool> registrar_;
static ClassRegistrar<DataProvider, DataConfig, ModelConfig, bool> registrar_;
static DataProvider* create(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu = FLAGS_use_gpu);
/**
* @brief create only used for unittest.
*/
inline static DataProvider* create(const DataConfig &config, bool useGpu) {
return create(config, ModelConfig(), useGpu);
}
DataProvider(const DataConfig& config, bool useGpu)
: config_(config),
skipShuffle_(false),
......@@ -336,13 +359,13 @@ public:
* @note return -1 to indicate unlimited number of samples.
*/
virtual int64_t getSize() = 0;
/**
* @brief Get next batch training samples internally
* @param[in] size size of training samples to get
* @param[out] batch a batch of training samples
* @return actual size of obtained training samples
*/
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0;
protected:
......
......@@ -65,7 +65,8 @@ void DataProviderGroup<T>::reset() {
provider_ = nullptr;
// shuffle file list
std::random_shuffle(fileList_.begin(), fileList_.end());
std::shuffle(fileList_.begin(), fileList_.end(),
ThreadLocalRandomEngine::get());
startLoader();
DataProvider::reset();
......
......@@ -22,7 +22,9 @@ namespace paddle {
using namespace std;
MultiDataProvider::MultiDataProvider(const DataConfig& config, bool useGpu)
MultiDataProvider::MultiDataProvider(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu)
: DataProvider(config, useGpu) {
bool atLeastOneMainDataFlag = false;
totalDataRatio_ = 0;
......@@ -58,7 +60,9 @@ MultiDataProvider::MultiDataProvider(const DataConfig& config, bool useGpu)
subConfig.set_async_load_data(false);
}
subDataProviders_[i] =
std::unique_ptr<DataProvider>(DataProvider::create(subConfig, useGpu_));
std::unique_ptr<DataProvider>(DataProvider::create(subConfig,
modelConfig,
useGpu_));
}
}
......@@ -116,6 +120,6 @@ int64_t MultiDataProvider::getNextBatchInternal(int64_t size,
return batch->getSize();
}
REGISTER_DATA_PROVIDER(multi, MultiDataProvider);
REGISTER_DATA_PROVIDER_EX(multi, MultiDataProvider);
} // namespace paddle
......@@ -24,7 +24,9 @@ protected:
std::vector<std::unique_ptr<DataProvider>> subDataProviders_;
public:
MultiDataProvider(const DataConfig& config, bool useGpu);
MultiDataProvider(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu);
~MultiDataProvider() {}
virtual void reset();
virtual void shuffle();
......
......@@ -374,7 +374,8 @@ void ProtoDataProvider::reset() {
}
void ProtoDataProvider::shuffle() {
std::random_shuffle(shuffledSequenceIds_.begin(), shuffledSequenceIds_.end());
std::shuffle(shuffledSequenceIds_.begin(), shuffledSequenceIds_.end(),
ThreadLocalRandomEngine::get());
}
/*
......
......@@ -17,6 +17,8 @@ limitations under the License. */
#include "paddle/utils/PythonUtil.h"
#include <fenv.h>
#include "paddle/utils/Util.h"
#include "paddle/utils/Excepts.h"
namespace paddle {
......@@ -44,7 +46,6 @@ PyDataProvider::PyDataProvider(const DataConfig& config, bool useGpu,
}
void PyDataProvider::loadData(const std::vector<std::string>& fileList) {
int feFlag = fegetexcept();
VLOG(1) << "module:" << pyModuleName_ << " class:" << pyClassName_;
classInstance_ =
createPythonClass(pyModuleName_, pyClassName_, fileList, pyUserArgs_);
......@@ -55,7 +56,7 @@ void PyDataProvider::loadData(const std::vector<std::string>& fileList) {
std::string headerInfo =
std::string(PyString_AsString(obj.get()), PyString_Size(obj.get()));
parseHeaderData(headerInfo);
feenableexcept(feFlag);
feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
}
void PyDataProvider::parseHeaderData(const std::string& headerData) {
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册