提交 3304de7a 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #48 from reyoung/master

Merge Baidu Changes into github
...@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8) ...@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8)
project(paddle CXX C) project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0) set(PADDLE_MAJOR_VERSION 0)
set(PADDLE_MINOR_VERSION 8) set(PADDLE_MINOR_VERSION 8)
set(PADDLE_PATCH_VERSION 0b) set(PADDLE_PATCH_VERSION 0b0)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION}) set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
......
...@@ -8,12 +8,12 @@ Docker is a tool designed to make it easier to create, deploy, and run applicati ...@@ -8,12 +8,12 @@ Docker is a tool designed to make it easier to create, deploy, and run applicati
### PaddlePaddle Docker images ### PaddlePaddle Docker images
There are six Docker images: There are six Docker images:
- paddledev/paddle:latest-cpu: PaddlePaddle CPU binary image. - paddledev/paddle:cpu-latest: PaddlePaddle CPU binary image.
- paddledev/paddle:latest-gpu: PaddlePaddle GPU binary image. - paddledev/paddle:gpu-latest: PaddlePaddle GPU binary image.
- paddledev/paddle:latest-cpu-devel: PaddlePaddle CPU binary image plus source code. - paddledev/paddle:cpu-devel-latest: PaddlePaddle CPU binary image plus source code.
- paddledev/paddle:latest-gpu-devel: PaddlePaddle GPU binary image plus source code. - paddledev/paddle:gpu-devel-latest: PaddlePaddle GPU binary image plus source code.
- paddledev/paddle:latest-cpu-demo: PaddlePaddle CPU binary image plus source code and demo - paddledev/paddle:cpu-demo-latest: PaddlePaddle CPU binary image plus source code and demo
- paddledev/paddle:latest-gpu-demo: PaddlePaddle GPU binary image plus source code and demo - paddledev/paddle:gpu-demo-latest: PaddlePaddle GPU binary image plus source code and demo
Tags with latest will be replaced by a released version. Tags with latest will be replaced by a released version.
...@@ -23,7 +23,7 @@ You have to install Docker in your machine which has linux kernel version 3.10+ ...@@ -23,7 +23,7 @@ You have to install Docker in your machine which has linux kernel version 3.10+
You can use ```docker pull ```to download images first, or just launch a container with ```docker run```: You can use ```docker pull ```to download images first, or just launch a container with ```docker run```:
```bash ```bash
docker run -it paddledev/paddle:lastest-cpu docker run -it paddledev/paddle:cpu-latest
``` ```
If you want to launch container with GPU support, you need to set some environment variables at the same time: If you want to launch container with GPU support, you need to set some environment variables at the same time:
...@@ -31,7 +31,7 @@ If you want to launch container with GPU support, you need to set some environme ...@@ -31,7 +31,7 @@ If you want to launch container with GPU support, you need to set some environme
```bash ```bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}" export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run -it paddledev/paddle:latest-gpu docker run -it paddledev/paddle:gpu-latest
``` ```
### Notice ### Notice
......
...@@ -165,7 +165,7 @@ We provide both C++ and Python interfaces to extract features. The following exa ...@@ -165,7 +165,7 @@ We provide both C++ and Python interfaces to extract features. The following exa
### C++ Interface ### C++ Interface
First, specify image data list in `define_py_data_sources` in the config, see example `demo/model_zoo/resnet/resnet.py`. First, specify image data list in `define_py_data_sources2` in the config, see example `demo/model_zoo/resnet/resnet.py`.
``` ```
train_list = 'train.list' if not is_test else None train_list = 'train.list' if not is_test else None
......
...@@ -257,7 +257,7 @@ In these network, we use several api in `trainer_config_helpers ...@@ -257,7 +257,7 @@ In these network, we use several api in `trainer_config_helpers
* Text Convolution Pooling Layer, `text_conv_pool * Text Convolution Pooling Layer, `text_conv_pool
<../../ui/api/trainer_config_helpers/networks.html <../../ui/api/trainer_config_helpers/networks.html
#trainer_config_helpers.networks.text_conv_pool>`_ #trainer_config_helpers.networks.text_conv_pool>`_
* Declare Python Data Sources, `define_py_data_sources * Declare Python Data Sources, `define_py_data_sources2
<../../ui/api/trainer_config_helpers/data_sources.html>`_ <../../ui/api/trainer_config_helpers/data_sources.html>`_
Data Provider Data Provider
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from py_paddle import swig_paddle, DataProviderWrapperConverter from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProviderWrapper import DenseSlot from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config from paddle.trainer.config_parser import parse_config
TEST_DATA = [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, TEST_DATA = [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
...@@ -89,12 +89,12 @@ TEST_DATA = [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...@@ -89,12 +89,12 @@ TEST_DATA = [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
def main(): def main():
conf = parse_config("./mnist_model/trainer_config.conf.norm", "") conf = parse_config("./mnist_model/trainer_config.py", "")
print conf.data_config.load_data_args print conf.data_config.load_data_args
network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config) network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine) # For code hint. assert isinstance(network, swig_paddle.GradientMachine) # For code hint.
network.loadParameters("./mnist_model/") network.loadParameters("./mnist_model/")
converter = DataProviderWrapperConverter(False, [DenseSlot(784)]) converter = DataProviderConverter([dense_vector(784)])
inArg = converter(TEST_DATA) inArg = converter(TEST_DATA)
print network.forwardTest(inArg) print network.forwardTest(inArg)
......
...@@ -10,27 +10,35 @@ SWIG. The main steps of predict values in python are: ...@@ -10,27 +10,35 @@ SWIG. The main steps of predict values in python are:
* Predict * Predict
Here is a sample python script that shows the typical prediction process for the Here is a sample python script that shows the typical prediction process for the
MNIST classification problem. MNIST classification problem. A complete sample code could be found at
:code:`src_root/doc/ui/predict/predict_sample.py`.
.. literalinclude:: ./predict_sample.py .. literalinclude:: ./predict_sample.py
:language: python :language: python
:linenos: :lines: 15-18,90-100,101-104
The module that does the most of the job is py_paddle.swig_paddle, it's The module that does the most of the job is py_paddle.swig_paddle, it's
generated by SWIG and has complete documents, for more details you can use generated by SWIG and has complete documents, for more details you can use
python's :code:`help()` function. Let's walk through the above python script: python's :code:`help()` function. Let's walk through the above python script:
* At the beginning, initialize PaddlePaddle with command line arguments(line 90). * At the beginning, use :code:`swig_paddle.initPaddle()` to initialize
* Parse the configuration file that is used in training(line 93). PaddlePaddle with command line arguments, for more about command line arguments
* Create a neural network at line 95 according the parsed configuration, then see `Command Line Arguments <../cmd_argument/detail_introduction.html>`_.
load the trained parameters from model at line 97. * Parse the configuration file that is used in training with :code:`parse_config()`.
* A utility class for data transformation is created at line 98. Because data to predict with always have no label, and output of prediction work
normally is the output layer rather than the cost layer, so you should modify
the configuration file accordingly before using it in the prediction work.
* Create a neural network with
:code:`swig_paddle.GradientMachine.createFromConfigproto()`, which takes the
parsed configuration :code:`conf.model_config` as argument. Then load the
trained parameters from the model with :code:`network.loadParameters()`.
* Create a data converter object of utility class :code:`DataProviderConverter`.
- Note: As swig_paddle can only accept C++ matrices, we offer a utility - Note: As swig_paddle can only accept C++ matrices, we offer a utility
class DataProviderWraaperConverter that can accept the same input data with class DataProviderConverter that can accept the same input data with
PyDataProviderWrapper, for more information please refer to document PyDataProvider2, for more information please refer to document
of `PyDataProvider2 <../data_provider/pydataprovider2.html>`_. of `PyDataProvider2 <../data_provider/pydataprovider2.html>`_.
* Do the prediction and output the result at line 100, forwardTest is another * Do the prediction with :code:`forwardTest()`, which takes the converted
utility class that directly takes the activations of the output layer. input data and outputs the activations of the output layer.
Here is a typical output: Here is a typical output:
......
...@@ -7,6 +7,9 @@ add_subdirectory(pserver) ...@@ -7,6 +7,9 @@ add_subdirectory(pserver)
add_subdirectory(trainer) add_subdirectory(trainer)
add_subdirectory(scripts) add_subdirectory(scripts)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_SOURCE_DIR}/setup.py)
if(WITH_PREDICT_SDK) if(WITH_PREDICT_SDK)
add_subdirectory(predict) add_subdirectory(predict)
endif() endif()
......
...@@ -266,25 +266,21 @@ template<int blockSize> ...@@ -266,25 +266,21 @@ template<int blockSize>
__global__ void KeMatrixClassificationError(real* in_A, __global__ void KeMatrixClassificationError(real* in_A,
int* in_B, int* in_B,
real* out_C, real* out_C,
int dimM,
int dimN) { int dimN) {
__shared__ real max_s[blockSize]; __shared__ real max_s[blockSize];
__shared__ int max_l[blockSize]; __shared__ int max_l[blockSize];
int cnt = (dimN + blockSize -1) / blockSize; const int tid = threadIdx.x;
int tid = threadIdx.x; const int rowId = blockIdx.x;
int lmt = tid;
int index = 0;
real t;
max_s[tid] = -1e30f; max_s[tid] = -1e30f;
for (int ii = 0; ii < cnt && lmt < dimN; ii++) { in_A += rowId * dimN;
index = blockIdx.y*dimN + lmt; real tmp;
t = in_A[index]; for (int colId = tid; colId < dimN; colId += blockSize) {
if (max_s[tid] < t) { tmp = in_A[colId];
max_s[tid] = t; if (max_s[tid] < tmp) {
max_l[tid] = lmt; max_s[tid] = tmp;
max_l[tid] = colId;
} }
lmt += blockSize;
} }
__syncthreads(); __syncthreads();
...@@ -300,7 +296,7 @@ __global__ void KeMatrixClassificationError(real* in_A, ...@@ -300,7 +296,7 @@ __global__ void KeMatrixClassificationError(real* in_A,
__syncthreads(); __syncthreads();
if (tid == 0) { if (tid == 0) {
out_C[blockIdx.y] = (max_l[0] == in_B[blockIdx.y] ? 0 : 1.0f); out_C[rowId] = (max_l[0] == in_B[rowId] ? 0 : 1.0f);
} }
} }
...@@ -313,12 +309,9 @@ void hl_matrix_classification_error(real* A_d, ...@@ -313,12 +309,9 @@ void hl_matrix_classification_error(real* A_d,
CHECK_NOTNULL(B_d); CHECK_NOTNULL(B_d);
CHECK_NOTNULL(C_d); CHECK_NOTNULL(C_d);
int blocksX = 1; // each sample is calculated by one block
int blocksY = dimM; KeMatrixClassificationError<1024><<< dimM, 1024, 0, STREAM_DEFAULT >>>
dim3 threads(1024, 1); (A_d, B_d, C_d, dimN);
dim3 grid(blocksX, blocksY);
KeMatrixClassificationError<1024><<< grid, threads, 0, STREAM_DEFAULT >>>
(A_d, B_d, C_d, dimM, dimN);
CHECK_SYNC("hl_matrix_classification_error"); CHECK_SYNC("hl_matrix_classification_error");
} }
......
...@@ -47,81 +47,40 @@ bool CRFLayer::init(const LayerMap& layerMap, ...@@ -47,81 +47,40 @@ bool CRFLayer::init(const LayerMap& layerMap,
// We don't need sequenceStartPositions because each sample of output_ is // We don't need sequenceStartPositions because each sample of output_ is
// for the cost of one sequence. // for the cost of one sequence.
setNeedSequenceInfo(false); setNeedSequenceInfo(false);
if (useGpu_) {
tmpCpuInput_.reserve(inputLayers_.size());
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_.push_back(Argument());
}
}
return true; return true;
} }
void CRFLayer::forward(PassType passType) { void CRFLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom(getInput(i), false, HPPL_STREAM_1);
}
VectorPtr cpuParameterValue;
VectorPtr cpuParameterGradient;
cpuParameterValue =
Vector::create(parameter_->getBuf(PARAMETER_VALUE)->getSize(), false);
cpuParameterValue->
copyFrom(*parameter_->getBuf(PARAMETER_VALUE), HPPL_STREAM_1);
if (parameter_->getBuf(PARAMETER_GRADIENT)) {
cpuParameterGradient =
Vector::create(parameter_->getBuf(PARAMETER_GRADIENT)->getSize(),
false);
cpuParameterGradient->
copyFrom(*parameter_->getBuf(PARAMETER_GRADIENT), HPPL_STREAM_1);
} else {
cpuParameterGradient = nullptr;
}
forwardImp(tmpCpuInput_[0], tmpCpuInput_[1], cpuParameterValue,
cpuParameterGradient);
parameter_->getBuf(PARAMETER_VALUE)->copyFrom(*cpuParameterValue,
HPPL_STREAM_1);
if (parameter_->getBuf(PARAMETER_GRADIENT)) {
parameter_->getBuf(PARAMETER_GRADIENT)->copyFrom(*cpuParameterGradient,
HPPL_STREAM_1);
}
} else {
forwardImp(getInput(0), getInput(1), parameter_->getBuf(PARAMETER_VALUE),
parameter_->getBuf(PARAMETER_GRADIENT));
}
}
void CRFLayer::forwardImp(const Argument&output, CHECK(!useGpu_) << "GPU is not supported";
const Argument& label,
VectorPtr parameterValue, const Argument& output = getInput(0);
VectorPtr parameterGradient) { const Argument& label = getInput(1);
CHECK(label.sequenceStartPositions); CHECK(label.sequenceStartPositions);
CHECK(label.ids); CHECK(label.ids);
int batchSize = output.getBatchSize(); int batchSize = output.getBatchSize();
size_t numSequences = label.sequenceStartPositions->getSize() - 1; size_t numSequences = label.sequenceStartPositions->getSize() - 1;
resizeOutput(numSequences, 1); resizeOutput(numSequences, 1);
std::vector<real> out(numSequences);
const int* starts = label.sequenceStartPositions->getData(false); const int* starts = label.sequenceStartPositions->getData(false);
CHECK_EQ(starts[numSequences], batchSize); CHECK_EQ(starts[numSequences], batchSize);
VectorPtr cpuParameterValue;
VectorPtr cpuParameterGradient;
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
if (i >= crfs_.size()) { if (i >= crfs_.size()) {
crfs_.emplace_back(numClasses_, crfs_.emplace_back(numClasses_,
parameterValue->getData(), parameter_->getBuf(PARAMETER_VALUE)->getData(),
parameterGradient parameter_->getBuf(PARAMETER_GRADIENT)
? parameterGradient->getData() ? parameter_->getBuf(PARAMETER_GRADIENT)->getData()
: nullptr); : nullptr);
} }
out[i] = crfs_[i].forward( output_.value->getData()[i] = crfs_[i].forward(
output.value->getData() + numClasses_ * starts[i], output.value->getData() + numClasses_ * starts[i],
label.ids->getData() + starts[i], starts[i + 1] - starts[i]); label.ids->getData() + starts[i], starts[i + 1] - starts[i]);
} }
output_.value->copyFrom(out.data(), numSequences);
if (weightLayer_) { if (weightLayer_) {
const MatrixPtr& weight = getInputValue(*weightLayer_); const MatrixPtr& weight = getInputValue(*weightLayer_);
getOutputValue()->dotMul(*getOutputValue(), *weight); getOutputValue()->dotMul(*getOutputValue(), *weight);
...@@ -129,22 +88,8 @@ void CRFLayer::forwardImp(const Argument&output, ...@@ -129,22 +88,8 @@ void CRFLayer::forwardImp(const Argument&output,
} }
void CRFLayer::backward(const UpdateCallback &callback) { void CRFLayer::backward(const UpdateCallback &callback) {
(void)callback; const Argument& output = getInput(0);
if (useGpu_) { const Argument& label = getInput(1);
backwardImp(callback, tmpCpuInput_[0], tmpCpuInput_[1]);
const_cast<Argument&>(getInput(0)).
resizeAndCopyFrom(tmpCpuInput_[0], true, HPPL_STREAM_1);
const_cast<Argument&>(getInput(1)).
resizeAndCopyFrom(tmpCpuInput_[1], true, HPPL_STREAM_1);
} else {
backwardImp(callback, getInput(0), getInput(1));
}
}
void CRFLayer::backwardImp(const UpdateCallback& callback,
const Argument&output,
const Argument& label) {
const int* starts = label.sequenceStartPositions->getData(false); const int* starts = label.sequenceStartPositions->getData(false);
int numSequences = label.sequenceStartPositions->getSize() - 1; int numSequences = label.sequenceStartPositions->getSize() - 1;
...@@ -159,9 +104,11 @@ void CRFLayer::backwardImp(const UpdateCallback& callback, ...@@ -159,9 +104,11 @@ void CRFLayer::backwardImp(const UpdateCallback& callback,
grad->mulScalar(weight); grad->mulScalar(weight);
} }
} }
if (coeff_ != real(1.0f)) { if (coeff_ != real(1.0f)) {
output.grad->mulScalar(coeff_); output.grad->mulScalar(coeff_);
} }
parameter_->incUpdate(callback); parameter_->incUpdate(callback);
} }
......
...@@ -32,11 +32,7 @@ public: ...@@ -32,11 +32,7 @@ public:
explicit CRFLayer(const LayerConfig& config) : Layer(config) {} explicit CRFLayer(const LayerConfig& config) : Layer(config) {}
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
virtual void forward(PassType passType); virtual void forward(PassType passType);
void forwardImp(const Argument&output, const Argument& label,
VectorPtr parameterValue, VectorPtr parameterGradient);
virtual void backward(const UpdateCallback& callback); virtual void backward(const UpdateCallback& callback);
void backwardImp(const UpdateCallback& callback, const Argument&output,
const Argument& label);
protected: protected:
size_t numClasses_; size_t numClasses_;
...@@ -44,7 +40,6 @@ protected: ...@@ -44,7 +40,6 @@ protected:
std::vector<LinearChainCRF> crfs_; std::vector<LinearChainCRF> crfs_;
LayerPtr weightLayer_; // weight for each sequence LayerPtr weightLayer_; // weight for each sequence
real coeff_; // weight for the layer real coeff_; // weight for the layer
std::vector<Argument> tmpCpuInput_;
}; };
} // namespace paddle } // namespace paddle
...@@ -18,27 +18,33 @@ ...@@ -18,27 +18,33 @@
import os import os
import sys import sys
from paddle.trainer.PyDataProviderWrapper import * from paddle.trainer.PyDataProvider2 import *
@init_hook_wrapper def hook(settings, dict_file, **kwargs):
def hook(obj, dict_file, **kwargs): settings.word_dict = dict_file
obj.word_dict = dict_file settings.input_types = [integer_value_sequence(len(settings.word_dict)),
obj.slots = [IndexSlot(len(obj.word_dict)), IndexSlot(3)] integer_value_sequence(3)]
obj.logger.info('dict len : %d' % (len(obj.word_dict))) settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@provider(use_seq=True, init_hook=hook) @provider(init_hook=hook)
def process(obj, file_name): def process(settings, file_name):
with open(file_name, 'r') as fdata: with open(file_name, 'r') as fdata:
for line in fdata: for line in fdata:
label, comment = line.strip().split('\t') label, comment = line.strip().split('\t')
label = int(''.join(label.split())) label = int(''.join(label.split()))
words = comment.split() words = comment.split()
word_slot = [obj.word_dict[w] for w in words if w in obj.word_dict] word_slot = [settings.word_dict[w] for w in words if w in settings.word_dict]
yield word_slot, [label] yield word_slot, [label]
## for hierarchical sequence network ## for hierarchical sequence network
@provider(use_seq=True, init_hook=hook) def hook2(settings, dict_file, **kwargs):
def process2(obj, file_name): settings.word_dict = dict_file
settings.input_types = [integer_value_sub_sequence(len(settings.word_dict)),
integer_value_sub_sequence(3)]
settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@provider(init_hook=hook2)
def process2(settings, file_name):
with open(file_name) as fdata: with open(file_name) as fdata:
label_list = [] label_list = []
word_slot_list = [] word_slot_list = []
...@@ -47,7 +53,7 @@ def process2(obj, file_name): ...@@ -47,7 +53,7 @@ def process2(obj, file_name):
label,comment = line.strip().split('\t') label,comment = line.strip().split('\t')
label = int(''.join(label.split())) label = int(''.join(label.split()))
words = comment.split() words = comment.split()
word_slot = [obj.word_dict[w] for w in words if w in obj.word_dict] word_slot = [settings.word_dict[w] for w in words if w in settings.word_dict]
label_list.append([label]) label_list.append([label])
word_slot_list.append(word_slot) word_slot_list.append(word_slot)
else: else:
......
...@@ -21,11 +21,11 @@ dict_file = dict() ...@@ -21,11 +21,11 @@ dict_file = dict()
for line_count, line in enumerate(open(dict_path, "r")): for line_count, line in enumerate(open(dict_path, "r")):
dict_file[line.strip()] = line_count dict_file[line.strip()] = line_count
define_py_data_sources(train_list='gserver/tests/Sequence/train.list', define_py_data_sources2(train_list='gserver/tests/Sequence/train.list',
test_list=None, test_list=None,
module='sequenceGen', module='sequenceGen',
obj='process', obj='process',
args={"dict_file":dict_file}) args={"dict_file":dict_file})
settings(batch_size=5) settings(batch_size=5)
######################## network configure ################################ ######################## network configure ################################
......
...@@ -21,11 +21,11 @@ dict_file = dict() ...@@ -21,11 +21,11 @@ dict_file = dict()
for line_count, line in enumerate(open(dict_path, "r")): for line_count, line in enumerate(open(dict_path, "r")):
dict_file[line.strip()] = line_count dict_file[line.strip()] = line_count
define_py_data_sources(train_list='gserver/tests/Sequence/train.list.nest', define_py_data_sources2(train_list='gserver/tests/Sequence/train.list.nest',
test_list=None, test_list=None,
module='sequenceGen', module='sequenceGen',
obj='process2', obj='process2',
args={"dict_file":dict_file}) args={"dict_file":dict_file})
settings(batch_size=2) settings(batch_size=2)
######################## network configure ################################ ######################## network configure ################################
......
...@@ -179,10 +179,9 @@ TEST(Layer, CRFLayer) { ...@@ -179,10 +179,9 @@ TEST(Layer, CRFLayer) {
config.layerConfig.add_inputs(); config.layerConfig.add_inputs();
config.layerConfig.add_inputs(); config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) { // Not support GPU now
testLayerGrad(config, "crf", 100, /* trans */ false, /* useGpu */ useGpu, testLayerGrad(config, "crf", 100, /* trans */ false, /* useGpu */ false,
false /*useWeight*/, 0.03 /*epsilon*/); false /*useWeight*/, 0.03 /*epsilon*/);
}
} }
TEST(Layer, CTCLayer) { TEST(Layer, CTCLayer) {
......
...@@ -1697,7 +1697,6 @@ TEST(Matrix, cosSimDerivate) { ...@@ -1697,7 +1697,6 @@ TEST(Matrix, cosSimDerivate) {
} }
} }
void testParamReluForward(int height, int width, int w_height, void testParamReluForward(int height, int width, int w_height,
int w_width) { int w_width) {
MatrixPtr output = CpuMatrix::create(height, width, false, false); MatrixPtr output = CpuMatrix::create(height, width, false, false);
...@@ -1736,7 +1735,6 @@ TEST(Matrix, paramReluForward) { ...@@ -1736,7 +1735,6 @@ TEST(Matrix, paramReluForward) {
} }
} }
void testParamReluBackwardW(int height, int width, int w_height, void testParamReluBackwardW(int height, int width, int w_height,
int w_width) { int w_width) {
MatrixPtr oGrad = CpuMatrix::create(height, width, false, false); MatrixPtr oGrad = CpuMatrix::create(height, width, false, false);
...@@ -1775,7 +1773,6 @@ TEST(Matrix, paramReluBackwardW) { ...@@ -1775,7 +1773,6 @@ TEST(Matrix, paramReluBackwardW) {
} }
} }
void testParamReluBackwardDiff(int height, int width, int w_height, void testParamReluBackwardDiff(int height, int width, int w_height,
int w_width) { int w_width) {
MatrixPtr oGrad = CpuMatrix::create(height, width, false, false); MatrixPtr oGrad = CpuMatrix::create(height, width, false, false);
...@@ -1819,6 +1816,36 @@ TEST(Matrix, paramReluBackwardDiff) { ...@@ -1819,6 +1816,36 @@ TEST(Matrix, paramReluBackwardDiff) {
} }
} }
void testClassificationError(int numSamples, int dim) {
MatrixPtr cpuError = std::make_shared<CpuMatrix>(numSamples, 1);
MatrixPtr gpuError = std::make_shared<GpuMatrix>(numSamples, 1);
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(numSamples, dim);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(numSamples, dim);
IVectorPtr cpuLabel = std::make_shared<CpuIVector>(numSamples);
IVectorPtr gpuLabel = std::make_shared<GpuIVector>(numSamples);
cpuOutput->randomizeUniform();
cpuLabel->rand(dim);
gpuOutput->copyFrom(*cpuOutput);
gpuLabel->copyFrom(*cpuLabel);
cpuError->classificationError(cpuOutput, cpuLabel);
gpuError->classificationError(gpuOutput, gpuLabel);
MatrixPtr check = std::make_shared<CpuMatrix>(numSamples, 1);
check->copyFrom(*gpuError);
MatrixCheckEqual(*cpuError, *check);
}
TEST(Matrix, classificationError) {
for (auto numSamples : {1, 10, 100, 1000, 70000}) {
for (auto dim : {1, 10, 100, 1000}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim;
testClassificationError(numSamples, dim);
}
}
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
......
...@@ -269,6 +269,9 @@ void Argument::concat(const std::vector<Argument>& args, ...@@ -269,6 +269,9 @@ void Argument::concat(const std::vector<Argument>& args,
const std::vector<int>& selectRows, const std::vector<int>& selectRows,
const std::vector<int>& seqStartPos, bool useGpu, const std::vector<int>& seqStartPos, bool useGpu,
hl_stream_t stream, PassType passType) { hl_stream_t stream, PassType passType) {
CHECK(!subSequenceStartPositions)
<< "undefined behavior for subsequence positions";
size_t batchSize = selectRows.size(); size_t batchSize = selectRows.size();
auto copyArg = [batchSize, stream](MatrixPtr& dst, MatrixPtr src, auto copyArg = [batchSize, stream](MatrixPtr& dst, MatrixPtr src,
int startRow, int pos, int size, int startRow, int pos, int size,
...@@ -347,9 +350,11 @@ void Argument::concat(const std::vector<Argument>& args, bool useGpu, ...@@ -347,9 +350,11 @@ void Argument::concat(const std::vector<Argument>& args, bool useGpu,
hl_stream_t stream, PassType passType) { hl_stream_t stream, PassType passType) {
int32_t batchSize = 0; int32_t batchSize = 0;
int64_t numSequences = 0; int64_t numSequences = 0;
int64_t numSubSequences = 0;
for (auto& arg : args) { for (auto& arg : args) {
batchSize += arg.getBatchSize(); batchSize += arg.getBatchSize();
numSequences += arg.getNumSequences(); numSequences += arg.getNumSequences();
numSubSequences += arg.getNumSubSequences();
} }
auto copyArg = [batchSize, stream](MatrixPtr& dst, MatrixPtr src, auto copyArg = [batchSize, stream](MatrixPtr& dst, MatrixPtr src,
...@@ -393,8 +398,26 @@ void Argument::concat(const std::vector<Argument>& args, bool useGpu, ...@@ -393,8 +398,26 @@ void Argument::concat(const std::vector<Argument>& args, bool useGpu,
std::copy(src->begin(), src->end(), dst->begin() + startRow); std::copy(src->begin(), src->end(), dst->begin() + startRow);
}; };
auto copySequencePos = []
(ICpuGpuVectorPtr& dstSeq, const ICpuGpuVectorPtr& srcSeq,
int dstNumSequences, int srcNumSequences,
int& startSequences, int startRow) {
if (srcSeq) {
ICpuGpuVector::resizeOrCreate(dstSeq, dstNumSequences + 1, false);
const int* src = srcSeq->getData(false);
int* dest = dstSeq->getMutableData(false);
for (int i = 0; i < srcNumSequences + 1; ++i) {
dest[i + startSequences] = src[i] + startRow;
}
startSequences += srcNumSequences;
} else {
dstSeq.reset();
}
};
int startRow = 0; int startRow = 0;
int startSequences = 0; int startSequences = 0;
int startSubSequences = 0;
dataId = args[0].dataId; dataId = args[0].dataId;
for (auto& arg : args) { for (auto& arg : args) {
CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have" CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have"
...@@ -403,17 +426,18 @@ void Argument::concat(const std::vector<Argument>& args, bool useGpu, ...@@ -403,17 +426,18 @@ void Argument::concat(const std::vector<Argument>& args, bool useGpu,
copyArg(value, arg.value, startRow, useGpu); copyArg(value, arg.value, startRow, useGpu);
if (passType != PASS_TEST) copyArg(grad, arg.grad, startRow, useGpu); if (passType != PASS_TEST) copyArg(grad, arg.grad, startRow, useGpu);
copyIds(ids, arg.ids, startRow, useGpu); copyIds(ids, arg.ids, startRow, useGpu);
if (arg.sequenceStartPositions) { copySequencePos(sequenceStartPositions,
ICpuGpuVector::resizeOrCreate(sequenceStartPositions, arg.sequenceStartPositions,
numSequences + 1, numSequences,
false); arg.getNumSequences(),
const int* src = arg.sequenceStartPositions->getData(false); startSequences,
int* dest = sequenceStartPositions->getMutableData(false); startRow);
for (int i = 0; i < arg.getNumSequences() + 1; ++i) { copySequencePos(subSequenceStartPositions,
dest[i + startSequences] = src[i] + startRow; arg.subSequenceStartPositions,
} numSubSequences,
startSequences += arg.getNumSequences(); arg.getNumSubSequences(),
} startSubSequences,
startRow);
copyStrs(strs, arg.strs, startRow, useGpu); copyStrs(strs, arg.strs, startRow, useGpu);
startRow += arg.getBatchSize(); startRow += arg.getBatchSize();
} }
......
...@@ -278,7 +278,7 @@ void ParameterClient2::prepareSendData( ...@@ -278,7 +278,7 @@ void ParameterClient2::prepareSendData(
if (sendingPara) { if (sendingPara) {
sendJob->parallelInputIovs[serverId].push_back( sendJob->parallelInputIovs[serverId].push_back(
{sendMat->getLocalRow(row), sizeof(real) * blockSize}); {sendMat->getLocalRow(row), sizeof(real) * (size_t) blockSize});
/// detect sparse parameter distribution /// detect sparse parameter distribution
sparseDistribution_->probeDistribution(serverId, sparseDistribution_->probeDistribution(serverId,
sizeof(real) * blockSize); sizeof(real) * blockSize);
...@@ -302,8 +302,8 @@ void ParameterClient2::prepareSendData( ...@@ -302,8 +302,8 @@ void ParameterClient2::prepareSendData(
block->set_begin_pos(beginDim); block->set_begin_pos(beginDim);
block->set_block_size(endDim - beginDim); block->set_block_size(endDim - beginDim);
if (buf) { if (buf) {
sendJob->parallelInputIovs[serverId].push_back( sendJob->parallelInputIovs[serverId].push_back({buf + beginDim,
{buf + beginDim, sizeof(real) * (endDim - beginDim)}); sizeof(real) * ((size_t) (endDim - beginDim))});
} }
} }
} }
......
...@@ -724,7 +724,7 @@ void ParameterServer2::sendBackParameter(const ParameterBlock& block, ...@@ -724,7 +724,7 @@ void ParameterServer2::sendBackParameter(const ParameterBlock& block,
<< " id=" << block.para_id() << " block id=" << block.block_id(); << " id=" << block.para_id() << " block id=" << block.block_id();
real* valueBuffer = vectors_[parameterType]->getPoint(offset); real* valueBuffer = vectors_[parameterType]->getPoint(offset);
outputBuffers->push_back({valueBuffer, block.block_size()}); outputBuffers->push_back({valueBuffer, (size_t) block.block_size()});
} }
void ParameterServer2::sendBackParameter(const ParameterBlock& block, void ParameterServer2::sendBackParameter(const ParameterBlock& block,
......
...@@ -148,7 +148,8 @@ void SocketChannel::writeMessage(const std::vector<struct iovec>& userIovs) { ...@@ -148,7 +148,8 @@ void SocketChannel::writeMessage(const std::vector<struct iovec>& userIovs) {
std::vector<iovec> iovs; std::vector<iovec> iovs;
iovs.reserve(userIovs.size() + 2); iovs.reserve(userIovs.size() + 2);
iovs.push_back({&header, sizeof(header)}); iovs.push_back({&header, sizeof(header)});
iovs.push_back({&iovLengths[0], sizeof(iovLengths[0]) * header.numIovs}); iovs.push_back({&iovLengths[0],
sizeof(iovLengths[0]) * (size_t) header.numIovs});
iovs.insert(iovs.end(), userIovs.begin(), userIovs.end()); iovs.insert(iovs.end(), userIovs.begin(), userIovs.end());
header.totalLength = 0; header.totalLength = 0;
......
...@@ -35,11 +35,11 @@ except: ...@@ -35,11 +35,11 @@ except:
pass pass
setup(name="py_paddle", setup(name="py_paddle",
version="0.8.0b", # TODO(yuyang18): Make this version same as CMake version="@PADDLE_VERSION@",
ext_modules=[ ext_modules=[
Extension('py_paddle._swig_paddle', # Build SWIG Extension. Extension('py_paddle._swig_paddle', # Build SWIG Extension.
['Paddle_wrap.cxx'], ['Paddle_wrap.cxx'],
extra_link_args=["-Xlinker", '-start-group'] + extra_link_args=["-Xlinker", '-start-group'] +
extra_links + ["-Xlinker", "-end-group"] extra_links + ["-Xlinker", "-end-group"]
) )
], ],
......
...@@ -79,7 +79,7 @@ protected: ...@@ -79,7 +79,7 @@ protected:
// The update function for after update operations, such as averager. // The update function for after update operations, such as averager.
void threadTraverse(const ParameterOptimizer::TraverseCallback& callback, void threadTraverse(const ParameterOptimizer::TraverseCallback& callback,
int tid, size_t numThreads, Parameter* para); int tid, size_t numThreads, Parameter* para);
typedef std::function<const ParameterOptimizer::TraverseCallback&(Parameter*)> typedef std::function<const ParameterOptimizer::TraverseCallback(Parameter*)>
GetTraverseCallback; GetTraverseCallback;
void traverse(GetTraverseCallback getTraverseCallback); void traverse(GetTraverseCallback getTraverseCallback);
}; };
......
...@@ -262,8 +262,8 @@ def SubModelEnd(name = None): ...@@ -262,8 +262,8 @@ def SubModelEnd(name = None):
def MakeLayerNameInParentSubmodel(name): def MakeLayerNameInParentSubmodel(name):
suffix = "" suffix = ""
for submodel in g_submodel_stack[1:]: if len(g_submodel_stack) > 1:
suffix = "@" + submodel.name + suffix suffix = "@" + g_submodel_stack[-1].name
return name + suffix return name + suffix
def GetLayerBaseName(name): def GetLayerBaseName(name):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册