提交 b41894d1 编写于 作者: S sweetsky0901

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into detection_output

...@@ -43,4 +43,11 @@ paddle_error paddle_init(int argc, char** argv) { ...@@ -43,4 +43,11 @@ paddle_error paddle_init(int argc, char** argv) {
isInit = true; isInit = true;
return kPD_NO_ERROR; return kPD_NO_ERROR;
} }
paddle_error paddle_init_thread() {
if (FLAGS_use_gpu) {
hl_init(FLAGS_gpu_id);
}
return kPD_NO_ERROR;
}
} }
...@@ -40,7 +40,7 @@ paddle_error paddle_matrix_destroy(paddle_matrix mat) { ...@@ -40,7 +40,7 @@ paddle_error paddle_matrix_destroy(paddle_matrix mat) {
paddle_error paddle_matrix_set_row(paddle_matrix mat, paddle_error paddle_matrix_set_row(paddle_matrix mat,
uint64_t rowID, uint64_t rowID,
paddle_real* rowArray) { paddle_real* rowArray) {
if (mat == nullptr) return kPD_NULLPTR; if (mat == nullptr || rowArray == nullptr) return kPD_NULLPTR;
auto ptr = cast(mat); auto ptr = cast(mat);
if (ptr->mat == nullptr) return kPD_NULLPTR; if (ptr->mat == nullptr) return kPD_NULLPTR;
if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE; if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "error.h"
const char* paddle_error_string(paddle_error err) {
switch (err) {
case kPD_NULLPTR:
return "nullptr error";
case kPD_OUT_OF_RANGE:
return "out of range error";
case kPD_PROTOBUF_ERROR:
return "protobuf error";
case kPD_NOT_SUPPORTED:
return "not supported error";
case kPD_UNDEFINED_ERROR:
return "undefined error";
default:
return "";
}
}
...@@ -15,6 +15,8 @@ limitations under the License. */ ...@@ -15,6 +15,8 @@ limitations under the License. */
#ifndef __PADDLE_CAPI_ERROR_H__ #ifndef __PADDLE_CAPI_ERROR_H__
#define __PADDLE_CAPI_ERROR_H__ #define __PADDLE_CAPI_ERROR_H__
#include "config.h"
/** /**
* Error Type for Paddle API. * Error Type for Paddle API.
*/ */
...@@ -27,4 +29,9 @@ typedef enum { ...@@ -27,4 +29,9 @@ typedef enum {
kPD_UNDEFINED_ERROR = -1, kPD_UNDEFINED_ERROR = -1,
} paddle_error; } paddle_error;
/**
* Error string for Paddle API.
*/
PD_API const char* paddle_error_string(paddle_error err);
#endif #endif
project(multi_thread) project(multi_thread)
cmake_minimum_required(VERSION 2.8) cmake_minimum_required(VERSION 2.8)
aux_source_directory(. SRC_LIST)
add_executable(${PROJECT_NAME} ${SRC_LIST})
find_package (Threads) find_package (Threads)
if(NOT PADDLE_ROOT)
set(PADDLE_ROOT $ENV{PADDLE_ROOT} CACHE PATH "Paddle Path")
endif()
if(PADDLE_ROOT)
include_directories(${PADDLE_ROOT}/include)
link_directories(${PADDLE_ROOT}/lib)
endif()
set(CPU_SRCS main.c)
add_executable(${PROJECT_NAME} ${CPU_SRCS})
set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99) set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99)
target_link_libraries(${PROJECT_NAME} -lpaddle_capi_shared target_link_libraries(${PROJECT_NAME}
${CMAKE_THREAD_LIBS_INIT}) -lpaddle_capi_shared
${CMAKE_THREAD_LIBS_INIT})
find_package(CUDA QUIET)
if(CUDA_FOUND)
set(GPU_SRCS main_gpu.c)
cuda_add_executable(${PROJECT_NAME}_gpu ${GPU_SRCS})
set_property(TARGET ${PROJECT_NAME}_gpu PROPERTY C_STANDARD 99)
target_link_libraries(${PROJECT_NAME}_gpu
-lpaddle_capi_shared
${CMAKE_THREAD_LIBS_INIT})
endif(CUDA_FOUND)
#include <paddle/capi.h>
#include <pthread.h>
#include <time.h>
#include "../common/common.h"
#define CONFIG_BIN "./trainer_config.bin"
#define NUM_THREAD 4
#define NUM_ITER 1000
pthread_mutex_t mutex;
/*
* @brief It is an simple inference example that runs multi-threads on a GPU.
* Each thread holds it own local gradient_machine but shares the same
* parameters.
* If you want to run on different GPUs, you need to launch
* multi-processes or set trainer_count > 1.
*/
void* thread_main(void* gm_ptr) {
// Initialize the thread environment of Paddle.
CHECK(paddle_init_thread());
paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr);
// Create input arguments.
paddle_arguments in_args = paddle_arguments_create_none();
// Create input matrix.
paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1,
/* size */ 784,
/* useGPU */ true);
// Create output arguments.
paddle_arguments out_args = paddle_arguments_create_none();
// Create output matrix.
paddle_matrix prob = paddle_matrix_create_none();
// CPU buffer to cache the input and output.
paddle_real* cpu_input = (paddle_real*)malloc(784 * sizeof(paddle_real));
paddle_real* cpu_output = (paddle_real*)malloc(10 * sizeof(paddle_real));
for (int iter = 0; iter < NUM_ITER; ++iter) {
// There is only one input layer of this network.
CHECK(paddle_arguments_resize(in_args, 1));
CHECK(paddle_arguments_set_value(in_args, 0, mat));
for (int i = 0; i < 784; ++i) {
cpu_input[i] = rand() / ((float)RAND_MAX);
}
CHECK(paddle_matrix_set_value(mat, cpu_input));
CHECK(paddle_gradient_machine_forward(machine,
in_args,
out_args,
/* isTrain */ false));
CHECK(paddle_arguments_get_value(out_args, 0, prob));
CHECK(paddle_matrix_get_value(prob, cpu_output));
pthread_mutex_lock(&mutex);
printf("Prob: ");
for (int i = 0; i < 10; ++i) {
printf("%.2f ", cpu_output[i]);
}
printf("\n");
pthread_mutex_unlock(&mutex);
}
CHECK(paddle_matrix_destroy(prob));
CHECK(paddle_arguments_destroy(out_args));
CHECK(paddle_matrix_destroy(mat));
CHECK(paddle_arguments_destroy(in_args));
CHECK(paddle_gradient_machine_destroy(machine));
free(cpu_input);
free(cpu_output);
return NULL;
}
int main() {
// Initalize Paddle
char* argv[] = {"--use_gpu=True"};
CHECK(paddle_init(1, (char**)argv));
// Reading config binary file. It is generated by `convert_protobin.sh`
long size;
void* buf = read_config(CONFIG_BIN, &size);
// Create a gradient machine for inference.
paddle_gradient_machine machine;
CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size));
CHECK(paddle_gradient_machine_randomize_param(machine));
// Loading parameter. Uncomment the following line and change the directory.
// CHECK(paddle_gradient_machine_load_parameter_from_disk(machine,
// "./some_where_to_params"));
srand(time(0));
pthread_mutex_init(&mutex, NULL);
pthread_t threads[NUM_THREAD];
for (int i = 0; i < NUM_THREAD; ++i) {
paddle_gradient_machine thread_local_machine;
CHECK(paddle_gradient_machine_create_shared_param(
machine, buf, size, &thread_local_machine));
pthread_create(&threads[i], NULL, thread_main, thread_local_machine);
}
for (int i = 0; i < NUM_THREAD; ++i) {
pthread_join(threads[i], NULL);
}
pthread_mutex_destroy(&mutex);
return 0;
}
...@@ -26,6 +26,13 @@ extern "C" { ...@@ -26,6 +26,13 @@ extern "C" {
*/ */
PD_API paddle_error paddle_init(int argc, char** argv); PD_API paddle_error paddle_init(int argc, char** argv);
/**
* Initialize the thread environment of Paddle.
* @note it is requisite for GPU runs but optional for CPU runs.
* For GPU runs, all threads will run on the same GPU devices.
*/
PD_API paddle_error paddle_init_thread();
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
...@@ -59,7 +59,7 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -59,7 +59,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
auto *in_var = block_.FindVarRecursive(Inputs(in)[i]); auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
auto *out_var = block_.FindVarRecursive(Outputs(out)[j]); auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
if (in_var->GetType() != VarDesc::LOD_TENSOR) { if (in_var->GetType() != VarDesc::LOD_TENSOR) {
VLOG(3) << "input " << in << "is not LodTensor"; VLOG(3) << "input " << in << " is not LodTensor";
return; return;
} }
PADDLE_ENFORCE_EQ(in_var->GetType(), VarDesc::LOD_TENSOR, PADDLE_ENFORCE_EQ(in_var->GetType(), VarDesc::LOD_TENSOR,
......
...@@ -41,14 +41,18 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -41,14 +41,18 @@ class ConcatOp : public framework::OperatorWithKernel {
for (size_t j = 0; j < in_zero_dims_size; j++) { for (size_t j = 0; j < in_zero_dims_size; j++) {
if (j == axis) { if (j == axis) {
out_dims[axis] += ins[i][j]; out_dims[axis] += ins[i][j];
continue; } else {
PADDLE_ENFORCE_EQ(out_dims[j], ins[i][j],
"Input tensors should have the same "
"elements except the specify axis.");
} }
PADDLE_ENFORCE_EQ(out_dims[j], ins[i][j],
"Input tensors should have the same "
"elements except the specify axis.");
} }
} }
if (out_dims[axis] < 0) {
out_dims[axis] = -1;
}
ctx->SetOutputDim("Out", out_dims); ctx->SetOutputDim("Out", out_dims);
ctx->ShareLoD("X", /*->*/ "Out");
} }
}; };
......
...@@ -95,6 +95,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { ...@@ -95,6 +95,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
"Input(Label) should be 1."); "Input(Label) should be 1.");
} }
ctx->SetOutputDim(framework::GradVarName("X"), x_dims); ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
ctx->ShareLoD("X", framework::GradVarName("X"));
} }
protected: protected:
......
...@@ -122,10 +122,6 @@ Place CUDADeviceContext::GetPlace() const { return place_; } ...@@ -122,10 +122,6 @@ Place CUDADeviceContext::GetPlace() const { return place_; }
void CUDADeviceContext::Wait() const { void CUDADeviceContext::Wait() const {
PADDLE_ENFORCE(cudaStreamSynchronize(stream_)); PADDLE_ENFORCE(cudaStreamSynchronize(stream_));
}
void CUDADeviceContext::Finish() const {
Wait();
PADDLE_ENFORCE(cudaGetLastError()); PADDLE_ENFORCE(cudaGetLastError());
} }
......
...@@ -46,8 +46,6 @@ class DeviceContext { ...@@ -46,8 +46,6 @@ class DeviceContext {
DeviceType* GetEigenDevice() const; DeviceType* GetEigenDevice() const;
virtual void Wait() const {} virtual void Wait() const {}
virtual void Finish() const {}
}; };
class CPUDeviceContext : public DeviceContext { class CPUDeviceContext : public DeviceContext {
...@@ -79,9 +77,6 @@ class CUDADeviceContext : public DeviceContext { ...@@ -79,9 +77,6 @@ class CUDADeviceContext : public DeviceContext {
/*! \brief Wait for all operations completion in the stream. */ /*! \brief Wait for all operations completion in the stream. */
void Wait() const override; void Wait() const override;
/*! \brief Check potential errors for the cuda kernel calls. */
void Finish() const override;
/*! \brief Return place in the device context. */ /*! \brief Return place in the device context. */
Place GetPlace() const override; Place GetPlace() const override;
......
...@@ -113,7 +113,10 @@ EOF ...@@ -113,7 +113,10 @@ EOF
-DWITH_SWIG_PY=ON \ -DWITH_SWIG_PY=ON \
-DWITH_STYLE_CHECK=OFF -DWITH_STYLE_CHECK=OFF
make -j `nproc` gen_proto_py make -j `nproc` gen_proto_py
make -j `nproc` paddle_python
make -j `nproc` paddle_docs paddle_docs_cn make -j `nproc` paddle_docs paddle_docs_cn
make -j `nproc` print_operators_doc
paddle/pybind/print_operators_doc > doc/en/html/operators.json
popd popd
fi fi
...@@ -185,14 +188,6 @@ EOF ...@@ -185,14 +188,6 @@ EOF
${DOCKERFILE_GPU_ENV} ${DOCKERFILE_GPU_ENV}
ADD go/cmd/pserver/pserver /usr/bin/ ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/ ADD go/cmd/master/master /usr/bin/
EOF
if [[ ${WITH_DOC:-OFF} == 'ON' ]]; then
cat >> /paddle/build/Dockerfile <<EOF
ADD paddle/pybind/print_operators_doc /usr/bin/
EOF
fi
cat >> /paddle/build/Dockerfile <<EOF
# default command shows the paddle version and exit # default command shows the paddle version and exit
CMD ["paddle", "version"] CMD ["paddle", "version"]
EOF EOF
......
...@@ -33,6 +33,12 @@ if(WITH_MKLDNN) ...@@ -33,6 +33,12 @@ if(WITH_MKLDNN)
list(APPEND MKL_DEPENDS mkldnn) list(APPEND MKL_DEPENDS mkldnn)
endif() endif()
if(WITH_GPU)
SET(PACKAGE_NAME "paddlepaddle-gpu")
else()
SET(PACKAGE_NAME "paddlepaddle")
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
......
...@@ -2722,15 +2722,15 @@ def img_pool_layer(input, ...@@ -2722,15 +2722,15 @@ def img_pool_layer(input,
.. math:: .. math::
w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride)) w = 1 + \frac{ceil(input\_width + 2 * padding - pool\_size)}{stride} \\\\
h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) h = 1 + \frac{ceil(input\_height + 2 * padding\_y - pool\_size\_y)}{stride\_y}
- ceil_mode=False: - ceil_mode=False:
.. math:: .. math::
w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride)) w = 1 + \frac{floor(input\_width + 2 * padding - pool\_size)}{stride} \\\\
h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) h = 1 + \frac{floor(input\_height + 2 * padding\_y - pool\_size\_y)}{stride\_y}
The example usage is: The example usage is:
...@@ -2863,17 +2863,17 @@ def img_pool3d_layer(input, ...@@ -2863,17 +2863,17 @@ def img_pool3d_layer(input,
.. math:: .. math::
w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride)) w = 1 + \frac{ceil(input\_width + 2 * padding - pool\_size)}{stride} \\\\
h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) h = 1 + \frac{ceil(input\_height + 2 * padding\_y - pool\_size\_y)}{stride\_y} \\\\
d = 1 + int(ceil(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z)) d = 1 + \frac{ceil(input\_depth + 2 * padding\_z - pool\_size\_z)}{stride\_z}
- ceil_mode=False: - ceil_mode=False:
.. math:: .. math::
w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride)) w = 1 + \frac{floor(input\_width + 2 * padding - pool\_size)}{stride} \\\\
h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) h = 1 + \frac{floor(input\_height + 2 * padding\_y - pool\_size\_y)}{stride\_y} \\\\
d = 1 + int(floor(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z)) d = 1 + \frac{floor(input\_depth + 2 * padding\_z - pool\_size\_z)}{stride\_z} \\\\
The example usage is: The example usage is:
...@@ -2996,7 +2996,7 @@ def spp_layer(input, ...@@ -2996,7 +2996,7 @@ def spp_layer(input,
Reference: Reference:
`Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition `Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition
https://arxiv.org/abs/1406.4729`_ <https://arxiv.org/abs/1406.4729>`_
The example usage is: The example usage is:
...@@ -3098,7 +3098,7 @@ def img_cmrnorm_layer(input, ...@@ -3098,7 +3098,7 @@ def img_cmrnorm_layer(input,
Reference: Reference:
`ImageNet Classification with Deep Convolutional Neural Networks `ImageNet Classification with Deep Convolutional Neural Networks
http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf`_ <http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf>`_
The example usage is: The example usage is:
...@@ -3166,7 +3166,7 @@ def batch_norm_layer(input, ...@@ -3166,7 +3166,7 @@ def batch_norm_layer(input,
Reference: Reference:
`Batch Normalization: Accelerating Deep Network Training by Reducing `Batch Normalization: Accelerating Deep Network Training by Reducing
Internal Covariate Shift Internal Covariate Shift
http://arxiv.org/abs/1502.03167`_ <http://arxiv.org/abs/1502.03167>`_
The example usage is: The example usage is:
...@@ -5424,17 +5424,19 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): ...@@ -5424,17 +5424,19 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
Reference: Reference:
`Maxout Networks `Maxout Networks
http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf`_ <http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf>`_
`Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks `Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks
https://arxiv.org/pdf/1312.6082v4.pdf`_ <https://arxiv.org/pdf/1312.6082v4.pdf>`_
.. math:: .. math::
y_{si+j} = \max_k x_{gsi + sk + j} out = \max_k (in[n, k, o_c , s]) \\\\
g = groups out_{i * s + j} = \max_k in_{ k * o_{c} * s + i * s + j} \\\\
s = input.size / num_channels s = \frac{input.size}{ num\_channels} \\\\
0 \le i < num_channels / groups o_{c} =\frac{num\_channels}{groups} \\\\
0 \le j < s 0 \le i < o_{c} \\\\
0 \le k < groups 0 \le j < s \\\\
0 \le k < groups \\\\
The simple usage is: The simple usage is:
...@@ -5493,7 +5495,7 @@ def ctc_layer(input, ...@@ -5493,7 +5495,7 @@ def ctc_layer(input,
Reference: Reference:
`Connectionist Temporal Classification: Labelling Unsegmented Sequence Data `Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
with Recurrent Neural Networks with Recurrent Neural Networks
http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf`_ <http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf>`_
Note: Note:
Considering the 'blank' label needed by CTC, you need to use (num_classes + 1) Considering the 'blank' label needed by CTC, you need to use (num_classes + 1)
...@@ -5567,7 +5569,7 @@ def warp_ctc_layer(input, ...@@ -5567,7 +5569,7 @@ def warp_ctc_layer(input,
Reference: Reference:
`Connectionist Temporal Classification: Labelling Unsegmented Sequence Data `Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
with Recurrent Neural Networks with Recurrent Neural Networks
http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf`_ <http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf>`_
Note: Note:
- Let num_classes represents the category number. Considering the 'blank' - Let num_classes represents the category number. Considering the 'blank'
...@@ -5788,7 +5790,7 @@ def nce_layer(input, ...@@ -5788,7 +5790,7 @@ def nce_layer(input,
Reference: Reference:
`A fast and simple algorithm for training neural probabilistic language `A fast and simple algorithm for training neural probabilistic language
models. https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf`_ models. <https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf>`_
The example usage is: The example usage is:
...@@ -5904,7 +5906,7 @@ def rank_cost(left, ...@@ -5904,7 +5906,7 @@ def rank_cost(left,
Reference: Reference:
`Learning to Rank using Gradient Descent `Learning to Rank using Gradient Descent
http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf`_ <http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf>`_
.. math:: .. math::
...@@ -6440,7 +6442,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -6440,7 +6442,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
Reference: Reference:
`Fast R-CNN `Fast R-CNN
https://arxiv.org/pdf/1504.08083v2.pdf`_ <https://arxiv.org/pdf/1504.08083v2.pdf>`_
The example usage is: The example usage is:
...@@ -6647,7 +6649,7 @@ def prelu_layer(input, ...@@ -6647,7 +6649,7 @@ def prelu_layer(input,
Reference: Reference:
`Delving Deep into Rectifiers: Surpassing Human-Level Performance on `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf`_ ImageNet Classification <http://arxiv.org/pdf/1502.01852v1.pdf>`_
.. math:: .. math::
z_i &\\quad if \\quad z_i > 0 \\\\ z_i &\\quad if \\quad z_i > 0 \\\\
...@@ -6744,7 +6746,7 @@ def gated_unit_layer(input, ...@@ -6744,7 +6746,7 @@ def gated_unit_layer(input,
Reference: Reference:
`Language Modeling with Gated Convolutional Networks `Language Modeling with Gated Convolutional Networks
https://arxiv.org/abs/1612.08083`_ <https://arxiv.org/abs/1612.08083>`_
.. math:: .. math::
y=\\text{act}(X \cdot W + b)\otimes \sigma(X \cdot V + c) y=\\text{act}(X \cdot W + b)\otimes \sigma(X \cdot V + c)
......
...@@ -430,7 +430,8 @@ def _create_op_func_(op_type): ...@@ -430,7 +430,8 @@ def _create_op_func_(op_type):
dtype = each.dtype dtype = each.dtype
elif dtype != each.dtype: elif dtype != each.dtype:
raise ValueError( raise ValueError(
"operator {0} must input same dtype".format(op_type)) "operator {0} must input same dtype. {1} vs {2}".format(
op_type, dtype, each.dtype))
return dtype return dtype
......
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.fluid as fluid
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor, g_scope from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.optimizer import SGDOptimizer
import paddle.v2.fluid as fluid
import paddle.v2.fluid.layers as pd
dict_size = 30000 dict_size = 30000
source_dict_dim = target_dict_dim = dict_size source_dict_dim = target_dict_dim = dict_size
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size) src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
hidden_dim = 512 hidden_dim = 32
word_dim = 512 word_dim = 16
IS_SPARSE = True IS_SPARSE = True
batch_size = 50 batch_size = 10
max_length = 50 max_length = 50
topk_size = 50 topk_size = 50
trg_dic_size = 10000 trg_dic_size = 10000
src_word_id = layers.data(name="src_word_id", shape=[1], dtype='int64') decoder_size = hidden_dim
src_embedding = layers.embedding(
input=src_word_id,
size=[dict_size, word_dim], def encoder_decoder():
dtype='float32', # encoder
is_sparse=IS_SPARSE, src_word_id = layers.data(
param_attr=fluid.ParamAttr(name='vemb')) name="src_word_id", shape=[1], dtype='int64', lod_level=1)
src_embedding = layers.embedding(
input=src_word_id,
def encoder(): size=[dict_size, word_dim],
dtype='float32',
lstm_hidden0, lstm_0 = layers.dynamic_lstm( is_sparse=IS_SPARSE,
input=src_embedding, param_attr=fluid.ParamAttr(name='vemb'))
size=hidden_dim,
candidate_activation='sigmoid', fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
cell_activation='sigmoid') lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4)
encoder_out = layers.sequence_pool(input=lstm_hidden0, pool_type="last")
lstm_hidden1, lstm_1 = layers.dynamic_lstm(
input=src_embedding, # decoder
size=hidden_dim, trg_language_word = layers.data(
candidate_activation='sigmoid', name="target_language_word", shape=[1], dtype='int64', lod_level=1)
cell_activation='sigmoid', trg_embedding = layers.embedding(
is_reverse=True) input=trg_language_word,
size=[dict_size, word_dim],
bidirect_lstm_out = layers.concat([lstm_hidden0, lstm_hidden1], axis=0) dtype='float32',
is_sparse=IS_SPARSE,
return bidirect_lstm_out param_attr=fluid.ParamAttr(name='vemb'))
rnn = fluid.layers.DynamicRNN()
def decoder_trainer(context): with rnn.block():
''' current_word = rnn.step_input(trg_embedding)
decoder with trainer mem = rnn.memory(init=encoder_out)
''' fc1 = fluid.layers.fc(input=[current_word, mem],
pass size=decoder_size,
act='tanh')
out = fluid.layers.fc(input=fc1, size=target_dict_dim, act='softmax')
rnn.update_memory(mem, fc1)
rnn.output(out)
return rnn()
def to_lodtensor(data, place): def to_lodtensor(data, place):
...@@ -72,13 +75,18 @@ def to_lodtensor(data, place): ...@@ -72,13 +75,18 @@ def to_lodtensor(data, place):
def main(): def main():
encoder_out = encoder() rnn_out = encoder_decoder()
# TODO(jacquesqiao) call here label = layers.data(
decoder_trainer(encoder_out) name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
cost = layers.cross_entropy(input=rnn_out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
optimizer.minimize(avg_cost)
train_data = paddle.batch( train_data = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.wmt14.train(8000), buf_size=1000), paddle.dataset.wmt14.train(dict_size), buf_size=1000),
batch_size=batch_size) batch_size=batch_size)
place = core.CPUPlace() place = core.CPUPlace()
...@@ -88,15 +96,23 @@ def main(): ...@@ -88,15 +96,23 @@ def main():
batch_id = 0 batch_id = 0
for pass_id in xrange(2): for pass_id in xrange(2):
print 'pass_id', pass_id
for data in train_data(): for data in train_data():
print 'batch', batch_id
batch_id += 1
if batch_id > 10: break
word_data = to_lodtensor(map(lambda x: x[0], data), place) word_data = to_lodtensor(map(lambda x: x[0], data), place)
trg_word = to_lodtensor(map(lambda x: x[1], data), place)
trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
outs = exe.run(framework.default_main_program(), outs = exe.run(framework.default_main_program(),
feed={'src_word_id': word_data, }, feed={
fetch_list=[encoder_out]) 'src_word_id': word_data,
'target_language_word': trg_word,
'target_language_next_word': trg_word_next
},
fetch_list=[avg_cost])
avg_cost_val = np.array(outs[0])
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
" avg_cost=" + str(avg_cost_val))
if batch_id > 3:
exit(0)
batch_id += 1
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -5,7 +5,7 @@ class BinaryDistribution(Distribution): ...@@ -5,7 +5,7 @@ class BinaryDistribution(Distribution):
return True return True
MAJOR = 0 MAJOR = 0
MINOR = 10 MINOR = 11
PATCH = 0 PATCH = 0
RC = 0 RC = 0
ISTAGED = False ISTAGED = False
...@@ -89,7 +89,7 @@ paddle_rt_libs = ['${WARPCTC_LIBRARIES}'] ...@@ -89,7 +89,7 @@ paddle_rt_libs = ['${WARPCTC_LIBRARIES}']
if '${MKL_SHARED_LIBS}'!= '': if '${MKL_SHARED_LIBS}'!= '':
paddle_rt_libs += '${MKL_SHARED_LIBS}'.split(';') paddle_rt_libs += '${MKL_SHARED_LIBS}'.split(';')
setup(name='paddlepaddle', setup(name='${PACKAGE_NAME}',
version='${PADDLE_VERSION}', version='${PADDLE_VERSION}',
description='Parallel Distributed Deep Learning', description='Parallel Distributed Deep Learning',
install_requires=setup_requires, install_requires=setup_requires,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册