提交 283bee28 编写于 作者: H hedaoyuan

Merge branch 'develop' of https://github.com/baidu/Paddle into FunctionTest

...@@ -25,9 +25,9 @@ addons: ...@@ -25,9 +25,9 @@ addons:
packages: packages:
- gcc-4.8 - gcc-4.8
- g++-4.8 - g++-4.8
- gfortran-4.8
- git - git
- build-essential - build-essential
- libatlas-base-dev
- python - python
- python-pip - python-pip
- python2.7-dev - python2.7-dev
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
set(CBLAS_FOUND OFF) set(CBLAS_FOUND OFF)
## Find MKL First. ## Find MKL First.
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL") set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS find_path(MKL_INCLUDE_DIR mkl.h PATHS
${MKL_ROOT}/include) ${MKL_ROOT}/include)
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
INCLUDE(cblas) INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND}) IF(NOT ${CBLAS_FOUND})
MESSAGE(FATAL_ERROR "Please install OpenBlas, MKL or ATLAS.")
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
...@@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32) ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
LIST(APPEND CBLAS_LIBRARIES gfortran pthread)
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ExternalProject_Add( ExternalProject_Add(
openblas openblas
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
URL "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz" GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG v0.2.19
PREFIX ${CBLAS_SOURCES_DIR} PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
CONFIGURE_COMMAND "" BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
BUILD_COMMAND make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER} INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
)
ExternalProject_Add_Step(
openblas lapacke_install
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h"
DEPENDEES install
) )
LIST(APPEND external_project_dependencies openblas) LIST(APPEND external_project_dependencies openblas)
ENDIF() ENDIF(NOT ${CBLAS_FOUND})
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
...@@ -54,6 +54,7 @@ ExternalProject_Add( ...@@ -54,6 +54,7 @@ ExternalProject_Add(
CONFIGURE_COMMAND CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
......
...@@ -31,6 +31,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ...@@ -31,6 +31,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
"please use pip to upgrade protobuf.") "please use pip to upgrade protobuf.")
ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
##################################### PYTHON ######################################## ##################################### PYTHON ########################################
SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python) SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python) SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)
......
...@@ -96,6 +96,7 @@ set(COMMON_FLAGS ...@@ -96,6 +96,7 @@ set(COMMON_FLAGS
-Wno-unused-parameter -Wno-unused-parameter
-Wno-unused-function -Wno-unused-function
-Wno-error=literal-suffix -Wno-error=literal-suffix
-Wno-error=sign-compare
-Wno-error=unused-local-typedefs) -Wno-error=unused-local-typedefs)
set(GPU_COMMON_FLAGS set(GPU_COMMON_FLAGS
...@@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS ...@@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS
-Wdelete-non-virtual-dtor -Wdelete-non-virtual-dtor
-Wno-unused-parameter -Wno-unused-parameter
-Wno-unused-function -Wno-unused-function
-Wno-error=sign-compare
-Wno-error=literal-suffix -Wno-error=literal-suffix
-Wno-error=unused-local-typedefs -Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=unused-function # Warnings in Numpy Header.
......
...@@ -47,7 +47,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS ...@@ -47,7 +47,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output LOG_DOWNLOAD 0 # Wrap download in script to log output
LOG_UPDATE 1 # Wrap update in script to log output LOG_UPDATE 1 # Wrap update in script to log output
LOG_CONFIGURE 1 # Wrap configure in script to log output LOG_CONFIGURE 1 # Wrap configure in script to log output
LOG_BUILD 1 # Wrap build in script to log output LOG_BUILD 0 # Wrap build in script to log output
LOG_TEST 1 # Wrap test in script to log output LOG_TEST 1 # Wrap test in script to log output
LOG_INSTALL 1 # Wrap install in script to log output LOG_INSTALL 0 # Wrap install in script to log output
) )
...@@ -64,7 +64,8 @@ As a simple example, consider the following: ...@@ -64,7 +64,8 @@ As a simple example, consider the following:
1. **BLAS Dependencies(optional)** 1. **BLAS Dependencies(optional)**
Paddle will find BLAS from system's default path. But you can specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`. CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically.
To utilize preinstalled BLAS, you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
```bash ```bash
# specify MKL # specify MKL
...@@ -99,7 +100,7 @@ As a simple example, consider the following: ...@@ -99,7 +100,7 @@ As a simple example, consider the following:
```bash ```bash
# necessary # necessary
sudo apt-get update sudo apt-get update
sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev git sudo apt-get install -y g++ make cmake build-essential python python-pip libpython-dev git
sudo pip install wheel numpy sudo pip install wheel numpy
sudo pip install 'protobuf>=3.0.0' sudo pip install 'protobuf>=3.0.0'
``` ```
......
...@@ -2,15 +2,9 @@ ...@@ -2,15 +2,9 @@
## Create AWS Account and IAM Account ## Create AWS Account and IAM Account
To use AWS, we need to sign up an AWS account on Amazon's Web site. AWS account allow us to manage AWS from Web Console. Amazon AMI enable us to manage AWS from command line interface.
An AWS account allows us to login to the AWS Console Web interface to
create IAM users and user groups. Usually, we create a user group with We need to create an AMI user with sufficient privilege to create kubernetes cluster on AWS.
privileges required to run PaddlePaddle, and we create users for
those who are going to run PaddlePaddle and add these users into the
group. IAM users can identify themselves using password and tokens,
where passwords allows users to log in to the AWS Console, and tokens
make it easy for users to submit and inspect jobs from the command
line.
To sign up an AWS account, please To sign up an AWS account, please
follow follow
...@@ -19,8 +13,7 @@ To create users and user groups under an AWS account, please ...@@ -19,8 +13,7 @@ To create users and user groups under an AWS account, please
follow follow
[this guide](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html). [this guide](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html).
Please be aware that this tutorial needs the following privileges in Please be aware that this tutorial needs the following privileges for the user in AMI:
the user group:
- AmazonEC2FullAccess - AmazonEC2FullAccess
- AmazonS3FullAccess - AmazonS3FullAccess
...@@ -31,6 +24,7 @@ the user group: ...@@ -31,6 +24,7 @@ the user group:
- IAMUserSSHKeys - IAMUserSSHKeys
- IAMFullAccess - IAMFullAccess
- NetworkAdministrator - NetworkAdministrator
- AWSKeyManagementServicePowerUser
By the time we write this tutorial, we noticed that Chinese AWS users By the time we write this tutorial, we noticed that Chinese AWS users
...@@ -46,9 +40,11 @@ it. ...@@ -46,9 +40,11 @@ it.
Here we will show you step by step on how to run PaddlePaddle training on AWS cluster. Here we will show you step by step on how to run PaddlePaddle training on AWS cluster.
###Download kube-aws and kubectl ### Download kube-aws and kubectl
#### kube-aws
####kube-aws [kube-aws](https://github.com/coreos/kube-aws) is a CLI tool to automate cluster deployment to AWS.
Import the CoreOS Application Signing Public Key: Import the CoreOS Application Signing Public Key:
...@@ -88,24 +84,22 @@ mv ${PLATFORM}/kube-aws /usr/local/bin ...@@ -88,24 +84,22 @@ mv ${PLATFORM}/kube-aws /usr/local/bin
``` ```
####kubectl #### kubectl
[kubectl](https://kubernetes.io/docs/user-guide/kubectl-overview/) is a command line interface for running commands against Kubernetes clusters.
Go to the [releases](https://github.com/kubernetes/kubernetes/releases) and download the latest release tarball. Go to the [releases](https://github.com/kubernetes/kubernetes/releases) and download the latest release tarball.
Extract the tarball and then concate the kubernetes binaries directory into PATH: Extract the tarball and then concate the kubernetes binaries directory into PATH:
``` ```
export PATH=<path/to/kubernetes-directory>/platforms/linux/amd64:$PATH export PATH=<path/to/kubernetes-directory>/platforms/linux/amd64:$PATH # The exact path depend on your platform
``` ```
User credentials and security tokens will be generated later in user directory, not in `~/.kube/config`, they will be necessary to use the CLI or the HTTP Basic Auth.
###Configure AWS Credentials
First check out [this](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) for installing the AWS command line interface, if you use ec2 instance with default amazon AMI, the cli tool has already been installed on your machine. ### Configure AWS Credentials
First check out [this](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) for installing the AWS command line interface.
And then configure your AWS account information: And then configure your AWS account information:
...@@ -126,33 +120,35 @@ Default output format: json ...@@ -126,33 +120,35 @@ Default output format: json
``` ```
Test that your credentials work by describing any instances you may already have running on your account: Verify that your credentials work by describing any instances you may already have running on your account:
``` ```
aws ec2 describe-instances aws ec2 describe-instances
``` ```
###Define Cluster Parameters ### Define Cluster Parameters
####EC2 key pair #### EC2 key pair
The keypair that will authenticate SSH access to your EC2 instances. The public half of this key pair will be configured on each CoreOS node. The keypair that will authenticate SSH access to your EC2 instances. The public half of this key pair will be configured on each CoreOS node.
After creating a key pair, you will use the name you gave the keys to configure the cluster. Key pairs are only available to EC2 instances in the same region. More info in the [EC2 Keypair docs](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html). Follow [EC2 Keypair docs](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) to create a EC2 key pair
####KMS key After creating a key pair, you will use the name you gave the keys to configure the cluster. Key pairs are only available to EC2 instances in the same region.
#### KMS key
Amazon KMS keys are used to encrypt and decrypt cluster TLS assets. If you already have a KMS Key that you would like to use, you can skip creating a new key and provide the Arn string for your existing key. Amazon KMS keys are used to encrypt and decrypt cluster TLS assets. If you already have a KMS Key that you would like to use, you can skip creating a new key and provide the Arn string for your existing key.
You can create a KMS key in the AWS console, or with the aws command line tool: You can create a KMS key in the AWS console, or with the aws command line tool:
``` ```
$ aws kms --region=us-west-2 create-key --description="kube-aws assets" $ aws kms --region=us-west-1 create-key --description="kube-aws assets"
{ {
"KeyMetadata": { "KeyMetadata": {
"CreationDate": 1458235139.724, "CreationDate": 1458235139.724,
"KeyState": "Enabled", "KeyState": "Enabled",
"Arn": "arn:aws:kms:us-west-2:xxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx", "Arn": "arn:aws:kms:us-west-1:xxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx",
"AWSAccountId": "xxxxxxxxxxxxx", "AWSAccountId": "xxxxxxxxxxxxx",
"Enabled": true, "Enabled": true,
"KeyUsage": "ENCRYPT_DECRYPT", "KeyUsage": "ENCRYPT_DECRYPT",
...@@ -166,7 +162,9 @@ You will use the `KeyMetadata.Arn` string to identify your KMS key in the init s ...@@ -166,7 +162,9 @@ You will use the `KeyMetadata.Arn` string to identify your KMS key in the init s
And then you need to add several inline policies in your user permission. And then you need to add several inline policies in your user permission.
kms inline policy: Go to AMI user page, click on `Add inline policy` button, and then select `Custom Policy`
paste into following inline policies:
``` ```
{ {
...@@ -182,16 +180,8 @@ kms inline policy: ...@@ -182,16 +180,8 @@ kms inline policy:
"Resource": [ "Resource": [
"arn:aws:kms:*:xxxxxxxxx:key/*" "arn:aws:kms:*:xxxxxxxxx:key/*"
] ]
} },
] {
}
```
cloudformation inline policy:
```
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1482205746000", "Sid": "Stmt1482205746000",
"Effect": "Allow", "Effect": "Allow",
"Action": [ "Action": [
...@@ -200,10 +190,11 @@ cloudformation inline policy: ...@@ -200,10 +190,11 @@ cloudformation inline policy:
"cloudformation:DeleteStack", "cloudformation:DeleteStack",
"cloudformation:DescribeStacks", "cloudformation:DescribeStacks",
"cloudformation:DescribeStackResource", "cloudformation:DescribeStackResource",
"cloudformation:GetTemplate" "cloudformation:GetTemplate",
"cloudformation:DescribeStackEvents"
], ],
"Resource": [ "Resource": [
"arn:aws:cloudformation:us-west-2:xxxxxxxxx:stack/YOUR_CLUSTER_NAME/*" "arn:aws:cloudformation:us-west-1:xxxxxxxxx:stack/YOUR_CLUSTER_NAME/*"
] ]
} }
] ]
...@@ -211,15 +202,23 @@ cloudformation inline policy: ...@@ -211,15 +202,23 @@ cloudformation inline policy:
``` ```
####External DNS name #### External DNS name
When the cluster is created, the controller will expose the TLS-secured API on a public IP address. You will need to create an A record for the external DNS hostname you want to point to this IP address. You can find the API external IP address after the cluster is created by invoking kube-aws status. When the cluster is created, the controller will expose the TLS-secured API on a public IP address. You will need to create an A record for the external DNS hostname you want to point to this IP address. You can find the API external IP address after the cluster is created by invoking kube-aws status.
####S3 bucket #### S3 bucket
You need to create an S3 bucket before startup the Kubernetes cluster. You need to create an S3 bucket before startup the Kubernetes cluster.
####Initialize an asset directory command (need to have a global unique name):
```
paddle aws s3api --region=us-west-1 create-bucket --bucket bucket-name
```
If you get an error message, try a different bucket name. The bucket name needs to be globally unique.
#### Initialize an asset directory
Create a directory on your local machine to hold the generated assets: Create a directory on your local machine to hold the generated assets:
...@@ -237,12 +236,16 @@ $ kube-aws init \ ...@@ -237,12 +236,16 @@ $ kube-aws init \
--region=us-west-1 \ --region=us-west-1 \
--availability-zone=us-west-1c \ --availability-zone=us-west-1c \
--key-name=key-pair-name \ --key-name=key-pair-name \
--kms-key-arn="arn:aws:kms:us-west-2:xxxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx" --kms-key-arn="arn:aws:kms:us-west-1:xxxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx"
``` ```
Here `us-west-1c` is used for parameter `--availability-zone`, but supported availability zone varies among AWS accounts.
Please check if `us-west-1c` is supported by `aws ec2 --region us-west-1 describe-availability-zones`, if not switch to other supported availability zone. (e.g., `us-west-1a`, or `us-west-1b`)
There will now be a cluster.yaml file in the asset directory. This is the main configuration file for your cluster. There will now be a cluster.yaml file in the asset directory. This is the main configuration file for your cluster.
####Render contents of the asset directory #### Render contents of the asset directory
In the simplest case, you can have kube-aws generate both your TLS identities and certificate authority for you. In the simplest case, you can have kube-aws generate both your TLS identities and certificate authority for you.
...@@ -285,21 +288,21 @@ $ tree ...@@ -285,21 +288,21 @@ $ tree
These assets (templates and credentials) are used to create, update and interact with your Kubernetes cluster. These assets (templates and credentials) are used to create, update and interact with your Kubernetes cluster.
###Kubernetes Cluster Start Up ### Kubernetes Cluster Start Up
####Create the instances defined in the CloudFormation template #### Create the instances defined in the CloudFormation template
Now for the exciting part, creating your cluster: Now for the exciting part, creating your cluster (choose any `<prefix>`):
``` ```
$ kube-aws up --s3-uri s3://<your-bucket-name>/<prefix> $ kube-aws up --s3-uri s3://<your-bucket-name>/<prefix>
``` ```
####Configure DNS #### Configure DNS
You can invoke `kube-aws status` to get the cluster API endpoint after cluster creation, if necessary. This command can take a while. And then dig the load balancer hostname to get the ip address, use this ip to setup an A record for your external dns name. You can invoke `kube-aws status` to get the cluster API endpoint after cluster creation, if necessary. This command can take a while. And use command `dig` to check the load balancer hostname to get the ip address, use this ip to setup an A record for your external dns name.
####Access the cluster #### Access the cluster
Once the API server is running, you should see: Once the API server is running, you should see:
...@@ -312,7 +315,7 @@ ip-10-0-0-xx.us-west-1.compute.internal Ready,SchedulingDisabled 5m ...@@ -312,7 +315,7 @@ ip-10-0-0-xx.us-west-1.compute.internal Ready,SchedulingDisabled 5m
``` ```
###Setup PaddlePaddle Environment on AWS ### Setup PaddlePaddle Environment on AWS
Now, we've created a cluster with following network capability: Now, we've created a cluster with following network capability:
......
...@@ -20,23 +20,27 @@ limitations under the License. */ ...@@ -20,23 +20,27 @@ limitations under the License. */
namespace paddle { namespace paddle {
const SequenceArg& BufferArg::sequence() const { const SequenceArg& BufferArg::sequence() const {
// CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA); CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
return dynamic_cast<const SequenceArg&>(*this); return dynamic_cast<const SequenceArg&>(*this);
} }
const SparseMatrixArg& BufferArg::sparse() const { const SparseMatrixArg& BufferArg::sparse() const {
// CHECK_EQ(bufferType_, TENSOR_SPARSE); CHECK_EQ(bufferType_, TENSOR_SPARSE);
return dynamic_cast<const SparseMatrixArg&>(*this); return dynamic_cast<const SparseMatrixArg&>(*this);
} }
SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType) SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {} col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType) SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {} col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
} // namespace paddle } // namespace paddle
...@@ -23,10 +23,11 @@ limitations under the License. */ ...@@ -23,10 +23,11 @@ limitations under the License. */
namespace paddle { namespace paddle {
enum BufferType { enum BufferType {
TENSOR_NORMAL = 0, TENSOR_UNKNOWN = 0,
TENSOR_SEQUENCE_ID = 1, TENSOR_NORMAL = 1,
TENSOR_SEQUENCE_DATA = 2, TENSOR_SEQUENCE_ID = 2,
TENSOR_SPARSE = 3 TENSOR_SEQUENCE_DATA = 3,
TENSOR_SPARSE = 4
}; };
enum SparseDataType { enum SparseDataType {
...@@ -98,6 +99,7 @@ public: ...@@ -98,6 +99,7 @@ public:
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(2), shape_(2),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, matrix.getHeight()); shape_.setDim(0, matrix.getHeight());
shape_.setDim(1, matrix.getWidth()); shape_.setDim(1, matrix.getWidth());
} }
...@@ -110,6 +112,7 @@ public: ...@@ -110,6 +112,7 @@ public:
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(shape), shape_(shape),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
CHECK_EQ(matrix.getElementCnt(), shape.getElements()); CHECK_EQ(matrix.getElementCnt(), shape.getElements());
} }
...@@ -119,6 +122,7 @@ public: ...@@ -119,6 +122,7 @@ public:
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(1), shape_(1),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize()); shape_.setDim(0, vector.getSize());
} }
...@@ -128,6 +132,7 @@ public: ...@@ -128,6 +132,7 @@ public:
valueType_(VALUE_TYPE_INT32), valueType_(VALUE_TYPE_INT32),
shape_(1), shape_(1),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize()); shape_.setDim(0, vector.getSize());
} }
...@@ -162,6 +167,8 @@ public: ...@@ -162,6 +167,8 @@ public:
ValueType valueType() const { return valueType_; } ValueType valueType() const { return valueType_; }
BufferType bufferType() const { return bufferType_; } BufferType bufferType() const { return bufferType_; }
const TensorShape& shape() const { return shape_; } const TensorShape& shape() const { return shape_; }
bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
const SequenceArg& sequence() const; const SequenceArg& sequence() const;
const SparseMatrixArg& sparse() const; const SparseMatrixArg& sparse() const;
...@@ -170,8 +177,8 @@ protected: ...@@ -170,8 +177,8 @@ protected:
void* buf_; void* buf_;
ValueType valueType_; ValueType valueType_;
TensorShape shape_; TensorShape shape_;
BufferType bufferType_; BufferType bufferType_{TENSOR_UNKNOWN};
ArgType argType_ = UNSPECIFIED; ArgType argType_{UNSPECIFIED};
// leading dimensions. The size is dims_.size() // leading dimensions. The size is dims_.size()
// Dims lds_; // Dims lds_;
}; };
...@@ -192,11 +199,13 @@ public: ...@@ -192,11 +199,13 @@ public:
const TensorShape& shape, const TensorShape& shape,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(buf, VALUE_TYPE_INT32, shape, argType) { : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) {
bufferType_ = TENSOR_SEQUENCE_ID;
CHECK_EQ(shape_.ndims(), (size_t)1); CHECK_EQ(shape_.ndims(), (size_t)1);
numSeqs_ = shape_[0] - 1; numSeqs_ = shape_[0] - 1;
} }
SequenceIdArg(const IVector& vector) : BufferArg(vector) { SequenceIdArg(const IVector& vector) : BufferArg(vector) {
bufferType_ = TENSOR_SEQUENCE_ID;
numSeqs_ = shape_[0] - 1; numSeqs_ = shape_[0] - 1;
} }
...@@ -226,12 +235,16 @@ public: ...@@ -226,12 +235,16 @@ public:
const SequenceIdArg& startPositions, const SequenceIdArg& startPositions,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(buf, valueType, shape, argType), : BufferArg(buf, valueType, shape, argType),
startPositions_(startPositions) {} startPositions_(startPositions) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
SequenceArg(const Matrix& matrix, SequenceArg(const Matrix& matrix,
const IVector& vector, const IVector& vector,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(matrix, argType), startPositions_(vector) {} : BufferArg(matrix, argType), startPositions_(vector) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
~SequenceArg() {} ~SequenceArg() {}
...@@ -264,6 +277,7 @@ public: ...@@ -264,6 +277,7 @@ public:
nnz_(nnz), nnz_(nnz),
format_(format), format_(format),
type_(type) { type_(type) {
bufferType_ = TENSOR_SPARSE;
CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
CHECK_EQ(shape_.ndims(), (size_t)2); CHECK_EQ(shape_.ndims(), (size_t)2);
CHECK_EQ(row_.shape().ndims(), (size_t)1); CHECK_EQ(row_.shape().ndims(), (size_t)1);
......
...@@ -24,7 +24,7 @@ if(WITH_TESTING) ...@@ -24,7 +24,7 @@ if(WITH_TESTING)
add_simple_unittest(TensorTypeTest) add_simple_unittest(TensorTypeTest)
add_simple_unittest(BufferArgTest) add_simple_unittest(BufferArgTest)
add_simple_unittest(FunctionTest) add_simple_unittest(FunctionTest)
# add_simple_unittest(ContextProjectionOpTest) add_simple_unittest(ContextProjectionOpTest)
endif() endif()
endif() endif()
......
...@@ -17,7 +17,10 @@ limitations under the License. */ ...@@ -17,7 +17,10 @@ limitations under the License. */
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
namespace paddle { namespace paddle {
/**
* Context Projection Forward with CPU Matrix Device.
*
*/
template <> template <>
void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat, void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
const CpuMatrix& input_mat, const CpuMatrix& input_mat,
...@@ -70,10 +73,30 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat, ...@@ -70,10 +73,30 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
} }
/** /**
* \param inputs[0] input value. * Paddle Function for Context Projection Forward.
* \param inputs[1] input weight. * Calculate the output layer value sequence after context projection.
* \param inputs[2] input sequence. *
* \param outputs[0] output value. * What is Context Projection for a sequence?
* For example, assumed input (x) has 4 words and the dimension of each word
* representation is 2. If we use zero to pad instead of learned weight to pad,
* and the context_lenth is 3, the output (y) is:
*
* @code
* x = [a1, a2;
* b1, b2;
* c1, c2;
* d1, d2]
* y = [0, 0, a1, a2, b1, b2;
* a1, a2, b1, b2, c1, c2;
* b1, b2, c1, c2, d1, d2;
* c1, c2, d1, d2, 0, 0]
* @endcode
*
* \param outputs[0].matrix output layer value, n * (d * l)
* \param outputs[0].vector start position sequence, n * 1
* \param inputs[0].matrix input layer value, n * d
* \param inputs[0].vector start position sequence, n * 1
* \param inputs[1].matrix input layer weight, pad * d
*/ */
template <DeviceType Device> template <DeviceType Device>
class ContextProjectionForwardFunc : public FunctionBase { class ContextProjectionForwardFunc : public FunctionBase {
...@@ -85,28 +108,37 @@ public: ...@@ -85,28 +108,37 @@ public:
} }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ((size_t)3, inputs.size()); CHECK(1 == inputs.size() || 2 == inputs.size());
CHECK_EQ((size_t)1, outputs.size()); CHECK_EQ((size_t)1, outputs.size());
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here";
const auto val_seqs = dynamic_cast<const SequenceArg&>(inputs[0]);
auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
CHECK(outputs[0].data() && inputs[0].data() && inputs[2].data()); CHECK(out_seq.data() && val_seqs.data() && val_seqs.getSequenceId().data());
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); CHECK_EQ(out_seq.shape().ndims(), (size_t)2);
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2); CHECK_EQ(val_seqs.shape().ndims(), (size_t)2);
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); CHECK_EQ(val_seqs.getSequenceId().shape().ndims(), (size_t)1);
CHECK_EQ(inputs[2].shape().ndims(), (size_t)1); if (2 == inputs.size()) {
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
}
/// dim of output = dim of input * context_length /// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); CHECK_EQ(out_seq.shape()[1], val_seqs.shape()[1] * context_length_);
/// dim of input == dim of weight
CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]);
/// input and output has the same batch_size /// input and output has the same batch_size
CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); CHECK_EQ(val_seqs.shape()[0], out_seq.shape()[0]);
/// dim of input == dim of weight
if (2 == inputs.size()) {
CHECK_EQ(val_seqs.shape()[1], inputs[1].shape()[1]);
}
CHECK_EQ(outputs[0].getArgType(), ADD_TO); CHECK_EQ(out_seq.getArgType(), ADD_TO);
auto out_mat = outputs[0].matrix<Device>(); auto out_mat = out_seq.matrix<Device>();
auto in_mat = inputs[0].matrix<Device>(); const auto in_mat = val_seqs.matrix<Device>();
auto w_mat = !inputs[1].data() const auto w_mat =
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0) (2 == inputs.size())
: inputs[1].matrix<Device>(); ? inputs[1].matrix<Device>()
auto seq_vec = inputs[2].vector<int, Device>(); : typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
const auto seq_vec = val_seqs.getSequenceId().vector<int, Device>();
ContextProjectionForward<Device>(out_mat, ContextProjectionForward<Device>(out_mat,
in_mat, in_mat,
w_mat, w_mat,
...@@ -122,8 +154,12 @@ private: ...@@ -122,8 +154,12 @@ private:
size_t begin_pad_; size_t begin_pad_;
}; };
/**
* Context Projection Backward with CPU Matrix Device.
*
*/
template <> template <>
void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat, void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
CpuMatrix& in_grad_mat, CpuMatrix& in_grad_mat,
CpuMatrix& w_grad_mat, CpuMatrix& w_grad_mat,
const CpuIVector& seq_vec, const CpuIVector& seq_vec,
...@@ -146,7 +182,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat, ...@@ -146,7 +182,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
int64_t pad_size = int64_t pad_size =
std::min(starts[i] - begin, starts[i + 1] - starts[i]); std::min(starts[i] - begin, starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) { if (is_padding && w_grad_mat) {
MatrixPtr mat = out_grad_mat.subMatrix(starts[i], pad_size); MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(starts[i], pad_size);
MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size); MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size);
sub->addAtOffset(*mat, j * input_dim); sub->addAtOffset(*mat, j * input_dim);
} }
...@@ -157,8 +194,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat, ...@@ -157,8 +194,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
int64_t pad_size = int64_t pad_size =
std::min(end - starts[i + 1], starts[i + 1] - starts[i]); std::min(end - starts[i + 1], starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) { if (is_padding && w_grad_mat) {
MatrixPtr mat = MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
out_grad_mat.subMatrix(starts[i + 1] - pad_size, pad_size); .subMatrix(starts[i + 1] - pad_size, pad_size);
MatrixPtr sub = w_grad_mat.subMatrix( MatrixPtr sub = w_grad_mat.subMatrix(
begin_pad + context_start + j - pad_size, pad_size); begin_pad + context_start + j - pad_size, pad_size);
sub->addAtOffset(*mat, j * input_dim); sub->addAtOffset(*mat, j * input_dim);
...@@ -169,17 +206,22 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat, ...@@ -169,17 +206,22 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
if (end <= begin) continue; if (end <= begin) continue;
if (!in_grad_mat) continue; if (!in_grad_mat) continue;
MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin); MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin);
MatrixPtr dst = out_grad_mat.subMatrix(dst_begin, dst_end - dst_begin); MatrixPtr dst = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(dst_begin, dst_end - dst_begin);
src->addAtOffset(*dst, j * input_dim); src->addAtOffset(*dst, j * input_dim);
} }
} }
} }
/** /**
* \param inputs[0] input grad. * Context Projection Backward Function.
* \param inputs[1] weight grad. * Update the weight gradient and input layer gradient with backprop
* \param inputs[2] input sequence. *
* \param outputs[0] output value. * \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vector start position sequence, n * 1
* \param outputs[0].matrix input layer grad, n * d
* \param outputs[0].vector start position sequence, n * 1
* \param outputs[1] weight grad, pad * d
*/ */
template <DeviceType Device> template <DeviceType Device>
class ContextProjectionBackwardFunc : public FunctionBase { class ContextProjectionBackwardFunc : public FunctionBase {
...@@ -193,32 +235,36 @@ public: ...@@ -193,32 +235,36 @@ public:
} }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ((size_t)3, inputs.size()); CHECK_EQ((size_t)1, inputs.size());
CHECK_EQ((size_t)1, outputs.size()); CHECK_EQ((size_t)2, outputs.size());
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here";
const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
CHECK(in_seq.data() && in_seq.getSequenceId().data());
CHECK_EQ(in_seq.shape().ndims(), (size_t)2);
CHECK_EQ(in_seq.getSequenceId().shape().ndims(), (size_t)1);
CHECK_EQ(out_seq.shape().ndims(), (size_t)2);
CHECK_EQ(out_seq.getSequenceId().shape().ndims(), (size_t)1);
CHECK_EQ(outputs[1].shape().ndims(), (size_t)2);
CHECK(outputs[0].data() && inputs[2].data()); /// dim of input grad == dim of weight
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]);
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2); /// input and output grad has the same batch_size
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]);
CHECK_EQ(inputs[2].shape().ndims(), (size_t)1); /// dim of output grad = dim of input grad * context_length
CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_);
CHECK_EQ(out_seq.getArgType(), ADD_TO);
CHECK_EQ(outputs[1].getArgType(), ADD_TO);
/// dim of input == dim of weight const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]); const auto out_grad_mat = in_seq.matrix<Device>();
/// input and output has the same batch_size
CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]);
/// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto out_grad_mat = outputs[0].matrix<Device>();
auto in_grad_mat = auto in_grad_mat =
!inputs[0].data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0) !out_seq.data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[0].matrix<Device>(); : out_seq.matrix<Device>();
auto w_grad_mat = !inputs[1].data() auto w_grad_mat = !outputs[1].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0) ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>(); : outputs[1].matrix<Device>();
auto seq_vec = inputs[2].vector<int, Device>();
ContextProjectionBackward<Device>(out_grad_mat, ContextProjectionBackward<Device>(out_grad_mat,
in_grad_mat, in_grad_mat,
w_grad_mat, w_grad_mat,
...@@ -238,11 +284,16 @@ private: ...@@ -238,11 +284,16 @@ private:
size_t total_pad_; size_t total_pad_;
}; };
#if 0
/** /**
* \param inputs[0] input grad. * Context Projection Backward Data Function
* \param inputs[1] input sequence. * Update input layer grad
* \param outputs[0] output grad. * input: sequence of output layer grad
* output: sequence of input layer grad
*
* \param outputs[0].matrix input layer grad, n * d
* \param outputs[0].vector start position sequence, n * 1
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vector start positon sequence, n * 1
*/ */
template <DeviceType Device> template <DeviceType Device>
class ContextProjectionBackwardDataFunc : public FunctionBase { class ContextProjectionBackwardDataFunc : public FunctionBase {
...@@ -252,32 +303,30 @@ public: ...@@ -252,32 +303,30 @@ public:
context_start_ = config.get<int>("context_start"); context_start_ = config.get<int>("context_start");
} }
void calc(const Arguments& inputs, void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
const Arguments& outputs, CHECK_EQ(1, static_cast<int>(inputs.size()));
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size())); CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size())); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); << "SequenceArg required here";
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2); const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2); const auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceId().data());
CHECK_EQ(static_cast<int>(out_seq.shape().ndims()), 2);
CHECK_EQ(static_cast<int>(in_seq.shape().ndims()), 2);
CHECK_EQ(static_cast<int>(in_seq.getSequenceId().shape().ndims()), 1);
/// output layer grad dim == input layer grad dim * context_length_
CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_);
/// input and output has the same batch_size /// input and output has the same batch_size
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); CHECK_EQ(in_seq.shape()[0], out_seq.shape()[0]);
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>( const auto out_grad_mat = in_seq.matrix<Device>();
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
const auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>( auto in_grad_mat = out_seq.matrix<Device>();
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
ContextProjectionBackwardData<Device>(out_grad_mat.get(), ContextProjectionBackwardData<Device>(
in_grad_mat.get(), out_grad_mat, in_grad_mat, seq_vec, context_length_, context_start_);
seq_vec,
context_length_,
context_start_);
} }
private: private:
...@@ -286,9 +335,14 @@ private: ...@@ -286,9 +335,14 @@ private:
}; };
/** /**
* \param inputs[0] weight grad. * Context Projection Backward Weight Function
* \param inputs[1] input sequence. * Update weight grad by backprop
* \param outputs[0] output grad. * input: sequence of output layer grad
* output: weight grad
*
* \param outputs[0] weight grad, pad * d
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vecotr start positon sequence, n * 1
*/ */
template <DeviceType Device> template <DeviceType Device>
class ContextProjectionBackwardWeightFunc : public FunctionBase { class ContextProjectionBackwardWeightFunc : public FunctionBase {
...@@ -300,28 +354,25 @@ public: ...@@ -300,28 +354,25 @@ public:
total_pad_ = config.get<size_t>("total_pad"); total_pad_ = config.get<size_t>("total_pad");
} }
void calc(const Arguments& inputs, void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
const Arguments& outputs, CHECK_EQ(1, static_cast<int>(inputs.size()));
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size())); CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size())); CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here";
const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); CHECK(in_seq.data() && in_seq.getSequenceId().data() && outputs[0].data());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2); CHECK_EQ(static_cast<int>(outputs[0].shape().ndims()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2); CHECK_EQ(static_cast<int>(in_seq.shape().ndims()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1); CHECK_EQ(static_cast<int>(in_seq.getSequenceId().shape().ndims()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]);
/// output layer grad dim == weight dim * context_length_
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>( CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_);
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(), const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
w_grad_mat.get(), const auto out_grad_mat = in_seq.matrix<Device>();
auto w_grad_mat = outputs[0].matrix<Device>();
ContextProjectionBackwardWeight<Device>(out_grad_mat,
w_grad_mat,
seq_vec, seq_vec,
context_length_, context_length_,
context_start_, context_start_,
...@@ -335,7 +386,6 @@ private: ...@@ -335,7 +386,6 @@ private:
size_t begin_pad_; size_t begin_pad_;
size_t total_pad_; size_t total_pad_;
}; };
#endif
REGISTER_TYPED_FUNC(ContextProjectionForward, REGISTER_TYPED_FUNC(ContextProjectionForward,
CPU, CPU,
...@@ -350,7 +400,6 @@ REGISTER_TYPED_FUNC(ContextProjectionForward, ...@@ -350,7 +400,6 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC(ContextProjectionBackward, REGISTER_TYPED_FUNC(ContextProjectionBackward,
GPU, GPU,
ContextProjectionBackwardFunc); ContextProjectionBackwardFunc);
#if 0
REGISTER_TYPED_FUNC(ContextProjectionBackwardData, REGISTER_TYPED_FUNC(ContextProjectionBackwardData,
GPU, GPU,
ContextProjectionBackwardDataFunc); ContextProjectionBackwardDataFunc);
...@@ -358,5 +407,4 @@ REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight, ...@@ -358,5 +407,4 @@ REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight,
GPU, GPU,
ContextProjectionBackwardWeightFunc); ContextProjectionBackwardWeightFunc);
#endif #endif
#endif
} // namespace paddle } // namespace paddle
...@@ -21,14 +21,14 @@ namespace paddle { ...@@ -21,14 +21,14 @@ namespace paddle {
/** /**
* \brief Context Projection Forward. * \brief Context Projection Forward.
* *
* \param[out] outputs output data. * \param[in/out] outputs output data.
* \param[in] input input data. * \param[in] input input data.
* \param[in] weight input weight. * \param[in] weight input weight.
* \param[in] sequence input data. * \param[in] sequence input data.
* \param[in] context_length consecutive rows for concatenation. * \param[in] context_length consecutive rows for concatenation.
* \param[in] context_start context start position. * \param[in] context_start context start position.
* \param[in] begin_pad begining pad position. * \param[in] begin_pad begining pad position.
* \param[in] is_padding whether padding 0 or not. * \param[in] is_padding whether padding 0 or not.
* *
*/ */
template <DeviceType DType> template <DeviceType DType>
...@@ -56,7 +56,7 @@ void ContextProjectionForward( ...@@ -56,7 +56,7 @@ void ContextProjectionForward(
*/ */
template <DeviceType DType> template <DeviceType DType>
void ContextProjectionBackward( void ContextProjectionBackward(
typename Tensor<real, DType>::Matrix& out_grad, const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad, typename Tensor<real, DType>::Matrix& in_grad,
typename Tensor<real, DType>::Matrix& w_grad, typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec, const typename Tensor<int, DType>::Vector& seq_vec,
...@@ -68,7 +68,7 @@ void ContextProjectionBackward( ...@@ -68,7 +68,7 @@ void ContextProjectionBackward(
template <DeviceType DType> template <DeviceType DType>
void ContextProjectionBackwardData( void ContextProjectionBackwardData(
typename Tensor<real, DType>::Matrix& out_grad, const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad, typename Tensor<real, DType>::Matrix& in_grad,
const typename Tensor<int, DType>::Vector& sequence, const typename Tensor<int, DType>::Vector& sequence,
size_t context_length, size_t context_length,
...@@ -76,7 +76,7 @@ void ContextProjectionBackwardData( ...@@ -76,7 +76,7 @@ void ContextProjectionBackwardData(
template <DeviceType DType> template <DeviceType DType>
void ContextProjectionBackwardWeight( void ContextProjectionBackwardWeight(
typename Tensor<real, DType>::Matrix& out_grad, const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& w_grad, typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec, const typename Tensor<int, DType>::Vector& seq_vec,
size_t context_length, size_t context_length,
......
...@@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output, ...@@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
begin_pad); begin_pad);
} }
__global__ void KeContextProjectionBackwardData(real* out_grad, __global__ void KeContextProjectionBackwardData(const real* out_grad,
const int* sequence, const int* sequence,
real* in_grad, real* in_grad,
int input_dim, size_t input_dim,
int context_length, int context_length,
int context_start) { int context_start) {
int idx = threadIdx.x; int idx = threadIdx.x;
...@@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, ...@@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
real value = 0; real value = 0;
int instances = seq_end - seq_start + context_length - 1; int instances = seq_end - seq_start + context_length - 1;
out_grad += seq_start * input_dim * context_length; auto out = const_cast<real*>(out_grad);
out += seq_start * input_dim * context_length;
in_grad += seq_start * input_dim; in_grad += seq_start * input_dim;
for (int k = 0; k <= input_dim / block_size; k++) { for (int k = 0; k <= input_dim / block_size; k++) {
if (idx < input_dim) { if (idx < input_dim) {
...@@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, ...@@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
int outx = (i - context_length) < 0 ? i : (context_length - 1); int outx = (i - context_length) < 0 ? i : (context_length - 1);
int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1));
real* output_r = real* output_r =
out_grad + outy * input_dim * context_length + outx * input_dim; out + outy * input_dim * context_length + outx * input_dim;
for (int j = outy; j < seq_end - seq_start; j++) { for (int j = outy; j < seq_end - seq_start; j++) {
value += output_r[idx]; value += output_r[idx];
if (j - outy == outx) break; if (j - outy == outx) break;
...@@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, ...@@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
* @param[in] context_start context start. * @param[in] context_start context start.
* *
*/ */
void hl_context_projection_backward_data(real* out_grad, void hl_context_projection_backward_data(const real* out_grad,
const int* sequence, const int* sequence,
real* input_grad, real* input_grad,
size_t num_sequences, size_t num_sequences,
...@@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad, ...@@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad,
} }
template <> template <>
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad, void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad, GpuMatrix& in_grad,
const GpuIVector& sequence, const GpuIVector& sequence,
size_t context_length, size_t context_length,
...@@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad, ...@@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
} }
template<int THREADS_X, int THREADS_Y> template<int THREADS_X, int THREADS_Y>
__global__ void KeContextProjectionBackwardWeight(real* out_grad, __global__ void KeContextProjectionBackwardWeight(const real* out_grad,
const int* sequence, const int* sequence,
real* w_grad, real* w_grad,
int num_sequences, int num_sequences,
...@@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad, ...@@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) { for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) {
int seq_start = sequence[seqId]; int seq_start = sequence[seqId];
int seq_end = sequence[seqId+1]; int seq_end = sequence[seqId+1];
output_r = out_grad + seq_start * w_dim * context_length; output_r = const_cast<real*>(out_grad)
+ seq_start * w_dim * context_length;
if (context_start < 0) { if (context_start < 0) {
if (padId + context_start < 0) { if (padId + context_start < 0) {
...@@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad, ...@@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
* beginning. * beginning.
* *
*/ */
void hl_context_projection_backward_weight(real* out_grad, void hl_context_projection_backward_weight(const real* out_grad,
const int* sequence, const int* sequence,
real* w_grad, real* w_grad,
size_t num_sequences, size_t num_sequences,
...@@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad, ...@@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad,
template <> template <>
void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>( void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
GpuMatrix& out_grad, const GpuMatrix& out_grad,
GpuMatrix& w_grad, GpuMatrix& w_grad,
const GpuIVector& seq_vec, const GpuIVector& seq_vec,
size_t context_length, size_t context_length,
...@@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>( ...@@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
} }
template <> template <>
void ContextProjectionBackward<DEVICE_TYPE_GPU>(GpuMatrix& out_grad, void ContextProjectionBackward<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad, GpuMatrix& in_grad,
GpuMatrix& w_grad, GpuMatrix& w_grad,
const GpuIVector& sequence, const GpuIVector& sequence,
......
...@@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start, ...@@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out.randomizeUniform(); cpu_out.randomizeUniform();
gpu_out.copyFrom(cpu_out); gpu_out.copyFrom(cpu_out);
compare.getCpuFunction()->calc( BufferArgs cpu_inputs;
{Tensor(cpu_in.getData(), Dims{batch_size, input_dim}), BufferArgs cpu_outputs;
Tensor(cpu_weight ? cpu_weight->getData() : nullptr, cpu_inputs.addArg(cpu_in, *cpu_seq);
Dims{pad, input_dim}), if (cpu_weight) {
Tensor(reinterpret_cast<real*>(cpu_seq->getData()), cpu_inputs.addArg(*cpu_weight, *cpu_seq);
Dims{cpu_seq->getSize()})}, }
{Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})}, cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO);
{});
compare.getGpuFunction()->calc( compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
{Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_weight ? gpu_weight->getData() : nullptr, BufferArgs gpu_inputs;
Dims{pad, input_dim}), BufferArgs gpu_outputs;
Tensor(reinterpret_cast<real*>(gpu_seq->getData()), gpu_inputs.addArg(gpu_in, *gpu_seq);
Dims{gpu_seq->getSize()})}, if (gpu_weight) {
{Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})}, gpu_inputs.addArg(*gpu_weight, *gpu_seq);
{}); }
gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO);
compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
autotest::TensorCheckEqual(cpu_out, gpu_out); autotest::TensorCheckEqual(cpu_out, gpu_out);
} }
...@@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start, ...@@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad->copyFrom(*cpu_w_grad); gpu_w_grad->copyFrom(*cpu_w_grad);
} }
compare.getCpuFunction()->calc( BufferArgs cpu_inputs;
{Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}), BufferArgs cpu_outputs;
Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr, cpu_inputs.addArg(cpu_out_grad, *cpu_seq);
Dims{pad, input_dim}), cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO);
Tensor(reinterpret_cast<real*>(cpu_seq->getData()), cpu_outputs.addArg(
Dims{cpu_seq->getSize()})}, cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);
{Tensor(cpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})}, compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
{});
BufferArgs gpu_inputs;
compare.getGpuFunction()->calc( BufferArgs gpu_outputs;
{Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}), gpu_inputs.addArg(gpu_out_grad, *gpu_seq);
Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr, gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO);
Dims{pad, input_dim}), gpu_outputs.addArg(
Tensor(reinterpret_cast<real*>(gpu_seq->getData()), gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);
Dims{gpu_seq->getSize()})},
{Tensor(gpu_out_grad.getData(), compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
Dims{batch_size, input_dim * context_length})},
{});
autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
if (is_padding) { if (is_padding) {
......
...@@ -93,6 +93,12 @@ void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { ...@@ -93,6 +93,12 @@ void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) {
addArg(*_args_.back()); addArg(*_args_.back());
} }
void BufferArgs::addArg(const Matrix& matrix,
const IVector& vector,
ArgType argType) {
args_.push_back(std::make_shared<SequenceArg>(matrix, vector, argType));
}
ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_; ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_;
} // namespace paddle } // namespace paddle
...@@ -102,6 +102,10 @@ public: ...@@ -102,6 +102,10 @@ public:
void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
void addArg(const Matrix& matrix,
const IVector& vector,
ArgType argType = UNSPECIFIED);
// get argument // get argument
const BufferArg& operator[](size_t num) const { const BufferArg& operator[](size_t num) const {
CHECK_LT(num, args_.size()); CHECK_LT(num, args_.size());
......
...@@ -118,16 +118,15 @@ void ContextProjection::forward() { ...@@ -118,16 +118,15 @@ void ContextProjection::forward() {
/// first use state_, otherwise use weight_(padding false === w nullptr) /// first use state_, otherwise use weight_(padding false === w nullptr)
auto w_ptr = auto w_ptr =
state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr; state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr;
auto start_pos = in_->sequenceStartPositions; const auto start_pos = in_->sequenceStartPositions->getVector(useGpu_);
BufferArgs inputs; BufferArgs inputs;
BufferArgs outputs; BufferArgs outputs;
inputs.addArg(*in_->value); inputs.addArg(*in_->value, *start_pos);
inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, if (w_ptr) {
w_ptr ? w_ptr->getHeight() : 0, inputs.addArg(CpuMatrix(w_ptr->getData(), w_ptr->getHeight(), input_dim),
input_dim)); *start_pos);
inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); }
outputs.addArg(*out_->value, ADD_TO); outputs.addArg(*out_->value, *start_pos, ADD_TO);
forward_[0]->calc(inputs, outputs); forward_[0]->calc(inputs, outputs);
if (state_ && config_.context_start() < 0) { if (state_ && config_.context_start() < 0) {
...@@ -166,13 +165,16 @@ void ContextProjection::backward(const UpdateCallback& callback) { ...@@ -166,13 +165,16 @@ void ContextProjection::backward(const UpdateCallback& callback) {
BufferArgs inputs; BufferArgs inputs;
BufferArgs outputs; BufferArgs outputs;
inputs.addArg(CpuMatrix( inputs.addArg(*out_->grad, *in_->sequenceStartPositions->getVector(useGpu_));
in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim)); outputs.addArg(
inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, CpuMatrix(
w_ptr ? w_ptr->getHeight() : 0, in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim),
input_dim)); *in_->sequenceStartPositions->getVector(useGpu_),
inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); ADD_TO);
outputs.addArg(*out_->grad, ADD_TO); outputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr,
w_ptr ? w_ptr->getHeight() : 0,
input_dim),
ADD_TO);
backward_[0]->calc(inputs, outputs); backward_[0]->calc(inputs, outputs);
if (config_.trainable_padding()) { if (config_.trainable_padding()) {
......
...@@ -34,6 +34,10 @@ class IScanner(object): ...@@ -34,6 +34,10 @@ class IScanner(object):
class DenseScanner(IScanner): class DenseScanner(IScanner):
"""
:type __mat__: numpy.ndarray
"""
def __init__(self, input_type, pos): def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos) IScanner.__init__(self, input_type, pos)
self.__mat__ = None self.__mat__ = None
...@@ -47,6 +51,8 @@ class DenseScanner(IScanner): ...@@ -47,6 +51,8 @@ class DenseScanner(IScanner):
def finish_scan(self, argument): def finish_scan(self, argument):
assert isinstance(argument, swig_paddle.Arguments) assert isinstance(argument, swig_paddle.Arguments)
assert isinstance(self.input_type, dp2.InputType) assert isinstance(self.input_type, dp2.InputType)
if self.__mat__.dtype != numpy.float32:
self.__mat__ = self.__mat__.astype(numpy.float32)
m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False) m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False)
argument.setSlotValue(self.pos, m) argument.setSlotValue(self.pos, m)
......
#!/bin/bash #!/bin/bash
brew update brew update
brew tap homebrew/science brew tap homebrew/science
brew install python brew install openblas swig md5sha1sum
sudo pip install --upgrade protobuf
brew install swig openblas md5sha1sum protobuf
...@@ -6,7 +6,7 @@ if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ...@@ -6,7 +6,7 @@ if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
export PYTHONHOME=/opt/python/2.7.12 export PYTHONHOME=/opt/python/2.7.12
export PATH=/opt/python/2.7.12/bin:${PATH} export PATH=/opt/python/2.7.12/bin:${PATH}
cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
NRPOC=`nproc` NRPOC=`nproc`
make -j $NPROC make -j $NPROC
make coveralls make coveralls
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
source ./common.sh source ./common.sh
# Compile Documentation only. # Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
make paddle_docs paddle_docs_cn make paddle_docs paddle_docs_cn
# check websites for broken links # check websites for broken links
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册