diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst index 5c02886efd7d11e9520910526fb90ec01e123bae..3790f09c84563fe541bd8d0bc08e23b19d4287ca 100644 --- a/doc/fluid/api/layers.rst +++ b/doc/fluid/api/layers.rst @@ -815,3 +815,8 @@ zeros .. autofunction:: paddle.fluid.layers.zeros :noindex: +topk +---- + +.. autofunction:: paddle.fluid.layers.topk + :noindex: diff --git a/doc/fluid/dev/contribute_to_paddle_cn.md b/doc/fluid/dev/contribute_to_paddle_cn.md new file mode 120000 index 0000000000000000000000000000000000000000..955216ca62e71b4d3666e1662aa86c9495d2e7d6 --- /dev/null +++ b/doc/fluid/dev/contribute_to_paddle_cn.md @@ -0,0 +1 @@ +../../v2/dev/contribute_to_paddle_cn.md \ No newline at end of file diff --git a/doc/fluid/dev/contribute_to_paddle_en.md b/doc/fluid/dev/contribute_to_paddle_en.md new file mode 120000 index 0000000000000000000000000000000000000000..f9fc68c37e17a8a365b0d7fae86c16b0d094631f --- /dev/null +++ b/doc/fluid/dev/contribute_to_paddle_en.md @@ -0,0 +1 @@ +../../v2/dev/contribute_to_paddle_en.md \ No newline at end of file diff --git a/doc/fluid/dev/index_cn.rst b/doc/fluid/dev/index_cn.rst index ad798003f560e7fb0e6db6083fdd152fd3417584..37e608160db0ad5a92297987937bbbfa8f842ea8 100644 --- a/doc/fluid/dev/index_cn.rst +++ b/doc/fluid/dev/index_cn.rst @@ -4,6 +4,8 @@ .. toctree:: :maxdepth: 1 + contribute_to_paddle_cn.md + write_docs_cn.md api_doc_std_cn.md new_op_cn.md new_op_kernel.md diff --git a/doc/fluid/dev/index_en.rst b/doc/fluid/dev/index_en.rst index 80c899a82fa452c5cd8f38dad89c15d3041b09e3..d7f83035010f13c30514673ecbee301f194dc175 100644 --- a/doc/fluid/dev/index_en.rst +++ b/doc/fluid/dev/index_en.rst @@ -4,6 +4,8 @@ Development .. toctree:: :maxdepth: 1 + contribute_to_paddle_en.md + write_docs_en.md api_doc_std_en.md new_op_en.md new_op_kernel.md diff --git a/doc/fluid/dev/write_docs_cn.rst b/doc/fluid/dev/write_docs_cn.rst new file mode 120000 index 0000000000000000000000000000000000000000..2c281eaaf43bbfad84c3be9ed1d1bd0dbc77fa9b --- /dev/null +++ b/doc/fluid/dev/write_docs_cn.rst @@ -0,0 +1 @@ +../../v2/dev/write_docs_cn.rst \ No newline at end of file diff --git a/doc/fluid/dev/write_docs_en.rst b/doc/fluid/dev/write_docs_en.rst new file mode 120000 index 0000000000000000000000000000000000000000..cb2b9b0ff1f1d9e0e5201d160f6b7d9d451374e2 --- /dev/null +++ b/doc/fluid/dev/write_docs_en.rst @@ -0,0 +1 @@ +../../v2/dev/write_docs_en.rst \ No newline at end of file diff --git a/doc/v2/api/data/data_reader.rst b/doc/v2/api/data/data_reader.rst index 2ccfec9c284877a7576e9751526b169a4ac78d8e..d7c896a6270b488ca4449e5211d0d0879eda6ac5 100644 --- a/doc/v2/api/data/data_reader.rst +++ b/doc/v2/api/data/data_reader.rst @@ -6,7 +6,43 @@ Data Reader Interface DataTypes ========= -.. automodule:: paddle.v2.data_type +.. autofunction:: paddle.v2.data_type.dense_array + :noindex: + +.. autofunction:: paddle.v2.data_type.integer_value + :noindex: + +.. autofunction:: paddle.v2.data_type.integer_value_sequence + :noindex: + +.. autofunction:: paddle.v2.data_type.integer_value_sub_sequence + :noindex: + +.. autofunction:: paddle.v2.data_type.sparse_binary_vector + :noindex: + +.. autofunction:: paddle.v2.data_type.sparse_binary_vector_sequence + :noindex: + +.. autofunction:: paddle.v2.data_type.sparse_binary_vector_sub_sequence + :noindex: + +.. autofunction:: paddle.v2.data_type.sparse_float_vector + :noindex: + +.. autofunction:: paddle.v2.data_type.sparse_float_vector_sequence + :noindex: + +.. autofunction:: paddle.v2.data_type.sparse_float_vector_sub_sequence + :noindex: + +.. autofunction:: paddle.v2.data_type.sparse_non_value_slot + :noindex: + +.. autofunction:: paddle.v2.data_type.sparse_value_slot + :noindex: + +.. autoclass:: paddle.v2.data_type.InputType :members: :noindex: diff --git a/doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md b/doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md deleted file mode 120000 index c44cd9a731bed7067cdf19aa2f714abdce6c736a..0000000000000000000000000000000000000000 --- a/doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md +++ /dev/null @@ -1 +0,0 @@ -k8s_aws_en.md \ No newline at end of file diff --git a/doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md b/doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..afc753aa42f19631c49a451a797f28365e65ed1d --- /dev/null +++ b/doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md @@ -0,0 +1,672 @@ +# Kubernetes on AWS + +我们将向你展示怎么样在AWS的Kubernetes集群上运行分布式PaddlePaddle训练,让我们从核心概念开始 + +## PaddlePaddle分布式训练的核心概念 + +### 分布式训练任务 + +一个分布式训练任务可以看做是一个Kubernetes任务 +每一个Kubernetes任务都有相应的配置文件,此配置文件指定了像任务的pod个数之类的环境变量信息 + +在分布式训练任务中,我们可以如下操作: + +1. 在分布式文件系统中,准备分块数据和配置文件(在此次教学中,我们会用到亚马逊分布式存储服务(EFS)) +2. 创建和提交一个kubernetes任务配置到集群中开始训练 + +### Parameter Server和Trainer + +在paddlepaddle集群中有两个角色:参数服务器(pserver)者和trainer, 每一个参数服务器过程都会保存一部分模型的参数。每一个trainer都保存一份完整的模型参数,并可以利用本地数据更新模型。在这个训练过程中,trainer发送模型更新到参数服务器中,参数服务器职责就是聚合这些更新,以便于trainer可以把全局模型同步到本地。 + +为了能够和pserver通信,trainer需要每一个pserver的IP地址。在Kubernetes中利用服务发现机制(比如:DNS、hostname)要比静态的IP地址要好一些,因为任何一个pod都会被杀掉然后新的pod被重启到另一个不同IP地址的node上。现在我们可以先用静态的IP地址方式,这种方式是可以更改的。 + +参数服务器和trainer一块被打包成一个docker镜像,这个镜像会运行在被Kubernetes集群调度的pod中。 + +### 训练者ID + +每一个训练过程都需要一个训练ID,以0作为基础值,作为命令行参数传递。训练过程因此用这个ID去读取数据分片。 + +### 训练 + +PaddlePaddle容器的入口是一个shell脚本,这个脚本可以读取Kubernetes内预置的环境变量。这里可以定义任务identity,在任务中identity可以用来远程访问包含所有pod的Kubernetes apiserver服务。 + +每一个pod通过ip来排序。每一个pod的序列作为“pod id”。因为我们会在每一个pod中运行训练和参数服务,可以用“pod id”作为训练ID。入口脚本详细工作流程如下: + +1. 查找apiserver得到pod信息,通过ip排序来分配一个trainer_id。 +2. 从EFS持久化卷中复制训练数据到容器中。 +3. 从环境变量中解析paddle pserver和 paddle trainer的启动参数,然后开始启动流程。 +4. 以trainer_id来训练将自动把结果写入到EFS卷中。 + + +## AWS的Kubernetes中的PaddlePaddle + +### 选择AWS服务区域 +这个教程需要多个AWS服务工作在一个区域中。在AWS创建任何东西之前,请检查链接https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services/ 选择一个可以提供如下服务的区域:EC2, EFS, VPS, CloudFormation, KMS, VPC, S3。在教程中我们使用“Oregon(us-west-2)”作为例子。 + +### 创建aws账户和IAM账户 + +在每一个aws账户下可以创建多个IAM用户。允许为每一个IAM用户赋予权限,作为IAM用户可以创建/操作aws集群 + +注册aws账户,请遵循用户指南。在AWS账户下创建IAM用户和用户组,请遵循用户指南 + +请注意此教程需要如下的IAM用户权限: + +- AmazonEC2FullAccess +- AmazonS3FullAccess +- AmazonRoute53FullAccess +- AmazonRoute53DomainsFullAccess +- AmazonElasticFileSystemFullAccess +- AmazonVPCFullAccess +- IAMUserSSHKeys +- IAMFullAccess +- NetworkAdministrator +- AWSKeyManagementServicePowerUser + + +### 下载kube-aws and kubectl + +#### kube-aws + +在AWS中[kube-aws](https://github.com/coreos/kube-aws)是一个自动部署集群的CLI工具 + +##### kube-aws完整性验证 +提示:如果你用的是非官方版本(e.g RC release)的kube-aws,可以跳过这一步骤。引入coreos的应用程序签名公钥: + +``` +gpg2 --keyserver pgp.mit.edu --recv-key FC8A365E +``` + +指纹验证: + +``` +gpg2 --fingerprint FC8A365E +``` +正确的指纹是: `18AD 5014 C99E F7E3 BA5F 6CE9 50BD D3E0 FC8A 365E` + +我们可以从发布页面中下载kube-aws,教程使用0.9.1版本 [release page](https://github.com/coreos/kube-aws/releases). + +验证tar包的GPG签名: + +``` +PLATFORM=linux-amd64 + # Or +PLATFORM=darwin-amd64 + +gpg2 --verify kube-aws-${PLATFORM}.tar.gz.sig kube-aws-${PLATFORM}.tar.gz +``` +##### 安装kube-aws +解压: + +``` +tar zxvf kube-aws-${PLATFORM}.tar.gz +``` + +添加到环境变量: + +``` +mv ${PLATFORM}/kube-aws /usr/local/bin +``` + + +#### kubectl + +[kubectl](https://Kubernetes.io/docs/user-guide/kubectl-overview/) 是一个操作Kubernetes集群的命令行接口 + +利用`curl`工具从Kubernetes发布页面中下载`kubectl` + +``` +# OS X +curl -O https://storage.googleapis.com/kubernetes-release/release/"$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)"/bin/darwin/amd64/kubectl + +# Linux +curl -O https://storage.googleapis.com/kubernetes-release/release/"$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)"/bin/linux/amd64/kubectl +``` + +为了能是kubectl运行必须将之添加到环境变量中 (e.g. `/usr/local/bin`): + +``` +chmod +x ./kubectl +sudo mv ./kubectl /usr/local/bin/kubectl +``` + +### 配置AWS证书 + +首先检查这里 [this](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) 安装AWS命令行工具 + +然后配置aws账户信息: + +``` +aws configure +``` + + +添加如下信息: + + +``` +AWS Access Key ID: YOUR_ACCESS_KEY_ID +AWS Secrete Access Key: YOUR_SECRETE_ACCESS_KEY +Default region name: us-west-2 +Default output format: json +``` + +`YOUR_ACCESS_KEY_ID`, and `YOUR_SECRETE_ACCESS_KEY` 是创建aws账户和IAM账户的IAM的key和密码 [Create AWS Account and IAM Account](#create-aws-account-and-iam-account) + +描述任何运行在你账户中的实例来验证凭据是否工作: + +``` +aws ec2 describe-instances +``` + +### 定义集群参数 + +#### EC2秘钥对 + +秘钥对将认证ssh访问你的EC2实例。秘钥对的公钥部分将配置到每一个COREOS节点中。 + +遵循 [EC2 Keypair User Guide](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) Keypair用户指南来创建EC2秘钥对 + +你可以使用创建好的秘钥对名称来配置集群. + +在同一工作区中秘钥对为EC2实例唯一码。在教程中使用 us-west-2 ,所以请确认在这个区域(Oregon)中创建秘钥对。 + +在浏览器中下载一个`key-name.pem`文件用来访问EC2实例,我们待会会用到. + + +#### KMS秘钥 + +亚马逊的KMS秘钥在TLS秘钥管理服务中用来加密和解密集群。如果你已经有可用的KMS秘钥,你可以跳过创建新秘钥这一步,提供现存秘钥的ARN字符串。 + +利用aws命令行创建kms秘钥: + +``` +aws kms --region=us-west-2 create-key --description="kube-aws assets" +{ + "KeyMetadata": { + "CreationDate": 1458235139.724, + "KeyState": "Enabled", + "Arn": "arn:aws:kms:us-west-2:aaaaaaaaaaaaa:key/xxxxxxxxxxxxxxxxxxx", + "AWSAccountId": "xxxxxxxxxxxxx", + "Enabled": true, + "KeyUsage": "ENCRYPT_DECRYPT", + "KeyId": "xxxxxxxxx", + "Description": "kube-aws assets" + } +} +``` + +我们稍后用到`Arn` 的值. + +在IAM用户许可中添加多个内联策略. + +进入[IAM Console](https://console.aws.amazon.com/iam/home?region=us-west-2#/home)。点击`Users`按钮,点击刚才创建的用户,然后点击`Add inline policy`按钮,选择`Custom Policy` + +粘贴内联策略: + +``` + (Caution: node_0, node_1, node_2 directories represents PaddlePaddle node and train_id, not the Kubernetes node){ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "Stmt1482205552000", + "Effect": "Allow", + "Action": [ + "kms:Decrypt", + "kms:Encrypt" + ], + "Resource": [ + "arn:aws:kms:*:AWS_ACCOUNT_ID:key/*" + ] + }, + { + "Sid": "Stmt1482205746000", + "Effect": "Allow", + "Action": [ + "cloudformation:CreateStack", + "cloudformation:UpdateStack", + "cloudformation:DeleteStack", + "cloudformation:DescribeStacks", + "cloudformation:DescribeStackResource", + "cloudformation:GetTemplate", + "cloudformation:DescribeStackEvents" + ], + "Resource": [ + "arn:aws:cloudformation:us-west-2:AWS_ACCOUNT_ID:stack/MY_CLUSTER_NAME/*" + ] + } + ] +} +``` +`Version` : 值必须是"2012-10-17". +`AWS_ACCOUNT_ID`: 你可以从命令行中获取: + +``` +aws sts get-caller-identity --output text --query Account +``` + +`MY_CLUSTER_NAME`: 选择一个你喜欢的MY_CLUSTER_NAME,稍后会用到。 +请注意,堆栈名称必须是正则表达式:[a-zA-Z][-a-zA-Z0-9*]*, 在名称中不能有"_"或者"-",否则kube-aws在下面步骤中会抛出异常 + +#### 外部DNS名称 + +当集群被创建后,基于DNS名称控制器将会暴露安全的TLS API. + +DNS名称含有CNAME指向到集群DNS名称或者记录指向集群的IP地址。 + +我们稍后会用到DNS名称,如果没有DNS名称的话,你可以选择一个(比如:`paddle`)还可以修改`/etc/hosts`用本机的DNS名称和集群IP关联。还可以在AWS上增加一个名称服务来关联paddle集群IP,稍后步骤中会查找集群IP. + +#### S3 bucket + +在启动Kubernetes集群前需要创建一个S3 bucket + +在AWS上创建s3 bucket会有许多的bugs,所以使用[s3 console](https://console.aws.amazon.com/s3/home?region=us-west-2)。 + +链接到 `Create Bucket`,确保在us-west-2 (Oregon)上创建一个唯一的BUCKET_NAME。 + +#### 初始化assets + +在本机创建一个目录用来存放产生的assets: + +``` +$ mkdir my-cluster +$ cd my-cluster +``` + +利用KMS Arn、秘钥对名称和前一步产生的DNS名称来初始化集群的CloudFormation栈: + +``` +kube-aws init \ +--cluster-name=MY_CLUSTER_NAME \ +--external-dns-name=MY_EXTERNAL_DNS_NAME \ +--region=us-west-2 \ +--availability-zone=us-west-2a \ +--key-name=KEY_PAIR_NAME \ +--kms-key-arn="arn:aws:kms:us-west-2:xxxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx" +``` + +`MY_CLUSTER_NAME`: the one you picked in [KMS key](#kms-key) + +`MY_EXTERNAL_DNS_NAME`: see [External DNS name](#external-dns-name) + +`KEY_PAIR_NAME`: see [EC2 key pair](#ec2-key-pair) + +`--kms-key-arn`: the "Arn" in [KMS key](#kms-key) + +这里的`us-west-2a`用于参数`--availability-zone`,但必须在AWS账户的有效可用区中 + +如果不能切换到其他的有效可用区(e.g., `us-west-2a`, or `us-west-2b`),请检查`us-west-2a`是支持`aws ec2 --region us-west-2 describe-availability-zones`。 + +现在在asset目录中就有了集群的主配置文件cluster.yaml。 + +默认情况下kube-aws会创建一个工作节点,修改`cluster.yaml`让`workerCount`从1个节点变成3个节点. + +#### 呈现asset目录内容 + +在这个简单的例子中,你可以使用kuber-aws生成TLS身份和证书 + +``` +kube-aws render credentials --generate-ca +``` + +下一步在asset目录中生成一组集群assets. + +``` +kube-aws render stack +``` +asserts(模板和凭证)用于创建、更新和当前目录被创建的Kubernetes集群相关联 + +### 启动Kubernetes集群 + +#### 创建一个在CloudFormation模板上定义好的实例 + +现在让我们创建集群(在命令行中选择任意的 `PREFIX`) + +``` +kube-aws up --s3-uri s3://BUCKET_NAME/PREFIX +``` + +`BUCKET_NAME`: t在[S3 bucket](#s3-bucket)上使用的bucket名称 + + +#### 配置DNS + +你可以执行命令 `kube-aws status`来查看创建后集群的API. + +``` +$ kube-aws status +Cluster Name: paddle-cluster +Controller DNS Name: paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com +``` +如果你用DNS名称,在ip上设置任何记录或是安装CNAME点到`Controller DNS Name` (`paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com`) + +##### 查询IP地址 + +用命令`dig`去检查负载均衡器的域名来获取ip地址. + +``` +$ dig paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com + +;; QUESTION SECTION: +;paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com. IN A + +;; ANSWER SECTION: +paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com. 59 IN A 54.241.164.52 +paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com. 59 IN A 54.67.102.112 +``` + +在上面的例子中,`54.241.164.52`, `54.67.102.112`这两个ip都将是工作状态 + +*如果你有DNS名称*,设置记录到ip上,然后你可以跳过“Access the cluster”这一步 + +*如果没有自己的DNS名称* + +编辑/etc/hosts文件用DNS关联IP + +##### 更新本地的DNS关联 +编辑`/etc/hosts`文件用DNS关联IP +##### 在VPC上添加route53私有名称服务 + - 打开[Route53 Console](https://console.aws.amazon.com/route53/home) + - 根据配置创建域名zone + - domain名称为: "paddle" + - Type: "Private hosted zone for amazon VPC" + - VPC ID: `` + + ![route53 zone setting](src/route53_create_zone.png) + - 添加记录 + - 点击zone中刚创建的“paddle” + - 点击按钮“Create record set” + - Name : leave blank + - type: "A" + - Value: `` + + ![route53 create recordset](src/route53_create_recordset.png) + - 检查名称服务 + - 连接通过kube-aws via ssh创建的任何实例 + - 运行命令"host paddle",看看是否ip为返回的kube-controller的私有IP + +#### 进入集群 + +集群运行后如下命令会看到: + +``` +$ kubectl --kubeconfig=kubeconfig get nodes +NAME STATUS AGE +ip-10-0-0-134.us-west-2.compute.internal Ready 6m +ip-10-0-0-238.us-west-2.compute.internal Ready 6m +ip-10-0-0-50.us-west-2.compute.internal Ready 6m +ip-10-0-0-55.us-west-2.compute.internal Ready 6m +``` + + +### 集群安装弹性文件系统 + +训练数据存放在AWS上的EFS分布式文件系统中. + +1. 在[security group console](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#SecurityGroups:sort=groupId)为EFS创建一个安全组 + 1. 可以看到`paddle-cluster-sg-worker` (在sg-055ee37d镜像中)安全组id +
![](src/worker_security_group.png)
+ + 2. 增加安全组`paddle-efs` ,以`paddle-cluster-sg-worker`的group id作为用户源和`ALL TCP`入栈规则。增加vpc `paddle-cluster-vpc`, 确保可用区是在[Initialize Assets](#initialize-assets)的时候用到的那一个. +
![](src/add_security_group.png)
+ +2. 利用`paddle-cluster-vpc`私有网络在[EFS console](https://us-west-2.console.aws.amazon.com/efs/home?region=us-west-2#/wizard/1) 中创建弹性文件系统, 确定子网为`paddle-cluster-Subnet0`和安全区为`paddle-efs`. +
![](src/create_efs.png)
+ + +### 开始在AWS上进行paddlepaddle的训练 + +#### 配置Kubernetes卷指向EFS + +首先需要创建一个持久卷[PersistentVolume](https://kubernetes.io/docs/user-guide/persistent-volumes/) 到EFS上 + +用 `pv.yaml`形式来保存 +``` +apiVersion: v1 +kind: PersistentVolume +metadata: + name: efsvol +spec: + capacity: + storage: 100Gi + accessModes: + - ReadWriteMany + nfs: + server: EFS_DNS_NAME + path: "/" +``` + +`EFS_DNS_NAME`: DNS名称最好能描述我们创建的`paddle-efs`,看起来像`fs-2cbf7385.efs.us-west-2.amazonaws.com` + +运行下面的命令来创建持久卷: +``` +kubectl --kubeconfig=kubeconfig create -f pv.yaml +``` +下一步创建 [PersistentVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes/)来声明持久卷 + +用`pvc.yaml`来保存. +``` +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: efsvol +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 50Gi +``` + +行下面命令来创建持久卷声明: +``` +kubectl --kubeconfig=kubeconfig create -f pvc.yaml +``` + +#### 准备训练数据 + +启动Kubernetes job在我们创建的持久层上进行下载、保存并均匀拆分训练数据为3份. + +用`paddle-data-job.yaml`保存 +``` +apiVersion: batch/v1 +kind: Job +metadata: + name: paddle-data +spec: + template: + metadata: + name: pi + spec: + containers: + - name: paddle-data + image: paddlepaddle/paddle-tutorial:k8s_data + imagePullPolicy: Always + volumeMounts: + - mountPath: "/efs" + name: efs + env: + - name: OUT_DIR + value: /efs/paddle-cluster-job + - name: SPLIT_COUNT + value: "3" + volumes: + - name: efs + persistentVolumeClaim: + claimName: efsvol + restartPolicy: Never +``` + +运行下面的命令来启动任务: +``` +kubectl --kubeconfig=kubeconfig create -f paddle-data-job.yaml +``` +任务运行大概需要7分钟,可以使用下面命令查看任务状态,直到`paddle-data`任务的`SUCCESSFUL`状态为`1`时成功,这里here有怎样创建镜像的源码 +``` +$ kubectl --kubeconfig=kubeconfig get jobs +NAME DESIRED SUCCESSFUL AGE +paddle-data 1 1 6m +``` +数据准备完成后的结果是以镜像`paddlepaddle/paddle-tutorial:k8s_data`存放,可以点击这里[here](src/k8s_data/README.md)查看如何创建docker镜像源码 + +#### 开始训练 + +现在可以开始运行paddle的训练任务,用`paddle-cluster-job.yaml`进行保存 +``` +apiVersion: batch/v1 +kind: Job +metadata: + name: paddle-cluster-job +spec: + parallelism: 3 + completions: 3 + template: + metadata: + name: paddle-cluster-job + spec: + volumes: + - name: efs + persistentVolumeClaim: + claimName: efsvol + containers: + - name: trainer + image: paddlepaddle/paddle-tutorial:k8s_train + command: ["bin/bash", "-c", "/root/start.sh"] + env: + - name: JOB_NAME + value: paddle-cluster-job + - name: JOB_PATH + value: /home/jobpath + - name: JOB_NAMESPACE + value: default + - name: TRAIN_CONFIG_DIR + value: quick_start + - name: CONF_PADDLE_NIC + value: eth0 + - name: CONF_PADDLE_PORT + value: "7164" + - name: CONF_PADDLE_PORTS_NUM + value: "2" + - name: CONF_PADDLE_PORTS_NUM_SPARSE + value: "2" + - name: CONF_PADDLE_GRADIENT_NUM + value: "3" + - name: TRAINER_COUNT + value: "3" + volumeMounts: + - mountPath: "/home/jobpath" + name: efs + ports: + - name: jobport0 + hostPort: 7164 + containerPort: 7164 + - name: jobport1 + hostPort: 7165 + containerPort: 7165 + - name: jobport2 + hostPort: 7166 + containerPort: 7166 + - name: jobport3 + hostPort: 7167 + containerPort: 7167 + restartPolicy: Never +``` + +`parallelism: 3, completions: 3` 意思是这个任务会同时开启3个paddlepaddle的pod,当pod启动后3个任务将被完成。 + +`env` 参数代表容器的环境变量,在这里指定paddlepaddle的参数. + +`ports` 指定TCP端口7164 - 7167和`pserver`进行连接,port从`CONF_PADDLE_PORT`(7164)到`CONF_PADDLE_PORT + CONF_PADDLE_PORTS_NUM + CONF_PADDLE_PORTS_NUM_SPARSE - 1`(7167)。我们使用多个端口密集和稀疏参数的更新来提高延迟 + +运行下面命令来启动任务. +``` +kubectl --kubeconfig=kubeconfig create -f paddle-claster-job.yaml +``` + +检查pods信息 + +``` +$ kubectl --kubeconfig=kubeconfig get pods +NAME READY STATUS RESTARTS AGE +paddle-cluster-job-cm469 1/1 Running 0 9m +paddle-cluster-job-fnt03 1/1 Running 0 9m +paddle-cluster-job-jx4xr 1/1 Running 0 9m +``` + +检查指定pod的控制台输出 +``` +kubectl --kubeconfig=kubeconfig log -f POD_NAME +``` + +`POD_NAME`: 任何一个pod的名称 (e.g., `paddle-cluster-job-cm469`). + +运行`kubectl --kubeconfig=kubeconfig describe job paddle-cluster-job`来检查训练任务的状态,将会在大约20分钟完成 + +`pserver`和`trainer`的细节都隐藏在docker镜像`paddlepaddle/paddle-tutorial:k8s_train`中,这里[here](src/k8s_train/README.md) 有创建docker镜像的源码. + +#### 检查训练输出 + +训练输出(模型快照和日志)将被保存在EFS上。我们可以用ssh登录到EC2的工作节点上,查看mount过的EFS和训练输出. + +1. ssh登录EC2工作节点 +``` +chmod 400 key-name.pem +ssh -i key-name.pem core@INSTANCE_IP +``` + +`INSTANCE_IP`: EC2上Kubernetes工作节点的公共IP地址,进入[EC2 console](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#Instances:sort=instanceId) 中检查任何`paddle-cluster-kube-aws-worker`实例的 `public IP` + +2. 挂载EFS +``` +mkdir efs +sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 EFS_DNS_NAME:/ efs +``` + +`EFS_DNS_NAME`: DNS名称最好能描述我们创建的`paddle-efs`,看起来像`fs-2cbf7385.efs.us-west-2.amazonaws.com`. + +文件夹`efs`上有这结构相似的node信息: +``` +-- paddle-cluster-job + |-- ... + |-- output + | |-- node_0 + | | |-- server.log + | | `-- train.log + | |-- node_1 + | | |-- server.log + | | `-- train.log + | |-- node_2 + | | |-- server.log + | | `-- train.log + | |-- pass-00000 + | | |-- ___fc_layer_0__.w0 + | | |-- ___fc_layer_0__.wbias + | | |-- done + | | |-- path.txt + | | `-- trainer_config.lr.py + | |-- pass-00001... +``` +`server.log` 是`pserver`的log日志,`train.log`是`trainer`的log日志,模型快照和描述存放在`pass-0000*`. + +### Kubernetes集群卸载或删除 + +#### 删除EFS + +到[EFS Console](https://us-west-2.console.aws.amazon.com/efs/home?region=us-west-2) 中删除创建的EFS卷 + +#### 删除安全组 + +去[Security Group Console](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#SecurityGroups:sort=groupId) 删除安全组`paddle-efs`. + +#### 删除S3 bucket + +进入 [S3 Console](https://console.aws.amazon.com/s3/home?region=us-west-2#)删除S3 bucket + +#### 销毁集群 + +``` +kube-aws destroy +``` + +命令会立刻返回,但需要大约5分钟来销毁集群 + +可以进入 [CludFormation Console](https://us-west-2.console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks?filter=active)检查销毁的过程。 diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 1f3ca24df16cf080d325fbdc0d613a828e384b2a..340b891e41671df7e61a4a66ec538d4603bb9842 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -102,7 +102,7 @@ cc_test(init_test SRCS init_test.cc DEPS init) cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto) cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) -cc_test(channel_test SRCS channel_test.cc) +# cc_test(channel_test SRCS channel_test.cc) cc_test(tuple_test SRCS tuple_test.cc ) cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index e3e7c55d153aec8ce9c25c962821b266eaa84fe4..946ee91a667496e2427304df4228334bb1061890 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -51,23 +51,23 @@ void FetchOpHandle::RunImpl() { auto *var = static_cast(input); var->generated_op_->Wait(cpu_ctx); } - tensors_.resize(inputs_.size()); - auto *var = static_cast(inputs_[0]); - auto &var_name = var->name_; + auto *var_handle = static_cast(inputs_[0]); + auto &var_name = var_handle->name_; platform::CPUPlace cpu; auto &scopes = *local_scopes_; for (size_t i = 0; i < scopes.size(); ++i) { auto &scope = scopes[i]; - auto &t = scope->FindVar(kLocalExecScopeName) - ->Get() - ->FindVar(var_name) - ->Get(); - if (platform::is_gpu_place(var->place_)) { + auto *var = + scope->FindVar(kLocalExecScopeName)->Get()->FindVar(var_name); + PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope", + var_name); + auto &t = var->Get(); + if (platform::is_gpu_place(t.place())) { #ifdef PADDLE_WITH_CUDA TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]); - dev_ctxes_[t.place()]->Wait(); + dev_ctxes_.at(t.place())->Wait(); #endif } else { tensors_[i].ShareDataWith(t); diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index 4d76dbf7f6ffcf6c82ebf7defd9334bbe64a451c..d2b6a35a5d5c260b023c68ec4684da95a5b79e81 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -89,101 +89,25 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( bool is_forwarding = true; for (auto *op : program.Block(0).AllOps()) { - bool change_forward = false; - if (!is_forwarding) { - // FIXME(yy): Do not hard code like this - if (op->OutputArgumentNames().size() == 1 && - op->OutputArgumentNames()[0] == GradVarName(loss_var_name_)) { - continue; // Drop fill 1. for backward coeff; - } - } - - // append send op if program is distributed trainer main program. - // always use the first device - if (!is_forwarding && op->Type() == "send") { - auto &p = places_[0]; - auto *s = local_scopes_[0]; - // FIXME(wuyi): send op always copy from GPU 0 - result.ops_.emplace_back(new SendOpHandle(*op, s, p)); - // Create inputs for output on original place and no ssa output - // is created for send op. - CreateOpHandleIOs(&result, *op, p, 0); - continue; - } - - for (size_t i = 0; i < places_.size(); ++i) { - auto &p = places_[i]; - auto *s = local_scopes_[i]; - - result.ops_.emplace_back(new ComputationOpHandle(*op, s, p)); - auto *op_handle = result.ops_.back().get(); - CreateOpHandleIOs(&result, *op, p, i); - - auto var_names = op->OutputArgumentNames(); - - if (is_forwarding) { - if (var_names.size() == 1 && var_names[0] == loss_var_name_) { -// Insert ScaleCost OpHandle -#ifdef PADDLE_WITH_CUDA - auto *communication_dev_ctx = nccl_ctxs_->DevCtx(p); -#else - auto *communication_dev_ctx = - platform::DeviceContextPool::Instance().Get(platform::CPUPlace()); -#endif - - op_handle = new ScaleLossGradOpHandle(local_scopes_.size(), s, p, - communication_dev_ctx); - result.ops_.emplace_back(op_handle); - - // FIXME: Currently ScaleLossGradOp only use device_count as scale - // factor. So it does not depend on any other operators. - // VarHandle *loss = GetVarHandle(loss_var_name, place); - // loss->pending_ops_.emplace_back(op_handle); - // op_handle->inputs_.emplace_back(loss); - - CreateOpOutput(&result, op_handle, GradVarName(loss_var_name_), p, i); - change_forward = true; - } - } - } - - if (change_forward) { + if (op->Type() == "send") { + // append send op if program is distributed trainer main program. + // always use the first device + CreateSendOp(&result, *op); + } else if (IsScaleLossOp(*op)) { + CreateScaleLossGradOp(&result); is_forwarding = false; - } - - if (!is_forwarding) { - auto var_names = op->OutputArgumentNames(); - // Currently, we assume that once gradient is generated, it can be - // broadcast, and each gradient is only broadcast once. But there are no - // other cases, for example, we need to adjust the gradient according to - // the input when we get the gradient, which is not considered at present. - for (auto &og : var_names) { - if (grad_names_.count(og) != 0 && - og_has_been_broadcast.count(og) == 0) { // is param grad - // Insert NCCL AllReduce Op - og_has_been_broadcast.insert(og); -#ifdef PADDLE_WITH_CUDA - result.ops_.emplace_back( - new NCCLAllReduceOpHandle(local_scopes_, places_, *nccl_ctxs_)); - auto *op_handle = result.ops_.back().get(); - - for (size_t i = 0; i < places_.size(); ++i) { - auto &p = places_[i]; - auto &vars = result.vars_[i][og]; - - if (vars.empty()) { // This device has no data. continue. - continue; - } - auto &prev_grad = vars[vars.size() - 1]; - op_handle->AddInput(prev_grad.get()); - - auto var = new VarHandle(vars.size() - 1, i, og, p); - vars.emplace_back(var); - op_handle->AddOutput(var); + } else { + CreateComputationalOps(&result, *op); + if (!is_forwarding) { + // Currently, we assume that once gradient is generated, it can be + // broadcast, and each gradient is only broadcast once. But there are no + // other cases, for example, we need to adjust the gradient according to + // the input when we get the gradient, which is not considered at + // present. + for (auto &og : op->OutputArgumentNames()) { + if (IsParameterGradientOnce(og, &og_has_been_broadcast)) { + InsertNCCLAllReduceOp(&result, og); } -#else - PADDLE_ENFORCE("Not implemented"); -#endif } } } @@ -207,7 +131,95 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( } return std::unique_ptr(graph); -} // namespace details +} + +void MultiDevSSAGraphBuilder::InsertNCCLAllReduceOp( + SSAGraph *result, const std::string &og) const { +#ifdef PADDLE_WITH_CUDA + result->ops_.emplace_back( + new NCCLAllReduceOpHandle(local_scopes_, places_, *nccl_ctxs_)); + auto *op_handle = result->ops_.back().get(); + + for (size_t i = 0; i < places_.size(); ++i) { + auto &p = places_[i]; + auto &vars = result->vars_[i][og]; + PADDLE_ENFORCE(!vars.empty()); + auto &prev_grad = vars.back(); + op_handle->AddInput(prev_grad.get()); + + auto var = new VarHandle(vars.size() - 1, i, og, p); + vars.emplace_back(var); + op_handle->AddOutput(var); + } +#else + PADDLE_ENFORCE("Not implemented"); +#endif +} + +bool MultiDevSSAGraphBuilder::IsParameterGradientOnce( + const std::string &og, + std::unordered_set *og_has_been_broadcast) const { + bool is_pg_once = + grad_names_.count(og) != 0 && og_has_been_broadcast->count(og) == 0; + if (is_pg_once) { + // Insert NCCL AllReduce Op + og_has_been_broadcast->insert(og); + } + return is_pg_once; +} + +void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(SSAGraph *result) const { + for (size_t i = 0; i < places_.size(); ++i) { +// Insert ScaleCost OpHandle +#ifdef PADDLE_WITH_CUDA + auto *communication_dev_ctx = nccl_ctxs_->DevCtx(places_[i]); +#else + auto *communication_dev_ctx = + platform::DeviceContextPool::Instance().Get(platform::CPUPlace()); +#endif + + auto *op_handle = + new ScaleLossGradOpHandle(local_scopes_.size(), local_scopes_[i], + places_[i], communication_dev_ctx); + result->ops_.emplace_back(op_handle); + + // FIXME: Currently ScaleLossGradOp only use device_count as scale + // factor. So it does not depend on any other operators. + // VarHandle *loss = GetVarHandle(loss_var_name, place); + // loss->pending_ops_.emplace_back(op_handle); + // op_handle->inputs_.emplace_back(loss); + + CreateOpOutput(result, op_handle, GradVarName(loss_var_name_), places_[i], + i); + } +} + +void MultiDevSSAGraphBuilder::CreateComputationalOps(SSAGraph *result, + const OpDesc &op) const { + for (size_t scope_idx = 0; scope_idx < places_.size(); ++scope_idx) { + auto p = places_[scope_idx]; + auto s = local_scopes_[scope_idx]; + result->ops_.emplace_back(new ComputationOpHandle(op, s, p)); + CreateOpHandleIOs(result, op, p, scope_idx); + } +} + +void MultiDevSSAGraphBuilder::CreateSendOp(SSAGraph *result, + const OpDesc &op) const { + auto &p = places_[0]; + auto *s = local_scopes_[0]; + // FIXME(wuyi): send op always copy from GPU 0 + result->ops_.emplace_back(new SendOpHandle(op, s, p)); + // Create inputs for output on original place and no ssa output + // is created for send op. + CreateOpHandleIOs(result, op, p, 0); +} + +bool MultiDevSSAGraphBuilder::IsScaleLossOp(const OpDesc &op) const { + // FIXME(yy): Do not hard code like this + return op.OutputArgumentNames().size() == 1 && + op.OutputArgumentNames()[0] == GradVarName(loss_var_name_); +} } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h index f1518d75b421006db6311c3b0f602e47000ab381..b5ba2dbd3c00f23fabd993d7908664db38a31941 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.h +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h @@ -57,6 +57,20 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { #ifdef PADDLE_WITH_CUDA platform::NCCLContextMap *nccl_ctxs_; #endif + + bool IsScaleLossOp(const OpDesc &op) const; + + void CreateSendOp(SSAGraph *result, const OpDesc &op) const; + + void CreateComputationalOps(SSAGraph *result, const OpDesc &op) const; + + void CreateScaleLossGradOp(SSAGraph *result) const; + + bool IsParameterGradientOnce( + const std::string &og, + std::unordered_set *og_has_been_broadcast) const; + + void InsertNCCLAllReduceOp(SSAGraph *result, const std::string &og) const; }; } // namespace details } // namespace framework diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc index 1e48f75958a3ada4d1cd5c8d0f920da4fed2157e..e587210b357ea6caa3272903d8aa6b3e4b2e8228 100644 --- a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc @@ -73,8 +73,9 @@ void NCCLAllReduceOpHandle::RunImpl() { for (size_t i = 0; i < local_scopes_.size(); ++i) { auto *s = local_scopes_[i]; + auto &local_scope = *s->FindVar(kLocalExecScopeName)->Get(); - auto &lod_tensor = s->FindVar(var_name)->Get(); + auto &lod_tensor = local_scope.FindVar(var_name)->Get(); lod_tensors.emplace_back(lod_tensor); } @@ -110,17 +111,21 @@ void NCCLAllReduceOpHandle::RunImpl() { } }); } else { // Special handle CPU only Operator's gradient. Like CRF - auto &trg = - *this->local_scopes_[0]->Var()->GetMutable(); + auto &trg = *this->local_scopes_[0] + ->FindVar(kLocalExecScopeName) + ->Get() + ->Var() + ->GetMutable(); // Reduce All Tensor to trg in CPU ReduceLoDTensor func(lod_tensors, &trg); VisitDataType(ToDataType(lod_tensors[0].type()), func); for (size_t i = 0; i < local_scopes_.size(); ++i) { - auto &scope = local_scopes_[i]; + auto &scope = + *local_scopes_[i]->FindVar(kLocalExecScopeName)->Get(); auto &p = places_[i]; - auto *var = scope->FindVar(var_name); + auto *var = scope.FindVar(var_name); auto *dev_ctx = dev_ctxes_[p]; RunAndRecordEvent(p, [&trg, var, dev_ctx, p] { diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc index 7fb9f99a8a1bc044e2f25f373265a5ec9f7d76d5..7a65ee62c9bfc0dad2ebee3be21de825fa405d73 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -30,10 +30,11 @@ ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {} void ScaleLossGradOpHandle::RunImpl() { std::string var_name = static_cast(this->outputs_[0])->name_; + auto &local_scope = *scope_->FindVar(kLocalExecScopeName)->Get(); - float *tmp = - scope_->FindVar(var_name)->GetMutable()->mutable_data( - make_ddim({1}), place_); + float *tmp = local_scope.FindVar(var_name) + ->GetMutable() + ->mutable_data(make_ddim({1}), place_); if (platform::is_cpu_place(place_)) { *tmp = coeff_; diff --git a/paddle/fluid/framework/grad_op_desc_maker.h b/paddle/fluid/framework/grad_op_desc_maker.h index cf697187d6225f3a1d2506120eebe14d4a41dff9..b4d3fa25c35fbf25b3d2fdd9fa1045dda0f773ec 100644 --- a/paddle/fluid/framework/grad_op_desc_maker.h +++ b/paddle/fluid/framework/grad_op_desc_maker.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include #include @@ -69,8 +70,7 @@ class GradOpDescMakerBase { " for input argument with a list of variables, " " drop_empty_grad is not allowed because it makes" " the correspondence bewteen a variable and its gradient" - " ambiguous. Use REGISTER_OP_EX to register the op" - " or call InputGrad(?,false) in GradOpDescMaker." + " ambiguous." " Op type %s", fwd_op_.Type()); diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index f1424f13b445155fe4f28732408a2445ab1aa9b7..748317438b44bc4af84f13b25f8e4f88386388fb 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -16,6 +16,8 @@ limitations under the License. */ #include #include +#include +#include #include #include #include @@ -141,36 +143,6 @@ class OpKernelRegistrar : public Registrar { return 0; \ } -/** - * Macro to register Operator. When the input is duplicable, you should - * use REGISTER_OP_EX with drop_empty_grad=false instead. - */ -#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ - grad_op_class) \ - REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \ - grad_op_class, true) - -// When an argument is duplicable, we need to use this version. -// Perhaps we can omit DropEmptyIG template parameter and -// only have one version of REGISTER_OP. -#define REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \ - grad_op_class, drop_empty_grad) \ - REGISTER_OPERATOR(grad_op_type, grad_op_class); \ - class _GradOpDescMaker_##grad_op_type##_ \ - : public ::paddle::framework::DefaultGradOpDescMaker { \ - using ::paddle::framework::DefaultGradOpDescMaker< \ - drop_empty_grad>::DefaultGradOpDescMaker; \ - \ - protected: \ - virtual std::string GradOpType() const { return #grad_op_type; } \ - }; \ - REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ - op_maker_class); - -#define REGISTER_OP_WITH_KERNEL(op_type, ...) \ - REGISTER_OPERATOR(op_type, ::paddle::framework::OperatorWithKernel, \ - ##__VA_ARGS__) - #define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \ REGISTER_OPERATOR(op_type, op_class, op_maker_class) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 0962f40c4a64f18f7105626c54a83f1c5b299c50..67e02e2f119707bba376056510a8ca1034590b55 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -44,6 +44,7 @@ class ParallelExecutorPrivate { #endif std::vector> var_types_; + bool own_local_scope; }; std::vector &ParallelExecutor::GetLocalScopes() { @@ -63,13 +64,16 @@ ParallelExecutor::ParallelExecutor( // Step 1. Bcast the params to devs. // Create local scopes if (local_scopes.empty()) { - for (size_t i = 0; i < member_->places_.size(); ++i) { - member_->local_scopes_.push_back(&scope->NewScope()); + member_->own_local_scope = true; + member_->local_scopes_.emplace_back(member_->global_scope_); + for (size_t i = 1; i < member_->places_.size(); ++i) { + member_->local_scopes_.emplace_back(&scope->NewScope()); } } else { + member_->own_local_scope = false; PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size()); for (size_t i = 0; i < member_->places_.size(); ++i) { - member_->local_scopes_.push_back(local_scopes[i]); + member_->local_scopes_.emplace_back(local_scopes[i]); } } @@ -159,7 +163,9 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { platform::RecordBlock b(0); // Create local scopes. - for (auto &scope : member_->local_scopes_) { + for (auto it = member_->local_scopes_.rbegin(); + it != member_->local_scopes_.rend(); ++it) { + auto &scope = *it; Scope &local_scope = scope->NewScope(); *scope->Var(details::kLocalExecScopeName)->GetMutable() = &local_scope; @@ -173,7 +179,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, InitializeVariable(scope->Var(std::get<0>(name_type_pair)), std::get<1>(name_type_pair)); } else { - InitializeVariable(scope->Var(std::get<0>(name_type_pair)), + InitializeVariable(local_scope.Var(std::get<0>(name_type_pair)), std::get<1>(name_type_pair)); } } @@ -228,5 +234,13 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( } } +ParallelExecutor::~ParallelExecutor() { + if (member_->own_local_scope) { + for (size_t i = 1; i < member_->local_scopes_.size(); ++i) { + member_->global_scope_->DeleteScope(member_->local_scopes_[i]); + } + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 303ac3bc55cfed57a03765b27d8aba581eabd1c8..f4f283bb4b5eafc33619c98b5f30e1e8f453ece3 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -42,6 +42,8 @@ class ParallelExecutor { const std::vector& local_scopes, bool allow_op_delay); + ~ParallelExecutor(); + std::vector& GetLocalScopes(); /** diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 3ae4e0ea01c4af761d1df074735692ed5b51ac06..256aded8ca234a24229e11f27b9e3e25728ad293 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -110,12 +110,12 @@ function(op_library TARGET) # Note that it's enough to just adding one operator to pybind in a *_op.cc file. # And for detail pybind information, please see generated paddle/pybind/pybind.h. file(READ ${TARGET}.cc TARGET_CONTENT) - string(REGEX MATCH "REGISTER_OP\\(.*REGISTER_OP\\(" multi_register "${TARGET_CONTENT}") - string(REGEX MATCH "REGISTER_OP\\([a-z0-9_]*," one_register "${multi_register}") + string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}") + string(REGEX MATCH "REGISTER_OPERATOR\\([a-z0-9_]*," one_register "${multi_register}") if (one_register STREQUAL "") string(REPLACE "_op" "" TARGET "${TARGET}") else () - string(REPLACE "REGISTER_OP(" "" TARGET "${one_register}") + string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}") string(REPLACE "," "" TARGET "${TARGET}") endif() diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index bb3034fd4389ca9457a1fd011c099156e0cc4d53..36c479b401837e49ecd7dc078fec725b5bc8b23b 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -469,8 +469,6 @@ REGISTER_ACTIVATION_OP_GRAD_MAKER(HardSigmoid, hard_sigmoid); namespace ops = paddle::operators; -void DummyFunctor() {} - #define FOR_EACH_INPLACE_OP_FUNCTOR(__macro) \ __macro(Sigmoid, sigmoid); \ __macro(Relu, relu); \ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index eaeaca84ab8592bcb7e4ef1451b4aeb0b2d9cf59..912415192659dc004f54a76e9cd1a20581d512a6 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -648,7 +648,7 @@ struct SoftReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } - bool Inplace() const { return IsInplace("softrelu"); } + bool Inplace() const { return IsInplace("soft_relu"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { diff --git a/paddle/fluid/operators/bilinear_tensor_product_op.cc b/paddle/fluid/operators/bilinear_tensor_product_op.cc index 2ec984d8e0f07b741f5e36f281134c0469079afd..44e2af8e2e066cfd58698b7112d8a08670b84c6d 100644 --- a/paddle/fluid/operators/bilinear_tensor_product_op.cc +++ b/paddle/fluid/operators/bilinear_tensor_product_op.cc @@ -153,9 +153,11 @@ class BilinearTensorProductOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(bilinear_tensor_product, ops::BilinearTensorProductOp, - ops::BilinearTensorProductOpMaker, bilinear_tensor_product_grad, - ops::BilinearTensorProductOpGrad); +REGISTER_OPERATOR(bilinear_tensor_product, ops::BilinearTensorProductOp, + ops::BilinearTensorProductOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(bilinear_tensor_product_grad, + ops::BilinearTensorProductOpGrad) REGISTER_OP_CPU_KERNEL( bilinear_tensor_product, ops::BilinearTensorProductKernel, diff --git a/paddle/fluid/operators/clip_op.cc b/paddle/fluid/operators/clip_op.cc index a3b67964c79268e6ce07018501c46163847897ad..3c2d8e87072e13e17a01f4fa37a9217cd24f2a5f 100644 --- a/paddle/fluid/operators/clip_op.cc +++ b/paddle/fluid/operators/clip_op.cc @@ -81,8 +81,9 @@ class ClipOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(clip, ops::ClipOp, ops::ClipOpMaker, clip_grad, - ops::ClipOpGrad); +REGISTER_OPERATOR(clip, ops::ClipOp, ops::ClipOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(clip_grad, ops::ClipOpGrad) REGISTER_OP_CPU_KERNEL( clip, ops::ClipKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 4a36b03cb63ac3ea61be1bbc56b8dd0adbe7d334..5fbbe4d0286e222cbfee63f61c6939160ba078de 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -103,8 +103,10 @@ class ConcatOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_EX(concat, ops::ConcatOp, ops::ConcatOpMaker, concat_grad, - ops::ConcatOpGrad, false) +REGISTER_OPERATOR(concat, ops::ConcatOp, ops::ConcatOpMaker, + paddle::framework::DefaultGradOpDescMaker< + false> /* set false to disable empty grad */) +REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad) REGISTER_OP_CPU_KERNEL( concat, ops::ConcatKernel) REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 695db841a4ec666b2c8783dfc7df959711341d85..83e56f80ca217ffe22e3e03d616da4b31763841d 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -335,14 +335,17 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType( } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(conv2d, ops::ConvOp, ops::Conv2DOpMaker, conv2d_grad, - ops::ConvOpGrad); +REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad) // depthwise convolution op -REGISTER_OP(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker, - depthwise_conv2d_grad, ops::ConvOpGrad); -REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad, - ops::ConvOpGrad); +REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad) +REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad) // depthwise conv kernel // TODO(xingzhaolong): neon kernel for mobile diff --git a/paddle/fluid/operators/conv_shift_op.cc b/paddle/fluid/operators/conv_shift_op.cc index a1a0b00208fe77ad462062b5d0cb0c5f3065f584..46a675e936c04ae8aa570c8dba9fbdcc64829a66 100644 --- a/paddle/fluid/operators/conv_shift_op.cc +++ b/paddle/fluid/operators/conv_shift_op.cc @@ -193,8 +193,9 @@ class ConvShiftGradKernel } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(conv_shift, ops::ConvShiftOp, ops::ConvShiftOpMaker, - conv_shift_grad, ops::ConvShiftGradOp); +REGISTER_OPERATOR(conv_shift, ops::ConvShiftOp, ops::ConvShiftOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(conv_shift_grad, ops::ConvShiftGradOp) REGISTER_OP_CPU_KERNEL(conv_shift, ops::ConvShiftKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index 08f5939d42a41d235a94eff16cf2f558068d6aaa..c148237f85385598b7f793e7c15f3ad346328f97 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -298,8 +298,10 @@ framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType( namespace ops = paddle::operators; -REGISTER_OP(conv2d_transpose, ops::ConvTransposeOp, ops::Conv2DTransposeOpMaker, - conv2d_transpose_grad, ops::ConvTransposeOpGrad); +REGISTER_OPERATOR(conv2d_transpose, ops::ConvTransposeOp, + ops::Conv2DTransposeOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(conv2d_transpose_grad, ops::ConvTransposeOpGrad) REGISTER_OP_CPU_KERNEL( conv2d_transpose, @@ -311,8 +313,10 @@ REGISTER_OP_CPU_KERNEL( ops::GemmConvTransposeGradKernel); -REGISTER_OP(conv3d_transpose, ops::ConvTransposeOp, ops::Conv3DTransposeOpMaker, - conv3d_transpose_grad, ops::ConvTransposeOpGrad); +REGISTER_OPERATOR(conv3d_transpose, ops::ConvTransposeOp, + ops::Conv3DTransposeOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(conv3d_transpose_grad, ops::ConvTransposeOpGrad) REGISTER_OP_CPU_KERNEL( conv3d_transpose, diff --git a/paddle/fluid/operators/cos_sim_op.cc b/paddle/fluid/operators/cos_sim_op.cc index 4c8af408f62453eaf22cc23d19844e8ca7625bfa..8cde2cb0770c472df1a29c08ce62293f8af28d9e 100644 --- a/paddle/fluid/operators/cos_sim_op.cc +++ b/paddle/fluid/operators/cos_sim_op.cc @@ -153,8 +153,9 @@ class CosSimOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(cos_sim, ops::CosSimOp, ops::CosSimOpMaker, cos_sim_grad, - ops::CosSimOpGrad); +REGISTER_OPERATOR(cos_sim, ops::CosSimOp, ops::CosSimOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(cos_sim_grad, ops::CosSimOpGrad) REGISTER_OP_CPU_KERNEL( cos_sim, ops::CosSimKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/crop_op.cc b/paddle/fluid/operators/crop_op.cc index fd7ea70c64fafd0a7ea55ec1e3a29eb66d84a2c6..a8f1fbd529c71d1915c75fa90b7e4e8239d2fa3f 100644 --- a/paddle/fluid/operators/crop_op.cc +++ b/paddle/fluid/operators/crop_op.cc @@ -153,7 +153,9 @@ class CropOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(crop, ops::CropOp, ops::CropOpMaker, crop_grad, ops::CropOpGrad); +REGISTER_OPERATOR(crop, ops::CropOp, ops::CropOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(crop_grad, ops::CropOpGrad); REGISTER_OP_CPU_KERNEL(crop, ops::CropKernel); REGISTER_OP_CPU_KERNEL( crop_grad, ops::CropGradKernel); diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 55810371c8d354483138b0673721a1ea39fa6f35..0ad87e511eac6f2b91b335253f3a63cc9b6b09ca 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -164,8 +164,9 @@ or not. But the output only shares the LoD information with input X. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker, - cross_entropy_grad, ops::CrossEntropyGradientOp); +REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp) REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, ops::CrossEntropyOpKernel); REGISTER_OP_CPU_KERNEL(cross_entropy_grad, diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index 16c612c45a37dd2ffd17f8d5f5946df30e9b3fe6..69fcffe9bc34006aef2e5a39227cf6d947e4615f 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -82,7 +82,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, platform::CPUPlace cpu; auto& gpu_dev_ctx = static_cast(ctx); - auto copy_size = tensor.memory_size(); + auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type()); payload = memory::Alloc(cpu, copy_size); memory::Copy(cpu, payload, @@ -99,7 +99,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else { payload = tensor.data(); } - payload_size = tensor.memory_size(); + payload_size = tensor.numel() * framework::SizeOfType(tensor.type()); e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, payload_size); } break; case framework::proto::VarType_Type_SELECTED_ROWS: { @@ -118,7 +118,8 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, platform::CPUPlace cpu; auto& gpu_dev_ctx = static_cast(ctx); - auto copy_size = tensor->memory_size(); + auto copy_size = + tensor->numel() * framework::SizeOfType(tensor->type()); payload = memory::Alloc(cpu, copy_size); memory::Copy(cpu, payload, boost::get(tensor->place()), @@ -133,7 +134,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else { payload = slr->mutable_value()->data(); } - payload_size = tensor->memory_size(); + payload_size = tensor->numel() * framework::SizeOfType(tensor->type()); e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, payload_size); } break; default: diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc index e4436549f6185ba04a5f270893596a6dcb11e89b..3b9882ab94fb6220c506f413496427f1edc3e21d 100644 --- a/paddle/fluid/operators/dropout_op.cc +++ b/paddle/fluid/operators/dropout_op.cc @@ -101,8 +101,9 @@ class DropoutOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(dropout, ops::DropoutOp, ops::DropoutOpMaker, dropout_grad, - ops::DropoutOpGrad); +REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad) REGISTER_OP_CPU_KERNEL( dropout, ops::CPUDropoutKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/elementwise_div_op.cc b/paddle/fluid/operators/elementwise_div_op.cc index 6f9a090c8ea660d023acece096b48d29aa2f35f7..f3dabb91334af27ccd812faaa4eee2a3ac6500bf 100644 --- a/paddle/fluid/operators/elementwise_div_op.cc +++ b/paddle/fluid/operators/elementwise_div_op.cc @@ -30,8 +30,10 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(elementwise_div, ops::ElementwiseOp, ops::ElementwiseDivOpMaker, - elementwise_div_grad, ops::ElementwiseOpGrad); +REGISTER_OPERATOR(elementwise_div, ops::ElementwiseOp, + ops::ElementwiseDivOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(elementwise_div_grad, ops::ElementwiseOpGrad) REGISTER_OP_CPU_KERNEL( elementwise_div, ops::ElementwiseDivKernel, diff --git a/paddle/fluid/operators/elementwise_max_op.cc b/paddle/fluid/operators/elementwise_max_op.cc index 61da7c59441df22d71316b13f131399d3cd55f3a..385159e8ec13f12a6ceca3a0ca17a5534d78c679 100644 --- a/paddle/fluid/operators/elementwise_max_op.cc +++ b/paddle/fluid/operators/elementwise_max_op.cc @@ -29,8 +29,10 @@ class ElementwiseMaxOpMaker : public ElementwiseOpMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(elementwise_max, ops::ElementwiseOp, ops::ElementwiseMaxOpMaker, - elementwise_max_grad, ops::ElementwiseOpGrad); +REGISTER_OPERATOR(elementwise_max, ops::ElementwiseOp, + ops::ElementwiseMaxOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(elementwise_max_grad, ops::ElementwiseOpGrad) REGISTER_OP_CPU_KERNEL( elementwise_max, ops::ElementwiseMaxKernel, diff --git a/paddle/fluid/operators/elementwise_min_op.cc b/paddle/fluid/operators/elementwise_min_op.cc index c74ff36db17579182e3c7e93a5adc5fe79fbcadd..0b7ea4b1bf260ecea09c667dbb35c121e600e352 100644 --- a/paddle/fluid/operators/elementwise_min_op.cc +++ b/paddle/fluid/operators/elementwise_min_op.cc @@ -29,8 +29,10 @@ class ElementwiseMinOpMaker : public ElementwiseOpMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(elementwise_min, ops::ElementwiseOp, ops::ElementwiseMinOpMaker, - elementwise_min_grad, ops::ElementwiseOpGrad); +REGISTER_OPERATOR(elementwise_min, ops::ElementwiseOp, + ops::ElementwiseMinOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(elementwise_min_grad, ops::ElementwiseOpGrad) REGISTER_OP_CPU_KERNEL( elementwise_min, ops::ElementwiseMinKernel, diff --git a/paddle/fluid/operators/elementwise_mul_op.cc b/paddle/fluid/operators/elementwise_mul_op.cc index 5d7f2cdffd11dfef8df22175dd0570b277c0e13a..0e092924d7a79a85c306567db2a44c5b96b3972c 100644 --- a/paddle/fluid/operators/elementwise_mul_op.cc +++ b/paddle/fluid/operators/elementwise_mul_op.cc @@ -31,8 +31,10 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(elementwise_mul, ops::ElementwiseOp, ops::ElementwiseMulOpMaker, - elementwise_mul_grad, ops::ElementwiseOpGrad); +REGISTER_OPERATOR(elementwise_mul, ops::ElementwiseOp, + ops::ElementwiseMulOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(elementwise_mul_grad, ops::ElementwiseOpGrad) REGISTER_OP_CPU_KERNEL( elementwise_mul, ops::ElementwiseMulKernel, diff --git a/paddle/fluid/operators/elementwise_sub_op.cc b/paddle/fluid/operators/elementwise_sub_op.cc index 6f770820c80310a183018b586cb7545ca1e9de51..675ff8860b38e1bfe6c49843e3c3f0acfa803e2b 100644 --- a/paddle/fluid/operators/elementwise_sub_op.cc +++ b/paddle/fluid/operators/elementwise_sub_op.cc @@ -29,8 +29,10 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(elementwise_sub, ops::ElementwiseOp, ops::ElementwiseSubOpMaker, - elementwise_sub_grad, ops::ElementwiseOpGrad); +REGISTER_OPERATOR(elementwise_sub, ops::ElementwiseOp, + ops::ElementwiseSubOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(elementwise_sub_grad, ops::ElementwiseOpGrad) REGISTER_OP_CPU_KERNEL( elementwise_sub, ops::ElementwiseSubKernel, diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index 51a66bd832fbdface953d9b7b509b32ce26d33ca..d69b769651855d8d86491967df19b5a920b78a18 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/operators/expand_op.h" +#include + namespace paddle { namespace operators { @@ -128,8 +130,9 @@ class ExpandGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(expand, ops::ExpandOp, ops::ExpandOpMaker, expand_grad, - ops::ExpandGradOp); +REGISTER_OPERATOR(expand, ops::ExpandOp, ops::ExpandOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(expand_grad, ops::ExpandGradOp) REGISTER_OP_CPU_KERNEL( expand, ops::ExpandKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index 381771f157d78fb04e54f0a07c40e4df2c91441a..5070a4b78d625c799f049296f0e3d0464cfee995 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -98,5 +98,6 @@ FCOpMaker::FCOpMaker(OpProto* proto, OpAttrChecker* op_checker) } // namespace operators } // namespace paddle -REGISTER_OP(fc, paddle::operators::FCOp, paddle::operators::FCOpMaker, fc_grad, - paddle::operators::FCOpGrad); +REGISTER_OPERATOR(fc, paddle::operators::FCOp, paddle::operators::FCOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(fc_grad, paddle::operators::FCOpGrad) diff --git a/paddle/fluid/operators/gather_op.cc b/paddle/fluid/operators/gather_op.cc index 6be06b8816ce65641b49d7b7b3861cdd8460feaa..60075d9777e33d1b71bfefdd0b05e69252739b33 100644 --- a/paddle/fluid/operators/gather_op.cc +++ b/paddle/fluid/operators/gather_op.cc @@ -100,7 +100,8 @@ Out = [[3, 4], } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker, gather_grad, - ops::GatherGradOp); +REGISTER_OPERATOR(gather, ops::GatherOp, ops::GatherOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(gather_grad, ops::GatherGradOp) REGISTER_OP_CPU_KERNEL(gather, ops::GatherOpKernel); REGISTER_OP_CPU_KERNEL(gather_grad, ops::GatherGradientOpKernel); diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index 2490b83b8c50ce4a68095be10d78a380174c1a3f..b717c5909189ca585fdc0f098cca19000ee95322 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -216,7 +216,9 @@ class GRUGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(gru, ops::GRUOp, ops::GRUOpMaker, gru_grad, ops::GRUGradOp); +REGISTER_OPERATOR(gru, ops::GRUOp, ops::GRUOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(gru_grad, ops::GRUGradOp) REGISTER_OP_CPU_KERNEL( gru, ops::GRUKernel, ops::GRUKernel); diff --git a/paddle/fluid/operators/gru_unit_op.cc b/paddle/fluid/operators/gru_unit_op.cc index f4c766db0a12b9d2167b0ee3b1d7666c4f1813f1..8f75a67bc78a5829a5ef5fbe5ed2887368b55e57 100644 --- a/paddle/fluid/operators/gru_unit_op.cc +++ b/paddle/fluid/operators/gru_unit_op.cc @@ -198,8 +198,9 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(gru_unit, ops::GRUUnitOp, ops::GRUUnitOpMaker, gru_unit_grad, - ops::GRUUnitGradOp); +REGISTER_OPERATOR(gru_unit, ops::GRUUnitOp, ops::GRUUnitOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(gru_unit_grad, ops::GRUUnitGradOp) REGISTER_OP_CPU_KERNEL( gru_unit, ops::GRUUnitKernel, ops::GRUUnitKernel); diff --git a/paddle/fluid/operators/hinge_loss_op.cc b/paddle/fluid/operators/hinge_loss_op.cc index efe84f14098028675cb332efd9545c9709528cb3..d14935e771280564601fdab00a1a7cfaa7031dd3 100644 --- a/paddle/fluid/operators/hinge_loss_op.cc +++ b/paddle/fluid/operators/hinge_loss_op.cc @@ -103,8 +103,9 @@ class HingeLossGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(hinge_loss, ops::HingeLossOp, ops::HingeLossOpMaker, - hinge_loss_grad, ops::HingeLossGradOp); +REGISTER_OPERATOR(hinge_loss, ops::HingeLossOp, ops::HingeLossOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(hinge_loss_grad, ops::HingeLossGradOp) REGISTER_OP_CPU_KERNEL( hinge_loss, ops::HingeLossKernel); diff --git a/paddle/fluid/operators/huber_loss_op.cc b/paddle/fluid/operators/huber_loss_op.cc index 134b23b4612b478f9aeb06454c9fd9a6c25fffb4..0789c89bd13a7a07ac22d739cfbee2ef439aa966 100644 --- a/paddle/fluid/operators/huber_loss_op.cc +++ b/paddle/fluid/operators/huber_loss_op.cc @@ -121,8 +121,9 @@ class HuberLossGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(huber_loss, ops::HuberLossOp, ops::HuberLossOpMaker, - huber_loss_grad, ops::HuberLossGradOp); +REGISTER_OPERATOR(huber_loss, ops::HuberLossOp, ops::HuberLossOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(huber_loss_grad, ops::HuberLossGradOp) REGISTER_OP_CPU_KERNEL( huber_loss, ops::HuberLossKernel); diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc index 5b387d8d344dfc3475a537827acd9e125fe6693c..593cf60c11fed9e77fa0328ea416790bd8a5437d 100644 --- a/paddle/fluid/operators/im2sequence_op.cc +++ b/paddle/fluid/operators/im2sequence_op.cc @@ -148,8 +148,9 @@ class Im2SequenceGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker, - im2sequence_grad, ops::Im2SequenceGradOp); +REGISTER_OPERATOR(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(im2sequence_grad, ops::Im2SequenceGradOp) REGISTER_OP_CPU_KERNEL( im2sequence, ops::Im2SequenceKernel); diff --git a/paddle/fluid/operators/l1_norm_op.cc b/paddle/fluid/operators/l1_norm_op.cc index 963b0587c386c72c05f8cc5d0b63074e9e726579..ba7577c510619a7eb26cdd125d2e5f282050820e 100644 --- a/paddle/fluid/operators/l1_norm_op.cc +++ b/paddle/fluid/operators/l1_norm_op.cc @@ -67,8 +67,9 @@ $$Out = \sum{|X|}$$ } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(l1_norm, ops::L1NormOp, ops::L1NormOpMaker, l1_norm_grad, - ops::L1NormGradOp); +REGISTER_OPERATOR(l1_norm, ops::L1NormOp, ops::L1NormOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(l1_norm_grad, ops::L1NormGradOp) REGISTER_OP_CPU_KERNEL( l1_norm, ops::L1NormKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/label_smooth_op.cc b/paddle/fluid/operators/label_smooth_op.cc index c2a8c7f867a4483a7fda2f4336a64ab109ce86e8..663adc570010506dbe25b10339be9d639a525e57 100644 --- a/paddle/fluid/operators/label_smooth_op.cc +++ b/paddle/fluid/operators/label_smooth_op.cc @@ -117,8 +117,9 @@ class LabelSmoothGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(label_smooth, ops::LabelSmoothOp, ops::LabelSmoothOpMaker, - label_smooth_grad, ops::LabelSmoothGradOp); +REGISTER_OPERATOR(label_smooth, ops::LabelSmoothOp, ops::LabelSmoothOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(label_smooth_grad, ops::LabelSmoothGradOp) REGISTER_OP_CPU_KERNEL( label_smooth, ops::LabelSmoothKernel, diff --git a/paddle/fluid/operators/layer_norm_op.cc b/paddle/fluid/operators/layer_norm_op.cc index 88b3b08af57eaf2d1086d778e3313c3dea6300fb..e033da857b21333c83de7a417f872d9f9ee1ce62 100644 --- a/paddle/fluid/operators/layer_norm_op.cc +++ b/paddle/fluid/operators/layer_norm_op.cc @@ -162,8 +162,9 @@ class LayerNormGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(layer_norm, ops::LayerNormOp, ops::LayerNormOpMaker, - layer_norm_grad, ops::LayerNormGradOp); +REGISTER_OPERATOR(layer_norm, ops::LayerNormOp, ops::LayerNormOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(layer_norm_grad, ops::LayerNormGradOp) REGISTER_OP_CPU_KERNEL( layer_norm, ops::LayerNormKernel, ops::LayerNormKernel); diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc index ef568a578b0b97ea402a2a521f0fe1431013d1b7..24b845528d75c7b24e2d60109ab2ebf7c474a51b 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.cc +++ b/paddle/fluid/operators/linear_chain_crf_op.cc @@ -256,8 +256,10 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(linear_chain_crf, ops::LinearChainCRFOp, ops::LinearChainCRFOpMaker, - linear_chain_crf_grad, ops::LinearChainCRFGradOp); +REGISTER_OPERATOR(linear_chain_crf, ops::LinearChainCRFOp, + ops::LinearChainCRFOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(linear_chain_crf_grad, ops::LinearChainCRFGradOp) REGISTER_OP_CPU_KERNEL( linear_chain_crf, ops::LinearChainCRFOpKernel, diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc index 7d5687f2d0666d393d7bb1c1a2fdde6c95e6d615..fd1e1ffd469ed85c871a4debb7508717aa58c211 100644 --- a/paddle/fluid/operators/lod_reset_op.cc +++ b/paddle/fluid/operators/lod_reset_op.cc @@ -155,8 +155,9 @@ class LoDResetGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker, lod_reset_grad, - ops::LoDResetGradOp); +REGISTER_OPERATOR(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(lod_reset_grad, ops::LoDResetGradOp) REGISTER_OP_CPU_KERNEL( lod_reset, ops::LoDResetKernel, ops::LoDResetKernel, diff --git a/paddle/fluid/operators/log_loss_op.cc b/paddle/fluid/operators/log_loss_op.cc index f44996d8ac746a33750a979eff2cbbc84e10214b..b1a68d28876eb5e3b2d464598da0b27632c52a6c 100644 --- a/paddle/fluid/operators/log_loss_op.cc +++ b/paddle/fluid/operators/log_loss_op.cc @@ -106,8 +106,9 @@ class LogLossGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(log_loss, ops::LogLossOp, ops::LogLossOpMaker, log_loss_grad, - ops::LogLossGradOp); +REGISTER_OPERATOR(log_loss, ops::LogLossOp, ops::LogLossOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(log_loss_grad, ops::LogLossGradOp) REGISTER_OP_CPU_KERNEL( log_loss, ops::LogLossKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index 553a06c3dcdbb9de43afcace75ebec7c5e819d4a..6ff9a68ba488875b7b77a752d43f1e1d82bae49f 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -276,7 +276,9 @@ class LRNOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(lrn, ops::LRNOp, ops::LRNOpMaker, lrn_grad, ops::LRNOpGrad); +REGISTER_OPERATOR(lrn, ops::LRNOp, ops::LRNOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(lrn_grad, ops::LRNOpGrad) REGISTER_OP_CPU_KERNEL( lrn, ops::LRNKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/lstm_op.cc b/paddle/fluid/operators/lstm_op.cc index e062d62c66c25e386c7643e310034bc1481ec43d..75b9c65f1822df6d52838ef63b4261265485acf5 100644 --- a/paddle/fluid/operators/lstm_op.cc +++ b/paddle/fluid/operators/lstm_op.cc @@ -273,7 +273,9 @@ class LSTMGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(lstm, ops::LSTMOp, ops::LSTMOpMaker, lstm_grad, ops::LSTMGradOp); +REGISTER_OPERATOR(lstm, ops::LSTMOp, ops::LSTMOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(lstm_grad, ops::LSTMGradOp) REGISTER_OP_CPU_KERNEL( lstm, ops::LSTMKernel, ops::LSTMKernel); diff --git a/paddle/fluid/operators/lstm_unit_op.cc b/paddle/fluid/operators/lstm_unit_op.cc index b3c9d7c34d1ac54fb3e15a60bcc470f392bf5027..16d2dabd1d6d4da5c4b60ffb2ff6ee6d25507ca4 100644 --- a/paddle/fluid/operators/lstm_unit_op.cc +++ b/paddle/fluid/operators/lstm_unit_op.cc @@ -97,8 +97,9 @@ class LstmUnitGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(lstm_unit, ops::LstmUnitOp, ops::LstmUnitOpMaker, lstm_unit_grad, - ops::LstmUnitGradOp); +REGISTER_OPERATOR(lstm_unit, ops::LstmUnitOp, ops::LstmUnitOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(lstm_unit_grad, ops::LstmUnitGradOp) REGISTER_OP_CPU_KERNEL(lstm_unit, ops::LstmUnitKernel, ops::LstmUnitKernel); diff --git a/paddle/fluid/operators/lstmp_op.cc b/paddle/fluid/operators/lstmp_op.cc index 82541517e122d5da2674b55561ba72af970a2567..a575ade4723a615f464a91a93959c7eb99fae029 100644 --- a/paddle/fluid/operators/lstmp_op.cc +++ b/paddle/fluid/operators/lstmp_op.cc @@ -322,8 +322,9 @@ class LSTMPGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(lstmp, ops::LSTMPOp, ops::LSTMPOpMaker, lstmp_grad, - ops::LSTMPGradOp); +REGISTER_OPERATOR(lstmp, ops::LSTMPOp, ops::LSTMPOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(lstmp_grad, ops::LSTMPGradOp) REGISTER_OP_CPU_KERNEL( lstmp, ops::LSTMPKernel, ops::LSTMPKernel); diff --git a/paddle/fluid/operators/margin_rank_loss_op.cc b/paddle/fluid/operators/margin_rank_loss_op.cc index b146b5088321efcee5a4511b3fedd047a0d54f00..b3f64312337c74d61ebf7514806fdcb28cddd6ba 100644 --- a/paddle/fluid/operators/margin_rank_loss_op.cc +++ b/paddle/fluid/operators/margin_rank_loss_op.cc @@ -111,9 +111,10 @@ class MarginRankLossGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(margin_rank_loss, ops::MarginRankLossOp, - ops::MarginRankLossOpMaker, margin_rank_loss_grad, - ops::MarginRankLossGradOp); +REGISTER_OPERATOR(margin_rank_loss, ops::MarginRankLossOp, + ops::MarginRankLossOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(margin_rank_loss_grad, ops::MarginRankLossGradOp) REGISTER_OP_CPU_KERNEL( margin_rank_loss, ops::MarginRankLossKernel); diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 1f5255887391218b766aa23842e443c8b2ad080f..6a3507fbfc1619f4e745d1a1b4191053d69efdbf 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -237,8 +237,9 @@ class MatMulOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(matmul, ops::MatMulOp, ops::MatMulOpMaker, matmul_grad, - ops::MatMulOpGrad); +REGISTER_OPERATOR(matmul, ops::MatMulOp, ops::MatMulOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(matmul_grad, ops::MatMulOpGrad) REGISTER_OP_CPU_KERNEL( matmul, ops::MatMulKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/maxout_op.cc b/paddle/fluid/operators/maxout_op.cc index 4e28d98834d27351be99106d6760eae46baf8938..9144d1fab9b2bd937d6bc66d2ee6eed4427c7df3 100644 --- a/paddle/fluid/operators/maxout_op.cc +++ b/paddle/fluid/operators/maxout_op.cc @@ -101,8 +101,9 @@ class MaxOutOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(maxout, ops::MaxOutOp, ops::MaxOutOpMaker, maxout_grad, - ops::MaxOutOpGrad); +REGISTER_OPERATOR(maxout, ops::MaxOutOp, ops::MaxOutOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(maxout_grad, ops::MaxOutOpGrad) REGISTER_OP_CPU_KERNEL( maxout, ops::MaxOutKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/modified_huber_loss_op.cc b/paddle/fluid/operators/modified_huber_loss_op.cc index a8fbd48c4da5b2d0585688e3100f9fe62ac5aa1f..042a977d2e71cec6e710f10de15326f28ecf5dc4 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.cc +++ b/paddle/fluid/operators/modified_huber_loss_op.cc @@ -108,9 +108,10 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(modified_huber_loss, ops::ModifiedHuberLossOp, - ops::ModifiedHuberLossOpMaker, modified_huber_loss_grad, - ops::ModifiedHuberLossGradOp); +REGISTER_OPERATOR(modified_huber_loss, ops::ModifiedHuberLossOp, + ops::ModifiedHuberLossOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(modified_huber_loss_grad, ops::ModifiedHuberLossGradOp) REGISTER_OP_CPU_KERNEL( modified_huber_loss, diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc index 5038287527c70d376d8c8a1cc8e4cca0b563126a..9a99e3878a963ce7346c1bc0135936568dbf85fe 100644 --- a/paddle/fluid/operators/mul_op.cc +++ b/paddle/fluid/operators/mul_op.cc @@ -160,7 +160,9 @@ class MulGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulGradOp); +REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(mul_grad, ops::MulGradOp) REGISTER_OP_CPU_KERNEL( mul, ops::MulKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc index 99f38529bbb5a36cd944a01940b5579195f2d601..b471a7e59493c9e25cfe81822f319c0c098a97ef 100644 --- a/paddle/fluid/operators/nce_op.cc +++ b/paddle/fluid/operators/nce_op.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/operators/nce_op.h" +#include + namespace paddle { namespace operators { @@ -179,7 +181,9 @@ class NCEOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(nce, ops::NCEOp, ops::NCEOpMaker, nce_grad, ops::NCEOpGrad); +REGISTER_OPERATOR(nce, ops::NCEOp, ops::NCEOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(nce_grad, ops::NCEOpGrad) REGISTER_OP_CPU_KERNEL(nce, ops::NCEKernel, ops::NCEKernel); REGISTER_OP_CPU_KERNEL(nce_grad, diff --git a/paddle/fluid/operators/norm_op.cc b/paddle/fluid/operators/norm_op.cc index 5345c5bdb0f1e2d96233595f89028993606d2399..ff4d6ec69fadd910ac4e07c6397273e607296696 100644 --- a/paddle/fluid/operators/norm_op.cc +++ b/paddle/fluid/operators/norm_op.cc @@ -85,8 +85,9 @@ class NormOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(norm, ops::NormOp, ops::NormOpMaker, norm_grad, - ops::NormOpGrad); +REGISTER_OPERATOR(norm, ops::NormOp, ops::NormOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(norm_grad, ops::NormOpGrad) REGISTER_OP_CPU_KERNEL( norm, ops::NormKernel, ops::NormKernel); diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index b144ec5f7d315cb340dcd94b4a519bfcfd2a0e66..371100fd747df0270d20cb38d00a0ae8068dfc63 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -333,8 +333,9 @@ Example: namespace ops = paddle::operators; -REGISTER_OP(pool2d, ops::PoolOp, ops::Pool2dOpMaker, pool2d_grad, - ops::PoolOpGrad); +REGISTER_OPERATOR(pool2d, ops::PoolOp, ops::Pool2dOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(pool2d_grad, ops::PoolOpGrad) REGISTER_OP_CPU_KERNEL( pool2d, ops::PoolKernel, @@ -343,8 +344,9 @@ REGISTER_OP_CPU_KERNEL( pool2d_grad, ops::PoolGradKernel, ops::PoolGradKernel) -REGISTER_OP(pool3d, ops::PoolOp, ops::Pool3dOpMaker, pool3d_grad, - ops::PoolOpGrad); +REGISTER_OPERATOR(pool3d, ops::PoolOp, ops::Pool3dOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(pool3d_grad, ops::PoolOpGrad) REGISTER_OP_CPU_KERNEL( pool3d, ops::PoolKernel, diff --git a/paddle/fluid/operators/pool_with_index_op.cc b/paddle/fluid/operators/pool_with_index_op.cc index 4df0a14577ca13ddd79424fc324eb689913b20a0..a633beab3b4497c3b5ea02e1987e98eeb86fc367 100644 --- a/paddle/fluid/operators/pool_with_index_op.cc +++ b/paddle/fluid/operators/pool_with_index_op.cc @@ -258,9 +258,10 @@ Example: namespace ops = paddle::operators; -REGISTER_OP(max_pool2d_with_index, ops::MaxPoolWithIndexOp, - ops::MaxPool2dWithIndexOpMaker, max_pool2d_with_index_grad, - ops::MaxPoolWithIndexOpGrad); +REGISTER_OPERATOR(max_pool2d_with_index, ops::MaxPoolWithIndexOp, + ops::MaxPool2dWithIndexOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(max_pool2d_with_index_grad, ops::MaxPoolWithIndexOpGrad) REGISTER_OP_CPU_KERNEL( max_pool2d_with_index, @@ -274,9 +275,10 @@ REGISTER_OP_CPU_KERNEL( ops::MaxPoolWithIndexGradKernel) -REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp, - ops::MaxPool3dWithIndexOpMaker, max_pool3d_with_index_grad, - ops::MaxPoolWithIndexOpGrad); +REGISTER_OPERATOR(max_pool3d_with_index, ops::MaxPoolWithIndexOp, + ops::MaxPool3dWithIndexOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(max_pool3d_with_index_grad, ops::MaxPoolWithIndexOpGrad) REGISTER_OP_CPU_KERNEL( max_pool3d_with_index, diff --git a/paddle/fluid/operators/prelu_op.cc b/paddle/fluid/operators/prelu_op.cc index 8eaa12a4a6cfc09fd4e2c3642bc8825fe2af6d6b..ef28114ef75f93427b389af67a599cc788004379 100644 --- a/paddle/fluid/operators/prelu_op.cc +++ b/paddle/fluid/operators/prelu_op.cc @@ -83,8 +83,9 @@ class PReluGradOp : public framework::OperatorWithKernel { namespace ops = paddle::operators; -REGISTER_OP(prelu, ops::PReluOp, ops::PReluOpMaker, prelu_grad, - ops::PReluGradOp); +REGISTER_OPERATOR(prelu, ops::PReluOp, ops::PReluOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(prelu_grad, ops::PReluGradOp) REGISTER_OP_CPU_KERNEL( prelu, ops::PReluKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/rank_loss_op.cc b/paddle/fluid/operators/rank_loss_op.cc index a1127f11a75e54168ca9682a0189255d37ee8571..865f03ec90814384a1f15f1ab5d05580f3ff13b9 100644 --- a/paddle/fluid/operators/rank_loss_op.cc +++ b/paddle/fluid/operators/rank_loss_op.cc @@ -121,8 +121,9 @@ class RankLossGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(rank_loss, ops::RankLossOp, ops::RankLossOpMaker, rank_loss_grad, - ops::RankLossGradOp); +REGISTER_OPERATOR(rank_loss, ops::RankLossOp, ops::RankLossOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(rank_loss_grad, ops::RankLossGradOp) REGISTER_OP_CPU_KERNEL( rank_loss, ops::RankLossKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/reduce_op.cc b/paddle/fluid/operators/reduce_op.cc index 7879367830216cdd875f9f95f95e2a88f282ac64..97bbc1dba641eeb0f7a96610ae3af4ba115426a6 100644 --- a/paddle/fluid/operators/reduce_op.cc +++ b/paddle/fluid/operators/reduce_op.cc @@ -14,6 +14,9 @@ limitations under the License. */ #include "paddle/fluid/operators/reduce_op.h" +#include +#include + namespace paddle { namespace operators { @@ -122,18 +125,18 @@ If reduce_all is true, just reduce along all dimensions and output a scalar. protected: std::string comment_; - void Replace(std::string &src, std::string from, std::string to) { + void Replace(std::string *src, std::string from, std::string to) { std::size_t len_from = std::strlen(from.c_str()); std::size_t len_to = std::strlen(to.c_str()); - for (std::size_t pos = src.find(from); pos != std::string::npos; - pos = src.find(from, pos + len_to)) { - src.replace(pos, len_from, to); + for (std::size_t pos = src->find(from); pos != std::string::npos; + pos = src->find(from, pos + len_to)) { + src->replace(pos, len_from, to); } } void SetComment(std::string name, std::string op) { - Replace(comment_, "{ReduceOp}", name); - Replace(comment_, "{reduce}", op); + Replace(&comment_, "{ReduceOp}", name); + Replace(&comment_, "{reduce}", op); } }; @@ -187,20 +190,25 @@ class ReduceProdOpMaker : public ReduceOpMaker { namespace ops = paddle::operators; -REGISTER_OP(reduce_sum, ops::ReduceOp, ops::ReduceSumOpMaker, reduce_sum_grad, - ops::ReduceGradOp); +REGISTER_OPERATOR(reduce_sum, ops::ReduceOp, ops::ReduceSumOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(reduce_sum_grad, ops::ReduceGradOp) -REGISTER_OP(reduce_mean, ops::ReduceOp, ops::ReduceMeanOpMaker, - reduce_mean_grad, ops::ReduceGradOp); +REGISTER_OPERATOR(reduce_mean, ops::ReduceOp, ops::ReduceMeanOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(reduce_mean_grad, ops::ReduceGradOp) -REGISTER_OP(reduce_max, ops::ReduceOp, ops::ReduceMaxOpMaker, reduce_max_grad, - ops::ReduceGradOp); +REGISTER_OPERATOR(reduce_max, ops::ReduceOp, ops::ReduceMaxOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(reduce_max_grad, ops::ReduceGradOp) -REGISTER_OP(reduce_min, ops::ReduceOp, ops::ReduceMinOpMaker, reduce_min_grad, - ops::ReduceGradOp); +REGISTER_OPERATOR(reduce_min, ops::ReduceOp, ops::ReduceMinOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(reduce_min_grad, ops::ReduceGradOp) -REGISTER_OP(reduce_prod, ops::ReduceOp, ops::ReduceProdOpMaker, - reduce_prod_grad, ops::ReduceGradOp); +REGISTER_OPERATOR(reduce_prod, ops::ReduceOp, ops::ReduceProdOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(reduce_prod_grad, ops::ReduceGradOp) #define REGISTER_REDUCE_CPU_KERNEL(reduce_type, functor, grad_functor) \ REGISTER_OP_CPU_KERNEL(reduce_type, \ diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 93f9c74b809770136d3d3300e0e0700b1bc0459e..e8ade16bde4af7811b436a29dd581c640f0fafc9 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -113,8 +113,9 @@ class ReshapeGradOp : public framework::OperatorWithKernel { namespace ops = paddle::operators; using CPU = paddle::platform::CPUDeviceContext; -REGISTER_OP(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, reshape_grad, - ops::ReshapeGradOp); +REGISTER_OPERATOR(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(reshape_grad, ops::ReshapeGradOp) REGISTER_OP_CPU_KERNEL(reshape, ops::ReshapeKernel, ops::ReshapeKernel, ops::ReshapeKernel, diff --git a/paddle/fluid/operators/roi_pool_op.cc b/paddle/fluid/operators/roi_pool_op.cc index 6d4861f0428834b1893c3a10a83920f0a62b5455..4b0ea68e0e712293823729fe269843738f2694d1 100644 --- a/paddle/fluid/operators/roi_pool_op.cc +++ b/paddle/fluid/operators/roi_pool_op.cc @@ -153,8 +153,9 @@ https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker, roi_pool_grad, - ops::ROIPoolGradOp); +REGISTER_OPERATOR(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(roi_pool_grad, ops::ROIPoolGradOp) REGISTER_OP_CPU_KERNEL( roi_pool, ops::CPUROIPoolOpKernel, diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index d34beeb6508084f4d680fad9bac99ea474d274d3..7e3d8d7d2fac20e3940053428af57b2eaa4ab1c1 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -250,8 +250,9 @@ class RowConvGradKernel } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(row_conv, ops::RowConvOp, ops::RowConvOpMaker, row_conv_grad, - ops::RowConvGradOp); +REGISTER_OPERATOR(row_conv, ops::RowConvOp, ops::RowConvOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(row_conv_grad, ops::RowConvGradOp) REGISTER_OP_CPU_KERNEL( row_conv, ops::RowConvKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/scatter_op.cc b/paddle/fluid/operators/scatter_op.cc index d6fd6214711f4ee66b1daffa4db2e84aa7201e79..0ad9e2ca2ec02c9328cc2c7dd849643bc82ec1c4 100644 --- a/paddle/fluid/operators/scatter_op.cc +++ b/paddle/fluid/operators/scatter_op.cc @@ -102,7 +102,8 @@ $$ } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(scatter, ops::ScatterOp, ops::ScatterOpMaker, scatter_grad, - ops::ScatterGradOp); +REGISTER_OPERATOR(scatter, ops::ScatterOp, ops::ScatterOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(scatter_grad, ops::ScatterGradOp) REGISTER_OP_CPU_KERNEL(scatter, ops::ScatterOpKernel); REGISTER_OP_CPU_KERNEL(scatter_grad, ops::ScatterGradientOpKernel); diff --git a/paddle/fluid/operators/sequence_concat_op.cc b/paddle/fluid/operators/sequence_concat_op.cc index 126753edd09e8bd0f9d5a08936afbc6326b29ace..55631c2b91025c25f204e4ba220269d4df2334e8 100644 --- a/paddle/fluid/operators/sequence_concat_op.cc +++ b/paddle/fluid/operators/sequence_concat_op.cc @@ -124,9 +124,11 @@ class SequenceConcatGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_EX(sequence_concat, ops::SequenceConcatOp, - ops::SequenceConcatOpMaker, sequence_concat_grad, - ops::SequenceConcatGradOp, false); +REGISTER_OPERATOR(sequence_concat, ops::SequenceConcatOp, + ops::SequenceConcatOpMaker, + paddle::framework::DefaultGradOpDescMaker< + false> /* set false to disable empty grad */) +REGISTER_OPERATOR(sequence_concat_grad, ops::SequenceConcatGradOp); REGISTER_OP_CPU_KERNEL( sequence_concat, ops::SequenceConcatOpKernel); diff --git a/paddle/fluid/operators/sequence_conv_op.cc b/paddle/fluid/operators/sequence_conv_op.cc index ec1f3a5da8c1fc8933b3720802ea901695195dec..57a1febcc4dc357c01682c29f95f579e73481453 100644 --- a/paddle/fluid/operators/sequence_conv_op.cc +++ b/paddle/fluid/operators/sequence_conv_op.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/operators/sequence_conv_op.h" +#include + namespace paddle { namespace operators { @@ -174,8 +176,9 @@ context_length, context_stride and context_start. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(sequence_conv, ops::SequenceConvOp, ops::SequenceConvOpMaker, - sequence_conv_grad, ops::SequenceConvGradOp); +REGISTER_OPERATOR(sequence_conv, ops::SequenceConvOp, ops::SequenceConvOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(sequence_conv_grad, ops::SequenceConvGradOp) REGISTER_OP_CPU_KERNEL( sequence_conv, diff --git a/paddle/fluid/operators/sequence_expand_op.cc b/paddle/fluid/operators/sequence_expand_op.cc index ae52849162ae4d78cc69ddbb98f58059f55683cb..ae05f94577a89435160f911d0954ad32bd87f4d2 100644 --- a/paddle/fluid/operators/sequence_expand_op.cc +++ b/paddle/fluid/operators/sequence_expand_op.cc @@ -200,8 +200,10 @@ class SequenceExpandOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(sequence_expand, ops::SequenceExpandOp, ops::SequenceExpandOpMaker, - sequence_expand_grad, ops::SequenceExpandOpGrad); +REGISTER_OPERATOR(sequence_expand, ops::SequenceExpandOp, + ops::SequenceExpandOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(sequence_expand_grad, ops::SequenceExpandOpGrad) REGISTER_OP_CPU_KERNEL( sequence_expand, ops::SequenceExpandKernel, diff --git a/paddle/fluid/operators/sequence_slice_op.cc b/paddle/fluid/operators/sequence_slice_op.cc index d09e5bca56b226100d2d0cf3a030c77703bfa76e..df88121e6f8a2ebcd89145f4a4a44fbdc541424d 100644 --- a/paddle/fluid/operators/sequence_slice_op.cc +++ b/paddle/fluid/operators/sequence_slice_op.cc @@ -120,8 +120,10 @@ NOTE: The first dimension size of input, the size of offset and Length, should b } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(sequence_slice, ops::SequenceSliceOp, ops::SequenceSliceOpMaker, - sequence_slice_grad, ops::SequenceSliceGradOp); +REGISTER_OPERATOR(sequence_slice, ops::SequenceSliceOp, + ops::SequenceSliceOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(sequence_slice_grad, ops::SequenceSliceGradOp) REGISTER_OP_CPU_KERNEL( sequence_slice, ops::SequenceSliceOpKernel); diff --git a/paddle/fluid/operators/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_softmax_op.cc index d2c1317bef95deca36f7f4198407f5350a1be035..47ba9a7445ce6b7039fd8fdcfd383fe370d13f74 100644 --- a/paddle/fluid/operators/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_softmax_op.cc @@ -155,9 +155,10 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(sequence_softmax, ops::SequenceSoftmaxOp, - ops::SequenceSoftmaxOpMaker, sequence_softmax_grad, - ops::SequenceSoftmaxGradOp); +REGISTER_OPERATOR(sequence_softmax, ops::SequenceSoftmaxOp, + ops::SequenceSoftmaxOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(sequence_softmax_grad, ops::SequenceSoftmaxGradOp) REGISTER_OP_CPU_KERNEL( sequence_softmax, ops::SequenceSoftmaxKernel, diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc index 7b93f19bb2f7102824852aa181e3728f79025121..442e1fef4c3c02dc5d5c392ca17f3dfa92cd5aea 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc @@ -135,11 +135,12 @@ However the output only shares the LoD with input `X`. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(sigmoid_cross_entropy_with_logits, - ops::SigmoidCrossEntropyWithLogitsOp, - ops::SigmoidCrossEntropyWithLogitsOpMaker, - sigmoid_cross_entropy_with_logits_grad, - ops::SigmoidCrossEntropyWithLogitsGradOp); +REGISTER_OPERATOR(sigmoid_cross_entropy_with_logits, + ops::SigmoidCrossEntropyWithLogitsOp, + ops::SigmoidCrossEntropyWithLogitsOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(sigmoid_cross_entropy_with_logits_grad, + ops::SigmoidCrossEntropyWithLogitsGradOp) REGISTER_OP_CPU_KERNEL(sigmoid_cross_entropy_with_logits, ops::SigmoidCrossEntropyWithLogitsKernel< paddle::platform::CPUDeviceContext, float>); diff --git a/paddle/fluid/operators/smooth_l1_loss_op.cc b/paddle/fluid/operators/smooth_l1_loss_op.cc index 658eb0195212cc3038fce6aab0ec3804efc59edf..3c15f0542b3f97c1ff17084c43850d9a6e264cda 100644 --- a/paddle/fluid/operators/smooth_l1_loss_op.cc +++ b/paddle/fluid/operators/smooth_l1_loss_op.cc @@ -132,8 +132,9 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(smooth_l1_loss, ops::SmoothL1LossOp, ops::SmoothL1LossOpMaker, - smooth_l1_loss_grad, ops::SmoothL1LossGradOp); +REGISTER_OPERATOR(smooth_l1_loss, ops::SmoothL1LossOp, ops::SmoothL1LossOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(smooth_l1_loss_grad, ops::SmoothL1LossGradOp) REGISTER_OP_CPU_KERNEL( smooth_l1_loss, ops::SmoothL1LossKernel); diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index e1f286f9ba42ff22fffbfc012832dd751a37c1d0..7c75a45fee8e455fafd43657fcb7a9db461957e8 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -160,8 +160,9 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { namespace ops = paddle::operators; -REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker, softmax_grad, - ops::SoftmaxOpGrad); +REGISTER_OPERATOR(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(softmax_grad, ops::SoftmaxOpGrad) REGISTER_OP_CPU_KERNEL( softmax, ops::SoftmaxKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/split_byref_op.cc b/paddle/fluid/operators/split_byref_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..7413ce3e9ce60ed733bb4d27e9ec205e5f0a7e1b --- /dev/null +++ b/paddle/fluid/operators/split_byref_op.cc @@ -0,0 +1,101 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/split_byref_op.h" +#include "paddle/fluid/operators/split_op.h" + +namespace paddle { +namespace operators { +using framework::Tensor; + +class SplitByrefOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SplitOp should not be null."); + PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, + "Outputs(Out) of SplitOp should not be empty."); + auto in_dims = ctx->GetInputDim("X"); + auto outs_names = ctx->Outputs("Out"); + size_t num = static_cast(ctx->Attrs().Get("num")); + std::vector sections = static_cast>( + ctx->Attrs().Get>("sections")); + const size_t outs_number = outs_names.size(); + std::vector outs_dims; + outs_dims.reserve(outs_number); + + if (num > 0) { + int64_t in_axis_dim = in_dims[0]; + PADDLE_ENFORCE_EQ(in_axis_dim % num, 0, + "tensor split does not result" + " in an equal division"); + size_t out_axis_dim = in_axis_dim / num; + for (size_t i = 0; i < outs_number; ++i) { + auto dim = in_dims; + dim[0] = out_axis_dim; + outs_dims.push_back(dim); + } + } else if (sections.size() > 0) { + PADDLE_ENFORCE_EQ(sections.size(), outs_number, + "tensor split sections size" + "should be equal to output size."); + for (size_t i = 0; i < outs_number; ++i) { + auto dim = in_dims; + dim[0] = sections[i]; + outs_dims.push_back(dim); + } + } + ctx->SetOutputsDim("Out", outs_dims); + } +}; + +class SplitByrefOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SplitByrefOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(Tensor) Input tensor of the split operator."); + AddOutput("Out", "(Tensor) Output tensors of the split operator.") + .AsDuplicable(); + AddComment(R"DOC( +SplitByref operator + +Split source tensor to sevaral tensors by axis 0. No copy in this operator +is performed, output tensor shares the same blocks of memory. +)DOC"); + AddAttr>("sections", + "(vector) " + "the length of each output along the " + "specified axis.") + .SetDefault(std::vector{}); + AddAttr("num", + "(int, default 0)" + "Number of sub-tensors. This must evenly divide " + "Input.dims()[axis]") + .SetDefault(0); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +// NOTE: concat op default axis must be 0! +USE_CPU_ONLY_OP(concat); + +REGISTER_OPERATOR(split_byref, ops::SplitByrefOp, ops::SplitByrefOpMaker, + ops::SplitGradMaker); +REGISTER_OP_CPU_KERNEL( + split_byref, ops::SplitByrefOpKernel); diff --git a/paddle/fluid/operators/split_byref_op.cu.cc b/paddle/fluid/operators/split_byref_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..5ee6186f3541b7dcb845ce0c6d28081685925da0 --- /dev/null +++ b/paddle/fluid/operators/split_byref_op.cu.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/split_byref_op.h" +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + split_byref, + ops::SplitByrefOpKernel); diff --git a/paddle/fluid/operators/split_byref_op.h b/paddle/fluid/operators/split_byref_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a3aad68ea736e223d3917607cca17f5cccfef630 --- /dev/null +++ b/paddle/fluid/operators/split_byref_op.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class SplitByrefOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto outs = ctx.MultiOutput("Out"); + auto place = ctx.GetPlace(); + + size_t row_offset = 0; + for (size_t i = 0; i < outs.size(); ++i) { + // NOTE: no need to call mutable_data here to allocate memory. + auto* out = outs[i]; + VLOG(3) << "spliting by ref: " << row_offset << " " << out->dims()[0]; + *out = std::move(in->Slice(row_offset, row_offset + out->dims()[0])); + row_offset += out->dims()[0]; + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index e745509ec8c1f2ec305d7d4aabfdd43d847124b5..a4398df36bcc2d3b8bbe8949f27f5d6508861d95 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -108,21 +108,6 @@ Example: } }; -class SplitGradMaker : public framework::SingleGradOpDescMaker { - public: - using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; - - protected: - std::unique_ptr Apply() const override { - auto op = new framework::OpDesc(); - op->SetType("concat"); - op->SetInput("X", OutputGrad("Out")); - op->SetOutput("Out", InputGrad("X")); - op->SetAttrMap(Attrs()); - return std::unique_ptr(op); - } -}; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h index e2c41f44ab3ea3c42837974dae749278c9356ba5..f0c417c70521b1bb3816f884d6ab7393473999e4 100644 --- a/paddle/fluid/operators/split_op.h +++ b/paddle/fluid/operators/split_op.h @@ -44,5 +44,20 @@ class SplitOpKernel : public framework::OpKernel { } }; +class SplitGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto op = new framework::OpDesc(); + op->SetType("concat"); + op->SetInput("X", OutputGrad("Out")); + op->SetOutput("Out", InputGrad("X")); + op->SetAttrMap(Attrs()); + return std::unique_ptr(op); + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/spp_op.cc b/paddle/fluid/operators/spp_op.cc index 8c55b4ebbc88f696e99b1194055bed3b0d0b3f0b..f286807159dc3a6ec61b0110c43278e9aa8be548 100644 --- a/paddle/fluid/operators/spp_op.cc +++ b/paddle/fluid/operators/spp_op.cc @@ -92,7 +92,9 @@ class SppOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(spp, ops::SppOp, ops::SppOpMaker, spp_grad, ops::SppOpGrad); +REGISTER_OPERATOR(spp, ops::SppOp, ops::SppOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(spp_grad, ops::SppOpGrad) REGISTER_OP_CPU_KERNEL( spp, ops::SppKernel, ops::SppKernel); diff --git a/paddle/fluid/operators/squared_l2_distance_op.cc b/paddle/fluid/operators/squared_l2_distance_op.cc index 1c5e87040a8dd74b98d8e31bfe351ea256e01f15..11e5faac398712b3d3c9fb54b5e0a51d0100ab92 100644 --- a/paddle/fluid/operators/squared_l2_distance_op.cc +++ b/paddle/fluid/operators/squared_l2_distance_op.cc @@ -109,9 +109,10 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(squared_l2_distance, ops::SquaredL2DistanceOp, - ops::SquaredL2DistanceOpMaker, squared_l2_distance_grad, - ops::SquaredL2DistanceGradOp); +REGISTER_OPERATOR(squared_l2_distance, ops::SquaredL2DistanceOp, + ops::SquaredL2DistanceOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(squared_l2_distance_grad, ops::SquaredL2DistanceGradOp) REGISTER_OP_CPU_KERNEL( squared_l2_distance, ops::SquaredL2DistanceKernel); diff --git a/paddle/fluid/operators/squared_l2_norm_op.cc b/paddle/fluid/operators/squared_l2_norm_op.cc index b64df2a218860be3adb3954e07b036c05bf05c8e..a60c1009487381c5553b0e4892658221ff67b247 100644 --- a/paddle/fluid/operators/squared_l2_norm_op.cc +++ b/paddle/fluid/operators/squared_l2_norm_op.cc @@ -67,8 +67,10 @@ $$Out = \sum_{i} X_{i}^2$$ } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(squared_l2_norm, ops::SquaredL2NormOp, ops::SquaredL2NormOpMaker, - squared_l2_norm_grad, ops::SquaredL2NormGradOp); +REGISTER_OPERATOR(squared_l2_norm, ops::SquaredL2NormOp, + ops::SquaredL2NormOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(squared_l2_norm_grad, ops::SquaredL2NormGradOp) REGISTER_OP_CPU_KERNEL( squared_l2_norm, ops::SquaredL2NormKernel); diff --git a/paddle/fluid/operators/top_k_op.h b/paddle/fluid/operators/top_k_op.h index 9f8482adedb4c29e32d4109941a2752d942ae49f..d44eeae8e6ff9ac87ab093d04e3f5427743f0c08 100644 --- a/paddle/fluid/operators/top_k_op.h +++ b/paddle/fluid/operators/top_k_op.h @@ -24,7 +24,6 @@ namespace paddle { namespace operators { using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; template @@ -36,9 +35,9 @@ class TopkKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { // Get the top k elements of each row of input tensor // FIXME: only deal with matrix(2d tensor). - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto* indices = ctx.Output("Indices"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + auto* indices = ctx.Output("Indices"); // k is determined by Attr const size_t k = static_cast(ctx.Attr("k")); diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc index 4aea9cd65bed615c84c95d891a0a4092678e1444..0f60dbf289555e3806f922ec43c80f079c774169 100644 --- a/paddle/fluid/operators/transpose_op.cc +++ b/paddle/fluid/operators/transpose_op.cc @@ -118,8 +118,9 @@ class TransposeOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(transpose, ops::TransposeOp, ops::TransposeOpMaker, transpose_grad, - ops::TransposeOpGrad); +REGISTER_OPERATOR(transpose, ops::TransposeOp, ops::TransposeOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(transpose_grad, ops::TransposeOpGrad) REGISTER_OP_CPU_KERNEL( transpose, ops::TransposeKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/unpool_op.cc b/paddle/fluid/operators/unpool_op.cc index 31859fd1d70dc6e6387258cd5f7412e78a302567..92a79269c2d1f0a56935389a80b9665c73334b31 100644 --- a/paddle/fluid/operators/unpool_op.cc +++ b/paddle/fluid/operators/unpool_op.cc @@ -132,8 +132,9 @@ class UnpoolOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(unpool, ops::UnpoolOp, ops::Unpool2dOpMaker, unpool_grad, - ops::UnpoolOpGrad); +REGISTER_OPERATOR(unpool, ops::UnpoolOp, ops::Unpool2dOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(unpool_grad, ops::UnpoolOpGrad) REGISTER_OP_CPU_KERNEL( unpool, ops::UnpoolKernel, ops::UnpoolKernel); diff --git a/paddle/fluid/operators/warpctc_op.cc b/paddle/fluid/operators/warpctc_op.cc index 940bf4fe7baa6a01a2143374b502c61d0b55fd77..ed81b5d266d678e88dabd32e74c4f111cd34b0c1 100644 --- a/paddle/fluid/operators/warpctc_op.cc +++ b/paddle/fluid/operators/warpctc_op.cc @@ -132,8 +132,9 @@ class WarpCTCGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(warpctc, ops::WarpCTCOp, ops::WarpCTCOpMaker, warpctc_grad, - ops::WarpCTCGradOp); +REGISTER_OPERATOR(warpctc, ops::WarpCTCOp, ops::WarpCTCOpMaker, + paddle::framework::DefaultGradOpDescMaker) +REGISTER_OPERATOR(warpctc_grad, ops::WarpCTCGradOp) REGISTER_OP_CPU_KERNEL( warpctc, ops::WarpCTCKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index ca9ab2c7aecff47924f0198802d710b7661f5576..0013597fd516d15c7d502370eec77e1a6a5dca88 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -39,20 +39,19 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) { class NCCLGroupGuard { public: + static std::mutex &NCCLMutex() { + static std::mutex mtx; + return mtx; + } + inline NCCLGroupGuard() { - mutex().lock(); + NCCLMutex().lock(); PADDLE_ENFORCE(dynload::ncclGroupStart()); } inline ~NCCLGroupGuard() { PADDLE_ENFORCE(dynload::ncclGroupEnd()); - mutex().unlock(); - } - - private: - static std::mutex &mutex() { - static std::mutex mtx; - return mtx; + NCCLMutex().unlock(); } }; @@ -68,26 +67,6 @@ struct NCCLContext { int device_id() const { return boost::get(ctx_->GetPlace()).device; } - - static void InitNCCLContext(std::unordered_map *contexts, - const std::vector &places) { - std::vector comms; - std::vector devs; - comms.resize(contexts->size()); - devs.reserve(contexts->size()); - - for (auto &p : places) { - devs.push_back(boost::get(p).device); - } - - PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( - &comms[0], static_cast(contexts->size()), &devs[0])); - - int i = 0; - for (auto &dev_id : devs) { - contexts->at(dev_id).comm_ = comms[i++]; - } - } }; struct NCCLContextMap { @@ -107,12 +86,12 @@ struct NCCLContextMap { "NCCL Context Map does not support contain two or more same device"); if (places.size() > 1) { - std::vector comms; - comms.resize(order_.size()); - - PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( - &comms[0], static_cast(order_.size()), &order_[0])); - + std::unique_ptr comms(new ncclComm_t[order_.size()]); + { + std::lock_guard guard(NCCLGroupGuard::NCCLMutex()); + PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( + comms.get(), static_cast(order_.size()), order_.data())); + } int i = 0; for (auto &dev_id : order_) { contexts_.at(dev_id).comm_ = comms[i++]; @@ -120,6 +99,9 @@ struct NCCLContextMap { } } + NCCLContextMap(const NCCLContextMap &other) = delete; + NCCLContextMap &operator=(const NCCLContextMap &other) = delete; + CUDADeviceContext *DevCtx(int dev_id) const { return at(dev_id).ctx_.get(); } CUDADeviceContext *DevCtx(platform::Place p) const { diff --git a/python/paddle/fluid/distribute_transpiler.py b/python/paddle/fluid/distribute_transpiler.py index aa15392d7e4901e8ee23ad5b4370542232adc2a5..50be386c51f89cb5fb50f4e5e3cb8bd84ccf267b 100644 --- a/python/paddle/fluid/distribute_transpiler.py +++ b/python/paddle/fluid/distribute_transpiler.py @@ -420,13 +420,14 @@ class DistributeTranspiler: # append op to the current block per_opt_block = append_block - for _, opt_op in enumerate(opt_op_on_pserver): + for idx, opt_op in enumerate(opt_op_on_pserver): for _, op in enumerate(self.optimize_ops): # optimizer is connected to itself if ufind.is_connected(op, opt_op) and \ op not in global_ops: __append_optimize_op__(op, per_opt_block) - per_opt_block = pserver_program.create_block(append_block.idx) + if idx == len(opt_op_on_pserver) - 1 and global_ops: + per_opt_block = pserver_program.create_block(append_block.idx) # append global ops for glb_op in global_ops: @@ -824,7 +825,7 @@ class DistributeTranspiler: for v in splited_vars: sections.append(v.shape[0]) program.global_block().append_op( - type="split", + type="split_byref", inputs={"X": orig_var}, outputs={"Out": splited_vars}, attrs={"sections": sections} # assume split evenly diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index b9a53eda9144e9e56cf9bc626db40cf4225bd87f..4b707973e27391a6bdcba138934f62a255e04bb2 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -32,7 +32,6 @@ __all__ = [ 'Switch', 'lod_rank_table', 'max_sequence_len', - 'topk', 'lod_tensor_to_array', 'array_to_lod_tensor', 'increment', @@ -751,43 +750,6 @@ def max_sequence_len(rank_table): return res -def topk(input, k): - """ - **topk** - - This function performs the operation that selects the k entries in the input - vector and outputs their values and indices as vectors. Thus topk_out[j] is - the j-th largest entry in input, and its index is topk_indices[j] - - Args: - input (Variable|list): The input tensor that has all the data. - k (int): The number of top elements that the function will pick. - - Returns: - Variable: The variable of type array that contains the k largest entries - from input. - Variable: The variable of type array that contains the indices of k - largest entries from input. - - Examples: - .. code-block:: python - - x = fluid.layers.data(name='x', shape=[10]) - k = 5 - array = fluid.layers.topk(x, k) - """ - helper = LayerHelper('topk', **locals()) - topk_out = helper.create_tmp_variable(dtype=input.dtype) - topk_indices = helper.create_tmp_variable(dtype='int64') - helper.append_op( - type='top_k', - inputs={'X': [input]}, - outputs={'Out': [topk_out], - 'Indices': [topk_indices]}, - attrs={'k': k}) - return topk_out, topk_indices - - def lod_tensor_to_array(x, table): """ Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY. diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 65b95a58d6546ed6d6b264443a7c802e16eef23f..d13c54daa5a985e2e1bf9357630fe29d24a17bb4 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -20,7 +20,7 @@ from ..initializer import init_on_cpu __all__ = [ 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', - 'polynomial_decay', 'piecewise_decay' + 'polynomial_decay', 'piecewise_decay', 'noam_decay' ] """ When training a model, it's often useful to decay the @@ -32,14 +32,41 @@ strategy according to this module. """ -def _decay_step_counter(): +def _decay_step_counter(begin=0): # the first global step is zero in learning rate decay global_step = nn.autoincreased_step_counter( - counter_name='@LR_DECAY_COUNTER@', begin=0, step=1) + counter_name='@LR_DECAY_COUNTER@', begin=begin, step=1) global_step = tensor.cast(global_step, 'float32') return global_step +def noam_decay(d_model, warmup_steps): + """Apply decay to learning rate. + ```python + lr_value = np.power(d_model, -0.5) * np.min([ + np.power(current_steps, -0.5), + np.power(warmup_steps, -1.5) * current_steps + ]) + ``` + + Args: + d_model(Variable): The dimensionality of input and output of model. + Reference: attention is all you need + https://arxiv.org/pdf/1706.03762.pdf + warmup_steps(Variable): A super parameter. + + Returns: + The decayed learning rate. + """ + global_step = _decay_step_counter(1) + with init_on_cpu(): + a = global_step**-0.5 + b = (warmup_steps**-1.5) * global_step + lr_value = (d_model**-0.5) * ops.elementwise_min(a, b) + + return lr_value + + def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): """Applies exponential decay to the learning rate. diff --git a/python/paddle/fluid/layers/metric.py b/python/paddle/fluid/layers/metric.py index f66dccfa2d040ea0a9d29daeaa1d2da640525959..cab2eb55510542bdd4dd7eca7667601697759181 100644 --- a/python/paddle/fluid/layers/metric.py +++ b/python/paddle/fluid/layers/metric.py @@ -20,6 +20,7 @@ from ..layer_helper import LayerHelper from ..initializer import Normal, Constant from ..framework import Variable from ..param_attr import ParamAttr +import nn __all__ = ['accuracy', 'auc'] @@ -27,17 +28,10 @@ __all__ = ['accuracy', 'auc'] def accuracy(input, label, k=1, correct=None, total=None): """ This function computes the accuracy using the input and label. - The output is the top_k inputs and their indices. + The output is the top k inputs and their indices. """ helper = LayerHelper("accuracy", **locals()) - topk_out = helper.create_tmp_variable(dtype=input.dtype) - topk_indices = helper.create_tmp_variable(dtype="int64") - helper.append_op( - type="top_k", - inputs={"X": [input]}, - outputs={"Out": [topk_out], - "Indices": [topk_indices]}, - attrs={"k": k}) + topk_out, topk_indices = nn.topk(input, k=k) acc_out = helper.create_tmp_variable(dtype="float32") if correct is None: correct = helper.create_tmp_variable(dtype="int64") @@ -68,12 +62,7 @@ def auc(input, label, curve='ROC', num_thresholds=200): helper = LayerHelper("auc", **locals()) topk_out = helper.create_tmp_variable(dtype=input.dtype) topk_indices = helper.create_tmp_variable(dtype="int64") - helper.append_op( - type="top_k", - inputs={"X": [input]}, - outputs={"Out": [topk_out], - "Indices": [topk_indices]}, - attrs={"k": k}) + topk_out, topk_indices = nn.topk(input, k=k) auc_out = helper.create_tmp_variable(dtype="float32") if correct is None: correct = helper.create_tmp_variable(dtype="int64") diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 2993cb973456836ab124cdb267dbb92c45fcecbc..752f4689befd791da5c5c9626ffec3331f448f41 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -60,6 +60,7 @@ __all__ = [ 'edit_distance', 'l2_normalize', 'matmul', + 'topk', 'warpctc', 'sequence_reshape', 'transpose', @@ -2576,6 +2577,53 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None): return out +def topk(input, k): + """ + This operator is used to find values and indices of the k largest entries + for the last dimension. + + If the input is a vector (rank=1), finds the k largest entries in the vector + and outputs their values and indices as vectors. Thus values[j] is the j-th + largest entry in input, and its index is indices[j]. + + If the input is a Tensor with higher rank, this operator computes the top k + entries along the last dimension. + + Args: + input(Variable): The input variable which can be a vector or Tensor with + higher rank. + k(int): An integer value to specify the top k largest elements. + + Returns: + values(Variable): The k largest elements along each last dimensional + slice. + indices(Variable): The indices of values within the last dimension of + input. + + Examples: + .. code-block:: python + + top5_values, top5_indices = layers.topk(input, k=5) + """ + shape = input.shape + if k < 1 and k >= shape[-1]: + raise ValueError("k must be greater than 0 and less than %d." % + (shape[-1])) + + helper = LayerHelper("top_k", **locals()) + values = helper.create_tmp_variable(dtype=input.dtype) + indices = helper.create_tmp_variable(dtype="int64") + helper.append_op( + type="top_k", + inputs={"X": [input]}, + outputs={"Out": [values], + "Indices": [indices]}, + attrs={"k": k}) + values.stop_gradient = True + indices.stop_gradient = True + return values, indices + + def edit_distance(input, label, normalized=True, ignored_tokens=None, name=None): """ @@ -2717,15 +2765,7 @@ def ctc_greedy_decoder(input, blank, name=None): cost = fluid.layers.ctc_greedy_decoder(input=x, blank=0) """ helper = LayerHelper("ctc_greedy_decoder", **locals()) - # top 1 op - topk_out = helper.create_tmp_variable(dtype=input.dtype) - topk_indices = helper.create_tmp_variable(dtype="int64") - helper.append_op( - type="top_k", - inputs={"X": [input]}, - outputs={"Out": [topk_out], - "Indices": [topk_indices]}, - attrs={"k": 1}) + _, topk_indices = topk(input, k=1) # ctc align op ctc_out = helper.create_tmp_variable(dtype="int64") diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 07cc1e29341bd497e88097a9ee5653631b79d734..fbdd6fd449625a21f91758dc12490b02070aea1a 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -16,6 +16,7 @@ import core import multiprocessing import framework import executor +import warnings import sys __all__ = ['ParallelExecutor'] @@ -62,8 +63,8 @@ class ParallelExecutor(object): main_program=test_program, share_vars_from=train_exe) - train_loss, = train_exe.run([loss.name], feed_dict=feed_dict) - test_loss, = test_exe.run([loss.name], feed_dict=feed_dict) + train_loss, = train_exe.run([loss.name], feed=feed_dict) + test_loss, = test_exe.run([loss.name], feed=feed_dict) """ self._places = [] @@ -103,8 +104,8 @@ class ParallelExecutor(object): self.persistable_vars = [ v.name - for v in filter(lambda var: \ - var.persistable and var.type != core.VarDesc.VarType.RAW, + for v in filter( + lambda var: var.persistable and var.type != core.VarDesc.VarType.RAW, main.list_vars()) ] @@ -163,7 +164,7 @@ class ParallelExecutor(object): Returns: fetched result list. """ - if feed is None: + if feed is None and feed_dict is not None: feed = feed_dict print >> sys.stderr, "`feed_dict` is deprecated. Please use `feed=`" diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py index d5dd63e8737cbdd9b91d083fbd0b38f8baf570b3..7703dfe0135b402f830bcdeaf47c26e5e3f8ca58 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py @@ -97,15 +97,18 @@ class TestConv3dOp(OpTest): } self.outputs = {'Output': output} + def testcudnn(self): + return core.is_compiled_with_cuda() and self.use_cudnn + def test_check_output(self): - if self.use_cudnn: + if self.testcudnn(): place = core.CUDAPlace(0) self.check_output_with_place(place, atol=1e-5) else: self.check_output() def test_check_grad(self): - if self.use_cudnn: + if self.testcudnn(): place = core.CUDAPlace(0) self.check_grad_with_place( place, @@ -117,7 +120,7 @@ class TestConv3dOp(OpTest): set(['Input', 'Filter']), 'Output', max_relative_error=0.03) def test_check_grad_no_filter(self): - if self.use_cudnn: + if self.testcudnn(): place = core.CUDAPlace(0) self.check_grad_with_place( place, ['Input'], @@ -132,7 +135,7 @@ class TestConv3dOp(OpTest): no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): - if self.use_cudnn: + if self.testcudnn(): place = core.CUDAPlace(0) self.check_grad_with_place( place, ['Filter'], diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index a1be2d671ddc5c689b16319fcf5bf12dca5dde7e..17d6afdee161426e5da398ffa2ec148a027c905e 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -350,6 +350,15 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(smooth_label) print(str(program)) + def test_topk(self): + program = Program() + with program_guard(program): + data = layers.data(name="label", shape=[200], dtype="float32") + values, indices = layers.topk(data, k=5) + self.assertIsNotNone(values) + self.assertIsNotNone(indices) + print(str(program)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 3ddafbbc57b29d506158bcb57188ab96f814e0d3..c783a142467f3f6a9cd210425acfc526a32a6f71 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -200,14 +200,29 @@ class TestParallelExecutorBase(unittest.TestCase): def check_network_convergence(self, method, memory_opt=True, - iter=10, + iter=50, batch_size=None, allow_op_delay=False, - feed_dict=None): + feed_dict=None, + seed=None, + use_parallel_executor=True): + def run_executor(exe, feed, fetch_list, program=None): + if isinstance(exe, fluid.ParallelExecutor): + res = exe.run(fetch_list=fetch_list, feed=feed) + elif isinstance(exe, fluid.Executor): + if program is None: + program = fluid.default_main_program() + res = exe.run(program=program, feed=feed, fetch_list=fetch_list) + else: + raise ValueError('Unkown type exe') + return res + main = fluid.Program() startup = fluid.Program() startup.random_seed = 1 # Fix random seed with fluid.program_guard(main, startup): + if seed is not None: + startup.random_seed = seed loss = method(use_feed=feed_dict is not None) adam = fluid.optimizer.Adam() adam.minimize(loss) @@ -217,18 +232,24 @@ class TestParallelExecutorBase(unittest.TestCase): startup_exe = fluid.Executor(place) startup_exe.run(startup) - exe = fluid.ParallelExecutor( - True, loss_name=loss.name, allow_op_delay=allow_op_delay) + if use_parallel_executor: + exe = fluid.ParallelExecutor( + True, loss_name=loss.name, allow_op_delay=allow_op_delay) + else: + exe = fluid.Executor(place=place) + if batch_size is not None: batch_size *= fluid.core.get_cuda_device_count() begin = time.time() - first_loss, = exe.run([loss.name], feed=feed_dict) + first_loss, = run_executor( + exe=exe, feed=feed_dict, fetch_list=[loss.name]) first_loss = numpy.array(first_loss) for i in xrange(iter): - exe.run([], feed=feed_dict) + run_executor(exe=exe, feed=feed_dict, fetch_list=[]) - last_loss, = exe.run([loss.name], feed=feed_dict) + last_loss, = run_executor( + exe=exe, feed=feed_dict, fetch_list=[loss.name]) end = time.time() if batch_size is not None: @@ -239,6 +260,7 @@ class TestParallelExecutorBase(unittest.TestCase): print first_loss, last_loss # self.assertGreater(first_loss[0], last_loss[0]) + return first_loss, last_loss class TestMNIST(TestParallelExecutorBase): @@ -268,6 +290,27 @@ class TestMNIST(TestParallelExecutorBase): simple_fc_net, feed_dict={"image": img, "label": label}) + def test_simple_fc_parallel_accuracy(self): + img = numpy.zeros(shape=[32, 784], dtype='float32') + label = numpy.ones(shape=[32, 1], dtype='int64') + single_first_loss, single_last_loss = self.check_network_convergence( + method=simple_fc_net, + seed=1000, + feed_dict={"image": img, + "label": label}, + use_parallel_executor=False) + parallel_first_loss, parallel_last_loss = self.check_network_convergence( + method=simple_fc_net, + seed=1000, + feed_dict={"image": img, + "label": label}, + use_parallel_executor=True) + + for p_f in parallel_first_loss: + self.assertAlmostEquals(p_f, single_first_loss[0], delta=1e-6) + for p_l in parallel_last_loss: + self.assertAlmostEquals(p_l, single_last_loss[0], delta=1e-6) + def test_batchnorm_fc(self): self.check_network_convergence(fc_with_batchnorm) img = numpy.zeros(shape=[32, 784], dtype='float32') @@ -496,10 +539,10 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): share_vars_from=train_exe) for i in xrange(5): - test_loss, = test_exe.run([loss.name], feed_dict=feed_dict) + test_loss, = test_exe.run([loss.name], feed=feed_dict) test_loss = numpy.array(test_loss) - train_loss, = train_exe.run([loss.name], feed_dict=feed_dict) + train_loss, = train_exe.run([loss.name], feed=feed_dict) train_loss = numpy.array(train_loss) self.assertTrue( numpy.allclose( diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_op.py b/python/paddle/fluid/tests/unittests/test_pool2d_op.py index 328a9ffd25b9fce3fd45bbe847e365f090acd17c..f7e1e8573290766cde0c35816d687e7ba6fa4220 100644 --- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py @@ -109,8 +109,11 @@ class TestPool2d_Op(OpTest): self.outputs = {'Out': output} + def testcudnn(self): + return core.is_compiled_with_cuda() and self.use_cudnn + def test_check_output(self): - if self.use_cudnn: + if self.testcudnn(): place = core.CUDAPlace(0) self.check_output_with_place(place, atol=1e-5) else: @@ -119,7 +122,7 @@ class TestPool2d_Op(OpTest): def test_check_grad(self): if self.dtype == np.float16: return - if self.use_cudnn and self.pool_type != "max": + if self.testcudnn() and self.pool_type != "max": place = core.CUDAPlace(0) self.check_grad_with_place( place, set(['X']), 'Out', max_relative_error=0.07) diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_op.py b/python/paddle/fluid/tests/unittests/test_pool3d_op.py index 15a8ac5e2029eec204d061d1832df3df90339697..aaa94842513691c836e04353aa4bc5ce5e66c5c3 100644 --- a/python/paddle/fluid/tests/unittests/test_pool3d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pool3d_op.py @@ -118,15 +118,18 @@ class TestPool3d_Op(OpTest): self.outputs = {'Out': output.astype('float32')} + def testcudnn(self): + return core.is_compiled_with_cuda() and self.use_cudnn + def test_check_output(self): - if self.use_cudnn: + if self.testcudnn(): place = core.CUDAPlace(0) self.check_output_with_place(place, atol=1e-5) else: self.check_output() def test_check_grad(self): - if self.use_cudnn and self.pool_type != "max": + if self.testcudnn() and self.pool_type != "max": place = core.CUDAPlace(0) self.check_grad_with_place( place, set(['X']), 'Out', max_relative_error=0.07) diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py index 887bdfe8b3608878bace5b857a71ada123b74b2f..eb49a53e54f4bdb6bcd6cb1991423970f29997bb 100644 --- a/python/paddle/fluid/tests/unittests/test_split_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_op.py @@ -19,7 +19,7 @@ from op_test import OpTest class TestSplitOp(OpTest): def setUp(self): - self.op_type = "split" + self._set_op_type() axis = 1 x = np.random.random((4, 5, 6)).astype('float32') out = np.split(x, [2, 3], axis) @@ -28,6 +28,9 @@ class TestSplitOp(OpTest): self.outputs = {'Out': [('out%d' % i, out[i]) \ for i in xrange(len(out))]} + def _set_op_type(self): + self.op_type = "split" + def test_check_output(self): self.check_output() @@ -35,5 +38,10 @@ class TestSplitOp(OpTest): self.check_grad(['X'], ['out0', 'out1', 'out2']) +class TestSplitByrefOp(OpTest): + def _set_op_type(self): + self.op_type = "split_byref" + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 37c4296f9bcea7e16daa46f778934331513c30c4..00c2a3b9928d1ca5f3e8cd5e87ba7ad4108e9dad 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -124,7 +124,7 @@ def test(word_idx): re.compile("aclImdb/test/neg/.*\.txt$"), word_idx) -def word_dict(): +def word_dict(cutoff=150): """ Build a word dictionary from the corpus. @@ -132,7 +132,7 @@ def word_dict(): :rtype: dict """ return build_dict( - re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) + re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), cutoff) def fetch():