未验证 提交 501a51fc 编写于 作者: C Charles-hit 提交者: GitHub

[PRIM][IR] Migrate vjp rules to new ir in non primitive mode (#55647)

* [prim][newir] add basic framework for primitive

* support desctensor in new ir

* add vjp interface

* support vjp in new ir

* support vjp in new ir

* polish vjp interface

* fix stop_gradients set

* fix vjp dispatch

* add comment

* add vjp test for new ir

* add test for tanh vjp

* [prim][newir] add basic framework for primitive

* support desctensor in new ir

* support vjp in new ir

* support vjp in new ir

* polish vjp interface

* fix stop_gradients set

* fix vjp dispatch

* add comment

* add vjp test for new ir

* add test for tanh vjp

* add eager and static backend for warp lower level api

* support call_vjp pybind

* polish code and add test for vjp

* remove useless code

* polish code

* remove useless code

* support mean vjp

* add test for mean vjp and support has_vjp function

* fix call_vjp

* polish code

* add primitive ops set for backend

* add vjp test for tanh_

* fix inference CI

* fix inference ci

* modify fluid cmake

* remove useless deps

* add cmake

---------
Co-authored-by: Ncxxly <chenxx_id@163.com>
Co-authored-by: Nzhangbo9674 <zhangbo54@baidu.com>
上级 8542deab
......@@ -10,5 +10,6 @@ add_subdirectory(prim)
add_subdirectory(jit)
add_subdirectory(ir)
add_subdirectory(ir_adaptor)
add_subdirectory(primitive)
# NOTE: please add subdirectory inference at last.
add_subdirectory(inference)
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/raw_tensor.h"
#include "paddle/fluid/framework/string_array.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h"
#include "paddle/fluid/primitive/type/desc_tensor.h"
namespace phi {
......@@ -40,6 +41,8 @@ template class TypeInfoTraits<phi::TensorBase, paddle::framework::Strings>;
template class TypeInfoTraits<phi::TensorBase, paddle::framework::FeedList>;
template class TypeInfoTraits<phi::TensorBase, egr::VariableCompatTensor>;
template class TypeInfoTraits<phi::TensorBase, paddle::prim::DescTensor>;
template class TypeInfoTraits<phi::TensorBase,
paddle::primitive::experimental::DescTensor>;
template class TypeInfoTraits<phi::TensorBase,
paddle::framework::VariableRefArray>;
......
......@@ -52,5 +52,12 @@ file(GLOB PD_DIALECT_SRCS "*.cc")
cc_library(
pd_dialect
SRCS ${PD_DIALECT_SRCS} ${op_source_file}
DEPS framework_proto phi phi_utils pd_interface pd_trait ir)
DEPS framework_proto
phi
phi_utils
pd_interface
pd_trait
ir
primitive_vjp_experimental
type_info)
target_include_directories(pd_dialect PRIVATE ${PD_DIALECT_BINARY_DIR})
......@@ -17,7 +17,11 @@ import os
import yaml
from op_build_gen import gen_build_func_str
from op_interface_gen import gen_exclusive_interface_str, gen_op_infer_meta_str
from op_interface_gen import (
gen_exclusive_interface_str,
gen_op_infer_meta_str,
vjp_interface_gen_op_list,
)
from op_member_func_gen import gen_op_get_inputs_outputs_str
from op_verify_gen import gen_verify_func_str
......@@ -43,6 +47,7 @@ H_FILE_TEMPLATE = """#ifdef GET_OP_LIST
#include "paddle/fluid/ir/dialect/op_yaml_info_util.h"
#include "paddle/fluid/ir/interface/op_yaml_info.h"
#include "paddle/fluid/ir/interface/infermeta.h"
#include "paddle/fluid/ir/interface/vjp.h"
#include "paddle/fluid/ir/trait/inplace.h"
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/phi/core/infermeta_utils.h"
......@@ -303,6 +308,9 @@ class OpInfoParser:
else:
self.infer_meta_func = None
# parse backward name
self.backward_name = self.parse_backward_name()
# parse inplace && view
self.inplace_map = self.parse_op_inplace_info()
self.view_map = self.parse_op_view_info()
......@@ -612,6 +620,12 @@ class OpInfoParser:
else:
return None
def parse_backward_name(self):
if 'backward' in self.op_yaml_item:
return self.op_yaml_item['backward']
else:
return None
def get_phi_dtype_name(self, name):
name = name.replace('Scalar', 'phi::Scalar')
name = name.replace('IntArray', 'phi::IntArray')
......@@ -720,6 +734,11 @@ def OpGenerator(
if op_info.infer_meta_func:
op_interfaces += ["InferMetaInterface"]
if (
op_info.backward_name
and op_info.op_phi_name[0] in vjp_interface_gen_op_list
):
op_interfaces += ["VjpInterface"]
exclusive_interface_str = gen_exclusive_interface_str(op_info)
# If op has inplace info, we will generate inplace op and non-inplace op.
......
......@@ -13,6 +13,7 @@
# limitations under the License.
# generator interfaces
from vjp_interface_gen_op_list import vjp_interface_gen_op_list
OP_INFER_SHAPE_TEMPLATE = """
void {op_name}::InferMeta( phi::InferMetaContext *infer_meta ) {{
......@@ -38,4 +39,6 @@ def gen_exclusive_interface_str(op_info):
exclusive_interface_str += (
" static void InferMeta( phi::InferMetaContext *infer_meta );"
)
if op_info.op_phi_name[0] in vjp_interface_gen_op_list:
exclusive_interface_str += "\n static std::vector<std::vector<ir::OpResult>> Vjp(ir::Operation* op, const std::vector<std::vector<ir::OpResult>>& out_grads, const std::vector<std::vector<int>>& stop_gradients);"
return exclusive_interface_str
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =====================================
# VjpInterface gen op list
# =====================================
# we don't support vjp function code
# gen now, so we use a whitelist to
# control the generation of Vjp methods.
# TODO(wanghao107)
# remove this file and support Vjp methods
# code gen.
vjp_interface_gen_op_list = ["tanh", "mean"]
......@@ -53,5 +53,23 @@ ir::OpResult full(std::vector<int64_t> shape,
return full_op.out();
}
ir::OpResult tanh_grad(ir::OpResult out, ir::OpResult grad_out) {
paddle::dialect::TanhGradOp tanh_grad_op =
APIBuilder::Instance().GetBuilder()->Build<paddle::dialect::TanhGradOp>(
out, grad_out);
return tanh_grad_op.result(0);
}
ir::OpResult mean_grad(ir::OpResult x,
ir::OpResult out_grad,
std::vector<int64_t> axis,
bool keepdim,
bool reduce_all) {
paddle::dialect::MeanGradOp mean_grad_op =
APIBuilder::Instance().GetBuilder()->Build<paddle::dialect::MeanGradOp>(
x, out_grad, axis, keepdim, reduce_all);
return mean_grad_op.result(0);
}
} // namespace dialect
} // namespace paddle
......@@ -39,5 +39,12 @@ ir::OpResult full(std::vector<int64_t> shape,
phi::DataType dtype = phi::DataType::FLOAT32,
phi::Place place = phi::CPUPlace());
ir::OpResult tanh_grad(ir::OpResult out, ir::OpResult grad_out);
ir::OpResult mean_grad(ir::OpResult x,
ir::OpResult out_grad,
std::vector<int64_t> axis = {},
bool keepdim = false,
bool reduce_all = false);
} // namespace dialect
} // namespace paddle
......@@ -91,6 +91,9 @@ class APIBuilder {
ctx_ = ir::IrContext::Instance();
ctx_->GetOrRegisterDialect<paddle::dialect::PaddleDialect>();
}
APIBuilder(const APIBuilder&) = delete;
ir::IrContext* ctx_;
std::shared_ptr<ir::Builder> builder_;
};
......
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/ir/dialect/pd_attribute.h"
#include "paddle/fluid/ir/dialect/pd_op.h"
#include "paddle/fluid/primitive/rule/vjp/vjp.h"
#include "paddle/fluid/primitive/type/desc_tensor.h"
#include "paddle/ir/core/op_base.h"
// TODO(wanghao107)
// this file will be generated in pd_op.cc
namespace paddle {
namespace dialect {
std::vector<std::vector<ir::OpResult>> TanhOp::Vjp(
ir::Operation* op,
const std::vector<std::vector<ir::OpResult>>& out_grads,
const std::vector<std::vector<int>>& stop_gradients) {
TanhOp op_obj = op->dyn_cast<TanhOp>();
Tensor out(
std::make_shared<primitive::experimental::DescTensor>(op_obj.out()));
Tensor grad_out(
std::make_shared<primitive::experimental::DescTensor>(out_grads[0][0]));
std::vector<std::vector<Tensor>> tensor_res =
primitive::experimental::tanh_vjp(out, grad_out, stop_gradients);
std::vector<std::vector<ir::OpResult>> res(1, std::vector<ir::OpResult>(1));
if (!stop_gradients[0][0]) {
res[0][0] = std::static_pointer_cast<primitive::experimental::DescTensor>(
tensor_res[0][0].impl())
->getValue()
.dyn_cast<ir::OpResult>();
}
return res;
}
std::vector<std::vector<ir::OpResult>> Tanh_Op::Vjp(
ir::Operation* op,
const std::vector<std::vector<ir::OpResult>>& out_grads,
const std::vector<std::vector<int>>& stop_gradients) {
// TODO(wanghao107)
// we don't support inplace now,
// so use the non-inplace version instead currently.
// Support inplace in the future.
Tanh_Op op_obj = op->dyn_cast<Tanh_Op>();
Tensor out(
std::make_shared<primitive::experimental::DescTensor>(op_obj.out()));
Tensor grad_out(
std::make_shared<primitive::experimental::DescTensor>(out_grads[0][0]));
std::vector<std::vector<Tensor>> tensor_res =
primitive::experimental::tanh_vjp(out, grad_out, stop_gradients);
std::vector<std::vector<ir::OpResult>> res(1, std::vector<ir::OpResult>(1));
if (!stop_gradients[0][0]) {
res[0][0] = std::static_pointer_cast<primitive::experimental::DescTensor>(
tensor_res[0][0].impl())
->getValue()
.dyn_cast<ir::OpResult>();
}
return res;
}
std::vector<std::vector<ir::OpResult>> MeanOp::Vjp(
ir::Operation* op,
const std::vector<std::vector<ir::OpResult>>& out_grads,
const std::vector<std::vector<int>>& stop_gradients) {
MeanOp op_obj = op->dyn_cast<MeanOp>();
Tensor x(std::make_shared<primitive::experimental::DescTensor>(op_obj.x()));
Tensor out_grad(
std::make_shared<primitive::experimental::DescTensor>(out_grads[0][0]));
std::vector<int64_t> axis =
op->attribute("axis")
.dyn_cast<paddle::dialect::IntArrayAttribute>()
.data()
.GetData();
bool keepdim = op->attribute("keepdim").dyn_cast<ir::BoolAttribute>().data();
bool reduce_all = false;
std::vector<std::vector<Tensor>> tensor_res =
primitive::experimental::mean_vjp(
x, out_grad, axis, keepdim, reduce_all, stop_gradients);
std::vector<std::vector<ir::OpResult>> res(1, std::vector<ir::OpResult>(1));
if (!stop_gradients[0][0]) {
res[0][0] = std::static_pointer_cast<primitive::experimental::DescTensor>(
tensor_res[0][0].impl())
->getValue()
.dyn_cast<ir::OpResult>();
}
return res;
}
} // namespace dialect
} // namespace paddle
......@@ -20,21 +20,24 @@ namespace dialect {
class VjpInterface : public ir::OpInterfaceBase<VjpInterface> {
public:
struct Concept {
explicit Concept(std::vector<std::vector<ir::Value>> (*vjp)(
std::vector<std::vector<ir::Value>> out_grads,
explicit Concept(std::vector<std::vector<ir::OpResult>> (*vjp)(
ir::Operation* op,
const std::vector<std::vector<ir::OpResult>>& out_grads,
const std::vector<std::vector<int>>& stop_gradients))
: vjp_(vjp) {}
std::vector<std::vector<ir::Value>> (*vjp_)(
std::vector<std::vector<ir::Value>> out_grads,
std::vector<std::vector<ir::OpResult>> (*vjp_)(
ir::Operation* op,
const std::vector<std::vector<ir::OpResult>>& out_grads,
const std::vector<std::vector<int>>& stop_gradients);
};
template <class ConcreteOp>
struct Model : public Concept {
static std::vector<std::vector<ir::Value>> Vjp(
std::vector<std::vector<ir::Value>> out_grads,
static std::vector<std::vector<ir::OpResult>> Vjp(
ir::Operation* op,
const std::vector<std::vector<ir::OpResult>>& out_grads,
const std::vector<std::vector<int>>& stop_gradients) {
return ConcreteOp::Vjp(out_grads, stop_gradients);
return ConcreteOp::Vjp(op, out_grads, stop_gradients);
}
Model() : Concept(Vjp) {}
......@@ -43,10 +46,11 @@ class VjpInterface : public ir::OpInterfaceBase<VjpInterface> {
VjpInterface(ir::Operation* op, Concept* impl)
: ir::OpInterfaceBase<VjpInterface>(op), impl_(impl) {}
std::vector<std::vector<ir::Value>> Vjp(
std::vector<std::vector<ir::Value>> out_grads,
std::vector<std::vector<ir::OpResult>> Vjp(
ir::Operation* op,
const std::vector<std::vector<ir::OpResult>>& out_grads,
const std::vector<std::vector<int>>& stop_gradients) {
return impl_->vjp_(out_grads, stop_gradients);
return impl_->vjp_(op, out_grads, stop_gradients);
}
private:
......
add_subdirectory(backend)
add_subdirectory(rule)
# Paddle Primitive Operator System and Combined Strategy Design
if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
primitive_backend_eager_experimental
SRCS eager_backend.cc
DEPS final_dygraph_function eager_utils phi)
endif()
cc_library(
primitive_backend_static_experimental
SRCS static_backend.cc
DEPS pd_dialect)
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/primitive/backend/eager_backend.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/primitive/primitive/primitive.h"
namespace paddle {
namespace primitive {
namespace backend {
namespace experimental {} // namespace experimental
} // namespace backend
} // namespace primitive
} // namespace paddle
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/phi/api/include/tensor.h"
namespace paddle {
namespace primitive {
namespace backend {
namespace experimental {} // namespace experimental
} // namespace backend
} // namespace primitive
} // namespace paddle
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/primitive/backend/static_backend.h"
#include "paddle/fluid/ir/dialect/pd_api.h"
#include "paddle/fluid/primitive/primitive/primitive.h"
#include "paddle/fluid/primitive/type/desc_tensor.h"
namespace paddle {
namespace primitive {
namespace backend {
namespace experimental {
using DescTensor = paddle::primitive::experimental::DescTensor;
template <>
Tensor tanh_grad<DescTensor>(const Tensor& out, const Tensor& grad_out) {
ir::OpResult out_res = std::static_pointer_cast<DescTensor>(out.impl())
->getValue()
.dyn_cast<ir::OpResult>();
ir::OpResult grad_out_res =
std::static_pointer_cast<DescTensor>(grad_out.impl())
->getValue()
.dyn_cast<ir::OpResult>();
ir::OpResult op_res = paddle::dialect::tanh_grad(out_res, grad_out_res);
return Tensor(std::make_shared<primitive::experimental::DescTensor>(op_res));
}
template <>
Tensor mean_grad<DescTensor>(const Tensor& x,
const Tensor& out_grad,
std::vector<int64_t> axis,
bool keepdim,
bool reduce_all) {
ir::OpResult x_res = std::static_pointer_cast<DescTensor>(x.impl())
->getValue()
.dyn_cast<ir::OpResult>();
ir::OpResult out_grad_res =
std::static_pointer_cast<DescTensor>(out_grad.impl())
->getValue()
.dyn_cast<ir::OpResult>();
ir::OpResult op_res = paddle::dialect::mean_grad(
x_res, out_grad_res, axis, keepdim, reduce_all);
return Tensor(std::make_shared<primitive::experimental::DescTensor>(op_res));
}
} // namespace experimental
} // namespace backend
} // namespace primitive
} // namespace paddle
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/phi/api/include/tensor.h"
namespace paddle {
namespace primitive {
namespace backend {
namespace experimental {
using Tensor = paddle::Tensor;
template <typename T>
Tensor tanh_grad(const Tensor& out, const Tensor& grad_out);
template <typename T>
Tensor mean_grad(const Tensor& x,
const Tensor& out_grad,
std::vector<int64_t> axis = {},
bool keepdim = false,
bool reduce_all = false);
} // namespace experimental
} // namespace backend
} // namespace primitive
} // namespace paddle
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace paddle {
namespace primitive {
namespace experimental {}
} // namespace primitive
} // namespace paddle
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/primitive/backend/eager_backend.h"
#include "paddle/fluid/primitive/backend/static_backend.h"
namespace paddle {
namespace primitive {
namespace experimental {
// why exist this file?
// We provide this file to divide
// the primitive ops set in the backend.
// It will be called by the vjp composite
// rules and composite ops rules.
} // namespace experimental
} // namespace primitive
} // namespace paddle
file(GLOB VJP_SRCS "*.cc")
cc_library(
primitive_vjp_experimental
SRCS ${VJP_SRCS}
DEPS primitive_backend_static_experimental)
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <vector>
#include "paddle/fluid/ir/dialect/pd_api.h"
#include "paddle/fluid/primitive/backend/static_backend.h"
#include "paddle/fluid/primitive/rule/vjp/vjp.h"
#include "paddle/fluid/primitive/type/desc_tensor.h"
#include "paddle/ir/core/operation.h"
// TODO(wanghao107):
// op's vjp will be auto generated.
namespace paddle {
namespace primitive {
namespace experimental {
std::vector<std::vector<paddle::Tensor>> tanh_vjp(
const Tensor& out,
const Tensor& grad_out,
const std::vector<std::vector<int>>& stop_gradients) {
std::vector<std::vector<paddle::Tensor>> vjp_res(
1, std::vector<paddle::Tensor>(1));
// get tanh_grad res.
Tensor op_res =
backend::experimental::tanh_grad<primitive::experimental::DescTensor>(
out, grad_out);
// set op stop_gradient info
// TODO(wanghao107): Replace with more generic code.
// Support set stop_gradients for all ops.
ir::Operation* grad_op =
std::static_pointer_cast<primitive::experimental::DescTensor>(
op_res.impl())
->getValue()
.dyn_cast<ir::OpResult>()
.owner();
uint32_t num_res = grad_op->num_results();
std::vector<ir::Attribute> ir_stop_gradients(num_res);
for (size_t i = 0; i < num_res; i++) {
if (stop_gradients[0][i]) {
ir_stop_gradients[i] =
ir::BoolAttribute::get(ir::IrContext::Instance(), true);
} else {
ir_stop_gradients[i] =
ir::BoolAttribute::get(ir::IrContext::Instance(), false);
}
}
grad_op->set_attribute(
"stop_gradient",
ir::ArrayAttribute::get(ir::IrContext::Instance(), ir_stop_gradients));
// construct vjp result by op result and stop_gradients info
if (!stop_gradients[0][0]) {
vjp_res[0][0] = op_res;
}
return vjp_res;
}
std::vector<std::vector<paddle::Tensor>> mean_vjp(
const Tensor& x,
const Tensor& out_grad,
std::vector<int64_t> axis,
bool keepdim,
bool reduce_all,
const std::vector<std::vector<int>>& stop_gradients) {
std::vector<std::vector<paddle::Tensor>> vjp_res(
1, std::vector<paddle::Tensor>(1));
// get mean_grad res.
Tensor op_res =
backend::experimental::mean_grad<primitive::experimental::DescTensor>(
x, out_grad, axis, keepdim, reduce_all);
// set op stop_gradient info
// TODO(wanghao107): Replace with more generic code.
// Support set stop_gradients for all ops.
ir::Operation* grad_op =
std::static_pointer_cast<primitive::experimental::DescTensor>(
op_res.impl())
->getValue()
.dyn_cast<ir::OpResult>()
.owner();
uint32_t num_res = grad_op->num_results();
std::vector<ir::Attribute> ir_stop_gradients(num_res);
for (size_t i = 0; i < num_res; i++) {
if (stop_gradients[0][i]) {
ir_stop_gradients[i] =
ir::BoolAttribute::get(ir::IrContext::Instance(), true);
} else {
ir_stop_gradients[i] =
ir::BoolAttribute::get(ir::IrContext::Instance(), false);
}
}
grad_op->set_attribute(
"stop_gradient",
ir::ArrayAttribute::get(ir::IrContext::Instance(), ir_stop_gradients));
// construct vjp result by op result and stop_gradients info
if (!stop_gradients[0][0]) {
vjp_res[0][0] = op_res;
}
return vjp_res;
}
} // namespace experimental
} // namespace primitive
} // namespace paddle
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES
#endif
#include <math.h>
#include <vector>
#include "paddle/fluid/primitive/primitive/primitive.h"
#include "paddle/ir/core/value.h"
#include "paddle/phi/api/include/tensor.h"
namespace paddle {
namespace primitive {
namespace experimental {
// TODO(wanghao107):
// op's vjp will be auto generated.
std::vector<std::vector<paddle::Tensor>> tanh_vjp(
const Tensor& out,
const Tensor& grad_out,
const std::vector<std::vector<int>>& stop_gradients);
std::vector<std::vector<paddle::Tensor>> mean_vjp(
const Tensor& x,
const Tensor& out_grad,
std::vector<int64_t> axis,
bool keepdim,
bool reduce_all,
const std::vector<std::vector<int>>& stop_gradients);
namespace details {
// NOTE: this namespace will store
// primitive ops grad composite rules.
} // namespace details
} // namespace experimental
} // namespace primitive
} // namespace paddle
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/ir/dialect/pd_type.h"
#include "paddle/fluid/ir/dialect/utils.h"
#include "paddle/ir/core/value.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/extended_tensor.h"
#include "paddle/phi/core/utils/data_type.h"
namespace paddle {
namespace primitive {
namespace experimental {
class DescTensor : public phi::ExtendedTensor,
public phi::TypeInfoTraits<phi::TensorBase, DescTensor> {
public:
explicit DescTensor(ir::Value value)
: value_(value),
dims_(value.type().dyn_cast<dialect::DenseTensorType>().dims()) {}
static const char* name() { return "DescTensor"; }
const phi::DDim& dims() const override { return dims_; }
int64_t numel() const override { return product(dims()); }
DataType dtype() const override {
return paddle::dialect::TransToPhiDataType(value_.type());
}
ir::Value getValue() const { return value_; }
const phi::Place& place() const override { return place_; }
bool initialized() const override { return value_.impl() != nullptr; }
private:
ir::Value value_;
mutable phi::DDim dims_;
phi::Place place_;
};
} // namespace experimental
} // namespace primitive
} // namespace paddle
......@@ -195,6 +195,7 @@ limitations under the License. */
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/ir/interface/vjp.h"
#include "paddle/fluid/prim/utils/eager/eager_tensor_operants.h"
#include "paddle/fluid/prim/utils/static/static_tensor_operants.h"
#include "paddle/fluid/pybind/eager_utils.h"
......@@ -687,6 +688,69 @@ static int GetNCCLVersion() {
}
#endif
void BindVjp(pybind11::module *m) {
m->def(
"call_vjp",
[](ir::Operation &fwd_op,
const std::vector<std::vector<ir::OpResult>> &out_grads,
const std::vector<std::vector<int>> &stop_gradients) {
py::list res;
ir::IrContext *ctx = ir::IrContext::Instance();
ir::OpInfo fwd_op_info = ctx->GetRegisteredOpInfo(fwd_op.name());
auto vjp_interface_impl =
fwd_op_info.GetInterfaceImpl<paddle::dialect::VjpInterface>();
if (vjp_interface_impl == nullptr) {
PADDLE_THROW(phi::errors::InvalidArgument(
"The vjp function is not registered in %s op ", fwd_op.name()));
}
std::vector<std::vector<ir::OpResult>> vjp_res =
vjp_interface_impl->vjp_(&fwd_op, out_grads, stop_gradients);
PADDLE_ENFORCE_EQ(
stop_gradients.size(),
vjp_res.size(),
phi::errors::InvalidArgument(
"The size of stop_gradients should be the same as vjp_res "
"size."
"But the size of stop_gradients: %d, vjp_res size: %d",
stop_gradients.size(),
vjp_res.size()));
for (size_t i = 0; i < vjp_res.size(); ++i) {
PADDLE_ENFORCE_EQ(stop_gradients[i].size(),
vjp_res[i].size(),
phi::errors::InvalidArgument(
"The size of stop_gradients[%d] should be the "
"same as vjp_res[%d] "
"size."
"But the size of stop_gradients[%d]: %d, "
"vjp_res[%d] size: %d",
i,
i,
i,
stop_gradients[i].size(),
i,
vjp_res[i].size()));
py::list sub_res;
for (size_t j = 0; j < vjp_res[i].size(); ++j) {
if (stop_gradients[i][j]) {
sub_res.append(nullptr);
} else {
sub_res.append(vjp_res[i][j]);
}
}
res.append(sub_res);
}
return res;
});
m->def("has_vjp", [](ir::Operation &fwd_op) {
ir::IrContext *ctx = ir::IrContext::Instance();
ir::OpInfo fwd_op_info = ctx->GetRegisteredOpInfo(fwd_op.name());
auto vjp_interface_impl =
fwd_op_info.GetInterfaceImpl<paddle::dialect::VjpInterface>();
if (vjp_interface_impl == nullptr) return false;
return true;
});
}
PYBIND11_MODULE(libpaddle, m) {
BindImperative(&m);
BindEager(&m);
......@@ -2846,6 +2910,7 @@ All parameter, weight, gradient are variables in Paddle.
#endif
BindNewIR(&m);
BindVjp(&m);
}
} // namespace pybind
} // namespace paddle
......@@ -1082,6 +1082,9 @@ void BuildProgram(ir::Builder &builder) { // NOLINT
}
// TODO(wilber): Add a normal test.
// TODO(wanghao107) fix this test on
// mac_py3 CI
#if !defined(__APPLE__)
TEST(pattern_rewrite, Patterns) {
ir::IrContext *ctx = ir::IrContext::Instance();
auto *test_dialect = ctx->GetOrRegisterDialect<Conv2dFusionTestDialect>();
......@@ -1111,3 +1114,4 @@ TEST(pattern_rewrite, Patterns) {
CHECK_EQ(pm.Run(&program), true);
}
#endif
......@@ -61,3 +61,12 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
init_env_utils
python)
endif()
# skip win32 since wget is not installed by default on windows machine.
if(NOT WIN32)
cc_test(
test_vjp_new_ir
SRCS test_vjp.cc
DEPS phi_kernel_adaptor pd_dialect ir)
endif()
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/new_executor/new_ir_interpreter.h"
#include "paddle/fluid/framework/new_executor/standalone_executor.h"
#include "paddle/fluid/ir/dialect/pd_dialect.h"
#include "paddle/fluid/ir/dialect/pd_op.h"
#include "paddle/fluid/ir/dialect/pd_type.h"
#include "paddle/fluid/ir/dialect/utils.h"
#include "paddle/fluid/ir/transforms/pd_op_to_kernel_pass.h"
#include "paddle/fluid/platform/init_phi.h"
#include "paddle/ir/core/block.h"
#include "paddle/ir/core/builtin_attribute.h"
#include "paddle/ir/core/builtin_op.h"
#include "paddle/ir/core/ir_context.h"
#include "paddle/ir/core/program.h"
#include "paddle/ir/core/utils.h"
DECLARE_FILE_SYMBOLS(kernel_dialect);
PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(tanh, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(tanh_grad, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(mean_grad, CPU, ALL_LAYOUT);
namespace paddle {
namespace framework {
TEST(VJP, TanhBackwardTest) {
ir::IrContext* ctx = ir::IrContext::Instance();
ir::Program program((ctx));
paddle::dialect::APIBuilder::Instance().SetProgram(&program);
std::shared_ptr<ir::Builder> builder =
paddle::dialect::APIBuilder::Instance().GetBuilder();
paddle::dialect::FullOp op1 = builder->Build<paddle::dialect::FullOp>(
std::vector<int64_t>{1}, 1.0, phi::DataType::FLOAT32, phi::CPUPlace());
paddle::dialect::TanhOp op2 =
builder->Build<paddle::dialect::TanhOp>(op1.out());
paddle::dialect::FullOp op3 = builder->Build<paddle::dialect::FullOp>(
std::vector<int64_t>{1}, 2.0, phi::DataType::FLOAT32, phi::CPUPlace());
std::vector<std::vector<int>> stop_gradients{{0}};
std::vector<std::vector<ir::OpResult>> out_grads{{op3.out()}};
ir::OpInfo op2_info = ctx->GetRegisteredOpInfo("pd.tanh");
auto tanh_vjp_interface_impl =
op2_info.GetInterfaceImpl<paddle::dialect::VjpInterface>();
tanh_vjp_interface_impl->vjp_(op2.operation(), out_grads, stop_gradients);
auto kernel_program = paddle::dialect::PdOpLowerToKernelPass(&program);
auto place = platform::CPUPlace();
Scope scope;
ProgramDesc prog_desc;
InterpreterCore test_core(place, {}, std::move(kernel_program), &scope);
std::stringstream os;
os << reinterpret_cast<NewIRInterpreter*>(
const_cast<InterpreterBaseImpl*>(test_core.Impl()));
std::string prefix_str = os.str();
test_core.SetSkipGcVars(
{prefix_str + "_inner_var_1", prefix_str + "_inner_var_3"});
test_core.BetaRun({});
auto out_tensor =
test_core.local_scope() == nullptr
? scope.FindVar(prefix_str + "_inner_var_1")->Get<phi::DenseTensor>()
: test_core.local_scope()
->FindVar(prefix_str + "_inner_var_1")
->Get<phi::DenseTensor>();
auto grad_out_tensor =
test_core.local_scope() == nullptr
? scope.FindVar(prefix_str + "_inner_var_3")->Get<phi::DenseTensor>()
: test_core.local_scope()
->FindVar(prefix_str + "_inner_var_3")
->Get<phi::DenseTensor>();
ASSERT_NEAR(out_tensor.data<float>()[0], 0.76159, 1e-5);
ASSERT_NEAR(grad_out_tensor.data<float>()[0], 0.83995, 1e-5);
}
TEST(VJP, Tanh_BackwardTest) {
ir::IrContext* ctx = ir::IrContext::Instance();
ir::Program program((ctx));
paddle::dialect::APIBuilder::Instance().SetProgram(&program);
std::shared_ptr<ir::Builder> builder =
paddle::dialect::APIBuilder::Instance().GetBuilder();
paddle::dialect::FullOp op1 = builder->Build<paddle::dialect::FullOp>(
std::vector<int64_t>{1}, 1.0, phi::DataType::FLOAT32, phi::CPUPlace());
paddle::dialect::Tanh_Op op2 =
builder->Build<paddle::dialect::Tanh_Op>(op1.out());
paddle::dialect::FullOp op3 = builder->Build<paddle::dialect::FullOp>(
std::vector<int64_t>{1}, 2.0, phi::DataType::FLOAT32, phi::CPUPlace());
std::vector<std::vector<int>> stop_gradients{{0}};
std::vector<std::vector<ir::OpResult>> out_grads{{op3.out()}};
ir::OpInfo op2_info = ctx->GetRegisteredOpInfo("pd.tanh_");
auto tanh_vjp_interface_impl =
op2_info.GetInterfaceImpl<paddle::dialect::VjpInterface>();
tanh_vjp_interface_impl->vjp_(op2.operation(), out_grads, stop_gradients);
auto kernel_program = paddle::dialect::PdOpLowerToKernelPass(&program);
auto place = platform::CPUPlace();
Scope scope;
ProgramDesc prog_desc;
InterpreterCore test_core(place, {}, std::move(kernel_program), &scope);
std::stringstream os;
os << reinterpret_cast<NewIRInterpreter*>(
const_cast<InterpreterBaseImpl*>(test_core.Impl()));
std::string prefix_str = os.str();
test_core.SetSkipGcVars(
{prefix_str + "_inner_var_0", prefix_str + "_inner_var_2"});
test_core.BetaRun({});
auto out_tensor =
test_core.local_scope() == nullptr
? scope.FindVar(prefix_str + "_inner_var_0")->Get<phi::DenseTensor>()
: test_core.local_scope()
->FindVar(prefix_str + "_inner_var_0")
->Get<phi::DenseTensor>();
auto grad_out_tensor =
test_core.local_scope() == nullptr
? scope.FindVar(prefix_str + "_inner_var_2")->Get<phi::DenseTensor>()
: test_core.local_scope()
->FindVar(prefix_str + "_inner_var_2")
->Get<phi::DenseTensor>();
ASSERT_NEAR(out_tensor.data<float>()[0], 0.76159, 1e-5);
ASSERT_NEAR(grad_out_tensor.data<float>()[0], 0.83995, 1e-5);
}
TEST(VJP, MeanBackwardTest) {
ir::IrContext* ctx = ir::IrContext::Instance();
ir::Program program((ctx));
paddle::dialect::APIBuilder::Instance().SetProgram(&program);
std::shared_ptr<ir::Builder> builder =
paddle::dialect::APIBuilder::Instance().GetBuilder();
paddle::dialect::FullOp op1 = builder->Build<paddle::dialect::FullOp>(
std::vector<int64_t>{2, 2}, 2.0, phi::DataType::FLOAT32, phi::CPUPlace());
paddle::dialect::MeanOp op2 =
builder->Build<paddle::dialect::MeanOp>(op1.out());
paddle::dialect::FullOp op3 = builder->Build<paddle::dialect::FullOp>(
std::vector<int64_t>{}, 1.0, phi::DataType::FLOAT32, phi::CPUPlace());
std::vector<std::vector<int>> stop_gradients{{0}};
std::vector<std::vector<ir::OpResult>> out_grads{{op3.out()}};
ir::OpInfo op2_info = ctx->GetRegisteredOpInfo("pd.mean");
auto mean_vjp_interface_impl =
op2_info.GetInterfaceImpl<paddle::dialect::VjpInterface>();
mean_vjp_interface_impl->vjp_(op2.operation(), out_grads, stop_gradients);
auto kernel_program = paddle::dialect::PdOpLowerToKernelPass(&program);
auto place = platform::CPUPlace();
Scope scope;
ProgramDesc prog_desc;
InterpreterCore test_core(place, {}, std::move(kernel_program), &scope);
std::stringstream os;
os << reinterpret_cast<NewIRInterpreter*>(
const_cast<InterpreterBaseImpl*>(test_core.Impl()));
std::string prefix_str = os.str();
test_core.SetSkipGcVars(
{prefix_str + "_inner_var_1", prefix_str + "_inner_var_3"});
test_core.BetaRun({});
auto out_tensor =
test_core.local_scope() == nullptr
? scope.FindVar(prefix_str + "_inner_var_1")->Get<phi::DenseTensor>()
: test_core.local_scope()
->FindVar(prefix_str + "_inner_var_1")
->Get<phi::DenseTensor>();
auto grad_out_tensor =
test_core.local_scope() == nullptr
? scope.FindVar(prefix_str + "_inner_var_3")->Get<phi::DenseTensor>()
: test_core.local_scope()
->FindVar(prefix_str + "_inner_var_3")
->Get<phi::DenseTensor>();
ASSERT_EQ(out_tensor.data<float>()[0], 2.0);
ASSERT_EQ(grad_out_tensor.data<float>()[0], 0.25);
ASSERT_EQ(grad_out_tensor.data<float>()[1], 0.25);
ASSERT_EQ(grad_out_tensor.data<float>()[2], 0.25);
ASSERT_EQ(grad_out_tensor.data<float>()[3], 0.25);
}
} // namespace framework
} // namespace paddle
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
from paddle import ir
from paddle.fluid.core import call_vjp, has_vjp
paddle.enable_static()
def get_ir_program():
main_program, start_program = (
paddle.static.Program(),
paddle.static.Program(),
)
with paddle.static.program_guard(main_program, start_program):
x = paddle.static.data('x', [4, 4], 'float32')
x.stop_gradient = False
paddle.tanh(x)
paddle.tensor.fill_constant(shape=[4, 4], dtype='float32', value=2.0)
newir_program = ir.translate_to_new_ir(main_program.desc)
return newir_program
class TestTanhVjp(unittest.TestCase):
def test_tanh_vjp1(self):
newir_program = get_ir_program()
tanh_op = newir_program.block().get_ops()[-2]
fill_constant_op = newir_program.block().get_ops()[-1]
out_grads = [[fill_constant_op.result(0)]]
stop_gradients = [[0]]
with paddle.ir.core.program_guard(newir_program):
grad_outs = call_vjp(tanh_op, out_grads, stop_gradients)
self.assertEqual(
grad_outs[0][0].get_defining_op().name(), "pd.tanh_grad"
)
self.assertEqual(
grad_outs[0][0]
.get_defining_op()
.operands()[0]
.source()
.get_defining_op()
.name(),
"pd.tanh",
)
self.assertEqual(
grad_outs[0][0]
.get_defining_op()
.operands()[1]
.source()
.get_defining_op()
.name(),
"pd.full",
)
self.assertEqual(len(newir_program.block().get_ops()), 4)
def test_tanh_vjp2(self):
newir_program = get_ir_program()
tanh_op = newir_program.block().get_ops()[-2]
fill_constant_op = newir_program.block().get_ops()[-1]
out_grads = [[fill_constant_op.result(0)]]
stop_gradients = [[1]]
with paddle.ir.core.program_guard(newir_program):
grad_outs = call_vjp(tanh_op, out_grads, stop_gradients)
self.assertEqual(grad_outs[0][0], None)
class TestMeanVjp(unittest.TestCase):
def test_mean_vjp1(self):
main_program, start_program = (
paddle.static.Program(),
paddle.static.Program(),
)
with paddle.static.program_guard(main_program, start_program):
x = paddle.static.data('x', [4, 4], 'float32')
x.stop_gradient = False
paddle.mean(x, axis=[0, 1])
paddle.tensor.fill_constant(shape=[1], dtype='float32', value=2.0)
newir_program = ir.translate_to_new_ir(main_program.desc)
fill_constant_op = newir_program.block().get_ops()[-1]
mean_op = newir_program.block().get_ops()[-2]
out_grads = [[fill_constant_op.result(0)]]
stop_gradients = [[0]]
with paddle.ir.core.program_guard(newir_program):
grad_outs = call_vjp(mean_op, out_grads, stop_gradients)
self.assertEqual(
grad_outs[0][0].get_defining_op().name(), "pd.mean_grad"
)
self.assertEqual(
grad_outs[0][0]
.get_defining_op()
.operands()[0]
.source()
.get_defining_op()
.name(),
"builtin.get_parameter",
)
self.assertEqual(
grad_outs[0][0]
.get_defining_op()
.operands()[1]
.source()
.get_defining_op()
.name(),
"pd.full",
)
self.assertEqual(len(newir_program.block().get_ops()), 4)
def test_mean_vjp2(self):
main_program, start_program = (
paddle.static.Program(),
paddle.static.Program(),
)
with paddle.static.program_guard(main_program, start_program):
x = paddle.static.data('x', [4, 4], 'float32')
x.stop_gradient = False
paddle.mean(x, axis=[0, 1])
paddle.tensor.fill_constant(shape=[1], dtype='float32', value=2.0)
newir_program = ir.translate_to_new_ir(main_program.desc)
fill_constant_op = newir_program.block().get_ops()[-1]
mean_op = newir_program.block().get_ops()[-2]
out_grads = [[fill_constant_op.result(0)]]
stop_gradients = [[1]]
with paddle.ir.core.program_guard(newir_program):
grad_outs = call_vjp(mean_op, out_grads, stop_gradients)
self.assertEqual(grad_outs[0][0], None)
class TesthasVjp(unittest.TestCase):
def test_has_vjp(self):
main_program, start_program = (
paddle.static.Program(),
paddle.static.Program(),
)
with paddle.static.program_guard(main_program, start_program):
x = paddle.static.data('x', [4, 4], 'float32')
x.stop_gradient = False
paddle.mean(x, axis=[0, 1])
paddle.tensor.fill_constant(shape=[1], dtype='float32', value=2.0)
newir_program = ir.translate_to_new_ir(main_program.desc)
fill_constant_op = newir_program.block().get_ops()[-1]
mean_op = newir_program.block().get_ops()[-2]
self.assertEqual(has_vjp(fill_constant_op), False)
self.assertEqual(has_vjp(mean_op), True)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册