未验证 提交 661dbdbe 编写于 作者: H Huihuang Zheng 提交者: GitHub

Modify ShareTensorWithCinnBuffer by callback to save memory (#37493)

Modify ShareTensorWithCinnBuffer by callback to save memory
上级 8a4460f5
......@@ -209,6 +209,7 @@ std::unique_ptr<CinnCompiledObject> CinnCompiler::CompileGraph(
std::make_unique<GraphCompiler>(target, scope, cinn_graph);
GraphCompiler::CompileOptions options;
options.with_instantiate_variables = false;
options.with_buffer_handle_instruction_inserted = true;
auto compiled_res =
graph_compiler->Build(options, std::move(fetch_ids), stream);
auto compiled_obj = std::make_unique<CinnCompiledObject>();
......
......@@ -13,7 +13,10 @@
// limitations under the License.
#include "paddle/fluid/operators/cinn_launch_op.h"
#include <functional>
#include <vector>
#include "paddle/fluid/string/string_helper.h"
DECLARE_bool(cudnn_deterministic);
......@@ -108,33 +111,9 @@ std::unordered_set<std::string> CinnLaunchContext::GetInternalVariableNames() {
return all_parameters;
}
void CinnLaunchContext::MutableTensorData(const std::string& var_name,
const platform::Place& place,
LoDTensor* paddle_tensor,
bool is_internal_var) {
auto cinn_name = var_name;
if (!is_internal_var) {
PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true,
platform::errors::InvalidArgument(
"Paddle variable(%s) not used by cinn", var_name));
cinn_name = paddle2cinn_varmap_.at(var_name);
}
auto cinn_tensor = GetCinnTensor(cinn_name);
// TODO(CtfGo): support mutable corresponding c++ type after CINN ready
VLOG(4) << "Only support float in cinn_launch op now.";
paddle_tensor->mutable_data<float>(
framework::make_ddim(cinn_tensor->shape().data()), place);
}
void CinnLaunchContext::CheckTensorEquivalent(const std::string& paddle_name,
const LoDTensor& paddle_tensor,
const CinnTensor& cinn_tensor) {
PADDLE_ENFORCE_EQ(
paddle_tensor.IsInitialized(), true,
platform::errors::InvalidArgument(
"Tensor in variable(%s) is not initialized.", paddle_name));
// check dimension
auto cinn_dims = framework::make_ddim(cinn_tensor->shape().data());
PADDLE_ENFORCE_EQ(paddle_tensor.dims(), cinn_dims,
......@@ -147,27 +126,39 @@ void CinnLaunchContext::CheckTensorEquivalent(const std::string& paddle_name,
}
void CinnLaunchContext::AssignExternalVariable(const std::string& paddle_name,
const platform::Place& place,
LoDTensor* paddle_tensor) {
PADDLE_ENFORCE_EQ(IsVariableUsed(paddle_name), true,
platform::errors::InvalidArgument(
"Paddle variable(%s) not used by cinn", paddle_name));
const auto& cinn_name = paddle2cinn_varmap_.at(paddle_name);
CheckTensorEquivalent(paddle_name, *paddle_tensor, GetCinnTensor(cinn_name));
return SetArgument(cinn_name, paddle_tensor);
CinnTensor cinn_tensor = GetCinnTensor(cinn_name);
if (!paddle_tensor->IsInitialized()) {
paddle_tensor->Resize(framework::make_ddim(cinn_tensor->shape().data()));
}
CheckTensorEquivalent(paddle_name, *paddle_tensor, cinn_tensor);
return SetArgument(cinn_name, place, /* free_mem_callback = */ false,
paddle_tensor);
}
void CinnLaunchContext::AssignInternalVariable(const std::string& cinn_name,
const platform::Place& place,
LoDTensor* paddle_tensor) {
PADDLE_ENFORCE_GT(cinn_variable_names_.count(cinn_name), 0,
platform::errors::InvalidArgument(
"Variable(%s) not found in cinn socpe.", cinn_name));
CheckTensorEquivalent(cinn_name, *paddle_tensor, GetCinnTensor(cinn_name));
return SetArgument(cinn_name, paddle_tensor);
CinnTensor cinn_tensor = GetCinnTensor(cinn_name);
if (!paddle_tensor->IsInitialized()) {
paddle_tensor->Resize(framework::make_ddim(cinn_tensor->shape().data()));
}
CheckTensorEquivalent(cinn_name, *paddle_tensor, cinn_tensor);
return SetArgument(cinn_name, place, /* free_mem_callback = */ true,
paddle_tensor);
}
std::unique_ptr<cinn_buffer_t> CinnLaunchContext::ShareTensorWithCinnBuffer(
LoDTensor* tensor) {
const platform::Place& place, bool free_mem_callback, LoDTensor* tensor) {
// convert paddle dimensions array to cinn format
std::vector<cinn_dimension_t> cinn_dims(tensor->dims().size());
for (auto i = 0; i < tensor->dims().size(); ++i) {
......@@ -177,19 +168,42 @@ std::unique_ptr<cinn_buffer_t> CinnLaunchContext::ShareTensorWithCinnBuffer(
auto cinn_buffer = std::make_unique<cinn_buffer_t>();
// assign size and memory
cinn_buffer->resize(cinn_dims.data(), cinn_dims.size());
cinn_buffer->memory = reinterpret_cast<uint8_t*>(tensor->data<float>());
cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
[place, tensor](void* ctx, cinn_buffer_t* buffer) {
buffer->memory =
reinterpret_cast<uint8_t*>(tensor->mutable_data<float>(place));
return 0;
});
if (free_mem_callback) {
cinn_buffer->external_free = new std::function<int(void*, cinn_buffer_t*)>(
[tensor](void* ctx, cinn_buffer_t* buffer) {
tensor->clear();
return 0;
});
return cinn_buffer;
}
cinn_buffer->external_free = new std::function<int(void*, cinn_buffer_t*)>(
[](void* ctx, cinn_buffer_t* buffer) {
// Do nothing
return 0;
});
return cinn_buffer;
}
void CinnLaunchContext::SetArgument(const std::string& cinn_name,
const platform::Place& place,
bool free_mem_callback,
LoDTensor* paddle_tensor) {
auto buffer = ShareTensorWithCinnBuffer(paddle_tensor);
auto buffer =
ShareTensorWithCinnBuffer(place, free_mem_callback, paddle_tensor);
name2argument_.emplace(cinn_name, buffer.get());
hold_buffers_.emplace_back(std::move(buffer));
VLOG(4) << "SetArgument-" << name2argument_.size() << ": "
<< "name(" << cinn_name << "), "
<< "type(" << framework::DataTypeToString(paddle_tensor->type())
<< "), dims(" << paddle_tensor->dims() << ").";
<< "name(" << cinn_name << "), dims(" << paddle_tensor->dims()
<< ").";
}
const std::map<std::string, cinn_pod_value_t>&
......
......@@ -49,16 +49,13 @@ class CinnLaunchContext {
// Return whether a Paddle variable used on compiled kernels
bool IsVariableUsed(const std::string& var_name);
// Allocate buffer to a Paddle tensor with assginment information from CINN
void MutableTensorData(const std::string& var_name,
const platform::Place& place, LoDTensor* paddle_tensor,
bool is_internal_var = false);
// Assign tensor buffer to input or output variables
void AssignExternalVariable(const std::string& var_name, LoDTensor* tensor);
void AssignExternalVariable(const std::string& var_name,
const platform::Place& place, LoDTensor* tensor);
// Assign tensor buffer to internal variables
void AssignInternalVariable(const std::string& var_name, LoDTensor* tensor);
void AssignInternalVariable(const std::string& var_name,
const platform::Place& place, LoDTensor* tensor);
// Extract internal variable names from CinnScope
// by excluding used input and output variables
......@@ -83,10 +80,12 @@ class CinnLaunchContext {
// Share the buffer of a Paddle tensor to CINN by delivering memory address
// to a cinn_buffer_t object
std::unique_ptr<cinn_buffer_t> ShareTensorWithCinnBuffer(LoDTensor* tensor);
std::unique_ptr<cinn_buffer_t> ShareTensorWithCinnBuffer(
const platform::Place& place, bool free_mem_callback, LoDTensor* tensor);
// Set an argument with (cinn name)->(paddle tensor) pair
void SetArgument(const std::string& cinn_name, LoDTensor* paddle_tensor);
void SetArgument(const std::string& cinn_name, const platform::Place& place,
bool free_mem_callback, LoDTensor* paddle_tensor);
private:
// a variable name map from paddle to cinn
......@@ -198,7 +197,7 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
}
launch_context->AssignExternalVariable(
var_name, scope.GetVar(var_name)->GetMutable<LoDTensor>());
var_name, place, scope.GetVar(var_name)->GetMutable<LoDTensor>());
}
// 3.2 Prepare output variables: all output variables should
......@@ -215,11 +214,7 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
"Output variable(%s) not used by cinn", var_name));
auto* tensor = scope.GetVar(var_name)->GetMutable<LoDTensor>();
if (!tensor->IsInitialized()) {
launch_context->MutableTensorData(var_name, place, tensor);
}
launch_context->AssignExternalVariable(
var_name, scope.GetVar(var_name)->GetMutable<LoDTensor>());
launch_context->AssignExternalVariable(var_name, place, tensor);
}
// 3.3 Prepare internal or temporary variables: Create a temporary
......@@ -232,8 +227,7 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
framework::Scope* temp_scope = scope.NewTmpScope().release();
for (const auto& var_name : internal_variable_names) {
auto* tensor = temp_scope->Var(var_name)->GetMutable<LoDTensor>();
launch_context->MutableTensorData(var_name, place, tensor, true);
launch_context->AssignInternalVariable(var_name, tensor);
launch_context->AssignInternalVariable(var_name, place, tensor);
}
// Step 4. Set CINN runtime FLAGS, such as FLAGS_cinn_cudnn_deterministic.
......
......@@ -222,30 +222,9 @@ TEST(CinnLaunchContextTest, TestGetInternalVariableNames) {
auto launch_context =
std::make_unique<CinnLaunchContext>(GetDefaultCompiledObj());
auto internal_variable_names = launch_context->GetInternalVariableNames();
ASSERT_EQ(internal_variable_names.size(), 1);
EXPECT_EQ(*internal_variable_names.begin(), "cinn_var2");
}
TEST(CinnLaunchContextTest, TestMutableTensorData) {
platform::CPUPlace place;
framework::Scope scope;
auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
auto* tensor2 = scope.Var("var2")->GetMutable<LoDTensor>();
auto launch_context =
std::make_unique<CinnLaunchContext>(GetDefaultCompiledObj());
// mutable_data on external variable
ASSERT_NO_THROW(launch_context->MutableTensorData("var1", place, tensor1));
ASSERT_TRUE(tensor1->IsInitialized());
ASSERT_EQ(tensor1->dims(), framework::make_ddim({3, 4}));
ASSERT_THROW(launch_context->MutableTensorData("not_exist", place, tensor1),
paddle::platform::EnforceNotMet);
// mutable_data on internal variable
ASSERT_NO_THROW(
launch_context->MutableTensorData("cinn_var2", place, tensor2, true));
ASSERT_TRUE(tensor2->IsInitialized());
ASSERT_EQ(tensor2->dims(), framework::make_ddim({6, 7, 8}));
ASSERT_EQ(internal_variable_names.size(), 3);
EXPECT_NE(internal_variable_names.find("cinn_var2"),
internal_variable_names.end());
}
TEST(CinnLaunchContextTest, TestCheckTensorEquivalent) {
......@@ -255,12 +234,9 @@ TEST(CinnLaunchContextTest, TestCheckTensorEquivalent) {
framework::Scope scope;
auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
// CheckTensorEquivalent: tensor is not initialized
ASSERT_THROW(launch_context->AssignExternalVariable("var1", tensor1),
paddle::platform::EnforceNotMet);
// CheckTensorEquivalent: tensor dimension not equivalent
tensor1->mutable_data<float>(framework::make_ddim({3, 5}), place);
ASSERT_THROW(launch_context->AssignExternalVariable("var1", tensor1),
ASSERT_THROW(launch_context->AssignExternalVariable("var1", place, tensor1),
paddle::platform::EnforceNotMet);
}
......@@ -272,11 +248,12 @@ TEST(CinnLaunchContextTest, TestAssignVariablePreCondition) {
auto* tensor4 = scope.Var("var4")->GetMutable<LoDTensor>();
// not used
ASSERT_THROW(launch_context->AssignExternalVariable("var4", tensor4),
ASSERT_THROW(launch_context->AssignExternalVariable("var4", place, tensor4),
paddle::platform::EnforceNotMet);
// not found
ASSERT_THROW(launch_context->AssignExternalVariable("cinn_var4", tensor4),
paddle::platform::EnforceNotMet);
ASSERT_THROW(
launch_context->AssignExternalVariable("cinn_var4", place, tensor4),
paddle::platform::EnforceNotMet);
}
TEST(CinnLaunchContextTest, TestSetArgument) {
......@@ -286,22 +263,25 @@ TEST(CinnLaunchContextTest, TestSetArgument) {
platform::CPUPlace place;
framework::Scope scope;
auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
tensor1->mutable_data<float>(framework::make_ddim({3, 4}), place);
auto* data1 = tensor1->data<float>();
float* data1 =
tensor1->mutable_data<float>(framework::make_ddim({3, 4}), place);
data1[0] = 9.99f;
data1[10] = 19.99f;
// assign external variable
ASSERT_NO_THROW(launch_context->AssignExternalVariable("var1", tensor1));
ASSERT_NO_THROW(
launch_context->AssignExternalVariable("var1", place, tensor1));
auto* tensor2 = scope.Var("var2")->GetMutable<LoDTensor>();
tensor2->mutable_data<float>(framework::make_ddim({6, 7, 8}), place);
ASSERT_NO_THROW(launch_context->AssignInternalVariable("cinn_var2", tensor2));
ASSERT_NO_THROW(
launch_context->AssignInternalVariable("cinn_var2", place, tensor2));
// FinalizeArguments not missed check
ASSERT_THROW(launch_context->FinalizeArguments(),
paddle::platform::EnforceNotMet);
auto* tensor3 = scope.Var("var3")->GetMutable<LoDTensor>();
tensor3->mutable_data<float>(framework::make_ddim({10, 16}), place);
ASSERT_NO_THROW(launch_context->AssignExternalVariable("var3", tensor3));
ASSERT_NO_THROW(
launch_context->AssignExternalVariable("var3", place, tensor3));
auto name2argument = launch_context->FinalizeArguments();
ASSERT_EQ(name2argument.size(), 3);
......@@ -310,6 +290,8 @@ TEST(CinnLaunchContextTest, TestSetArgument) {
auto* cinn_buffer =
static_cast<cinn_buffer_t*>(name2argument.at("cinn_var1"));
ASSERT_EQ(cinn_buffer->memory, nullptr);
cinn_buffer->external_malloc->operator()(nullptr, cinn_buffer);
ASSERT_NE(cinn_buffer->memory, nullptr);
ASSERT_EQ(cinn_buffer->num_elements(), 12);
auto* shadow_data = reinterpret_cast<float*>(cinn_buffer->memory);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册