未验证 提交 22708640 编写于 作者: W wangchaochaohu 提交者: GitHub

Fusion group optimize for cuda codegen(#23940)

上级 94dfb7d7
...@@ -71,6 +71,8 @@ static bool HasInput(Node* n, std::string name) { ...@@ -71,6 +71,8 @@ static bool HasInput(Node* n, std::string name) {
std::vector<OperationExpression> CodeGenerator::ConvertToExpressions( std::vector<OperationExpression> CodeGenerator::ConvertToExpressions(
SubGraph* subgraph) { SubGraph* subgraph) {
std::unordered_map<std::string, int> var_ids = EncodeVarNodes(subgraph); std::unordered_map<std::string, int> var_ids = EncodeVarNodes(subgraph);
std::vector<Node*> intermediate_out_nodes =
subgraph->GetIntermediateOutVarNodes();
std::vector<OperationExpression> expressions; std::vector<OperationExpression> expressions;
for (auto* node : subgraph->SortedNodes()) { for (auto* node : subgraph->SortedNodes()) {
if (node && node->IsOp() && node->Op()) { if (node && node->IsOp() && node->Op()) {
...@@ -81,7 +83,8 @@ std::vector<OperationExpression> CodeGenerator::ConvertToExpressions( ...@@ -81,7 +83,8 @@ std::vector<OperationExpression> CodeGenerator::ConvertToExpressions(
// - X, Y in forward operations // - X, Y in forward operations
// - X, Y, Out, out@GRAD in backward operations // - X, Y, Out, out@GRAD in backward operations
std::vector<int> input_ids; std::vector<int> input_ids;
auto operation = OperationMap::Instance().Get(op->Type()); std::string op_name = op->Type();
auto operation = OperationMap::Instance().Get(op_name);
std::vector<std::string> input_names = operation.input_names; std::vector<std::string> input_names = operation.input_names;
for (auto& name : input_names) { for (auto& name : input_names) {
...@@ -105,6 +108,7 @@ std::vector<OperationExpression> CodeGenerator::ConvertToExpressions( ...@@ -105,6 +108,7 @@ std::vector<OperationExpression> CodeGenerator::ConvertToExpressions(
std::vector<int> output_ids; std::vector<int> output_ids;
std::vector<std::string> output_names = std::vector<std::string> output_names =
OperationMap::Instance().Get(op->Type()).output_names; OperationMap::Instance().Get(op->Type()).output_names;
std::unordered_map<int, bool> intermediate_state;
for (auto& name : output_names) { for (auto& name : output_names) {
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NE(
...@@ -112,12 +116,21 @@ std::vector<OperationExpression> CodeGenerator::ConvertToExpressions( ...@@ -112,12 +116,21 @@ std::vector<OperationExpression> CodeGenerator::ConvertToExpressions(
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Output(%s) of operation %s is not set.", name, op->Type())); "Output(%s) of operation %s is not set.", name, op->Type()));
output_ids.push_back(var_ids[op->Output(name)[0]]); output_ids.push_back(var_ids[op->Output(name)[0]]);
bool enable_intermediate = false;
for (auto* n : intermediate_out_nodes) {
if (n->Name() == op->Output(name)[0]) {
enable_intermediate = true;
break;
}
}
intermediate_state[var_ids[op->Output(name)[0]]] = enable_intermediate;
} }
std::string lhs_type = ExtractDataType(node->outputs); std::string lhs_type = ExtractDataType(node->outputs);
std::string rhs_type = ExtractDataType(node->inputs); std::string rhs_type = ExtractDataType(node->inputs);
auto expression = OperationExpression(node->Name(), input_ids, output_ids, auto expression =
rhs_type, lhs_type); OperationExpression(node->Name(), input_ids, output_ids, rhs_type,
lhs_type, intermediate_state);
expression.SetAttr(attr); expression.SetAttr(attr);
expressions.push_back(expression); expressions.push_back(expression);
} }
...@@ -133,13 +146,17 @@ std::string CodeGenerator::Generate( ...@@ -133,13 +146,17 @@ std::string CodeGenerator::Generate(
// TODO(liuyiqun): Check whether all expressions are elementwise operations. // TODO(liuyiqun): Check whether all expressions are elementwise operations.
std::set<int> input_ids = std::move(DistilInputIds(expressions)); std::set<int> input_ids = std::move(DistilInputIds(expressions));
std::set<int> output_ids = std::move(DistilOutputIds(expressions)); std::set<int> output_ids = std::move(DistilOutputIds(expressions));
std::set<int> intermediate_ids =
std::move(DistilIntermediateIds(expressions));
std::unordered_map<int, std::string> dtypes = std::unordered_map<int, std::string> dtypes =
std::move(DistilDtypes(expressions)); std::move(DistilDtypes(expressions));
TemplateVariable template_var; TemplateVariable template_var;
template_var.Add("func_name", func_name); template_var.Add("func_name", func_name);
template_var.Add("parameters", EmitParameters(input_ids, output_ids, dtypes)); template_var.Add("parameters", EmitParameters(input_ids, output_ids,
intermediate_ids, dtypes));
template_var.Add("compute_body", template_var.Add("compute_body",
EmitComputeBody(expressions, input_ids, output_ids, dtypes)); EmitComputeBody(expressions, input_ids, output_ids,
intermediate_ids, dtypes));
std::set<std::string> all_dtype; std::set<std::string> all_dtype;
for (const auto& type : dtypes) { for (const auto& type : dtypes) {
...@@ -185,6 +202,19 @@ std::set<int> CodeGenerator::DistilOutputIds( ...@@ -185,6 +202,19 @@ std::set<int> CodeGenerator::DistilOutputIds(
return output_ids; return output_ids;
} }
std::set<int> CodeGenerator::DistilIntermediateIds(
const std::vector<OperationExpression>& expressions) {
std::set<int> intermediate_ids;
// Use std::set to remove the reptead id and get a ordered list.
for (size_t i = 0; i < expressions.size(); i++) {
for (auto id : expressions[i].GetOutputIds()) {
auto intermediate_state = expressions[i].GetIntermediateState();
if (intermediate_state[id]) intermediate_ids.insert(id);
}
}
return intermediate_ids;
}
std::unordered_map<int, std::string> CodeGenerator::DistilDtypes( std::unordered_map<int, std::string> CodeGenerator::DistilDtypes(
const std::vector<OperationExpression>& expressions) { const std::vector<OperationExpression>& expressions) {
std::unordered_map<int, std::string> dtypes; std::unordered_map<int, std::string> dtypes;
...@@ -218,6 +248,7 @@ std::unordered_map<int, std::string> CodeGenerator::DistilDtypes( ...@@ -218,6 +248,7 @@ std::unordered_map<int, std::string> CodeGenerator::DistilDtypes(
// we get the parameter list code for the expression information // we get the parameter list code for the expression information
std::string CodeGenerator::EmitParameters( std::string CodeGenerator::EmitParameters(
const std::set<int>& input_ids, const std::set<int>& output_ids, const std::set<int>& input_ids, const std::set<int>& output_ids,
const std::set<int>& intermediate_ids,
const std::unordered_map<int, std::string>& dtypes) const { const std::unordered_map<int, std::string>& dtypes) const {
std::stringstream ret; std::stringstream ret;
ret << "int N, "; ret << "int N, ";
...@@ -226,25 +257,28 @@ std::string CodeGenerator::EmitParameters( ...@@ -226,25 +257,28 @@ std::string CodeGenerator::EmitParameters(
// from the input list. // from the input list.
for (auto id : input_ids) { for (auto id : input_ids) {
if (output_ids.find(id) == output_ids.end()) { if (output_ids.find(id) == output_ids.end()) {
ret << dtypes.at(id) << "* " << ArgName(id) << ", "; ret << "const " << dtypes.at(id) << "* __restrict__ " << ArgName(id)
<< ", ";
} }
} }
size_t index = 0; size_t index = 0;
for (auto id : output_ids) { for (auto id : output_ids) {
ret << dtypes.at(id) << "* " << ArgName(id); if (intermediate_ids.find(id) == intermediate_ids.end()) {
if (index != output_ids.size() - 1) { ret << dtypes.at(id) << "* " << ArgName(id);
ret << ", "; if (index != output_ids.size() - 1) {
ret << ", ";
}
} }
index++; index++;
} }
return ret.str(); return ret.str();
} }
std::string CodeGenerator::EmitComputeBody( std::string CodeGenerator::EmitComputeBody(
const std::vector<OperationExpression>& expressions, const std::vector<OperationExpression>& expressions,
const std::set<int>& input_ids, const std::set<int>& output_ids, const std::set<int>& input_ids, const std::set<int>& output_ids,
const std::set<int>& intermediate_ids,
const std::unordered_map<int, std::string>& dtypes) const { const std::unordered_map<int, std::string>& dtypes) const {
std::ostringstream compute; std::ostringstream compute;
std::unordered_set<int> used; std::unordered_set<int> used;
...@@ -258,14 +292,17 @@ std::string CodeGenerator::EmitComputeBody( ...@@ -258,14 +292,17 @@ std::string CodeGenerator::EmitComputeBody(
for (auto id : input_ids) { for (auto id : input_ids) {
if (output_ids.find(id) == output_ids.end() && if (output_ids.find(id) == output_ids.end() &&
used.find(id) != used.end()) { used.find(id) != used.end()) {
load << dtypes.at(id) << " " << TmpName(id) << " = " << VarName(id) load << dtypes.at(id) << " " << TmpName(id) << " = "
<< "__ldg(&" << VarName(id) << ")"
<< ";"; << ";";
} }
} }
// Store temporal variables to memory. // Store temporal variables to memory.
std::ostringstream store; std::ostringstream store;
for (auto id : output_ids) { for (auto id : output_ids) {
store << VarName(id) << " = " << TmpName(id) << ";"; if (intermediate_ids.find(id) == intermediate_ids.end()) {
store << VarName(id) << " = " << TmpName(id) << ";";
}
} }
return load.str() + compute.str() + store.str(); return load.str() + compute.str() + store.str();
...@@ -285,32 +322,7 @@ std::unordered_map<std::string, int> CodeGenerator::EncodeVarNodes( ...@@ -285,32 +322,7 @@ std::unordered_map<std::string, int> CodeGenerator::EncodeVarNodes(
var_ids[in->Name()] = id++; var_ids[in->Name()] = id++;
} }
} }
// Numbering internal vars.
for (auto* node : subgraph->SortedNodes()) {
if (node && node->IsVar() && node->Var()) {
bool is_found = false;
for (auto* in : input_var_nodes) {
if (node == in) {
is_found = true;
break;
}
}
if (is_found) {
continue;
}
for (auto* out : output_var_nodes) {
if (node == out) {
is_found = true;
break;
}
}
PADDLE_ENFORCE_EQ(
is_found, true,
platform::errors::Unimplemented(
"Subgraph with internal var nodes (%s) is not supported yet.",
node->Name()));
}
}
// Encoding output vars. // Encoding output vars.
for (auto* out : output_var_nodes) { for (auto* out : output_var_nodes) {
VLOG(3) << "Ecoding output names:" << out->Name() << ", id:" << id; VLOG(3) << "Ecoding output names:" << out->Name() << ", id:" << id;
......
...@@ -43,17 +43,21 @@ class CodeGenerator { ...@@ -43,17 +43,21 @@ class CodeGenerator {
const std::vector<OperationExpression>& expressions); const std::vector<OperationExpression>& expressions);
std::set<int> DistilOutputIds( std::set<int> DistilOutputIds(
const std::vector<OperationExpression>& expressions); const std::vector<OperationExpression>& expressions);
std::set<int> DistilIntermediateIds(
const std::vector<OperationExpression>& expressions);
std::unordered_map<int, std::string> DistilDtypes( std::unordered_map<int, std::string> DistilDtypes(
const std::vector<OperationExpression>& expressions); const std::vector<OperationExpression>& expressions);
// we get the parameter list code for the expression information // we get the parameter list code for the expression information
std::string EmitParameters( std::string EmitParameters(
const std::set<int>& input_ids, const std::set<int>& output_ids, const std::set<int>& input_ids, const std::set<int>& output_ids,
const std::set<int>& intermediate_ids,
const std::unordered_map<int, std::string>& dtypes) const; const std::unordered_map<int, std::string>& dtypes) const;
std::string EmitComputeBody( std::string EmitComputeBody(
const std::vector<OperationExpression>& expressions, const std::vector<OperationExpression>& expressions,
const std::set<int>& input_ids, const std::set<int>& output_ids, const std::set<int>& input_ids, const std::set<int>& output_ids,
const std::set<int>& intermediate_ids,
const std::unordered_map<int, std::string>& dtypes) const; const std::unordered_map<int, std::string>& dtypes) const;
// Encode all var nodes in the subgraph with an unique number. // Encode all var nodes in the subgraph with an unique number.
......
...@@ -149,8 +149,6 @@ std::string OperationExpression::GetRHS(std::unordered_set<int>* used, ...@@ -149,8 +149,6 @@ std::string OperationExpression::GetRHS(std::unordered_set<int>* used,
"Expected %d-th input id > 0 for operation < %s " "Expected %d-th input id > 0 for operation < %s "
">. Received %d.", ">. Received %d.",
index, op_type_, input_ids_[index])); index, op_type_, input_ids_[index]));
// TODO(wangchaochaohu): Here fp16 convert to float to do comupte, we
// need to add general fp16 compute later.
var_name = TmpName(input_ids_[index]); var_name = TmpName(input_ids_[index]);
rhs.replace(pos, length + 3, var_name); rhs.replace(pos, length + 3, var_name);
used->insert(input_ids_[index]); used->insert(input_ids_[index]);
......
...@@ -46,14 +46,19 @@ class OperationExpression { ...@@ -46,14 +46,19 @@ class OperationExpression {
public: public:
explicit OperationExpression(std::string op_type, std::vector<int> input_ids, explicit OperationExpression(std::string op_type, std::vector<int> input_ids,
std::vector<int> output_ids, std::vector<int> output_ids,
std::string rhs_type, std::string lhs_type) std::string rhs_type, std::string lhs_type,
std::unordered_map<int, bool> intermediate_state)
: op_type_(op_type), : op_type_(op_type),
input_ids_(input_ids), input_ids_(input_ids),
output_ids_(output_ids), output_ids_(output_ids),
rhs_type_(rhs_type), rhs_type_(rhs_type),
lhs_type_(lhs_type) {} lhs_type_(lhs_type),
intermediate_state_(intermediate_state) {}
std::string GetOpType() const { return op_type_; } std::string GetOpType() const { return op_type_; }
std::unordered_map<int, bool> GetIntermediateState() const {
return intermediate_state_;
}
std::vector<int> GetInputIds() const { return input_ids_; } std::vector<int> GetInputIds() const { return input_ids_; }
std::vector<int> GetOutputIds() const { return output_ids_; } std::vector<int> GetOutputIds() const { return output_ids_; }
std::string GetRHSType() const { return rhs_type_; } std::string GetRHSType() const { return rhs_type_; }
...@@ -78,6 +83,7 @@ class OperationExpression { ...@@ -78,6 +83,7 @@ class OperationExpression {
AttributeMap attr_; AttributeMap attr_;
std::string rhs_type_; std::string rhs_type_;
std::string lhs_type_; std::string lhs_type_;
std::unordered_map<int, bool> intermediate_state_;
}; };
class TemplateVariable { class TemplateVariable {
......
...@@ -269,6 +269,22 @@ __CUDA_FP16_DECL__ __half hsqrt(const __half a) { ...@@ -269,6 +269,22 @@ __CUDA_FP16_DECL__ __half hsqrt(const __half a) {
__APPROX_FCAST(sqrt); __APPROX_FCAST(sqrt);
} }
#if defined(__cplusplus) && (__CUDA_ARCH__ >= 320 || !defined(__CUDA_ARCH__))
#if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)
#define __LDG_PTR "l"
#else
#define __LDG_PTR "r"
#endif /*(defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)*/
__CUDA_FP16_DECL__ __half __ldg(const __half *ptr)
{
__half ret;
asm ("ld.global.nc.b16 %0, [%1];" : "=h"(__HALF_TO_US(ret)) : __LDG_PTR(ptr));
return ret;
}
#undef __LDG_PTR
#endif /*defined(__cplusplus) && (__CUDA_ARCH__ >= 320 || !defined(__CUDA_ARCH__))*/
__device__ inline __half Exp(const __half x) { return hexp(x); } __device__ inline __half Exp(const __half x) { return hexp(x); }
__device__ inline __half Log(const __half x) { return hlog(x); } __device__ inline __half Log(const __half x) { return hlog(x); }
__device__ inline __half Sqrt(const __half x) { return hsqrt(x); } __device__ inline __half Sqrt(const __half x) { return hsqrt(x); }
......
...@@ -48,13 +48,18 @@ int FusionGroupPass::DetectFusionGroup(Graph* graph, int type) const { ...@@ -48,13 +48,18 @@ int FusionGroupPass::DetectFusionGroup(Graph* graph, int type) const {
int num_subgraphs = 0; int num_subgraphs = 0;
size_t min_subgraph_size = 2; size_t min_subgraph_size = 2;
bool save_intermediate_out = true; bool save_intermediate_out = false;
for (auto& vec : subgraphs) { for (auto& vec : subgraphs) {
fusion_group::SubGraph subgraph( fusion_group::SubGraph subgraph(
type, "", save_intermediate_out, type, "", save_intermediate_out,
std::unordered_set<Node*>(vec.begin(), vec.end())); std::unordered_set<Node*>(vec.begin(), vec.end()));
VLOG(3) << "subgraph: {\n" << DebugString(subgraph.SortedNodes()) << "}\n"; VLOG(3) << "subgraph: {\n" << DebugString(subgraph.SortedNodes()) << "}\n";
// In elementwise fused kernel, memory is the bound of execution,
// here we remove the output id to use less memory and less time.
if (subgraph.RemoveIntermediateOut()) {
subgraph.DetectIntermediateOutWithGraph(graph);
}
if (subgraph.IsValid(min_subgraph_size)) { if (subgraph.IsValid(min_subgraph_size)) {
subgraph.SetFuncName("FusedElementwise" + std::to_string(index++)); subgraph.SetFuncName("FusedElementwise" + std::to_string(index++));
if (GenerateCode(&subgraph)) { if (GenerateCode(&subgraph)) {
...@@ -106,6 +111,8 @@ void FusionGroupPass::InsertFusionGroupOp( ...@@ -106,6 +111,8 @@ void FusionGroupPass::InsertFusionGroupOp(
subgraph->GetInputVarNodes(); subgraph->GetInputVarNodes();
const std::vector<Node*>& output_vars_of_subgraph = const std::vector<Node*>& output_vars_of_subgraph =
subgraph->GetOutputVarNodes(); subgraph->GetOutputVarNodes();
const std::vector<Node*> intermediate_vars_of_subgraph =
subgraph->GetIntermediateOutVarNodes();
std::unordered_set<Node*> external_nodes; std::unordered_set<Node*> external_nodes;
OpDesc op_desc; OpDesc op_desc;
...@@ -122,9 +129,18 @@ void FusionGroupPass::InsertFusionGroupOp( ...@@ -122,9 +129,18 @@ void FusionGroupPass::InsertFusionGroupOp(
std::vector<std::string> output_names; std::vector<std::string> output_names;
std::vector<std::string> outs_data_types; std::vector<std::string> outs_data_types;
std::vector<Node*> output_var_without_intermediate;
for (auto* n : output_vars_of_subgraph) { for (auto* n : output_vars_of_subgraph) {
output_names.push_back(n->Name()); auto it_input =
outs_data_types.push_back(DataTypeToString(n->Var()->GetDataType())); find(input_vars_of_subgraph.begin(), input_vars_of_subgraph.end(), n);
auto it_intermediate = find(intermediate_vars_of_subgraph.begin(),
intermediate_vars_of_subgraph.end(), n);
if (it_intermediate == intermediate_vars_of_subgraph.end() &&
it_input == input_vars_of_subgraph.end()) {
output_names.push_back(n->Name());
outs_data_types.push_back(DataTypeToString(n->Var()->GetDataType()));
output_var_without_intermediate.push_back(n);
}
external_nodes.insert(n); external_nodes.insert(n);
} }
...@@ -141,7 +157,7 @@ void FusionGroupPass::InsertFusionGroupOp( ...@@ -141,7 +157,7 @@ void FusionGroupPass::InsertFusionGroupOp(
IR_NODE_LINK_TO(in, fusion_group_node); IR_NODE_LINK_TO(in, fusion_group_node);
} }
for (auto* out : output_vars_of_subgraph) { for (auto* out : output_var_without_intermediate) {
IR_NODE_LINK_TO(fusion_group_node, out); IR_NODE_LINK_TO(fusion_group_node, out);
} }
......
...@@ -54,6 +54,7 @@ void OperationMap::Insert(int type, int num_operands, std::string op_type, ...@@ -54,6 +54,7 @@ void OperationMap::Insert(int type, int num_operands, std::string op_type,
std::string grad_op_type = op_type + "_grad"; std::string grad_op_type = op_type + "_grad";
// grad_inputs = inputs + outputs + grad of outputs // grad_inputs = inputs + outputs + grad of outputs
std::vector<std::string> grad_input_names = input_names; std::vector<std::string> grad_input_names = input_names;
for (auto name : output_names) { for (auto name : output_names) {
grad_input_names.push_back(name); grad_input_names.push_back(name);
} }
......
...@@ -19,6 +19,8 @@ limitations under the License. */ ...@@ -19,6 +19,8 @@ limitations under the License. */
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/ir/fusion_group/operation.h" #include "paddle/fluid/framework/ir/fusion_group/operation.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/subgraph_detector.h" #include "paddle/fluid/framework/ir/subgraph_detector.h"
...@@ -64,6 +66,7 @@ class SubGraph { ...@@ -64,6 +66,7 @@ class SubGraph {
} }
int GetType() const { return type_; } int GetType() const { return type_; }
bool RemoveIntermediateOut() { return !save_intermediate_out_; }
void SetFuncName(std::string func_name) { func_name_ = func_name; } void SetFuncName(std::string func_name) { func_name_ = func_name; }
std::string GetFuncName() const { return func_name_; } std::string GetFuncName() const { return func_name_; }
...@@ -133,30 +136,45 @@ class SubGraph { ...@@ -133,30 +136,45 @@ class SubGraph {
} }
} }
} }
return output_vars_all;
}
if (save_intermediate_out_) { std::vector<Node*> GetIntermediateOutVarNodes() {
return output_vars_all; return intermediate_out_nodes_;
} }
std::vector<Node*> output_vars_outside; void DetectIntermediateOutWithGraph(Graph* graph) {
for (auto* n : output_vars_all) { auto graph_nodes = graph->Nodes();
// If one of the var_node's outputs is the input of some operator
// outside the subgraph, it is considered the output var node of the for (auto* n : SortedNodes()) {
// subgraph. bool enable_remove = true;
bool is_found = true;
if (n->outputs.size() == 0U) { if (n && n->IsVar() && n->Var()) {
is_found = false; bool leaf_graph = true;
} for (auto* node : graph_nodes) {
for (auto* out : n->outputs) { if (node->IsOp()) {
if (!Has(out)) { auto inputs = node->inputs;
is_found = false; for (auto* in : inputs) {
if (in == n) {
if (!Has(node)) enable_remove = false;
leaf_graph = false;
}
}
}
if (!enable_remove) {
break;
}
} }
if (leaf_graph) enable_remove = false;
} else {
enable_remove = false;
} }
if (!is_found) {
output_vars_outside.push_back(n); if (enable_remove) {
intermediate_out_nodes_.push_back(n);
} }
} }
return output_vars_outside;
} }
private: private:
...@@ -218,6 +236,7 @@ class SubGraph { ...@@ -218,6 +236,7 @@ class SubGraph {
bool save_intermediate_out_{true}; bool save_intermediate_out_{true};
std::unordered_set<Node*> nodes_set_; std::unordered_set<Node*> nodes_set_;
std::vector<Node*> intermediate_out_nodes_{};
bool is_sorted_{false}; bool is_sorted_{false};
std::vector<Node*> sorted_nodes_; std::vector<Node*> sorted_nodes_;
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册