diff --git a/paddle/fluid/framework/new_executor/data_transfer.cc b/paddle/fluid/framework/new_executor/data_transfer.cc index 51cf78d1cdf27250e4cf83cbba54895f7dfeab07..581701c1e119c876464fd62c25ab2815c20cf1e9 100644 --- a/paddle/fluid/framework/new_executor/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/data_transfer.cc @@ -149,7 +149,8 @@ std::shared_ptr TransferLayout(const std::string& var_name, // 2. Construct VariableNameMap VariableNameMap in_name_map = {{"X", {var_name}}}; VariableNameMap out_name_map = {{"Out", {*new_var_name}}}; - AttributeMap attr_map = {{"dst_layout", static_cast(out_layout)}}; + AttributeMap attr_map = {{"src_layout", static_cast(in_layout)}, + {"dst_layout", static_cast(out_layout)}}; // 3. Create transfer_layout_op std::string op_type("transfer_layout"); @@ -157,8 +158,9 @@ std::shared_ptr TransferLayout(const std::string& var_name, auto op = std::shared_ptr( op_info.Creator()(op_type, in_name_map, out_name_map, attr_map)); - VLOG(3) << string::Sprintf("Insert %s(%s) with %s -> %s(%s).", op_type, - var_name, in_layout, *new_var_name, out_layout); + VLOG(3) << string::Sprintf("Insert %s for variable %s(%s) -> %s(%s).", + op_type, var_name, in_layout, *new_var_name, + out_layout); return op; } @@ -242,6 +244,7 @@ std::shared_ptr TransferDevice(const std::string& var_name, void ApplyDataTransform(const OpKernelType& expected_kernel_key, const platform::Place& place, VariableValueMap* ins_map_temp, + VariableValueMap* outs_map_temp, VariableScope* var_scope, OpFuncNode* op_func_node, std::vector* new_op_func_nodes, bool use_local_scope) { @@ -251,6 +254,7 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, "op_base in apply_data_transform.")); VariableNameMap new_ins(op_base->Inputs()); + VariableNameMap new_outs(op_base->Outputs()); // record the no need transform variable index. std::unordered_set no_data_transform_index; @@ -258,7 +262,7 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, for (auto& var_name_item : *ins_map_temp) { for (size_t i = 0; i < var_name_item.second.size(); ++i) { auto var = var_name_item.second[i]; - auto& var_name = new_ins[var_name_item.first].at(i); + auto var_name = new_ins[var_name_item.first].at(i); const Tensor* tensor_in; if (var->IsType() || var->IsType()) { tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var); @@ -287,6 +291,28 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, var_scope->VarId(new_var_name); var_name_item.second[i] = var_scope->Var(new_var_name); new_ins[var_name_item.first][i] = new_var_name; + for (auto& pair : new_outs) { + for (size_t j = 0; j < pair.second.size(); ++j) { + VLOG(4) << pair.second[j] << " " << var_name; + if (pair.second[j] == var_name) { + VLOG(4) << "Found inplace between input(" << var_name_item.first + << ") and output(" << pair.first + << "), the variable name is " << var_name; + (*outs_map_temp)[pair.first][j] = var_scope->Var(new_var_name); + new_outs[pair.first][j] = new_var_name; + op_func_node + ->inplace_back_map[var_scope->GetIdByName(new_var_name)] = + var_scope->GetIdByName(var_name); + op_func_node->output_index[pair.first][j] = + var_scope->VarId(new_var_name); + // NOTE(zhiqiu): The inplace op with `transfer` also changes + // original output after that + // so add original output as well + op_func_node->output_index[pair.first].push_back( + var_scope->VarId(var_name)); + } + } + } // NOTE(Aurelius84): avoid deepcopy twice if we already insert data // transfer op. if (op_base->Type() == "fetch_v2") { @@ -306,7 +332,7 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, // with instruction. (hot fix, it is not good design here) op_func_node->operator_base_ = std::shared_ptr(framework::OpRegistry::CreateOp( - op_base->Type(), new_ins, op_base->Outputs(), op_base->Attrs())); + op_base->Type(), new_ins, new_outs, op_base->Attrs())); op_func_node->no_data_transform_index = std::move(no_data_transform_index); } diff --git a/paddle/fluid/framework/new_executor/data_transfer.h b/paddle/fluid/framework/new_executor/data_transfer.h index 7744e955c857874f7a01190092cb299f93175e43..1c48018927934cf5987f9bee232ecf9d9ba38bd1 100644 --- a/paddle/fluid/framework/new_executor/data_transfer.h +++ b/paddle/fluid/framework/new_executor/data_transfer.h @@ -54,6 +54,7 @@ class DataTranferHelper { void ApplyDataTransform(const OpKernelType& expected_kernel_key, const platform::Place& place, VariableValueMap* ins_map_temp, + VariableValueMap* outs_map_temp, VariableScope* var_scope, OpFuncNode* op_func_node, std::vector* op_func_nodes, bool use_local_scope = true); diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index b36ff519ce18722160f1e98a4d64167f4945ae22..d956f23242d4b52c9def4f9a5daa244cc6cb9523 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -457,6 +457,21 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) { VLOG(4) << "End run " << place << " " << op->DebugStringEx(global_scope_); + if (!instr_node.InplaceBackMap().empty()) { + auto& m = instr_node.InplaceBackMap(); + // NOTE(zhiqiu): same logic as TransferInplaceVarsBack() in operator.cc + for (auto& p : m) { + auto* transformed_tensor = GetMutableLoDTensorOrSelectedRowsValueFromVar( + global_scope_->Var(p.first)); + auto* original_tensor = GetMutableLoDTensorOrSelectedRowsValueFromVar( + global_scope_->Var(p.second)); + original_tensor->ShareDataWith(*transformed_tensor); + VLOG(4) << "Transfer inplace variable back form " + << global_scope_->GetNameById(p.first) << " to " + << global_scope_->GetNameById(p.second); + } + } + /*For profiling/benchmark only*/ if (FLAGS_benchmark) { instr_node.DeviceContext().Wait(); diff --git a/paddle/fluid/framework/new_executor/interpretercore_util.cc b/paddle/fluid/framework/new_executor/interpretercore_util.cc index a045d6c7f4a65fdda83578f319b75788d0f68f95..b89683dc4399167e3c702dadb1bc9e25f31c0677 100644 --- a/paddle/fluid/framework/new_executor/interpretercore_util.cc +++ b/paddle/fluid/framework/new_executor/interpretercore_util.cc @@ -138,7 +138,9 @@ get_unused_vars(const BlockDesc& block, size_t op_idx = name_op_idx_pair.second; result[ops[op_idx].get()].emplace_back(name); + VLOG(4) << ops[op_idx].get()->Type() << " " << name; } + VLOG(4) << "gc map size:" << result.size(); return result; } @@ -311,8 +313,8 @@ void build_op_func_list(const platform::Place& place, operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( main_program, block.ID(), ops_unique); - std::vector> - ops; // its elements will be moved to vec_func_list + // its elements will be moved to vec_func_list + std::vector> ops; for (auto& op_unique : ops_unique) { ops.emplace_back(std::move(op_unique)); } @@ -348,34 +350,28 @@ void build_op_func_list(const platform::Place& place, op_func_node.operator_base_ = ops[i]; op_func_node.input_index = ins_name2id; op_func_node.output_index = outs_name2id; + VLOG(4) << "Start run " << place << " " << op->DebugStringEx(local_scope); - if (dynamic_cast(op) == nullptr) { + if (dynamic_cast(op) == nullptr) { // op is not a operatorwithkernel, so direcly run OperatorBase::Run() deal_operator_base(place, var_scope, ops[i], &op_func_node, local_scope); + VLOG(4) << "End run " << place << " " + << op_func_node.operator_base_->DebugStringEx(local_scope); } else { - auto op_with_kernel = - static_cast(op); + auto op_with_kernel = const_cast( + static_cast(op)); // construct RuntimeContext and analysis KernelType RuntimeContext runtime_context({}, {}); runtime_context.inputs.swap(ins_map); runtime_context.outputs.swap(outs_map); - // see OperatorWithKernel::RunImpl in operator.cc for why - if (!(op->HasAttr(kAllKernelsMustComputeRuntimeShape) && - op->Attr(kAllKernelsMustComputeRuntimeShape))) { - InterpretercoreInferShapeContext infer_shape_ctx(*op, runtime_context); - // TODO(Aurelius84): In case of control flow ops, they are NOT - // inheritted - // from OperatorWithKernel. - op_with_kernel->Info().infer_shape_(&infer_shape_ctx); - } - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = pool.Get(place); Scope scope; auto expected_kernel_key = op_with_kernel->GetExpectedKernelType( ExecutionContext(*op, scope, *dev_ctx, runtime_context)); + op_with_kernel->ResetKernelType(new OpKernelType(expected_kernel_key)); // change device by the device_guard() apply_device_guard(op, place, &expected_kernel_key); @@ -383,13 +379,16 @@ void build_op_func_list(const platform::Place& place, // step 3. apply data transforms and insert data transfer ops VariableValueMap& ins_map_temp = runtime_context.inputs; + VariableValueMap& outs_map_temp = runtime_context.outputs; // NOTE(zhiqiu): op_func_node->operator_base_ maybe changed in // ApplyDataTransform - ApplyDataTransform(expected_kernel_key, place, &ins_map_temp, var_scope, - &op_func_node, vec_func_list, use_local_scope); - op_with_kernel = static_cast( - op_func_node.operator_base_.get()); + ApplyDataTransform(expected_kernel_key, place, &ins_map_temp, + &outs_map_temp, var_scope, &op_func_node, + vec_func_list, use_local_scope); + op_with_kernel = const_cast( + static_cast( + op_func_node.operator_base_.get())); // step 4. Run op kernel VLOG(3) << op_with_kernel->Type() @@ -412,6 +411,16 @@ void build_op_func_list(const platform::Place& place, auto exec_ctx = ExecutionContext(*op_with_kernel, scope, *dev_ctx, runtime_context); + // see OperatorWithKernel::RunImpl in operator.cc for why + if (!(op->HasAttr(kAllKernelsMustComputeRuntimeShape) && + op->Attr(kAllKernelsMustComputeRuntimeShape))) { + InterpretercoreInferShapeContext infer_shape_ctx(*op, runtime_context); + // TODO(Aurelius84): In case of control flow ops, they are NOT + // inheritted + // from OperatorWithKernel. + op_with_kernel->Info().infer_shape_(&infer_shape_ctx); + } + auto run_phi_kernel = false; if (phi::KernelFactory::Instance().HasCompatiblePhiKernel( op_with_kernel->Type())) { @@ -476,9 +485,28 @@ void build_op_func_list(const platform::Place& place, op_func_node, place, outputs_names, &runtime_context.outputs, var_scope, vec_func_list, local_scope); } + if (!op_func_node.inplace_back_map.empty()) { + auto& m = op_func_node.inplace_back_map; + // NOTE(zhiqiu): same logic as TransferInplaceVarsBack() in operator.cc + for (auto& p : m) { + auto* transformed_tensor = + GetMutableLoDTensorOrSelectedRowsValueFromVar( + var_scope->Var(p.first)); + auto* original_tensor = GetMutableLoDTensorOrSelectedRowsValueFromVar( + var_scope->Var(p.second)); + original_tensor->ShareDataWith(*transformed_tensor); + VLOG(4) << "Transfer inplace variable back form " + << var_scope->GetNameById(p.first) << " to " + << var_scope->GetNameById(p.second); + } + } } + VLOG(4) << "End run " << place << " " + << op_func_node.operator_base_->DebugStringEx(local_scope); + vec_func_list->emplace_back(op_func_node); + // gc--------------------------------------------------------------------------- auto iter = unused_var_map.find(op); if (iter == unused_var_map.end()) { @@ -514,10 +542,7 @@ void build_op_func_list(const platform::Place& place, framework::ToTypeName(var->Type()), var_name)); } } - delete garbages; // free mem - - VLOG(3) << "run " << op->Type() << " done."; } } diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index 35bac4393170331486298a29f1b6be26065ad864..ccdd9dc9d50ced8d1fb0ec57b24ee878637dd5a4 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -692,6 +692,10 @@ phi::Kernel* Instruction::PhiKernel() const { return op_func_node_.pt_kernel_; } OpFuncType Instruction::KernelType() const { return op_func_node_.type_; } +const std::map& Instruction::InplaceBackMap() const { + return op_func_node_.inplace_back_map; +} + OperatorBase* Instruction::OpBase() const { auto op_base = op_func_node_.operator_base_; PADDLE_ENFORCE_NOT_NULL(op_base, platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index dc34bd2c69411837b6130b87dba1753687cf82f8..5704fa414bbb2b195c66a7d85e0cd587403e04fc 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -297,6 +297,8 @@ struct OpFuncNode { std::map> output_index; std::unordered_set no_data_transform_index; + std::map inplace_back_map; + OpKernelComputeFunc kernel_func_; platform::DeviceContext* dev_ctx_; // not owned @@ -325,6 +327,8 @@ class Instruction { OpFuncType KernelType() const; + const std::map& InplaceBackMap() const; + OperatorBase* OpBase() const; NextInstruction& NextInstructions(); diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 4048995a44c8e745629116471a36b8d50f9a8551..71fc059728956b6178572a0dd8dbae85327c34fd 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -664,6 +664,10 @@ class OperatorWithKernel : public OperatorBase { const OpKernelType* kernel_type() const { return kernel_type_.get(); } + void ResetKernelType(OpKernelType* kernel_type) { + kernel_type_.reset(kernel_type); + } + private: void RunImpl(const Scope& scope, const platform::Place& place) const final; void RunImpl(const Scope& scope, const platform::Place& place, diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 5194c8772e47bca5ec728079b4b2dce883e39c22..36a0d53e052453f97a4d510cad5587614c9796c7 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -94,7 +94,8 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { "must smaller than or equal to 5. But received: the shape of input X " "= [%s], the dimension of input X = [%d]", x_dims, x_dims.size())); - + VLOG(4) << ctx->IsRunMKLDNNKernel(); + VLOG(4) << data_layout; const int64_t C = ((ctx->IsRunMKLDNNKernel() == true) || (data_layout == DataLayout::kNCHW) ? x_dims[1] @@ -136,6 +137,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { C, bias_dim[0])); } ctx->SetOutputDim("Y", x_dims); + VLOG(4) << x_dims; ctx->SetOutputDim("MeanOut", {C}); ctx->SetOutputDim("VarianceOut", {C}); ctx->SetOutputDim("SavedMean", {C}); diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index 41dc774c1111955cde4bd7ad8d68b46469edf1cd..900d3e54c797184697daede09d674ffdef7d96bd 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -203,14 +203,12 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { auto *y = ctx.Output("Y"); auto *batch_mean = ctx.Output("SavedMean"); auto *batch_variance = ctx.Output("SavedVariance"); - BatchNormMKLDNNHandler handler(ctx, mkldnn_engine, x, global_stats, test_mode); auto src_memory = handler.AcquireSrcMemory(x); auto scaleshift_memory = handler.AcquireScaleShiftMemory(scale, shift); auto dst_memory = handler.AcquireDstMemory(y); - auto batch_norm_p = handler.AcquireForwardPrimitive(); std::shared_ptr mean_memory; @@ -300,7 +298,6 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto diff_src_memory = handler.AcquireDiffSrcMemory(diff_x); auto diff_scaleshift_memory = handler.AcquireDiffScaleShiftMemory(diff_scaleshift_data.data()); - // finally create batch_norm backward primitive auto batch_norm_bwd_p = handler.AcquireBackwardPrimitive(); diff --git a/paddle/fluid/operators/transfer_layout_op.cc b/paddle/fluid/operators/transfer_layout_op.cc index bf3a985923f87db35702bb443ab1651a4db86001..96946ee15f41a5d70484562ec2cb490e0ed681c0 100644 --- a/paddle/fluid/operators/transfer_layout_op.cc +++ b/paddle/fluid/operators/transfer_layout_op.cc @@ -16,6 +16,8 @@ #include +#include "paddle/fluid/framework/op_version_registry.h" + namespace paddle { namespace framework { class OpDesc; @@ -95,8 +97,9 @@ class TransferLayoutKernel { auto *x = ctx.InputVar("X"); auto *out = ctx.OutputVar("Out"); auto &dev_ctx = ctx.device_context(); + auto src_layout = ctx.Attr("src_layout"); auto dst_layout = ctx.Attr("dst_layout"); - TransferLayoutFunctor(x, out, dev_ctx, dst_layout)(); + TransferLayoutFunctor(x, out, dev_ctx, src_layout, dst_layout)(); } }; @@ -105,6 +108,14 @@ class TransferLayoutOpProtoMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "(LoDTensor) The input Tensor"); AddOutput("Out", "(LoDTensor) The Output Tensor with desired layout"); + // NOTE(zhiqiu): in most case, the src_layout is not needed, the op can use + // the layout + // of input X. However, in some mkldnn kernel, the src layout computed by + // GetKernelTypeForVar is different with the layout of tensor X. + AddAttr("src_layout", + "kAnyLayout = 0, kNHWC = 1, kNCHW = 2, kMKLDNN = 3, default " + "-1 means unspecified and use the tensor's layout.") + .SetDefault(-1); AddAttr("dst_layout", "kAnyLayout = 0, kNHWC = 1, kNCHW = 2, kMKLDNN = 3"); AddComment(R"DOC( @@ -126,3 +137,8 @@ REGISTER_OPERATOR( // dtype is not important REGISTER_OP_CPU_KERNEL_FUNCTOR(transfer_layout, float, ops::TransferLayoutKernel); +REGISTER_OP_VERSION(transfer_layout) + .AddCheckpoint( + R"ROC(refine transfer_layout, add src_layout attribute)ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "src_layout", "(int, the layout of the input tensor", -1)); diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h index f2d75b4194adf4a80d15d7ff6be18e1c5d14fc49..06bf54e998cc2f080f828c0a723f8e6631eb70e2 100644 --- a/paddle/fluid/operators/transfer_layout_op.h +++ b/paddle/fluid/operators/transfer_layout_op.h @@ -39,8 +39,12 @@ class TransferLayoutFunctor { public: TransferLayoutFunctor(const framework::Variable *in, framework::Variable *out, const platform::DeviceContext &dev_ctx, - const int dst_layout) - : in_(in), out_(out), dev_ctx_(dev_ctx), dst_layout_(dst_layout) {} + const int src_layout, const int dst_layout) + : in_(in), + out_(out), + dev_ctx_(dev_ctx), + src_layout_(src_layout), + dst_layout_(dst_layout) {} void operator()() const { auto &in_tensor = *framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_); @@ -50,7 +54,8 @@ class TransferLayoutFunctor { out_tensor.set_layout(out_layout); #ifdef PADDLE_WITH_MKLDNN - auto in_layout = in_tensor.layout(); + auto in_layout = static_cast(src_layout_); + VLOG(4) << in_layout << "->" << out_layout << " " << in_tensor.layout(); if (in_layout == DataLayout::kMKLDNN || out_layout == DataLayout::kMKLDNN) { PADDLE_ENFORCE_NE( in_layout, out_layout, @@ -68,6 +73,7 @@ class TransferLayoutFunctor { // For NHWC data we need reshape of tensors as MKL-DNN // is expecting NHWC dims description order if (in_layout == DataLayout::kNHWC) { + VLOG(4) << "kNHWC"; platform::MatchShapeToLayout(&out_tensor, in_layout, out_layout); paddle::platform::MKLDNNDeviceContext::tls() .set_cur_paddle_data_layout(in_layout); @@ -75,6 +81,7 @@ class TransferLayoutFunctor { out_tensor.set_layout(DataLayout::kMKLDNN); out_tensor.set_format(out_format); } else { + VLOG(4) << "kNCHW"; // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel // Do transform via MKLDNN lib paddle::framework::innerTransDataLayoutFromMKLDNN( @@ -123,6 +130,7 @@ class TransferLayoutFunctor { const framework::Variable *in_; framework::Variable *out_; const platform::DeviceContext &dev_ctx_; + const int src_layout_; const int dst_layout_; }; diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 5605d326f2cfa53e5f3f8aba1b65d1a2cd3e8893..9aa362546ec23e978fff618c3b5d07796e02aaf0 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -531,6 +531,7 @@ Eigen::GpuDevice* CUDADeviceContext::eigen_device() const { } void CUDADeviceContext::Wait() const { + VLOG(4) << "CUDA context(" << this << ") Wait"; if (thread_ctx_.count(this)) { context()->Stream()->Wait(); return; diff --git a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py index 1e856a0fe900fca423333f2d859af40db49e8f24..cff4f7f41d02b9b9b47fbb359e455cbb5c9ae27d 100644 --- a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py +++ b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py @@ -352,5 +352,23 @@ class TestException(unittest.TestCase): self.fetch_vars.name)) +class TestInplaceApiWithDataTransform(unittest.TestCase): + def test_increment(self): + if paddle.fluid.core.is_compiled_with_cuda(): + with paddle.fluid.device_guard("gpu:0"): + x = paddle.fluid.layers.fill_constant([1], "float32", 0) + with paddle.fluid.device_guard("cpu"): + x = paddle.increment(x) + exe = paddle.static.Executor(paddle.CUDAPlace(0)) + os.environ['FLAGS_USE_STANDALONE_EXECUTOR'] = '1' + + for i in range(10): + a, = exe.run(paddle.static.default_main_program(), + fetch_list=[x]) + self.assertEqual(a[0], 1) + + del os.environ['FLAGS_USE_STANDALONE_EXECUTOR'] + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py b/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py index 0f64f7f5d8d10078b9743231f597ec0bc46d2966..94644cf2fec1d5357bf9b6ab23443cbbfec5cce4 100644 --- a/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py +++ b/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py @@ -30,6 +30,7 @@ class TestTransferLayoutOpkNCHWTokNHWC(OpTest): self.inputs = {'X': ipt.astype('float32')} self.outputs = {'Out': ipt.transpose([0, 2, 3, 1])} self.attrs = { + 'src_layout': 0, 'dst_layout': 1 # kNHWC } self.op_type = 'transfer_layout'