diff --git a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc index d786159078dbcdd7e65d917214b344d86c12e897..87750d713c6d459c5e62f6a85687da0b4d07a7e0 100644 --- a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc +++ b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc @@ -368,6 +368,7 @@ void ProcessInputNode( in_var_type == framework::proto::VarType::FP32) { if (WeightsShouldNotConvert(in_node)) return; in_var->SetDataType(to_type); + in_var_type = to_type; } else if (!in_var->Persistable() && IsFloatVarType(in_var_type) && in_var_type != to_type) { AddCastOp(graph, diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index bc330354e71fcb724251e3a80183f6d683a02590..3948ca8a59fd59d72a3e3dce8003138ab65363a1 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" +#include #include #include "paddle/fluid/framework/data_layout.h" @@ -113,6 +114,7 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) { reserve_cpu_weights = true; } + std::unordered_set visited; for (auto *node : paddle::framework::ir::TopologySortOperations(graph)) { if (!node->IsOp()) continue; if (node->Op()->Type() == "feed" || node->Op()->Type() == "fetch") continue; @@ -126,6 +128,8 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) { } continue; } + if (visited.count(var_name)) continue; + visited.insert(var_name); auto *var = scope->FindLocalVar(var_name); PADDLE_ENFORCE_NOT_NULL(var, platform::errors::PreconditionNotMet(