From 6af0593c6a0602ee8b277bdcab98a6f8d6499467 Mon Sep 17 00:00:00 2001 From: Siddharth Goyal Date: Tue, 15 May 2018 15:31:36 -0700 Subject: [PATCH] Add FP16 option to load_combine op (#10601) --- paddle/fluid/operators/load_combine_op.cc | 36 +++++--- .../operators/save_load_combine_op_test.cc | 90 ++++++++++++++++++- 2 files changed, 113 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/operators/load_combine_op.cc b/paddle/fluid/operators/load_combine_op.cc index b5522dd24..0522a9419 100644 --- a/paddle/fluid/operators/load_combine_op.cc +++ b/paddle/fluid/operators/load_combine_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include - +#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" @@ -31,6 +31,7 @@ class LoadCombineOp : public framework::OperatorBase { void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { auto filename = Attr("file_path"); + auto load_as_fp16 = Attr("load_as_fp16"); std::ifstream fin(filename); PADDLE_ENFORCE(static_cast(fin), @@ -59,17 +60,25 @@ class LoadCombineOp : public framework::OperatorBase { // Get data from fin to tensor DeserializeFromStream(fin, tensor, dev_ctx); - if (platform::is_gpu_place(place)) { - // copy CPU to GPU - framework::LoDTensor cpu_tensor; - cpu_tensor.ShareDataWith(*tensor); - cpu_tensor.set_lod(tensor->lod()); - - // reset tensor + auto in_dtype = framework::ToDataType(tensor->type()); + auto out_dtype = + load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype; + + if (in_dtype != out_dtype) { + // convert to float16 tensor + auto in_kernel_type = framework::OpKernelType(in_dtype, place); + auto out_kernel_type = framework::OpKernelType(out_dtype, place); + framework::LoDTensor fp16_tensor; + // copy LoD info to the new tensor + fp16_tensor.set_lod(tensor->lod()); + framework::TransDataType(in_kernel_type, out_kernel_type, *tensor, + &fp16_tensor); + + // reset output tensor out_var->Clear(); tensor = out_var->GetMutable(); - tensor->set_lod(cpu_tensor.lod()); - TensorCopy(cpu_tensor, place, dev_ctx, tensor); + tensor->set_lod(fp16_tensor.lod()); + tensor->ShareDataWith(fp16_tensor); } } } @@ -82,6 +91,13 @@ class LoadCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker { "Out", "(vector) The output LoDTensors that will be read from the input file.") .AsDuplicable(); + AddAttr( + "load_as_fp16", + "(boolean, default false)" + "If true, the tensor will be first loaded and then " + "converted to float16 data type. Otherwise, the tensor will be " + "directly loaded without data type conversion.") + .SetDefault(false); AddAttr("file_path", "(string) " "LoDTensors will be loaded from \"file_path\".") diff --git a/paddle/fluid/operators/save_load_combine_op_test.cc b/paddle/fluid/operators/save_load_combine_op_test.cc index 47618c51d..4743e0d94 100644 --- a/paddle/fluid/operators/save_load_combine_op_test.cc +++ b/paddle/fluid/operators/save_load_combine_op_test.cc @@ -139,8 +139,9 @@ TEST(SaveLoadCombineOp, CPU) { CheckValues(expect4, actual4, expect_lod4, actual_lod4, numel4); } -// FP16 version of SaveLoadCombineOp Test -TEST(SaveLoadCombineFP16Op, CPU) { +// FP16 version of SaveLoadCombineOp Test, only altering the saving aspect +// to save as FP16. +TEST(SaveCombineFP16Op, CPU) { paddle::framework::Scope scope; paddle::platform::CPUPlace place; @@ -169,7 +170,7 @@ TEST(SaveLoadCombineFP16Op, CPU) { 20, 50, lod4, "test_var4", place, &scope, &expect_lod4); // Set attributes - std::string filename = "check_tensor_fp16.ls"; + std::string filename = "check_tensor_fp16_save.ls"; paddle::framework::AttributeMap attrs; attrs.insert({"file_path", std::string(filename)}); attrs.insert({"save_as_fp16", true}); @@ -216,6 +217,89 @@ TEST(SaveLoadCombineFP16Op, CPU) { actual_lod4, numel4); } +// FP16 version of SaveLoadCombineOp Test, only altering the loading aspect +// to load tensors with FP16 precision. +TEST(LoadCombineFP16Op, CPU) { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + + std::vector lod1 = {0, 1, 2, 3, 10}; + int numel1 = 100; + paddle::framework::LoD expect_lod1; + float* expect1 = CreateForSaveCombineOp( + 10, 10, lod1, "test_var1", place, &scope, &expect_lod1); + + std::vector lod2 = {0, 2, 5, 10}; + int numel2 = 200; + paddle::framework::LoD expect_lod2; + float* expect2 = CreateForSaveCombineOp( + 10, 20, lod2, "test_var2", place, &scope, &expect_lod2); + + std::vector lod3 = {0, 20}; + int numel3 = 4000; + paddle::framework::LoD expect_lod3; + float* expect3 = CreateForSaveCombineOp( + 20, 200, lod3, "test_var3", place, &scope, &expect_lod3); + + std::vector lod4 = {0, 1, 20}; + int numel4 = 1000; + paddle::framework::LoD expect_lod4; + float* expect4 = CreateForSaveCombineOp( + 20, 50, lod4, "test_var4", place, &scope, &expect_lod4); + + // Set attributes + std::string filename = "check_tensor_fp16_load.ls"; + paddle::framework::AttributeMap attrs; + attrs.insert({"file_path", std::string(filename)}); + + // Run the save_combine_op + auto save_combine_op = paddle::framework::OpRegistry::CreateOp( + "save_combine", + {{"X", {"test_var1", "test_var2", "test_var3", "test_var4"}}}, {}, attrs); + save_combine_op->Run(scope, place); + + // Set up output vars + auto load_var1 = scope.Var("out_var1"); + auto load_var2 = scope.Var("out_var2"); + auto load_var3 = scope.Var("out_var3"); + auto load_var4 = scope.Var("out_var4"); + + attrs.insert({"load_as_fp16", true}); + // Run the load_combine_op + auto load_combine_op = paddle::framework::OpRegistry::CreateOp( + "load_combine", {}, + {{"Out", {"out_var1", "out_var2", "out_var3", "out_var4"}}}, attrs); + load_combine_op->Run(scope, place); + + auto* target1 = load_var1->GetMutable(); + auto* target2 = load_var2->GetMutable(); + auto* target3 = load_var3->GetMutable(); + auto* target4 = load_var4->GetMutable(); + + paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4; + paddle::platform::float16* actual1 = + GetValuesAfterLoadCombineOp(target1, scope, + &actual_lod1); + paddle::platform::float16* actual2 = + GetValuesAfterLoadCombineOp(target2, scope, + &actual_lod2); + paddle::platform::float16* actual3 = + GetValuesAfterLoadCombineOp(target3, scope, + &actual_lod3); + paddle::platform::float16* actual4 = + GetValuesAfterLoadCombineOp(target4, scope, + &actual_lod4); + + CheckValues(expect1, actual1, expect_lod1, + actual_lod1, numel1); + CheckValues(expect2, actual2, expect_lod2, + actual_lod2, numel2); + CheckValues(expect3, actual3, expect_lod3, + actual_lod3, numel3); + CheckValues(expect4, actual4, expect_lod4, + actual_lod4, numel4); +} + // Test with original SaveLoadTest TEST(SaveLoadTestWithCombineOp, CPU) { paddle::framework::Scope scope; -- GitLab