Add FP16 option in save_combine_op (#10471)

* Add FP16 option in save_combine_op * Fix casting issue

Add FP16 option in save_combine_op (#10471)
* Add FP16 option in save_combine_op * Fix casting issue
283c4dbe · Siddharth Goyal · GitHub · 28a6037b · 283c4dbe · 283c4dbe
Showing with 141 addition and 35 deletion

paddle/fluid/operators/save_combine_op.cc paddle/fluid/operators/save_combine_op.cc +26 -2

paddle/fluid/operators/save_load_combine_op_test.cc paddle/fluid/operators/save_load_combine_op_test.cc +115 -33

未找到文件。
--- a/paddle/fluid/operators/save_combine_op.cc
+++ b/paddle/fluid/operators/save_combine_op.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <numeric>
 #include <sstream>
 #include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/data_type_transform.h"
 #include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/op_registry.h"
@@ -69,6 +70,7 @@ class SaveCombineOp : public framework::OperatorBase {
               const platform::Place &place) const override {
    auto filename = Attr<std::string>("file_path");
    auto overwrite = Attr<bool>("overwrite");
+    auto save_as_fp16 = Attr<bool>("save_as_fp16");

    bool is_present = FileExists(filename);
    if (is_present && !overwrite) {
@@ -100,9 +102,25 @@ class SaveCombineOp : public framework::OperatorBase {
                     inp_var_names[i]);

      auto &tensor = var->Get<framework::LoDTensor>();
-      // Serialize tensor
+      // Serialize tensors one by one
+
+      // Check types to see if a fp16 transformation is required
+      auto in_dtype = framework::ToDataType(tensor.type());
+      auto out_dtype =
+          save_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
+
+      if (in_dtype != out_dtype) {
+        auto in_kernel_type = framework::OpKernelType(in_dtype, place);
+        auto out_kernel_type = framework::OpKernelType(out_dtype, place);
+        framework::LoDTensor out;
+        // copy LoD info to the new tensor
+        out.set_lod(tensor.lod());
+        framework::TransDataType(in_kernel_type, out_kernel_type, tensor, &out);
+        framework::SerializeToStream(fout, out, dev_ctx);
+      } else {
        framework::SerializeToStream(fout, tensor, dev_ctx);
      }
+    }
    fout.close();
  }
 };
@@ -125,6 +143,12 @@ to a file on disk.
                  "(boolean, default true)"
                  "Overwrite the output file if it exists.")
        .SetDefault(true);
+    AddAttr<bool>("save_as_fp16",
+                  "(boolean, default false)"
+                  "If true, the tensor will be converted to float16 data "
+                  "type and then saved. Otherwise, the tensor will be "
+                  "directly saved without data type conversion.")
+        .SetDefault(false);
    AddAttr<std::string>(
        "file_path",
        "(string)"

--- a/paddle/fluid/operators/save_load_combine_op_test.cc
+++ b/paddle/fluid/operators/save_load_combine_op_test.cc
@@ -17,11 +17,13 @@ limitations under the License. */
 #include <vector>
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/platform/float16.h"

 USE_NO_KERNEL_OP(save_combine);
 USE_NO_KERNEL_OP(load_combine);

-int* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
+template <typename T, typename U>
+T* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
                          std::string var_name,
                          const paddle::platform::CPUPlace& place,
                          paddle::framework::Scope* scope,
@@ -34,9 +36,10 @@ int* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
    (*expect_lod)[0].push_back(lod_info[i]);
  }
  tensor->set_lod(*expect_lod);
-  int* expect = tensor->mutable_data<int>(place);
+  T* expect = tensor->mutable_data<T>(place);
  for (int64_t i = 0; i < tensor->numel(); ++i) {
-    expect[i] = static_cast<int>(i);
+    expect[i] = static_cast<T>(
+        static_cast<U>(i));  // For FP16, we intend to do float(float16(i))
  }
  return expect;
 }
@@ -48,18 +51,20 @@ paddle::framework::LoDTensor* GeneratePlaceholderBeforeLoad(
  return target;
 }

-int* GetValuesAfterLoadCombineOp(paddle::framework::LoDTensor* target,
+template <typename T>
+T* GetValuesAfterLoadCombineOp(paddle::framework::LoDTensor* target,
                               const paddle::framework::Scope& scope,
                               paddle::framework::LoD* actual_lod) {
-  int* actual = target->data<int>();
+  T* actual = target->data<T>();
  *actual_lod = target->lod();
  return actual;
 }

-void CheckValues(int* expect, int* actual, paddle::framework::LoD expect_lod,
-                 paddle::framework::LoD actual_lod, const int& numel) {
-  for (int64_t i = 0; i < numel; ++i) {
-    EXPECT_EQ(expect[i], actual[i]);
+template <typename T, typename U>
+void CheckValues(T* expect, U* actual, const paddle::framework::LoD& expect_lod,
+                 const paddle::framework::LoD& actual_lod, const int& numel) {
+  for (int i = 0; i < numel; ++i) {
+    EXPECT_EQ(expect[i], static_cast<T>(actual[i]));
  }
  EXPECT_EQ(expect_lod.size(), actual_lod.size());
  for (size_t i = 0; i < expect_lod.size(); ++i) {
@@ -78,26 +83,26 @@ TEST(SaveLoadCombineOp, CPU) {
  std::vector<int> lod1 = {0, 1, 2, 3, 10};
  int numel1 = 100;
  paddle::framework::LoD expect_lod1;
-  int* expect1 = CreateForSaveCombineOp(10, 10, lod1, "test_var1", place,
-                                        &scope, &expect_lod1);
+  int* expect1 = CreateForSaveCombineOp<int, int>(10, 10, lod1, "test_var1",
+                                                  place, &scope, &expect_lod1);

  std::vector<int> lod2 = {0, 2, 5, 10};
  int numel2 = 200;
  paddle::framework::LoD expect_lod2;
-  int* expect2 = CreateForSaveCombineOp(10, 20, lod2, "test_var2", place,
-                                        &scope, &expect_lod2);
+  int* expect2 = CreateForSaveCombineOp<int, int>(10, 20, lod2, "test_var2",
+                                                  place, &scope, &expect_lod2);

  std::vector<int> lod3 = {0, 2, 3, 20};
  int numel3 = 4000;
  paddle::framework::LoD expect_lod3;
-  int* expect3 = CreateForSaveCombineOp(20, 200, lod3, "test_var3", place,
-                                        &scope, &expect_lod3);
+  int* expect3 = CreateForSaveCombineOp<int, int>(20, 200, lod3, "test_var3",
+                                                  place, &scope, &expect_lod3);

  std::vector<int> lod4 = {0, 1, 20};
  int numel4 = 1000;
  paddle::framework::LoD expect_lod4;
-  int* expect4 = CreateForSaveCombineOp(20, 50, lod4, "test_var4", place,
-                                        &scope, &expect_lod4);
+  int* expect4 = CreateForSaveCombineOp<int, int>(20, 50, lod4, "test_var4",
+                                                  place, &scope, &expect_lod4);

  // Set attributes
  std::string filename = "check_tensor.ls";
@@ -123,15 +128,92 @@ TEST(SaveLoadCombineOp, CPU) {
  load_combine_op->Run(scope, place);

  paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4;
-  int* actual1 = GetValuesAfterLoadCombineOp(target1, scope, &actual_lod1);
-  int* actual2 = GetValuesAfterLoadCombineOp(target2, scope, &actual_lod2);
-  int* actual3 = GetValuesAfterLoadCombineOp(target3, scope, &actual_lod3);
-  int* actual4 = GetValuesAfterLoadCombineOp(target4, scope, &actual_lod4);
-
-  CheckValues(expect1, actual1, expect_lod1, actual_lod1, numel1);
-  CheckValues(expect2, actual2, expect_lod2, actual_lod2, numel2);
-  CheckValues(expect3, actual3, expect_lod3, actual_lod3, numel3);
-  CheckValues(expect4, actual4, expect_lod4, actual_lod4, numel4);
+  int* actual1 = GetValuesAfterLoadCombineOp<int>(target1, scope, &actual_lod1);
+  int* actual2 = GetValuesAfterLoadCombineOp<int>(target2, scope, &actual_lod2);
+  int* actual3 = GetValuesAfterLoadCombineOp<int>(target3, scope, &actual_lod3);
+  int* actual4 = GetValuesAfterLoadCombineOp<int>(target4, scope, &actual_lod4);
+
+  CheckValues<int, int>(expect1, actual1, expect_lod1, actual_lod1, numel1);
+  CheckValues<int, int>(expect2, actual2, expect_lod2, actual_lod2, numel2);
+  CheckValues<int, int>(expect3, actual3, expect_lod3, actual_lod3, numel3);
+  CheckValues<int, int>(expect4, actual4, expect_lod4, actual_lod4, numel4);
+}
+
+// FP16 version of SaveLoadCombineOp Test
+TEST(SaveLoadCombineFP16Op, CPU) {
+  paddle::framework::Scope scope;
+  paddle::platform::CPUPlace place;
+
+  std::vector<int> lod1 = {0, 1, 2, 3, 10};
+  int numel1 = 100;
+  paddle::framework::LoD expect_lod1;
+  float* expect1 = CreateForSaveCombineOp<float, paddle::platform::float16>(
+      10, 10, lod1, "test_var1", place, &scope, &expect_lod1);
+
+  std::vector<int> lod2 = {0, 2, 5, 10};
+  int numel2 = 200;
+  paddle::framework::LoD expect_lod2;
+  float* expect2 = CreateForSaveCombineOp<float, paddle::platform::float16>(
+      10, 20, lod2, "test_var2", place, &scope, &expect_lod2);
+
+  std::vector<int> lod3 = {0, 20};
+  int numel3 = 4000;
+  paddle::framework::LoD expect_lod3;
+  float* expect3 = CreateForSaveCombineOp<float, paddle::platform::float16>(
+      20, 200, lod3, "test_var3", place, &scope, &expect_lod3);
+
+  std::vector<int> lod4 = {0, 1, 20};
+  int numel4 = 1000;
+  paddle::framework::LoD expect_lod4;
+  float* expect4 = CreateForSaveCombineOp<float, paddle::platform::float16>(
+      20, 50, lod4, "test_var4", place, &scope, &expect_lod4);
+
+  // Set attributes
+  std::string filename = "check_tensor_fp16.ls";
+  paddle::framework::AttributeMap attrs;
+  attrs.insert({"file_path", std::string(filename)});
+  attrs.insert({"save_as_fp16", true});
+
+  // Run the save_combine_op
+  auto save_combine_op = paddle::framework::OpRegistry::CreateOp(
+      "save_combine",
+      {{"X", {"test_var1", "test_var2", "test_var3", "test_var4"}}}, {}, attrs);
+  save_combine_op->Run(scope, place);
+
+  // Set up output vars
+  auto target1 = GeneratePlaceholderBeforeLoad("out_var1", &scope);
+  auto target2 = GeneratePlaceholderBeforeLoad("out_var2", &scope);
+  auto target3 = GeneratePlaceholderBeforeLoad("out_var3", &scope);
+  auto target4 = GeneratePlaceholderBeforeLoad("out_var4", &scope);
+
+  // Run the load_combine_op
+  auto load_combine_op = paddle::framework::OpRegistry::CreateOp(
+      "load_combine", {},
+      {{"Out", {"out_var1", "out_var2", "out_var3", "out_var4"}}}, attrs);
+  load_combine_op->Run(scope, place);
+
+  paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4;
+  paddle::platform::float16* actual1 =
+      GetValuesAfterLoadCombineOp<paddle::platform::float16>(target1, scope,
+                                                             &actual_lod1);
+  paddle::platform::float16* actual2 =
+      GetValuesAfterLoadCombineOp<paddle::platform::float16>(target2, scope,
+                                                             &actual_lod2);
+  paddle::platform::float16* actual3 =
+      GetValuesAfterLoadCombineOp<paddle::platform::float16>(target3, scope,
+                                                             &actual_lod3);
+  paddle::platform::float16* actual4 =
+      GetValuesAfterLoadCombineOp<paddle::platform::float16>(target4, scope,
+                                                             &actual_lod4);
+
+  CheckValues<float, paddle::platform::float16>(expect1, actual1, expect_lod1,
+                                                actual_lod1, numel1);
+  CheckValues<float, paddle::platform::float16>(expect2, actual2, expect_lod2,
+                                                actual_lod2, numel2);
+  CheckValues<float, paddle::platform::float16>(expect3, actual3, expect_lod3,
+                                                actual_lod3, numel3);
+  CheckValues<float, paddle::platform::float16>(expect4, actual4, expect_lod4,
+                                                actual_lod4, numel4);
 }

 // Test with original SaveLoadTest
@@ -141,7 +223,7 @@ TEST(SaveLoadTestWithCombineOp, CPU) {

  auto var = scope.Var("test_var");
  auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
-  tensor->Resize({3, 10});
+  tensor->Resize({3, 4000});
  paddle::framework::LoD expect_lod;
  expect_lod.resize(1);
  expect_lod[0].push_back(0);