add dist kv with quant infer op test=serving

b9590df6 · wangjiawei04 · 24c78e3b · b9590df6
显示空白变更内容
内联并排

Showing with 22 addition and 14 deletion

core/general-server/op/general_dist_kv_quant_infer_op.cpp core/general-server/op/general_dist_kv_quant_infer_op.cpp +22 -14

未找到文件。
--- a/core/general-server/op/general_dist_kv_quant_infer_op.cpp
+++ b/core/general-server/op/general_dist_kv_quant_infer_op.cpp
@@ -23,8 +23,8 @@
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
 #include "core/predictor/framework/resource.h"
-#include "core/util/include/timer.h"
 #include "core/predictor/tools/quant.h"
+#include "core/util/include/timer.h"
 namespace baidu {
 namespace paddle_serving {
@@ -145,13 +145,21 @@ int GeneralDistKVQuantInferOp::inference() {
               values[cube_val_idx].buff.size());
      } else {
        // min (float), max (float), num, num, num... (Byte)
-        size_t num_of_float = values[cube_val_idx].buff.size() - 2 * sizeof(float);
+        size_t num_of_float =
-        float* float_ptr = new float[num_of_float];
+            values[cube_val_idx].buff.size() - 2 * sizeof(float);
-        char* src_ptr = new char[values[cube_val_idx].buff.size()];
+        float *float_ptr = new float[num_of_float];
-        memcpy(src_ptr, values[cube_val_idx].buff.data(), values[cube_val_idx].buff.size());
+        char *src_ptr = new char[values[cube_val_idx].buff.size()];
-        float* minmax = reinterpret_cast<float*>(src_ptr);
+        memcpy(src_ptr,
-        dequant(src_ptr + 2*sizeof(float), float_ptr, minmax[0], minmax[1], num_of_float, cube_quant_bits);
+               values[cube_val_idx].buff.data(),
-        memcpy(data_ptr, float_ptr, sizeof(float)*num_of_float);
+               values[cube_val_idx].buff.size());
+        float *minmax = reinterpret_cast<float *>(src_ptr);
+        dequant(src_ptr + 2 * sizeof(float),
+                float_ptr,
+                minmax[0],
+                minmax[1],
+                num_of_float,
+                cube_quant_bits);
+        memcpy(data_ptr, float_ptr, sizeof(float) * num_of_float);
        delete float_ptr;
        delete src_ptr;
      }