Merge pull request #20 from Oneflow-Inc/dev_ldp

sync with the lastest oneflow master branch

Merge pull request #20 from Oneflow-Inc/dev_ldp
sync with the lastest oneflow master branch
fde5ac94 · guo ran · GitHub · c19a4b26 · 0676037f · fde5ac94
11 changed file
--- a/ops/logistic_kernel.cpp
+++ b/ops/logistic_kernel.cpp
@@ -24,7 +24,7 @@ class LogisticKernel final : public user_op::OpKernel {
 #define REGISTER_LOGISTIC_KERNEL(device, dtype)                                                 \
  REGISTER_USER_KERNEL("logistic")                                                              \
      .SetCreateFn<LogisticKernel<device, dtype>>()                                             \
-      .SetIsMatchedHob((user_op::HobDeviceType() == device)                                     \
+      .SetIsMatchedHob((user_op::HobDeviceTag() == device)                                     \
                       & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value))         \
      .SetInplaceProposalFn([](const user_op::InferContext&,                                    \
                               user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \

--- a/ops/yolo_box_diff_kernel.cu
+++ b/ops/yolo_box_diff_kernel.cu
@@ -319,13 +319,11 @@ class YoloBoxDiffKernel final : public user_op::OpKernel {
    Memcpy<DeviceType::kGPU>(
        ctx->device_ctx(), reinterpret_cast<void*>(buf_manager.AnchorBoxesTmpPtr()),
        reinterpret_cast<void*>(anchor_boxes.data()),
-        buf_manager.AnchorBoxesTmpElemCnt() * sizeof(int32_t),
-        cudaMemcpyHostToDevice);
+        buf_manager.AnchorBoxesTmpElemCnt() * sizeof(int32_t));
    Memcpy<DeviceType::kGPU>(ctx->device_ctx(),
                             reinterpret_cast<void*>(buf_manager.BoxMaskTmpPtr()),
                             reinterpret_cast<void*>(box_mask.data()),
-                             buf_manager.BoxMaskTmpElemCnt() * sizeof(int32_t),
-                             cudaMemcpyHostToDevice);
+                             buf_manager.BoxMaskTmpElemCnt() * sizeof(int32_t));
    Memset<DeviceType::kGPU>(ctx->device_ctx(), statistics_info->mut_dptr<float>(), 0, statistics_info->shape().elem_cnt() * sizeof(float));

    FOR_RANGE(int32_t, im_index, 0, bbox->shape().At(0)) {
@@ -380,7 +378,7 @@ class YoloBoxDiffKernel final : public user_op::OpKernel {
 #define REGISTER_YOLO_BOX_DIFF_GPU_KERNEL(dtype)                                                  \
  REGISTER_USER_KERNEL("yolo_box_diff")                                                           \
      .SetCreateFn<YoloBoxDiffKernel<dtype>>()                                                    \
-      .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU)                             \
+      .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kGPU)                             \
                       & (user_op::HobDataType("bbox", 0) == GetDataType<dtype>::value)           \
                       & (user_op::HobDataType("gt_boxes", 0) == GetDataType<dtype>::value))      \
      .SetInferTmpSizeFn([](user_op::InferContext* ctx) {                                         \

--- a/ops/yolo_box_diff_op.cpp
+++ b/ops/yolo_box_diff_op.cpp
@@ -13,14 +13,14 @@ REGISTER_USER_OP("yolo_box_diff")
    .Output("neg_inds")
    .Output("valid_num")
    .Output("statistics_info")
-    .Attr("image_height", UserOpAttrType::kAtInt32)
-    .Attr("image_width", UserOpAttrType::kAtInt32)
-    .Attr("layer_height", UserOpAttrType::kAtInt32)
-    .Attr("layer_width", UserOpAttrType::kAtInt32)
-    .Attr("ignore_thresh", UserOpAttrType::kAtFloat)
-    .Attr("truth_thresh", UserOpAttrType::kAtFloat)
-    .Attr("anchor_boxes", UserOpAttrType::kAtListInt32)
-    .Attr("box_mask", UserOpAttrType::kAtListInt32)
+    .Attr<int32_t>("image_height")
+    .Attr<int32_t>("image_width")
+    .Attr<int32_t>("layer_height")
+    .Attr<int32_t>("layer_width")
+    .Attr<float>("ignore_thresh")
+    .Attr<float>("truth_thresh")
+    .Attr<std::vector<int32_t>>("anchor_boxes")
+    .Attr<std::vector<int32_t>>("box_mask")
    .SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
      // input: bbox : (n, r, 4)  r = h*w*3
      const user_op::TensorDesc* bbox_desc = ctx->TensorDesc4ArgNameAndIndex("bbox", 0);

--- a/ops/yolo_detect_kernel.cu
+++ b/ops/yolo_detect_kernel.cu
@@ -142,8 +142,7 @@ class YoloDetectGpuKernel final : public user_op::OpKernel {
    Memcpy<DeviceType::kGPU>(
        ctx->device_ctx(), reinterpret_cast<void*>(buf_manager.AnchorBoxesTmpPtr()),
        reinterpret_cast<void*>(anchor_boxes.data()),
-        buf_manager.AnchorBoxesTmpElemCnt() * sizeof(int32_t),
-        cudaMemcpyHostToDevice);
+        buf_manager.AnchorBoxesTmpElemCnt() * sizeof(int32_t));

    FOR_RANGE(int32_t, im_index, 0, bbox->shape().At(0)) {
      const T* probs_ptr = probs->dptr<T>() + im_index * probs->shape().Count(1);
@@ -175,7 +174,7 @@ class YoloDetectGpuKernel final : public user_op::OpKernel {
 #define REGISTER_YOLO_DETECT_GPU_KERNEL(dtype)                                                   \
  REGISTER_USER_KERNEL("yolo_detect")                                                            \
      .SetCreateFn<YoloDetectGpuKernel<dtype>>()                                                 \
-      .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU)                            \
+      .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kGPU)                            \
                       & (user_op::HobDataType("bbox", 0) == GetDataType<dtype>::value)          \
                       & (user_op::HobDataType("probs", 0) == GetDataType<dtype>::value))        \
      .SetInferTmpSizeFn([](user_op::InferContext* ctx) {                                        \

--- a/ops/yolo_detect_op.cpp
+++ b/ops/yolo_detect_op.cpp
@@ -9,14 +9,14 @@ REGISTER_USER_OP("yolo_detect")
    .Output("out_bbox")
    .Output("out_probs")
    .Output("valid_num")
-    .Attr("image_height", UserOpAttrType::kAtInt32)
-    .Attr("image_width", UserOpAttrType::kAtInt32)
-    .Attr("layer_height", UserOpAttrType::kAtInt32)
-    .Attr("layer_width", UserOpAttrType::kAtInt32)
-    .Attr("prob_thresh", UserOpAttrType::kAtFloat)
-    .Attr("num_classes", UserOpAttrType::kAtInt32)
-    .Attr("anchor_boxes", UserOpAttrType::kAtListInt32)
-    .Attr("max_out_boxes", UserOpAttrType::kAtInt32)
+    .Attr<int32_t>("image_height")
+    .Attr<int32_t>("image_width")
+    .Attr<int32_t>("layer_height")
+    .Attr<int32_t>("layer_width")
+    .Attr<float>("prob_thresh")
+    .Attr<int32_t>("num_classes")
+    .Attr<std::vector<int32_t>>("anchor_boxes")
+    .Attr<int32_t>("max_out_boxes")
    .SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
      // bbox : (n, h*w*3, 4) probs : (n, h*w*3, 81)
      // out_bbox : (n, max_out_boxes, 4) out_probs : (n, max_out_boxes, 81)

--- a/ops/yolo_non_maximum_suppression.cpp
+++ b/ops/yolo_non_maximum_suppression.cpp
@@ -6,9 +6,9 @@ REGISTER_USER_OP("yolo_nms")
    .Input("bbox")
    .Input("probs")
    .Output("out")
-    .Attr("iou_threshold", UserOpAttrType::kAtFloat)
-    .Attr("keep_n", UserOpAttrType::kAtInt32)
-    .Attr("batch_dims", UserOpAttrType::kAtInt32)
+    .Attr<float>("iou_threshold")
+    .Attr<int32_t>("keep_n")
+    .Attr<int32_t>("batch_dims")
    .SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
      Shape* bbox_shape = ctx->Shape4ArgNameAndIndex("bbox", 0);
      DimVector dim_vec(bbox_shape->NumAxes() - 1);

--- a/ops/yolo_non_maximum_suppression_gpu.cu
+++ b/ops/yolo_non_maximum_suppression_gpu.cu
@@ -140,7 +140,7 @@ class YoloNmsGpuKernel final : public user_op::OpKernel {
 #define REGISTER_YOLO_NMS_GPU_KERNEL(dtype)                                              \
  REGISTER_USER_KERNEL("yolo_nms")                                                       \
      .SetCreateFn<YoloNmsGpuKernel<dtype>>()                                            \
-      .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU)                    \
+      .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kGPU)                    \
                       & (user_op::HobDataType("out", 0) == DataType::kInt8)             \
                       & (user_op::HobDataType("bbox", 0) == GetDataType<dtype>::value)) \
      .SetInferTmpSizeFn([](user_op::InferContext* ctx) {                                \

--- a/ops/yolo_predict_decoder.cpp
+++ b/ops/yolo_predict_decoder.cpp
@@ -31,10 +31,10 @@ class DecodeOpKernelState final : public user_op::OpKernelState {
 REGISTER_USER_OP("yolo_predict_decoder")
    .Output("out")
    .Output("origin_image_info")
-    .Attr("batch_size", UserOpAttrType::kAtInt32)
-    .Attr("image_height", UserOpAttrType::kAtInt32)
-    .Attr("image_width", UserOpAttrType::kAtInt32)
-    .Attr("image_paths", UserOpAttrType::kAtListString)
+    .Attr<int32_t>("batch_size")
+    .Attr<int32_t>("image_height")
+    .Attr<int32_t>("image_width")
+    .Attr<std::vector<std::string>>("image_paths")
    .SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
      Shape* out_shape = ctx->Shape4ArgNameAndIndex("out", 0);
      Shape* origin_image_info_shape = ctx->Shape4ArgNameAndIndex("origin_image_info", 0);

--- a/ops/yolo_prob_loss_kernel.cu
+++ b/ops/yolo_prob_loss_kernel.cu
@@ -114,7 +114,7 @@ class YoloProbLossKernel final : public user_op::OpKernel {
 #define REGISTER_YOLO_PROB_LOSS_KERNEL(dtype)                                                    \
  REGISTER_USER_KERNEL("yolo_prob_loss")                                                         \
      .SetCreateFn<YoloProbLossKernel<dtype>>()                                                  \
-      .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU)                            \
+      .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kGPU)                            \
                       & (user_op::HobDataType("bbox_objness", 0) == GetDataType<dtype>::value)  \
                       & (user_op::HobDataType("bbox_clsprob", 0) == GetDataType<dtype>::value)) \
      .SetInferTmpSizeFn([](const oneflow::user_op::InferContext*) { return 0; });

--- a/ops/yolo_prob_loss_op.cpp
+++ b/ops/yolo_prob_loss_op.cpp
@@ -11,7 +11,7 @@ REGISTER_USER_OP("yolo_prob_loss")
    .Input("valid_num")
    .Output("bbox_objness_out")
    .Output("bbox_clsprob_out")
-    .Attr("num_classes", UserOpAttrType::kAtInt32)
+    .Attr<int32_t>("num_classes")
    .SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
      // input: bbox_objness : (n, r, 1)  r = h*w*3
      const user_op::TensorDesc* bbox_objness_desc =

--- a/ops/yolo_train_decoder.cpp
+++ b/ops/yolo_train_decoder.cpp
@@ -32,16 +32,16 @@ REGISTER_USER_OP("yolo_train_decoder")
    .Output("data")
    .Output("ground_truth")
    .Output("gt_valid_num")
-    .Attr("batch_size", UserOpAttrType::kAtInt32)
-    .Attr("image_height", UserOpAttrType::kAtInt32)
-    .Attr("image_width", UserOpAttrType::kAtInt32)
-    .Attr("classes", UserOpAttrType::kAtInt32)
-    .Attr("num_boxes", UserOpAttrType::kAtInt32)
-    .Attr("hue", UserOpAttrType::kAtFloat)
-    .Attr("jitter", UserOpAttrType::kAtFloat)
-    .Attr("saturation", UserOpAttrType::kAtFloat)
-    .Attr("exposure", UserOpAttrType::kAtFloat)
-    .Attr("image_path_file", UserOpAttrType::kAtString)
+    .Attr<int32_t>("batch_size")
+    .Attr<int32_t>("image_height")
+    .Attr<int32_t>("image_width")
+    .Attr<int32_t>("classes")
+    .Attr<int32_t>("num_boxes")
+    .Attr<float>("hue")
+    .Attr<float>("jitter")
+    .Attr<float>("saturation")
+    .Attr<float>("exposure")
+    .Attr<std::string>("image_path_file")
    .SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
      user_op::TensorDesc* data_desc = ctx->TensorDesc4ArgNameAndIndex("data", 0);
      user_op::TensorDesc* ground_truth_desc = ctx->TensorDesc4ArgNameAndIndex("ground_truth", 0);