未验证 提交 fde5ac94 编写于 作者: G guo ran 提交者: GitHub

Merge pull request #20 from Oneflow-Inc/dev_ldp

sync with the lastest oneflow master branch
......@@ -24,7 +24,7 @@ class LogisticKernel final : public user_op::OpKernel {
#define REGISTER_LOGISTIC_KERNEL(device, dtype) \
REGISTER_USER_KERNEL("logistic") \
.SetCreateFn<LogisticKernel<device, dtype>>() \
.SetIsMatchedHob((user_op::HobDeviceType() == device) \
.SetIsMatchedHob((user_op::HobDeviceTag() == device) \
& (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \
.SetInplaceProposalFn([](const user_op::InferContext&, \
user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \
......
......@@ -319,13 +319,11 @@ class YoloBoxDiffKernel final : public user_op::OpKernel {
Memcpy<DeviceType::kGPU>(
ctx->device_ctx(), reinterpret_cast<void*>(buf_manager.AnchorBoxesTmpPtr()),
reinterpret_cast<void*>(anchor_boxes.data()),
buf_manager.AnchorBoxesTmpElemCnt() * sizeof(int32_t),
cudaMemcpyHostToDevice);
buf_manager.AnchorBoxesTmpElemCnt() * sizeof(int32_t));
Memcpy<DeviceType::kGPU>(ctx->device_ctx(),
reinterpret_cast<void*>(buf_manager.BoxMaskTmpPtr()),
reinterpret_cast<void*>(box_mask.data()),
buf_manager.BoxMaskTmpElemCnt() * sizeof(int32_t),
cudaMemcpyHostToDevice);
buf_manager.BoxMaskTmpElemCnt() * sizeof(int32_t));
Memset<DeviceType::kGPU>(ctx->device_ctx(), statistics_info->mut_dptr<float>(), 0, statistics_info->shape().elem_cnt() * sizeof(float));
FOR_RANGE(int32_t, im_index, 0, bbox->shape().At(0)) {
......@@ -380,7 +378,7 @@ class YoloBoxDiffKernel final : public user_op::OpKernel {
#define REGISTER_YOLO_BOX_DIFF_GPU_KERNEL(dtype) \
REGISTER_USER_KERNEL("yolo_box_diff") \
.SetCreateFn<YoloBoxDiffKernel<dtype>>() \
.SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \
.SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kGPU) \
& (user_op::HobDataType("bbox", 0) == GetDataType<dtype>::value) \
& (user_op::HobDataType("gt_boxes", 0) == GetDataType<dtype>::value)) \
.SetInferTmpSizeFn([](user_op::InferContext* ctx) { \
......
......@@ -13,14 +13,14 @@ REGISTER_USER_OP("yolo_box_diff")
.Output("neg_inds")
.Output("valid_num")
.Output("statistics_info")
.Attr("image_height", UserOpAttrType::kAtInt32)
.Attr("image_width", UserOpAttrType::kAtInt32)
.Attr("layer_height", UserOpAttrType::kAtInt32)
.Attr("layer_width", UserOpAttrType::kAtInt32)
.Attr("ignore_thresh", UserOpAttrType::kAtFloat)
.Attr("truth_thresh", UserOpAttrType::kAtFloat)
.Attr("anchor_boxes", UserOpAttrType::kAtListInt32)
.Attr("box_mask", UserOpAttrType::kAtListInt32)
.Attr<int32_t>("image_height")
.Attr<int32_t>("image_width")
.Attr<int32_t>("layer_height")
.Attr<int32_t>("layer_width")
.Attr<float>("ignore_thresh")
.Attr<float>("truth_thresh")
.Attr<std::vector<int32_t>>("anchor_boxes")
.Attr<std::vector<int32_t>>("box_mask")
.SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
// input: bbox : (n, r, 4) r = h*w*3
const user_op::TensorDesc* bbox_desc = ctx->TensorDesc4ArgNameAndIndex("bbox", 0);
......
......@@ -142,8 +142,7 @@ class YoloDetectGpuKernel final : public user_op::OpKernel {
Memcpy<DeviceType::kGPU>(
ctx->device_ctx(), reinterpret_cast<void*>(buf_manager.AnchorBoxesTmpPtr()),
reinterpret_cast<void*>(anchor_boxes.data()),
buf_manager.AnchorBoxesTmpElemCnt() * sizeof(int32_t),
cudaMemcpyHostToDevice);
buf_manager.AnchorBoxesTmpElemCnt() * sizeof(int32_t));
FOR_RANGE(int32_t, im_index, 0, bbox->shape().At(0)) {
const T* probs_ptr = probs->dptr<T>() + im_index * probs->shape().Count(1);
......@@ -175,7 +174,7 @@ class YoloDetectGpuKernel final : public user_op::OpKernel {
#define REGISTER_YOLO_DETECT_GPU_KERNEL(dtype) \
REGISTER_USER_KERNEL("yolo_detect") \
.SetCreateFn<YoloDetectGpuKernel<dtype>>() \
.SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \
.SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kGPU) \
& (user_op::HobDataType("bbox", 0) == GetDataType<dtype>::value) \
& (user_op::HobDataType("probs", 0) == GetDataType<dtype>::value)) \
.SetInferTmpSizeFn([](user_op::InferContext* ctx) { \
......
......@@ -9,14 +9,14 @@ REGISTER_USER_OP("yolo_detect")
.Output("out_bbox")
.Output("out_probs")
.Output("valid_num")
.Attr("image_height", UserOpAttrType::kAtInt32)
.Attr("image_width", UserOpAttrType::kAtInt32)
.Attr("layer_height", UserOpAttrType::kAtInt32)
.Attr("layer_width", UserOpAttrType::kAtInt32)
.Attr("prob_thresh", UserOpAttrType::kAtFloat)
.Attr("num_classes", UserOpAttrType::kAtInt32)
.Attr("anchor_boxes", UserOpAttrType::kAtListInt32)
.Attr("max_out_boxes", UserOpAttrType::kAtInt32)
.Attr<int32_t>("image_height")
.Attr<int32_t>("image_width")
.Attr<int32_t>("layer_height")
.Attr<int32_t>("layer_width")
.Attr<float>("prob_thresh")
.Attr<int32_t>("num_classes")
.Attr<std::vector<int32_t>>("anchor_boxes")
.Attr<int32_t>("max_out_boxes")
.SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
// bbox : (n, h*w*3, 4) probs : (n, h*w*3, 81)
// out_bbox : (n, max_out_boxes, 4) out_probs : (n, max_out_boxes, 81)
......
......@@ -6,9 +6,9 @@ REGISTER_USER_OP("yolo_nms")
.Input("bbox")
.Input("probs")
.Output("out")
.Attr("iou_threshold", UserOpAttrType::kAtFloat)
.Attr("keep_n", UserOpAttrType::kAtInt32)
.Attr("batch_dims", UserOpAttrType::kAtInt32)
.Attr<float>("iou_threshold")
.Attr<int32_t>("keep_n")
.Attr<int32_t>("batch_dims")
.SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
Shape* bbox_shape = ctx->Shape4ArgNameAndIndex("bbox", 0);
DimVector dim_vec(bbox_shape->NumAxes() - 1);
......
......@@ -140,7 +140,7 @@ class YoloNmsGpuKernel final : public user_op::OpKernel {
#define REGISTER_YOLO_NMS_GPU_KERNEL(dtype) \
REGISTER_USER_KERNEL("yolo_nms") \
.SetCreateFn<YoloNmsGpuKernel<dtype>>() \
.SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \
.SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kGPU) \
& (user_op::HobDataType("out", 0) == DataType::kInt8) \
& (user_op::HobDataType("bbox", 0) == GetDataType<dtype>::value)) \
.SetInferTmpSizeFn([](user_op::InferContext* ctx) { \
......
......@@ -31,10 +31,10 @@ class DecodeOpKernelState final : public user_op::OpKernelState {
REGISTER_USER_OP("yolo_predict_decoder")
.Output("out")
.Output("origin_image_info")
.Attr("batch_size", UserOpAttrType::kAtInt32)
.Attr("image_height", UserOpAttrType::kAtInt32)
.Attr("image_width", UserOpAttrType::kAtInt32)
.Attr("image_paths", UserOpAttrType::kAtListString)
.Attr<int32_t>("batch_size")
.Attr<int32_t>("image_height")
.Attr<int32_t>("image_width")
.Attr<std::vector<std::string>>("image_paths")
.SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
Shape* out_shape = ctx->Shape4ArgNameAndIndex("out", 0);
Shape* origin_image_info_shape = ctx->Shape4ArgNameAndIndex("origin_image_info", 0);
......
......@@ -114,7 +114,7 @@ class YoloProbLossKernel final : public user_op::OpKernel {
#define REGISTER_YOLO_PROB_LOSS_KERNEL(dtype) \
REGISTER_USER_KERNEL("yolo_prob_loss") \
.SetCreateFn<YoloProbLossKernel<dtype>>() \
.SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \
.SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kGPU) \
& (user_op::HobDataType("bbox_objness", 0) == GetDataType<dtype>::value) \
& (user_op::HobDataType("bbox_clsprob", 0) == GetDataType<dtype>::value)) \
.SetInferTmpSizeFn([](const oneflow::user_op::InferContext*) { return 0; });
......
......@@ -11,7 +11,7 @@ REGISTER_USER_OP("yolo_prob_loss")
.Input("valid_num")
.Output("bbox_objness_out")
.Output("bbox_clsprob_out")
.Attr("num_classes", UserOpAttrType::kAtInt32)
.Attr<int32_t>("num_classes")
.SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
// input: bbox_objness : (n, r, 1) r = h*w*3
const user_op::TensorDesc* bbox_objness_desc =
......
......@@ -32,16 +32,16 @@ REGISTER_USER_OP("yolo_train_decoder")
.Output("data")
.Output("ground_truth")
.Output("gt_valid_num")
.Attr("batch_size", UserOpAttrType::kAtInt32)
.Attr("image_height", UserOpAttrType::kAtInt32)
.Attr("image_width", UserOpAttrType::kAtInt32)
.Attr("classes", UserOpAttrType::kAtInt32)
.Attr("num_boxes", UserOpAttrType::kAtInt32)
.Attr("hue", UserOpAttrType::kAtFloat)
.Attr("jitter", UserOpAttrType::kAtFloat)
.Attr("saturation", UserOpAttrType::kAtFloat)
.Attr("exposure", UserOpAttrType::kAtFloat)
.Attr("image_path_file", UserOpAttrType::kAtString)
.Attr<int32_t>("batch_size")
.Attr<int32_t>("image_height")
.Attr<int32_t>("image_width")
.Attr<int32_t>("classes")
.Attr<int32_t>("num_boxes")
.Attr<float>("hue")
.Attr<float>("jitter")
.Attr<float>("saturation")
.Attr<float>("exposure")
.Attr<std::string>("image_path_file")
.SetTensorDescInferFn([](user_op::InferContext* ctx) -> Maybe<void> {
user_op::TensorDesc* data_desc = ctx->TensorDesc4ArgNameAndIndex("data", 0);
user_op::TensorDesc* ground_truth_desc = ctx->TensorDesc4ArgNameAndIndex("ground_truth", 0);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册