提交 0e2deaa5 编写于 作者: Y Yang Yang

Merge remote-tracking branch 'pr/8364' into backward_on_parallel_do

...@@ -37,9 +37,8 @@ class Vector { ...@@ -37,9 +37,8 @@ class Vector {
// Fill vector with value. The vector size is `count`. // Fill vector with value. The vector size is `count`.
explicit Vector(size_t count, const T& value = T()) { explicit Vector(size_t count, const T& value = T()) {
if (count == 0) {
InitEmpty(); InitEmpty();
} else { if (count != 0) {
resize(count); resize(count);
T* ptr = begin(); T* ptr = begin();
for (size_t i = 0; i < count; ++i) { for (size_t i = 0; i < count; ++i) {
...@@ -122,6 +121,10 @@ class Vector { ...@@ -122,6 +121,10 @@ class Vector {
const T* begin() const { return &this->operator[](0); } const T* begin() const { return &this->operator[](0); }
const T* end() const { return &this->operator[](size()); } const T* end() const { return &this->operator[](size()); }
const T* cbegin() const { return begin(); }
const T* cend() const { return end(); }
const T& back() const { const T& back() const {
auto it = end(); auto it = end();
--it; --it;
...@@ -244,7 +247,9 @@ class Vector { ...@@ -244,7 +247,9 @@ class Vector {
bool operator==(const Vector<T>& other) const { bool operator==(const Vector<T>& other) const {
if (size() != other.size()) return false; if (size() != other.size()) return false;
for (auto it1 = begin(), it2 = other.begin(); it1 < end(); ++it1, ++it2) { auto it1 = cbegin();
auto it2 = other.cbegin();
for (; it1 < cend(); ++it1, ++it2) {
if (*it1 != *it2) { if (*it1 != *it2) {
return false; return false;
} }
......
...@@ -26,10 +26,10 @@ TEST(mixed_vector, CPU_VECTOR) { ...@@ -26,10 +26,10 @@ TEST(mixed_vector, CPU_VECTOR) {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
tmp.push_back(i); tmp.push_back(i);
} }
ASSERT_EQ(tmp.size(), 10); ASSERT_EQ(tmp.size(), 10UL);
vec<int> tmp2; vec<int> tmp2;
tmp2 = tmp; tmp2 = tmp;
ASSERT_EQ(tmp2.size(), 10); ASSERT_EQ(tmp2.size(), 10UL);
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
ASSERT_EQ(tmp2[i], i); ASSERT_EQ(tmp2[i], i);
ASSERT_EQ(tmp2[i], tmp[i]); ASSERT_EQ(tmp2[i], tmp[i]);
...@@ -58,7 +58,7 @@ TEST(mixed_vector, GPU_VECTOR) { ...@@ -58,7 +58,7 @@ TEST(mixed_vector, GPU_VECTOR) {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
tmp.push_back(i); tmp.push_back(i);
} }
ASSERT_EQ(tmp.size(), 10); ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu(0); paddle::platform::CUDAPlace gpu(0);
multiply_10<<<1, 1, 0, GetCUDAStream(gpu)>>>(tmp.MutableData(gpu)); multiply_10<<<1, 1, 0, GetCUDAStream(gpu)>>>(tmp.MutableData(gpu));
...@@ -79,7 +79,7 @@ TEST(mixed_vector, MultiGPU) { ...@@ -79,7 +79,7 @@ TEST(mixed_vector, MultiGPU) {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
tmp.push_back(i); tmp.push_back(i);
} }
ASSERT_EQ(tmp.size(), 10); ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu0(0); paddle::platform::CUDAPlace gpu0(0);
paddle::platform::SetDeviceId(0); paddle::platform::SetDeviceId(0);
multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0)); multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0));
...@@ -91,3 +91,10 @@ TEST(mixed_vector, MultiGPU) { ...@@ -91,3 +91,10 @@ TEST(mixed_vector, MultiGPU) {
ASSERT_EQ(tmp[i], i * 100); ASSERT_EQ(tmp[i], i * 100);
} }
} }
TEST(mixed_vector, InitWithCount) {
paddle::framework::Vector<int> vec(10, 10);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(vec[i], 10);
}
}
...@@ -38,22 +38,22 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { ...@@ -38,22 +38,22 @@ class MultiClassNMSOp : public framework::OperatorWithKernel {
auto box_dims = ctx->GetInputDim("BBoxes"); auto box_dims = ctx->GetInputDim("BBoxes");
auto score_dims = ctx->GetInputDim("Scores"); auto score_dims = ctx->GetInputDim("Scores");
PADDLE_ENFORCE_EQ(box_dims.size(), 2, PADDLE_ENFORCE_EQ(box_dims.size(), 3,
"The rank of Input(BBoxes) must be 2."); "The rank of Input(BBoxes) must be 3.");
PADDLE_ENFORCE_EQ(score_dims.size(), 3, PADDLE_ENFORCE_EQ(score_dims.size(), 3,
"The rank of Input(Scores) must be 3."); "The rank of Input(Scores) must be 3.");
PADDLE_ENFORCE_EQ(box_dims[1], 4, PADDLE_ENFORCE_EQ(box_dims[2], 4,
"The 2nd dimension of Input(BBoxes) must be 4, " "The 2nd dimension of Input(BBoxes) must be 4, "
"represents the layout of coordinate " "represents the layout of coordinate "
"[xmin, ymin, xmax, ymax]"); "[xmin, ymin, xmax, ymax]");
PADDLE_ENFORCE_EQ(box_dims[0], score_dims[2], PADDLE_ENFORCE_EQ(box_dims[1], score_dims[2],
"The 1st dimensiong of Input(BBoxes) must be equal to " "The 1st dimensiong of Input(BBoxes) must be equal to "
"3rd dimension of Input(Scores), which represents the " "3rd dimension of Input(Scores), which represents the "
"predicted bboxes."); "predicted bboxes.");
// Here the box_dims[0] is not the real dimension of output. // Here the box_dims[0] is not the real dimension of output.
// It will be rewritten in the computing kernel. // It will be rewritten in the computing kernel.
ctx->SetOutputDim("Out", {box_dims[0], 6}); ctx->SetOutputDim("Out", {box_dims[1], 6});
} }
protected: protected:
...@@ -260,15 +260,20 @@ class MultiClassNMSKernel : public framework::OpKernel<T> { ...@@ -260,15 +260,20 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
int64_t batch_size = score_dims[0]; int64_t batch_size = score_dims[0];
int64_t class_num = score_dims[1]; int64_t class_num = score_dims[1];
int64_t predict_dim = score_dims[2]; int64_t predict_dim = score_dims[2];
int64_t box_dim = boxes->dims()[2];
std::vector<std::map<int, std::vector<int>>> all_indices; std::vector<std::map<int, std::vector<int>>> all_indices;
std::vector<size_t> batch_starts = {0}; std::vector<size_t> batch_starts = {0};
for (int64_t i = 0; i < batch_size; ++i) { for (int64_t i = 0; i < batch_size; ++i) {
Tensor ins_score = scores->Slice(i, i + 1); Tensor ins_score = scores->Slice(i, i + 1);
ins_score.Resize({class_num, predict_dim}); ins_score.Resize({class_num, predict_dim});
Tensor ins_boxes = boxes->Slice(i, i + 1);
ins_boxes.Resize({predict_dim, box_dim});
std::map<int, std::vector<int>> indices; std::map<int, std::vector<int>> indices;
int num_nmsed_out = 0; int num_nmsed_out = 0;
MultiClassNMS(ctx, ins_score, *boxes, indices, num_nmsed_out); MultiClassNMS(ctx, ins_score, ins_boxes, indices, num_nmsed_out);
all_indices.push_back(indices); all_indices.push_back(indices);
batch_starts.push_back(batch_starts.back() + num_nmsed_out); batch_starts.push_back(batch_starts.back() + num_nmsed_out);
} }
...@@ -282,11 +287,15 @@ class MultiClassNMSKernel : public framework::OpKernel<T> { ...@@ -282,11 +287,15 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
for (int64_t i = 0; i < batch_size; ++i) { for (int64_t i = 0; i < batch_size; ++i) {
Tensor ins_score = scores->Slice(i, i + 1); Tensor ins_score = scores->Slice(i, i + 1);
ins_score.Resize({class_num, predict_dim}); ins_score.Resize({class_num, predict_dim});
Tensor ins_boxes = boxes->Slice(i, i + 1);
ins_boxes.Resize({predict_dim, box_dim});
int64_t s = batch_starts[i]; int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1]; int64_t e = batch_starts[i + 1];
if (e > s) { if (e > s) {
Tensor out = outs->Slice(s, e); Tensor out = outs->Slice(s, e);
MultiClassOutput(ins_score, *boxes, all_indices[i], &out); MultiClassOutput(ins_score, ins_boxes, all_indices[i], &out);
} }
} }
} }
...@@ -303,9 +312,9 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -303,9 +312,9 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
MultiClassNMSOpMaker(OpProto* proto, OpAttrChecker* op_checker) MultiClassNMSOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("BBoxes", AddInput("BBoxes",
"(Tensor) A 2-D Tensor with shape [M, 4] represents the " "(Tensor) A 3-D Tensor with shape [N, M, 4] represents the "
"predicted locations of M bounding bboxes. Each bounding box " "predicted locations of M bounding bboxes, N is the batch size. "
"has four coordinate values and the layout is " "Each bounding box has four coordinate values and the layout is "
"[xmin, ymin, xmax, ymax]."); "[xmin, ymin, xmax, ymax].");
AddInput("Scores", AddInput("Scores",
"(Tensor) A 3-D Tensor with shape [N, C, M] represents the " "(Tensor) A 3-D Tensor with shape [N, C, M] represents the "
......
...@@ -137,7 +137,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold, ...@@ -137,7 +137,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold,
det_outs = [] det_outs = []
lod = [0] lod = [0]
for n in range(batch_size): for n in range(batch_size):
nmsed_outs, nmsed_num = multiclass_nms(boxes, scores[n], background, nmsed_outs, nmsed_num = multiclass_nms(boxes[n], scores[n], background,
score_threshold, nms_threshold, score_threshold, nms_threshold,
nms_top_k, keep_top_k) nms_top_k, keep_top_k)
lod.append(lod[-1] + nmsed_num) lod.append(lod[-1] + nmsed_num)
...@@ -145,7 +145,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold, ...@@ -145,7 +145,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold,
for c, indices in nmsed_outs.iteritems(): for c, indices in nmsed_outs.iteritems():
for idx in indices: for idx in indices:
xmin, ymin, xmax, ymax = boxes[idx][:] xmin, ymin, xmax, ymax = boxes[n][idx][:]
det_outs.append([c, scores[n][c][idx], xmin, ymin, xmax, ymax]) det_outs.append([c, scores[n][c][idx], xmin, ymin, xmax, ymax])
return det_outs, lod return det_outs, lod
...@@ -179,9 +179,9 @@ class TestMulticlassNMSOp(OpTest): ...@@ -179,9 +179,9 @@ class TestMulticlassNMSOp(OpTest):
scores = np.reshape(scores, (N, M, C)) scores = np.reshape(scores, (N, M, C))
scores = np.transpose(scores, (0, 2, 1)) scores = np.transpose(scores, (0, 2, 1))
boxes = np.random.random((M, BOX_SIZE)).astype('float32') boxes = np.random.random((N, M, BOX_SIZE)).astype('float32')
boxes[:, 0:2] = boxes[:, 0:2] * 0.5 boxes[:, :, 0:2] = boxes[:, :, 0:2] * 0.5
boxes[:, 2:4] = boxes[:, 2:4] * 0.5 + 0.5 boxes[:, :, 2:4] = boxes[:, :, 2:4] * 0.5 + 0.5
nmsed_outs, lod = batched_multiclass_nms(boxes, scores, background, nmsed_outs, lod = batched_multiclass_nms(boxes, scores, background,
score_threshold, nms_threshold, score_threshold, nms_threshold,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册