提交 9d3e2cc5 编写于 作者: W w-adamski 提交者: Liangliang He

Added floordiv() operator (#336)

* Added floordiv() operator

* Restored original indentation

* Update eltwise.cl

Update eltwise.cl according to the recent fix.
上级 e93296e0
...@@ -20,7 +20,7 @@ Operator lists ...@@ -20,7 +20,7 @@ Operator lists
"DEPTHWISE_CONV_2D","Y","Only multiplier = 1 is supported; Fusion is supported." "DEPTHWISE_CONV_2D","Y","Only multiplier = 1 is supported; Fusion is supported."
"DEPTH_TO_SPACE","Y","" "DEPTH_TO_SPACE","Y",""
"DEQUANTIZE","Y","Model quantization will be supported later." "DEQUANTIZE","Y","Model quantization will be supported later."
"ELEMENT_WISE","Y","ADD/MUL/DIV/MIN/MAX/NEG/ABS/SQR_DIFF/POW/RSQRT/SQRT/EQUAL" "ELEMENT_WISE","Y","ADD/MUL/DIV/MIN/MAX/NEG/ABS/SQR_DIFF/POW/RSQRT/SQRT/EQUAL/FLOOR_DIV"
"EMBEDDING_LOOKUP","Y","" "EMBEDDING_LOOKUP","Y",""
"EXPANDDIMS","Y","Only CPU and TensorFlow is supported." "EXPANDDIMS","Y","Only CPU and TensorFlow is supported."
"FILL","Y","Only CPU and TensorFlow is supported." "FILL","Y","Only CPU and TensorFlow is supported."
......
...@@ -136,6 +136,23 @@ inline void TensorGeneralBroadcastEltwise( ...@@ -136,6 +136,23 @@ inline void TensorGeneralBroadcastEltwise(
} }
} }
break; break;
case FLOOR_DIV:
if (!swapped) {
for (index_t i = 0; i < output_size; ++i) {
const index_t idx0 = GetIndex(input0_shape, out_index);
const index_t idx1 = GetIndex(input1_shape, out_index);
output[i] = std::floor(input0[idx0] / input1[idx1]);
IncreaseIndex(output_shape, &out_index);
}
} else {
for (index_t i = 0; i < output_size; ++i) {
const index_t idx0 = GetIndex(input0_shape, out_index);
const index_t idx1 = GetIndex(input1_shape, out_index);
output[i] = std::floor(input1[idx1] / input0[idx0]);
IncreaseIndex(output_shape, &out_index);
}
}
break;
case MIN: case MIN:
for (index_t i = 0; i < output_size; ++i) { for (index_t i = 0; i < output_size; ++i) {
const index_t idx0 = GetIndex(input0_shape, out_index); const index_t idx0 = GetIndex(input0_shape, out_index);
...@@ -270,6 +287,25 @@ inline void TensorBroadcastEltwise(const EltwiseType type, ...@@ -270,6 +287,25 @@ inline void TensorBroadcastEltwise(const EltwiseType type,
} }
} }
break; break;
case FLOOR_DIV:
if (!swapped) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (index_t d = 0; d < diff_size; ++d) {
for (index_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
std::floor(input0[i + d * common_size] / input1[i]);
}
}
} else {
#pragma omp parallel for collapse(2) schedule(runtime)
for (index_t d = 0; d < diff_size; ++d) {
for (index_t i = 0; i < common_size; ++i) {
output[i + d * common_size] =
std::floor(input1[i] / input0[i + d * common_size]);
}
}
}
break;
case MIN: case MIN:
#pragma omp parallel for collapse(2) schedule(runtime) #pragma omp parallel for collapse(2) schedule(runtime)
for (index_t d = 0; d < diff_size; ++d) { for (index_t d = 0; d < diff_size; ++d) {
...@@ -405,6 +441,19 @@ inline void TensorEltwise(const EltwiseType type, ...@@ -405,6 +441,19 @@ inline void TensorEltwise(const EltwiseType type,
} }
} }
break; break;
case FLOOR_DIV:
if (!swapped) {
#pragma omp parallel for schedule(runtime)
for (index_t i = 0; i < size; ++i) {
output[i] = std::floor(input0[i] / input1[i]);
}
} else {
#pragma omp parallel for schedule(runtime)
for (index_t i = 0; i < size; ++i) {
output[i] = std::floor(input1[i] / input0[i]);
}
}
break;
case MIN: case MIN:
#pragma omp parallel for schedule(runtime) #pragma omp parallel for schedule(runtime)
for (index_t i = 0; i < size; ++i) { for (index_t i = 0; i < size; ++i) {
...@@ -525,6 +574,19 @@ inline void TensorScalarEltwise(const EltwiseType type, ...@@ -525,6 +574,19 @@ inline void TensorScalarEltwise(const EltwiseType type,
} }
} }
break; break;
case FLOOR_DIV:
if (!swapped) {
#pragma omp parallel for schedule(runtime)
for (index_t i = 0; i < size; ++i) {
output[i] = std::floor(input0[i] / input1);
}
} else {
#pragma omp parallel for schedule(runtime)
for (index_t i = 0; i < size; ++i) {
output[i] = std::floor(input1 / input0[i]);
}
}
break;
case MIN: case MIN:
#pragma omp parallel for schedule(runtime) #pragma omp parallel for schedule(runtime)
for (index_t i = 0; i < size; ++i) { for (index_t i = 0; i < size; ++i) {
...@@ -694,6 +756,33 @@ inline void TensorEltwisePerChannel(const EltwiseType type, ...@@ -694,6 +756,33 @@ inline void TensorEltwisePerChannel(const EltwiseType type,
} }
} }
break; break;
case FLOOR_DIV:
if (!swapped) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (index_t b = 0; b < batch0; ++b) {
for (index_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (index_t i = 0; i < image_size; ++i) {
out_ptr[i] = std::floor(in0_ptr[i] / in1_ptr[c]);
}
}
}
} else {
#pragma omp parallel for collapse(2) schedule(runtime)
for (index_t b = 0; b < batch0; ++b) {
for (index_t c = 0; c < channel; ++c) {
const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
DstType *out_ptr = output + ((b * channel) + c) * image_size;
for (index_t i = 0; i < image_size; ++i) {
out_ptr[i] = std::floor(in1_ptr[c] / in0_ptr[i]);
}
}
}
}
break;
case MIN: case MIN:
#pragma omp parallel for collapse(2) schedule(runtime) #pragma omp parallel for collapse(2) schedule(runtime)
for (index_t b = 0; b < batch0; ++b) { for (index_t b = 0; b < batch0; ++b) {
......
...@@ -30,7 +30,8 @@ enum EltwiseType { ...@@ -30,7 +30,8 @@ enum EltwiseType {
SQR_DIFF = 8, SQR_DIFF = 8,
POW = 9, POW = 9,
EQUAL = 10, EQUAL = 10,
NONE = 11, FLOOR_DIV = 11,
NONE = 12,
}; };
inline bool IsLogicalType(EltwiseType type) { return type == EQUAL; } inline bool IsLogicalType(EltwiseType type) { return type == EQUAL; }
......
...@@ -223,6 +223,10 @@ TEST_F(EltwiseOpTest, CPUSimpleScalarScalar) { ...@@ -223,6 +223,10 @@ TEST_F(EltwiseOpTest, CPUSimpleScalarScalar) {
ops::EltwiseType::PROD, 1, 2, 2); ops::EltwiseType::PROD, 1, 2, 2);
SimpleScalarScalar<DeviceType::CPU, float, float>( SimpleScalarScalar<DeviceType::CPU, float, float>(
ops::EltwiseType::DIV, 1, 2, 0.5); ops::EltwiseType::DIV, 1, 2, 0.5);
SimpleScalarScalar<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, 1, 2, 0);
SimpleScalarScalar<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, 1, -2, -1);
SimpleScalarScalar<DeviceType::CPU, float, float>( SimpleScalarScalar<DeviceType::CPU, float, float>(
ops::EltwiseType::MIN, 1, 2, 1); ops::EltwiseType::MIN, 1, 2, 1);
SimpleScalarScalar<DeviceType::CPU, float, float>( SimpleScalarScalar<DeviceType::CPU, float, float>(
...@@ -249,6 +253,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorScalar) { ...@@ -249,6 +253,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorScalar) {
SimpleTensorScalar<DeviceType::CPU, float, float>( SimpleTensorScalar<DeviceType::CPU, float, float>(
ops::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2, ops::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2,
{1, 2, 3, 4, 5, 6}); {1, 2, 3, 4, 5, 6});
SimpleTensorScalar<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 3,
{0, 1, 2, 2, 3, 4});
SimpleTensorScalar<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, -3,
{-1, -2, -2, -3, -4, -4});
SimpleTensorScalar<DeviceType::CPU, float, float>( SimpleTensorScalar<DeviceType::CPU, float, float>(
ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1,
{1, 1, 1, 1, 1, 1}); {1, 1, 1, 1, 1, 1});
...@@ -281,6 +291,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorScalar) { ...@@ -281,6 +291,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorScalar) {
SimpleTensorScalar<DeviceType::GPU, float, float>( SimpleTensorScalar<DeviceType::GPU, float, float>(
ops::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2, ops::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2,
{1, 2, 3, 4, 5, 6}); {1, 2, 3, 4, 5, 6});
SimpleTensorScalar<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 3,
{0, 1, 2, 2, 3, 4});
SimpleTensorScalar<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, -3,
{-1, -2, -2, -3, -4, -4});
SimpleTensorScalar<DeviceType::GPU, float, float>( SimpleTensorScalar<DeviceType::GPU, float, float>(
ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1, ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1,
{1, 1, 1, 1, 1, 1}); {1, 1, 1, 1, 1, 1});
...@@ -317,6 +333,22 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { ...@@ -317,6 +333,22 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
SimpleTensorEltwise<DeviceType::CPU, float, float>( SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, ops::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
{1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {2, 2, 2, 2, 3}, {0, 1, 1, 2, 1, 3, 3, 4, 4, 3});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {-2, -2, -2, -2, -3},
{-1, -1, -2, -2, -2, -3, -4, -4, -5, -4});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
{2, 2, 2, 3, 3, 2, 2, 2, 2, 2}, {0, 0, 0, 0, 1, 0, 0, 0, 1, 2});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
{-2, -2, -2, -3, -3, -2, -2, -2, -2, -2},
{-1, -1, -1, -1, -2, -1, -1, -1, -1, -2});
SimpleTensorEltwise<DeviceType::CPU, float, float>( SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, ops::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5});
...@@ -349,6 +381,21 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { ...@@ -349,6 +381,21 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
SimpleTensorEltwise<DeviceType::CPU, float, float>( SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::DIV, {5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, ops::EltwiseType::DIV, {5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
{1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{5}, {2, 2, 2, 2, 3}, {0, 1, 1, 2, 1, 3, 3, 4, 4, 3});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{5}, {-2, -2, -2, -2, -3}, {-1, -1, -2, -2, -2, -3, -4, -4, -5, -4});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
{2, 2, 2, 3, 3, 2, 2, 2, 2, 2}, {0, 0, 0, 0, 1, 0, 0, 0, 1, 2});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
{-2, -2, -2, -3, -3, -2, -2, -2, -2, -2},
{-1, -1, -1, -1, -2, -1, -1, -1, -1, -2});
SimpleTensorEltwise<DeviceType::CPU, float, float>( SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::MIN, {5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, ops::EltwiseType::MIN, {5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5});
...@@ -382,6 +429,22 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) { ...@@ -382,6 +429,22 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
SimpleTensorEltwise<DeviceType::GPU, float, float>( SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, ops::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
{1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {2, 2, 2, 2, 3}, {0, 1, 1, 2, 1, 3, 3, 4, 4, 3});
SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV,
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 1, 1, 5}, {-2, -2, -2, -2, -3},
{-1, -1, -2, -2, -2, -3, -4, -4, -5, -4});
SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
{2, 2, 2, 3, 3, 2, 2, 2, 2, 2}, {0, 0, 0, 0, 1, 0, 0, 0, 1, 2});
SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
{-2, -2, -2, -3, -3, -2, -2, -2, -2, -2},
{-1, -1, -1, -1, -2, -1, -1, -1, -1, -2});
SimpleTensorEltwise<DeviceType::GPU, float, float>( SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, ops::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5});
...@@ -410,6 +473,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) { ...@@ -410,6 +473,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) {
SimpleTensorEltwise<DeviceType::CPU, float, float>( SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, ops::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3},
{1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1}); {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 2, 1, 3}, {2, 3, 4, 5, 6, 7},
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {2, 1, 1, 1, 1, 1});
SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 2, 1, 3}, {-2, -3, -4, -5, -6, -7},
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {-2, -2, -2, -2, -2, -2});
SimpleTensorEltwise<DeviceType::CPU, float, float>( SimpleTensorEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, ops::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5},
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
...@@ -442,6 +511,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) { ...@@ -442,6 +511,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) {
SimpleTensorEltwise<DeviceType::GPU, float, float>( SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, ops::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3},
{1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1}); {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1});
SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 2, 1, 3}, {2, 3, 4, 5, 6, 7},
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {2, 1, 1, 1, 1, 1});
SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 2, 1, 3}, {-2, -3, -4, -5, -6, -7},
{1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {-2, -2, -2, -2, -2, -2});
SimpleTensorEltwise<DeviceType::GPU, float, float>( SimpleTensorEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, ops::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5},
{1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
...@@ -767,6 +842,12 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) { ...@@ -767,6 +842,12 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) {
TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>( TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, ops::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3}); {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3});
TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 1}, {2, 3}, {1, 1, 2, 3}, {0, 1, 1, 1, 1, 2});
TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 1}, {-2, -3}, {1, 1, 2, 3}, {-1, -1, -2, -2, -2, -2});
TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>( TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2}); {1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2});
...@@ -794,6 +875,12 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastGPU) { ...@@ -794,6 +875,12 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastGPU) {
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>( TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, ops::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3}); {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 1}, {2, 3}, {1, 1, 2, 3}, {0, 1, 1, 1, 1, 2});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
{1, 1, 2, 1}, {-2, -3}, {1, 1, 2, 3}, {-1, -1, -2, -2, -2, -2});
TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>( TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1}, ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
{1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2}); {1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2});
......
...@@ -81,13 +81,19 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS ...@@ -81,13 +81,19 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
#else #else
out = pow(in0, in1); out = pow(in0, in1);
#endif #endif
#elif ELTWISE_TYPE == 11
#ifdef SWAPPED
out = floor(in1 / in0);
#else
out = floor(in0 / in1);
#endif
#endif #endif
#if ((INPUT_TYPE == 1 || INPUT_TYPE == 4) && \ #if ((INPUT_TYPE == 1 || INPUT_TYPE == 4) && \
(ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \ (ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \
ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9)) || \ ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9)) || \
((INPUT_TYPE != 1 || INPUT_TYPE != 4) && \ ((INPUT_TYPE != 1 || INPUT_TYPE != 4) && \
(ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9)) (ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9 || ELTWISE_TYPE == 11))
const int remain_channel = channel - 4 * chan_idx; const int remain_channel = channel - 4 * chan_idx;
if (remain_channel < 4) { if (remain_channel < 4) {
switch (remain_channel) { switch (remain_channel) {
......
...@@ -58,6 +58,10 @@ void ScalarEltwise(const T* in0, ...@@ -58,6 +58,10 @@ void ScalarEltwise(const T* in0,
else else
out[0] = in0[0] / in1[0]; out[0] = in0[0] / in1[0];
break; break;
case FLOOR_DIV:
out[0] = std::floor(swapped ? in1[0] / in0[0] :
in0[0] / in1[0]);
break;
case MIN: case MIN:
out[0] = std::min(in1[0], in0[0]); out[0] = std::min(in1[0], in0[0]);
break; break;
......
...@@ -63,6 +63,10 @@ TEST_F(ScalarMathOpTest, SimpleCPU) { ...@@ -63,6 +63,10 @@ TEST_F(ScalarMathOpTest, SimpleCPU) {
ops::EltwiseType::PROD, 3, -2, 3, -6); ops::EltwiseType::PROD, 3, -2, 3, -6);
ScalarMathTest<DeviceType::CPU, float, float>( ScalarMathTest<DeviceType::CPU, float, float>(
ops::EltwiseType::DIV, 3, -2, 1, -1.5); ops::EltwiseType::DIV, 3, -2, 1, -1.5);
ScalarMathTest<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, 3, -2, 1, -2);
ScalarMathTest<DeviceType::CPU, float, float>(
ops::EltwiseType::FLOOR_DIV, 3, 2, 1, 1);
ScalarMathTest<DeviceType::CPU, float, float>( ScalarMathTest<DeviceType::CPU, float, float>(
ops::EltwiseType::MIN, 3, -2, 1, -2); ops::EltwiseType::MIN, 3, -2, 1, -2);
ScalarMathTest<DeviceType::CPU, float, float>( ScalarMathTest<DeviceType::CPU, float, float>(
......
...@@ -79,6 +79,7 @@ class EltwiseType(Enum): ...@@ -79,6 +79,7 @@ class EltwiseType(Enum):
SQR_DIFF = 8 SQR_DIFF = 8
POW = 9 POW = 9
EQUAL = 10 EQUAL = 10
FLOOR_DIV = 11
class ReduceType(Enum): class ReduceType(Enum):
......
...@@ -113,6 +113,7 @@ TFSupportedOps = [ ...@@ -113,6 +113,7 @@ TFSupportedOps = [
'ArgMax', 'ArgMax',
'Split', 'Split',
'FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxVars',
'FloorDiv',
'Sqrt', 'Sqrt',
] ]
...@@ -185,6 +186,7 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -185,6 +186,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType.Abs.name: EltwiseType.ABS, TFOpType.Abs.name: EltwiseType.ABS,
TFOpType.Pow.name: EltwiseType.POW, TFOpType.Pow.name: EltwiseType.POW,
TFOpType.RealDiv.name: EltwiseType.DIV, TFOpType.RealDiv.name: EltwiseType.DIV,
TFOpType.FloorDiv.name: EltwiseType.FLOOR_DIV,
TFOpType.SquaredDifference.name: EltwiseType.SQR_DIFF, TFOpType.SquaredDifference.name: EltwiseType.SQR_DIFF,
TFOpType.Square.name: EltwiseType.POW, TFOpType.Square.name: EltwiseType.POW,
TFOpType.Rsqrt.name: EltwiseType.POW, TFOpType.Rsqrt.name: EltwiseType.POW,
...@@ -264,6 +266,7 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -264,6 +266,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType.ArgMax.name: self.convert_argmax, TFOpType.ArgMax.name: self.convert_argmax,
TFOpType.Split.name: self.convert_split, TFOpType.Split.name: self.convert_split,
TFOpType.FakeQuantWithMinMaxVars.name: self.convert_fake_quantize, TFOpType.FakeQuantWithMinMaxVars.name: self.convert_fake_quantize,
TFOpType.FloorDiv.name: self.convert_elementwise,
TFOpType.Sqrt.name: self.convert_elementwise, TFOpType.Sqrt.name: self.convert_elementwise,
} }
self._option = option self._option = option
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册