diff --git a/src/operators/kernel/arm/concat_kernel.cpp b/src/operators/kernel/arm/concat_kernel.cpp index 8cdf6cb01afa85239bfd0d48bbce02790ba5250d..3c6a6f151f9b05ad0b69b40298ee5a47797d70af 100644 --- a/src/operators/kernel/arm/concat_kernel.cpp +++ b/src/operators/kernel/arm/concat_kernel.cpp @@ -27,7 +27,11 @@ bool ConcatKernel::Init(ConcatParam *param) { template <> void ConcatKernel::Compute(const ConcatParam ¶m) { - ConcatCompute(param); + if (param.Inputs()[0]->type() == typeid(int8_t)) { + ConcatCompute(param); + } else { + ConcatCompute(param); + } param.Out()->set_lod(param.Inputs()[0]->lod()); } diff --git a/src/operators/kernel/central-arm-func/concat_arm_func.h b/src/operators/kernel/central-arm-func/concat_arm_func.h index 57a22aafa5e0bc75c1041c379c2229deaa310ffe..4b22857302d11e4a7861282b3088ebe23bea0537 100644 --- a/src/operators/kernel/central-arm-func/concat_arm_func.h +++ b/src/operators/kernel/central-arm-func/concat_arm_func.h @@ -57,8 +57,8 @@ template void ConcatCompute(const ConcatParam ¶m) { auto inputs = param.Inputs(); auto *out = param.Out(); - int64_t axis = param.Axis(); - out->mutable_data(); + int axis = param.Axis(); + out->mutable_data

(); /// Sometimes direct copies will be faster, this maybe need deeply analysis. if (axis == 0 && inputs.size() < 10) { @@ -66,12 +66,12 @@ void ConcatCompute(const ConcatParam ¶m) { for (auto *in : inputs) { auto in_stride = framework::stride_numel(in->dims()); auto out_stride = framework::stride_numel(out->dims()); - auto dst = out->data() + output_offset; - auto src = in->data(); + auto dst = out->data

() + output_offset; + auto src = in->data

(); PADDLE_MOBILE_ENFORCE( in_stride.size() == out_stride.size(), "src and dst tensor should have the same dims size."); - memory::Copy(dst, src, sizeof(float) * in_stride[0]); + memory::Copy(dst, src, sizeof(P) * in_stride[0]); output_offset += in_stride[0]; } } else { @@ -79,8 +79,8 @@ void ConcatCompute(const ConcatParam ¶m) { for (int j = 0; j < inputs.size(); ++j) { inputs_concat[j] = *inputs[j]; } - ConcatFunctor concat_functor; - concat_functor(inputs_concat, static_cast(axis), out); + ConcatFunctor

concat_functor; + concat_functor(inputs_concat, axis, out); } } diff --git a/test/operators/test_concat_op.cpp b/test/operators/test_concat_op.cpp index 1a347a9c37a96f3c31506d0b45f95e05b64292ff..88ec06be6f1b5197669f7c580d935bb9d2475c5a 100644 --- a/test/operators/test_concat_op.cpp +++ b/test/operators/test_concat_op.cpp @@ -12,76 +12,125 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include +#include +#include +#include "../test_helper.h" #include "../test_include.h" #include "operators/concat_op.h" +namespace paddle_mobile { +using framework::AttributeMap; +using framework::DDim; +using framework::LoDTensor; +using framework::Scope; +using framework::make_ddim; + +template +void concat(const std::vector &input, LoDTensor &output, int axis) { + int num = input.size(); + + int rows = 1; + auto dim_0 = input[0].dims(); + for (int i = 0; i < axis; ++i) { + rows *= dim_0[i]; + } + int out_rows = rows, out_cols = 0; + + std::vector input_cols(input.size()); + for (int i = 0; i < num; ++i) { + int t_cols = input[i].numel() / rows; + out_cols += t_cols; + input_cols[i] = t_cols; + } + + // computation + auto output_data = output.data(); + int col_idx = 0; + for (int j = 0; j < num; ++j) { + int col_len = input_cols[j]; + auto input_data = input[j].data(); + for (int k = 0; k < out_rows; ++k) { + memcpy(output_data + k * out_cols + col_idx, input_data + k * col_len, + sizeof(T) * col_len); + } + col_idx += col_len; + } +} + +template +int TestConcatOP() { + DDim inputA_shape = make_ddim({10, 4, 2, 2}); + DDim inputB_shape = make_ddim({20, 4, 2, 2}); + DDim inputC_shape = make_ddim({30, 4, 2, 2}); + DDim inputD_shape = make_ddim({40, 4, 2, 2}); + DDim output_shape = make_ddim({100, 4, 2, 2}); + int axis_v = 0; + VariableNameMap inputs; + VariableNameMap outputs; + std::vector input_tensors; + auto scope = std::make_shared(); + inputs["X"] = + std::vector({"inputA", "inputB", "inputC", "inputD"}); + outputs["Out"] = std::vector({"output"}); + + auto inputA_var = scope.get()->Var("inputA"); + auto inputA = inputA_var->template GetMutable(); + SetupTensor(inputA, inputA_shape, -127, 127); + input_tensors.push_back(std::move(*inputA)); + + auto inputB_var = scope.get()->Var("inputB"); + auto inputB = inputB_var->template GetMutable(); + SetupTensor(inputB, inputB_shape, -127, 127); + input_tensors.push_back(std::move(*inputB)); + + auto inputC_var = scope.get()->Var("inputC"); + auto inputC = inputC_var->template GetMutable(); + SetupTensor(inputC, inputC_shape, -127, 127); + input_tensors.push_back(std::move(*inputC)); + + auto inputD_var = scope.get()->Var("inputD"); + auto inputD = inputD_var->template GetMutable(); + SetupTensor(inputD, inputD_shape, -127, 127); + input_tensors.push_back(std::move(*inputD)); + + auto output_var = scope.get()->Var("output"); + AttributeMap attrs; + attrs["axis"].Set(axis_v); + + auto *op = new operators::ConcatOp("concat", inputs, outputs, + attrs, scope); + op->InferShape(); + op->Run(); + auto output = output_var->template Get(); + const T *output_data = output->data(); + LoDTensor output_cmp; + output_cmp.mutable_data(output_shape); + concat(input_tensors, output_cmp, axis_v); + const T *output_cmp_data = output_cmp.data(); + // compare + int eq = 0; + int neq = 0; + for (int i = 0; i < output->numel(); ++i) { + PADDLE_MOBILE_ENFORCE(output_data[i] == output_cmp_data[i], + "The execution of test_concat_op is failed!"); + if (output_data[i] == output_cmp_data[i]) { + ++eq; + } else { + ++neq; + } + } + std::cout << "eq = " << eq << ", neq = " << neq << std::endl; + + delete op; + return 0; +} +} // namespace paddle_mobile + int main() { - paddle_mobile::framework::Loader loader; - auto program = loader.Load(g_googlenet); - PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, - "program file read fail"); - - Executor4Test> - executor(program, "concat"); - - // 1. input_tensors; - vector input_tensors; - - Tensor input1; - auto input1_data = CreateInput(&input1, {4, 10, 2, 2}, 0, 1); - input_tensors.push_back(input1); - Tensor input2; - auto input2_data = CreateInput(&input2, {4, 20, 2, 2}, 0, 1); - input_tensors.push_back(input2); - Tensor input3; - auto input3_data = CreateInput(&input3, {4, 30, 2, 2}, 0, 1); - input_tensors.push_back(input3); - Tensor input4; - auto input4_data = CreateInput(&input4, {4, 40, 2, 2}, 0, 1); - input_tensors.push_back(input4); - // 2. input_names - vector input_names({ - "conv2d_3.tmp_1", - "conv2d_5.tmp_1", - "conv2d_7.tmp_1", - "conv2d_8.tmp_1", - }); - - // 3. output_names - vector output_names({"concat_0.tmp_0"}); - - // 4. out_dims; - vector out_ddims; - auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2}); - out_ddims.push_back(out_ddim); - - auto output = executor.Predict(input_tensors, input_names, - output_names, out_ddims); - - auto output0_data = output[0]->data(); - - // 5. test one example. - int input_n = 1; - int input_c = 2; - int input_h = 0; - int input_w = 1; - int stride0 = input3.numel() / input3.dims()[0]; - int stride1 = input3.numel() / input3.dims()[0] / input3.dims()[1]; - int stride2 = input3.dims()[3]; - /// inputx1 (4,10,2,2), - /// inputx2 (4,20,2,2), - /// inputx3 (4,30,2,2), - /// inputx4 (4,40,2,2), - /// axis = 1 - /// output (4,100,2,2) - int input_index = - input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w; - int output_index = input_n * 100 * 2 * 2 + - (input_c + input1.dims()[1] + input2.dims()[1]) * 2 * 2 + - input_h * 2 + input_w; - - DLOG << " input3 [1, 2,0,1] = " << input3_data[input_index]; - DLOG << " output [1,32,0,1] = " << output0_data[output_index]; + paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); + paddle_mobile::TestConcatOP(); + paddle_mobile::TestConcatOP(); return 0; } diff --git a/test/operators/test_fusion_fc_op.cpp b/test/operators/test_fusion_fc_op.cpp index 34de0292d0e5bc460f402c08a22fe60b02e7ab0c..05c694a77dc6b3e631ea3134e05757944447d807 100644 --- a/test/operators/test_fusion_fc_op.cpp +++ b/test/operators/test_fusion_fc_op.cpp @@ -17,8 +17,10 @@ limitations under the License. */ #include "../test_helper.h" #include "../test_include.h" #include "framework/operator.h" -#include "operators/fusion_fc_int8_op.h" #include "operators/fusion_fc_op.h" +#ifdef FUSION_FC_INT8_OP +#include "operators/fusion_fc_int8_op.h" +#endif #define a(i, j) a[(i)*lda + (j)] #define b(i, j) b[(i)*ldb + (j)]