neon_struct.h 2.1 KB
Newer Older
1
/**
2
 * \file dnn/src/arm_common/neon_struct.h
3 4 5 6 7 8 9 10 11
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 */
12
#pragma once
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
#include "src/arm_common/simd_macro/marm_neon.h"
namespace megdnn {
namespace {
struct Vdotq_s32_h {
    static int32x4_t impl(int8x16_t& a, int8x16_t& b, int32x4_t& c,
                          int16x8_t& temp) {
        return vdotq_s32_h(a, b, c, temp);
    }
};
struct Vdot2_s32_h {
    static int32x4_t impl(int8x8_t a, int8x8_t b, int32x4_t c, int16x8_t temp) {
        return vdot2_s32_h(a, b, c, temp);
    }
};

struct Vmlal_s16 {
    static int32x4_t impl(int16x8_t a, int16x8_t b, int32x4_t c) {
        return vmlal_s16(c, vget_low_s16(a), vget_low_s16(b));
    }
};

struct Vld1q_s8 {
    static int8x16_t impl(const int8_t* ptr) { return vld1q_s8(ptr); }
};
37 38 39
struct Vld1q_f32 {
    static float32x4_t impl(const float32_t* ptr) { return vld1q_f32(ptr); }
};
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
struct Vld1_s8 {
    static int8x8_t impl(const int8_t* ptr) { return vld1_s8(ptr); }
};
struct Vldq_dup_4s8_8s16 {
    static int16x8_t impl(const int8_t* ptr) { return vldq_dup_4s8_8s16(ptr); }
};

struct Vldq_tbl_low_s8 {
    static int8x8_t impl(const int8_t* ptr, uint8x16_t idx) {
        return vldq_tbl_low_s8(ptr, idx);
    }
};

struct Vld1_dup_s8_s16 {
    static int16x8_t impl(const int8_t* ptr) { return vld1_dup_s8_s16(ptr); }
};
56 57 58 59 60 61 62

struct Vfmaq_laneq_f32 {
    template <const int lane>
    static float32x4_t impl(float32x4_t a, float32x4_t b, float32x4_t v) {
        return vfmaq_laneq_f32(a, b, v, lane);
    }
};
63 64 65 66 67 68 69 70
#if __ARM_FEATURE_DOTPROD
struct Vdotq_laneq_s32 {
    template <const int lane>
    static int32x4_t impl(int32x4_t a, int8x16_t b, int8x16_t v) {
        return vdotq_laneq_s32(a, b, v, lane);
    }
};
#endif
71

72
}  // namespace
73 74 75
}  // namespace megdnn

// vim: syntax=cpp.doxygen