jitcode.h 3.8 KB
Newer Older
T
tensor-tang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. */

#pragma once

T
tensor-tang 已提交
17
#include <string>
T
tensor-tang 已提交
18
#include <type_traits>
T
tensor-tang 已提交
19
#include "paddle/fluid/operators/jit/gen_base.h"
T
tensor-tang 已提交
20
#include "paddle/fluid/platform/cpu_info.h"
T
tensor-tang 已提交
21 22 23 24 25 26 27

#define XBYAK_USE_MMAP_ALLOCATOR
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"

namespace paddle {
namespace operators {
T
tensor-tang 已提交
28 29
namespace jit {
namespace gen {
T
tensor-tang 已提交
30 31 32 33

// Application Binary Interface
constexpr Xbyak::Operand::Code abi_param1(Xbyak::Operand::RDI),
    abi_param2(Xbyak::Operand::RSI), abi_param3(Xbyak::Operand::RDX),
34 35
    abi_param4(Xbyak::Operand::RCX), abi_param5(Xbyak::Operand::R8),
    abi_param6(Xbyak::Operand::R9);
T
tensor-tang 已提交
36

T
tensor-tang 已提交
37 38 39 40 41 42 43 44 45 46 47
constexpr Xbyak::Operand::Code g_abi_regs[] = {
    Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::R12,
    Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15};

constexpr int num_g_abi_regs = sizeof(g_abi_regs) / sizeof(g_abi_regs[0]);

using reg64_t = const Xbyak::Reg64;
using reg32_t = const Xbyak::Reg32;
using xmm_t = const Xbyak::Xmm;
using ymm_t = const Xbyak::Ymm;
using zmm_t = const Xbyak::Zmm;
48
using opmask_t = const Xbyak::Opmask;
T
tensor-tang 已提交
49 50 51
using Label = Xbyak::Label;

typedef enum {
52
  MUL = 0,
T
tensor-tang 已提交
53
  MAX,
54 55 56 57
  ADD,
  SUB,
  RELU,
  EXP,
T
tensor-tang 已提交
58
  SQUARE,
59 60 61
  SIGMOID,
  TANH,
  IDENTITY
T
tensor-tang 已提交
62 63 64
} operand_type;

#define DECLARE_JIT_CODE(codename) \
T
tensor-tang 已提交
65
  std::string name() const override { return #codename; }
T
tensor-tang 已提交
66

T
tensor-tang 已提交
67
class JitCode : public GenBase, public Xbyak::CodeGenerator {
T
tensor-tang 已提交
68
 public:
T
tensor-tang 已提交
69
  explicit JitCode(size_t code_size, void* code_ptr = nullptr)
T
tensor-tang 已提交
70 71 72
      : Xbyak::CodeGenerator(
            (code_size % 4096 != 0 ? (code_size / 4096 + 1) * 4096 : code_size),
            code_ptr) {}
T
tensor-tang 已提交
73 74

  virtual void genCode() = 0;
T
tensor-tang 已提交
75

T
tensor-tang 已提交
76
  size_t getSize() const override { return CodeGenerator::getSize(); }
77
  const unsigned char* getCodeInternal() const override {
T
tensor-tang 已提交
78 79 80
    const Xbyak::uint8* code = CodeGenerator::getCode();
    return code;
  }
T
tensor-tang 已提交
81 82 83 84 85 86 87 88 89 90

 protected:
  Xbyak::Reg64 param1{abi_param1};
  const int EVEX_max_8b_offt = 0x200;
  const Xbyak::Reg64 reg_EVEX_max_8b_offt = rbp;

  virtual void preCode() {
    for (int i = 0; i < num_g_abi_regs; ++i) {
      push(Xbyak::Reg64(g_abi_regs[i]));
    }
T
tensor-tang 已提交
91
    if (platform::MayIUse(platform::avx512f)) {
T
tensor-tang 已提交
92 93 94 95 96 97 98 99 100 101
      mov(reg_EVEX_max_8b_offt, 2 * EVEX_max_8b_offt);
    }
  }
  virtual void postCode() {
    for (int i = 0; i < num_g_abi_regs; ++i) {
      pop(Xbyak::Reg64(g_abi_regs[num_g_abi_regs - 1 - i]));
    }
    ret();
  }
  void L(const char* label) { Xbyak::CodeGenerator::L(label); }
102
  void L(Xbyak::Label& label) { Xbyak::CodeGenerator::L(label); }  // NOLINT
T
tensor-tang 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
  // Enhanced vector extension
  Xbyak::Address EVEX_compress_addr(Xbyak::Reg64 base, int offt,
                                    bool bcast = false) {
    int scale = 0;
    // Learn from https://github.com/intel/mkl-dnn
    if (EVEX_max_8b_offt <= offt && offt < 3 * EVEX_max_8b_offt) {
      offt = offt - 2 * EVEX_max_8b_offt;
      scale = 1;
    } else if (3 * EVEX_max_8b_offt <= offt && offt < 5 * EVEX_max_8b_offt) {
      offt = offt - 4 * EVEX_max_8b_offt;
      scale = 2;
    }
    auto re = Xbyak::RegExp() + base + offt;
    if (scale) {
      re = re + reg_EVEX_max_8b_offt * scale;
    }
    if (bcast) {
      return zword_b[re];
    } else {
      return zword[re];
    }
  }
T
tensor-tang 已提交
125 126
};

T
tensor-tang 已提交
127 128
}  // namespace gen
}  // namespace jit
T
tensor-tang 已提交
129 130
}  // namespace operators
}  // namespace paddle