blas.h 3.7 KB
Newer Older
T
tensor-tang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. */

#pragma once

#include <string>
18
#include "glog/logging.h"
T
tensor-tang 已提交
19
#include "paddle/fluid/operators/jit/gen/jitcode.h"
T
tensor-tang 已提交
20 21 22

namespace paddle {
namespace operators {
T
tensor-tang 已提交
23 24
namespace jit {
namespace gen {
T
tensor-tang 已提交
25 26 27 28

// function: vec = Operand(vec(or scalar), vec(or scalar)) (maybe with relu)
class VXXJitCode : public JitCode {
 public:
T
tensor-tang 已提交
29 30 31 32 33 34 35 36
  explicit VXXJitCode(int d, operand_type type, int scalar_index,
                      bool with_relu, size_t code_size = 256 * 1024,
                      void* code_ptr = nullptr)
      : JitCode(code_size, code_ptr),
        num_(d),
        type_(type),
        scalar_index_(scalar_index),
        with_relu_(with_relu) {
T
tensor-tang 已提交
37 38
    if (!(type_ == operand_type::MUL || type_ == operand_type::ADD ||
          type_ == operand_type::SUB)) {
39 40
      LOG(FATAL) << "Do not support this operand type: " << type_;
    }
T
tensor-tang 已提交
41 42 43 44
    this->genCode();
  }

  virtual const char* name() const {
T
tensor-tang 已提交
45 46 47 48 49 50
    std::string base = "VXXJitCode";
    if (scalar_index_ == 1) {
      base += "_Scalar";
    } else {
      base += "_Vec";
    }
51
    if (type_ == operand_type::MUL) {
T
tensor-tang 已提交
52
      base += "_Mul";
53
    } else if (type_ == operand_type::ADD) {
T
tensor-tang 已提交
54
      base += "_Add";
T
tensor-tang 已提交
55 56
    } else if (type_ == operand_type::SUB) {
      base += "_SUB";
T
tensor-tang 已提交
57 58 59 60 61 62 63
    }
    if (scalar_index_ == 2) {
      base += "_Scalar";
    } else {
      base += "_Vec";
    }
    base += (with_relu_ ? "_Relu" : "");
T
tensor-tang 已提交
64
    base += "_D" + std::to_string(num_);
T
tensor-tang 已提交
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    return base.c_str();
  }
  void genCode() override;

 private:
  int num_;
  operand_type type_;
  int scalar_index_;
  bool with_relu_;
  reg64_t param1{abi_param1};
  reg64_t param2{abi_param2};
  reg64_t param3{abi_param3};

  xmm_t xmm_src1 = xmm_t(0);
  xmm_t xmm_src2 = xmm_t(1);
  xmm_t xmm_dst = xmm_t(2);
  xmm_t xmm_zero = xmm_t(3);

  ymm_t ymm_src1 = ymm_t(0);
  ymm_t ymm_src2 = ymm_t(1);
  ymm_t ymm_dst = ymm_t(2);
  ymm_t ymm_zero = ymm_t(3);
};

89 90 91 92 93 94 95 96
#define DECLARE_BLAS_JITCODE(name, op_type, scalar_idx, with_relu)             \
  class name##JitCode : public VXXJitCode {                                    \
   public:                                                                     \
    explicit name##JitCode(int d, size_t code_size, void* code_ptr = nullptr)  \
        : VXXJitCode(d, op_type, scalar_idx, with_relu, code_size, code_ptr) { \
    }                                                                          \
  };

97 98 99 100 101 102
DECLARE_BLAS_JITCODE(VMul, operand_type::MUL, 0, false);
DECLARE_BLAS_JITCODE(VAdd, operand_type::ADD, 0, false);
DECLARE_BLAS_JITCODE(VSub, operand_type::SUB, 0, false);
DECLARE_BLAS_JITCODE(VAddRelu, operand_type::ADD, 0, true);
DECLARE_BLAS_JITCODE(VScal, operand_type::MUL, 1, false);
DECLARE_BLAS_JITCODE(VAddBias, operand_type::ADD, 1, false);
103 104

#undef DECLARE_BLAS_JITCODE
T
tensor-tang 已提交
105

T
tensor-tang 已提交
106 107 108 109 110 111 112 113 114 115 116 117
// nChw16c = nChw16c .* NC
class NCHW16CMulNCJitCode : public JitCode {
 public:
  DECLARE_JIT_CODE(NCHW16CMulNCJitCode);
  explicit NCHW16CMulNCJitCode(int d /*unused*/, size_t code_size,
                               void* code_ptr = nullptr)
      : JitCode(code_size, code_ptr) {
    this->genCode();
  }
  void genCode() override;
};

T
tensor-tang 已提交
118 119
}  // namespace gen
}  // namespace jit
T
tensor-tang 已提交
120 121
}  // namespace operators
}  // namespace paddle