fpga_common.cpp 5.2 KB
Newer Older
Z
zhangyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "fpga/common/fpga_common.h"
#include <algorithm>
#include <map>
J
jameswu2014 已提交
18
#include <utility>
Z
zhangyang 已提交
19 20 21 22 23 24 25
#include "fpga/common/config.h"
#include "fpga/common/driver.h"

namespace paddle_mobile {
namespace fpga {

int16_t fp32_2_fp16(float fp32_num) {
qnqinan's avatar
qnqinan 已提交
26
  int32_t tmp = *(reinterpret_cast<int32_t *>(&fp32_num));
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
  int16_t se_fp32 = (tmp >> 23) & 0x1ff;
  int32_t m_fp32 = tmp & 0x007fffff;
  int16_t se_fp16 = 0;
  int16_t m_fp16 = 0;

  if (se_fp32 < 103) {
    se_fp16 = 0x0000;
    m_fp16 = m_fp32 >> 24;
  } else if (se_fp32 < 113) {
    se_fp16 = (0x0400 >> (113 - se_fp32));
    m_fp16 = m_fp32 >> (126 - se_fp32);
  } else if (se_fp32 <= 142) {
    se_fp16 = (se_fp32 - 112) << 10;
    m_fp16 = m_fp32 >> 13;
  } else if (se_fp32 < 255) {
    se_fp16 = 0x7C00;
    m_fp16 = m_fp32 >> 24;
  } else if (se_fp32 == 255) {
    se_fp16 = 0x7C00;
    m_fp16 = m_fp32 >> 13;
  } else if (se_fp32 < 359) {
    se_fp16 = 0x8000;
    m_fp16 = m_fp32 >> 24;
  } else if (se_fp32 < 369) {
    se_fp16 = (0x0400 >> (369 - se_fp32)) | 0x8000;
    m_fp16 = m_fp32 >> (382 - se_fp32);
  } else if (se_fp32 <= 398) {
    se_fp16 = ((se_fp32 - 368) << 10) | 0x8000;
    m_fp16 = m_fp32 >> 13;
  } else if (se_fp32 < 511) {
    se_fp16 = 0x7C00;
    m_fp16 = m_fp32 >> 24;
  } else {
    se_fp16 = 0x7C00;
    m_fp16 = m_fp32 >> 13;
  }
  int16_t result = se_fp16 + m_fp16;
  return result;
}

int32_t convertmantissa(int32_t i) {
  int32_t m = i << 13;
  int32_t e = 0;
  while (!(m & 0x00800000)) {
    e -= 0x00800000;
    m <<= 1;
Z
zhangyang 已提交
73
  }
74 75 76
  m &= ~0x00800000;
  e += 0x38800000;
  return m | e;
Z
zhangyang 已提交
77 78 79
}

float fp16_2_fp32(int16_t fp16_num) {
qnqinan's avatar
qnqinan 已提交
80
  int16_t se_fp16 = (fp16_num >> 10) & 0x3f;
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
  int16_t m_fp16 = fp16_num & 0x3ff;
  int32_t e_fp32 = 0;
  int16_t offset = 0;
  int32_t m_fp32 = 0;
  if (se_fp16 == 0) {
    e_fp32 = 0;
    offset = 0;
  } else if (se_fp16 < 31) {
    e_fp32 = se_fp16 << 23;
    offset = 1024;
  } else if (se_fp16 == 31) {
    e_fp32 = 0x47800000;
    offset = 1024;
  } else if (se_fp16 == 32) {
    e_fp32 = 0x80000000;
    offset = 0;
  } else if (se_fp16 < 63) {
qnqinan's avatar
qnqinan 已提交
98
    e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23);
99 100 101 102
    offset = 1024;
  } else {  // se_fp16 == 63
    e_fp32 = 0xC7800000;
    offset = 1024;
Z
zhangyang 已提交
103
  }
104 105 106 107 108 109 110 111 112 113 114 115
  int16_t a = offset + m_fp16;
  if (a == 0) {
    m_fp32 = 0;
  } else if (a < 1024) {
    int32_t tmp = a;
    m_fp32 = convertmantissa(tmp);
  } else {
    int32_t tmp = a - 1024;
    m_fp32 = 0x38000000 + (tmp << 13);
  }

  int32_t tmp = e_fp32 + m_fp32;
qnqinan's avatar
qnqinan 已提交
116
  float fp32_num = *(reinterpret_cast<float *>(&tmp));
Z
zhangyang 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
  return fp32_num;
}

static std::map<void *, size_t> memory_map;

int open_device() {
  int ret = driver::open_device_driver();
  return ret;
}

int close_device() {
  int ret = driver::close_device_driver();
  return ret;
}

void *fpga_malloc(size_t size) {
  static uint64_t counter = 0;
Z
zhangyang 已提交
134 135 136
  if (size <= 0) {
    size = 1;
  }
Z
zhangyang 已提交
137 138 139 140 141 142 143 144 145 146 147 148 149
#ifdef PADDLE_MOBILE_ZU5
  auto ptr = driver::fpga_malloc_driver(size);
#else
  auto ptr = malloc(size);
#endif
  counter += size;
  memory_map.insert(std::make_pair(ptr, size));
  //  DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total "
  //       << counter << " bytes";
  return ptr;
}

void fpga_free(void *ptr) {
Z
zhangyang 已提交
150 151 152
  if (ptr == nullptr) {
    return;
  }
Z
zhangyang 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
  static uint64_t counter = 0;
  size_t size = 0;
  auto iter = memory_map.find(ptr);  // std::map<void *, size_t>::iterator
  if (iter != memory_map.end()) {
    size = iter->second;
    memory_map.erase(iter);
#ifdef PADDLE_MOBILE_ZU5
    driver::fpga_free_driver(ptr);
#else
    free(ptr);
#endif
    counter += size;
    //    DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total "
    //         << counter << " bytes";
  } else {
168
    DLOG << "Address: " << ptr << "  Invalid pointer";
Z
zhangyang 已提交
169 170 171 172
  }
}
void fpga_copy(void *dest, const void *src, size_t num) {
#ifdef PADDLE_MOBILE_ZU5
Z
zhangyang 已提交
173 174
  // driver::fpga_copy_driver(dest, src, num);
  memcpy(dest, src, num);
Z
zhangyang 已提交
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
#else
  memcpy(dest, src, num);
#endif
}

int fpga_flush(void *address, size_t size) {
#ifdef PADDLE_MOBILE_ZU5
  return driver::fpga_flush_driver(address, size);
#else
  return 0;
#endif
}
int fpga_invalidate(void *address, size_t size) {
#ifdef PADDLE_MOBILE_ZU5
  return driver::fpga_invalidate_driver(address, size);
#else
  return 0;
#endif
}
194 195
uint64_t vaddr_to_paddr(void *address) {
#ifdef PADDLE_MOBILE_ZU5
Z
zhangyang 已提交
196
  return driver::vaddr_to_paddr_driver(address);
197 198 199 200
#else
  return 0;
#endif
}
201 202

uint32_t paddle_mobile_version() {
J
jameswu2014 已提交
203 204
  uint32_t v_master = 35;
  uint32_t v_slave = 35;
205 206 207 208 209 210 211 212

  uint32_t first = 1, second = 2, fourth_master = 1, fourth_slave = 2;
  uint32_t master = first << 24 | second << 16 | v_master << 8 | fourth_master;
  uint32_t slave = first << 24 | second << 16 | v_slave << 8 | fourth_slave;

  return slave;
}

Z
zhangyang 已提交
213 214
}  // namespace fpga
}  // namespace paddle_mobile