fpga_common.cpp 5.2 KB
Newer Older
Z
zhangyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "fpga/common/fpga_common.h"
#include <algorithm>
#include <map>
#include "fpga/common/config.h"
#include "fpga/common/driver.h"

namespace paddle_mobile {
namespace fpga {

int16_t fp32_2_fp16(float fp32_num) {
qnqinan's avatar
qnqinan 已提交
25
  int32_t tmp = *(reinterpret_cast<int32_t *>(&fp32_num));
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
  int16_t se_fp32 = (tmp >> 23) & 0x1ff;
  int32_t m_fp32 = tmp & 0x007fffff;
  int16_t se_fp16 = 0;
  int16_t m_fp16 = 0;

  if (se_fp32 < 103) {
    se_fp16 = 0x0000;
    m_fp16 = m_fp32 >> 24;
  } else if (se_fp32 < 113) {
    se_fp16 = (0x0400 >> (113 - se_fp32));
    m_fp16 = m_fp32 >> (126 - se_fp32);
  } else if (se_fp32 <= 142) {
    se_fp16 = (se_fp32 - 112) << 10;
    m_fp16 = m_fp32 >> 13;
  } else if (se_fp32 < 255) {
    se_fp16 = 0x7C00;
    m_fp16 = m_fp32 >> 24;
  } else if (se_fp32 == 255) {
    se_fp16 = 0x7C00;
    m_fp16 = m_fp32 >> 13;
  } else if (se_fp32 < 359) {
    se_fp16 = 0x8000;
    m_fp16 = m_fp32 >> 24;
  } else if (se_fp32 < 369) {
    se_fp16 = (0x0400 >> (369 - se_fp32)) | 0x8000;
    m_fp16 = m_fp32 >> (382 - se_fp32);
  } else if (se_fp32 <= 398) {
    se_fp16 = ((se_fp32 - 368) << 10) | 0x8000;
    m_fp16 = m_fp32 >> 13;
  } else if (se_fp32 < 511) {
    se_fp16 = 0x7C00;
    m_fp16 = m_fp32 >> 24;
  } else {
    se_fp16 = 0x7C00;
    m_fp16 = m_fp32 >> 13;
  }
  int16_t result = se_fp16 + m_fp16;
  return result;
}

int32_t convertmantissa(int32_t i) {
  int32_t m = i << 13;
  int32_t e = 0;
  while (!(m & 0x00800000)) {
    e -= 0x00800000;
    m <<= 1;
Z
zhangyang 已提交
72
  }
73 74 75
  m &= ~0x00800000;
  e += 0x38800000;
  return m | e;
Z
zhangyang 已提交
76 77 78
}

float fp16_2_fp32(int16_t fp16_num) {
qnqinan's avatar
qnqinan 已提交
79
  int16_t se_fp16 = (fp16_num >> 10) & 0x3f;
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
  int16_t m_fp16 = fp16_num & 0x3ff;
  int32_t e_fp32 = 0;
  int16_t offset = 0;
  int32_t m_fp32 = 0;
  if (se_fp16 == 0) {
    e_fp32 = 0;
    offset = 0;
  } else if (se_fp16 < 31) {
    e_fp32 = se_fp16 << 23;
    offset = 1024;
  } else if (se_fp16 == 31) {
    e_fp32 = 0x47800000;
    offset = 1024;
  } else if (se_fp16 == 32) {
    e_fp32 = 0x80000000;
    offset = 0;
  } else if (se_fp16 < 63) {
qnqinan's avatar
qnqinan 已提交
97
    e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23);
98 99 100 101
    offset = 1024;
  } else {  // se_fp16 == 63
    e_fp32 = 0xC7800000;
    offset = 1024;
Z
zhangyang 已提交
102
  }
103 104 105 106 107 108 109 110 111 112 113 114
  int16_t a = offset + m_fp16;
  if (a == 0) {
    m_fp32 = 0;
  } else if (a < 1024) {
    int32_t tmp = a;
    m_fp32 = convertmantissa(tmp);
  } else {
    int32_t tmp = a - 1024;
    m_fp32 = 0x38000000 + (tmp << 13);
  }

  int32_t tmp = e_fp32 + m_fp32;
qnqinan's avatar
qnqinan 已提交
115
  float fp32_num = *(reinterpret_cast<float *>(&tmp));
Z
zhangyang 已提交
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
  return fp32_num;
}

static std::map<void *, size_t> memory_map;

int open_device() {
  int ret = driver::open_device_driver();
  return ret;
}

int close_device() {
  int ret = driver::close_device_driver();
  return ret;
}

void *fpga_malloc(size_t size) {
  static uint64_t counter = 0;
Z
zhangyang 已提交
133 134 135
  if (size <= 0) {
    size = 1;
  }
Z
zhangyang 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148
#ifdef PADDLE_MOBILE_ZU5
  auto ptr = driver::fpga_malloc_driver(size);
#else
  auto ptr = malloc(size);
#endif
  counter += size;
  memory_map.insert(std::make_pair(ptr, size));
  //  DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total "
  //       << counter << " bytes";
  return ptr;
}

void fpga_free(void *ptr) {
Z
zhangyang 已提交
149 150 151
  if (ptr == nullptr) {
    return;
  }
Z
zhangyang 已提交
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
  static uint64_t counter = 0;
  size_t size = 0;
  auto iter = memory_map.find(ptr);  // std::map<void *, size_t>::iterator
  if (iter != memory_map.end()) {
    size = iter->second;
    memory_map.erase(iter);
#ifdef PADDLE_MOBILE_ZU5
    driver::fpga_free_driver(ptr);
#else
    free(ptr);
#endif
    counter += size;
    //    DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total "
    //         << counter << " bytes";
  } else {
167
    DLOG << "Address: " << ptr << "  Invalid pointer";
Z
zhangyang 已提交
168 169 170 171
  }
}
void fpga_copy(void *dest, const void *src, size_t num) {
#ifdef PADDLE_MOBILE_ZU5
Z
zhangyang 已提交
172 173
  // driver::fpga_copy_driver(dest, src, num);
  memcpy(dest, src, num);
Z
zhangyang 已提交
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
#else
  memcpy(dest, src, num);
#endif
}

int fpga_flush(void *address, size_t size) {
#ifdef PADDLE_MOBILE_ZU5
  return driver::fpga_flush_driver(address, size);
#else
  return 0;
#endif
}
int fpga_invalidate(void *address, size_t size) {
#ifdef PADDLE_MOBILE_ZU5
  return driver::fpga_invalidate_driver(address, size);
#else
  return 0;
#endif
}
193 194
uint64_t vaddr_to_paddr(void *address) {
#ifdef PADDLE_MOBILE_ZU5
Z
zhangyang 已提交
195
  return driver::vaddr_to_paddr_driver(address);
196 197 198 199
#else
  return 0;
#endif
}
200 201 202 203 204 205 206 207 208 209 210 211

uint32_t paddle_mobile_version() {
  uint32_t v_master = 34;
  uint32_t v_slave = 34;

  uint32_t first = 1, second = 2, fourth_master = 1, fourth_slave = 2;
  uint32_t master = first << 24 | second << 16 | v_master << 8 | fourth_master;
  uint32_t slave = first << 24 | second << 16 | v_slave << 8 | fourth_slave;

  return slave;
}

Z
zhangyang 已提交
212 213
}  // namespace fpga
}  // namespace paddle_mobile