init.cc 16.8 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
D
dzhwinter 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
14
#include <csignal>
C
chengduo 已提交
15
#include <fstream>
D
dzhwinter 已提交
16 17
#include <string>

T
tensor-tang 已提交
18
#include "paddle/fluid/platform/cpu_helper.h"
19
#include "paddle/fluid/platform/device/npu/npu_info.h"
20
#include "paddle/fluid/string/split.h"
21
#include "paddle/phi/backends/cpu/cpu_info.h"
22
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Y
Yu Yang 已提交
23
#include "paddle/fluid/platform/cuda_device_guard.h"
24
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
25 26
#endif
#ifdef PADDLE_WITH_CUDA
27
#include "paddle/fluid/platform/dynload/cupti.h"
S
sneaxiy 已提交
28
#endif
29
#include "paddle/fluid/platform/device/device_wrapper.h"
Y
Yi Wang 已提交
30
#include "paddle/fluid/platform/device_context.h"
31
#include "paddle/fluid/platform/init.h"
L
liutiexing 已提交
32
#include "paddle/fluid/platform/os_info.h"
Y
Yi Wang 已提交
33
#include "paddle/fluid/platform/place.h"
34

35
#ifdef PADDLE_WITH_XPU
36 37
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/device/xpu/xpu_info.h"
38 39
#endif

F
fwenguang 已提交
40 41 42 43
#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#endif

44 45 46
#ifdef WITH_WIN_DUMP_DBG
#include <stdio.h>
#include <time.h>
47 48 49
#ifndef NOMINMAX
#define NOMINMAX  // msvc max/min macro conflict with std::min/max
#endif
50
#include <windows.h>
51

52 53 54
#include "DbgHelp.h"
#endif

J
jianghaicheng 已提交
55 56 57 58
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#endif

59
#include "paddle/fluid/memory/memory.h"
60
#include "paddle/phi/common/memory_utils.h"
61
#include "paddle/phi/core/custom_kernel.h"
62

63
DECLARE_int32(paddle_num_threads);
Z
Zeng Jinle 已提交
64
PADDLE_DEFINE_EXPORTED_int32(
65 66
    multiple_of_cupti_buffer_size,
    1,
Z
Zeng Jinle 已提交
67 68
    "Multiple of the CUPTI device buffer size. If the timestamps have "
    "been dropped when you are profiling, try increasing this value.");
T
tensor-tang 已提交
69

D
dzhwinter 已提交
70 71 72
namespace paddle {
namespace framework {

73 74 75 76
#ifdef _WIN32
#define strdup _strdup
#endif

D
dzhwinter 已提交
77
std::once_flag gflags_init_flag;
78
std::once_flag glog_init_flag;
79
std::once_flag memory_method_init_flag;
D
dzhwinter 已提交
80

81
bool InitGflags(std::vector<std::string> args) {
82
  bool successed = false;
D
dzhwinter 已提交
83
  std::call_once(gflags_init_flag, [&]() {
C
chengduo 已提交
84
    FLAGS_logtostderr = true;
L
Leo Chen 已提交
85 86 87 88 89
    // NOTE(zhiqiu): dummy is needed, since the function
    // ParseNewCommandLineFlags in gflags.cc starts processing
    // commandline strings from idx 1.
    // The reason is, it assumes that the first one (idx 0) is
    // the filename of executable file.
90 91
    args.insert(args.begin(), "dummy");
    std::vector<char *> argv;
D
dzhwinter 已提交
92
    std::string line;
93 94 95 96
    int argc = args.size();
    for (auto &arg : args) {
      argv.push_back(const_cast<char *>(arg.data()));
      line += arg;
D
dzhwinter 已提交
97 98
      line += ' ';
    }
L
Leo Chen 已提交
99 100
    VLOG(1) << "Before Parse: argc is " << argc
            << ", Init commandline: " << line;
101 102

    char **arr = argv.data();
103
    ::GFLAGS_NAMESPACE::AllowCommandLineReparsing();
104
    ::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &arr, true);
105
    successed = true;
106 107

    VLOG(1) << "After Parse: argc is " << argc;
D
dzhwinter 已提交
108
  });
109
  return successed;
D
dzhwinter 已提交
110 111
}

112
#ifdef PADDLE_WITH_CUDA
113 114 115 116
void InitCupti() {
#ifdef PADDLE_WITH_CUPTI
  if (FLAGS_multiple_of_cupti_buffer_size == 1) return;
  size_t attrValue = 0, attrValueSize = sizeof(size_t);
117 118 119 120 121 122 123 124 125 126 127 128 129 130
#define MULTIPLY_ATTR_VALUE(attr)                                      \
  {                                                                    \
    PADDLE_ENFORCE_EQ(                                                 \
        !platform::dynload::cuptiActivityGetAttribute(                 \
            attr, &attrValueSize, &attrValue),                         \
        true,                                                          \
        platform::errors::Unavailable("Get cupti attribute failed.")); \
    attrValue *= FLAGS_multiple_of_cupti_buffer_size;                  \
    LOG(WARNING) << "Set " #attr " " << attrValue << " byte";          \
    PADDLE_ENFORCE_EQ(                                                 \
        !platform::dynload::cuptiActivitySetAttribute(                 \
            attr, &attrValueSize, &attrValue),                         \
        true,                                                          \
        platform::errors::Unavailable("Set cupti attribute failed.")); \
131 132 133 134 135 136 137 138 139
  }
  MULTIPLY_ATTR_VALUE(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE);
  MULTIPLY_ATTR_VALUE(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE_CDP);
#if CUDA_VERSION >= 9000
  MULTIPLY_ATTR_VALUE(CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE);
#endif
#undef MULTIPLY_ATTR_VALUE
#endif
}
140
#endif
141

142 143 144
#ifdef PADDLE_WITH_CUSTOM_DEVICE
void LoadCustomDevice(const std::string &library_dir) {
  LOG(INFO) << "Try loading custom device libs from: [" << library_dir << "]";
145
  std::vector<std::string> libs = phi::ListAllLibraries(library_dir);
146 147 148 149 150 151 152
  for (const auto &lib_path : libs) {
    auto dso_handle = dlopen(lib_path.c_str(), RTLD_NOW);
    PADDLE_ENFORCE_NOT_NULL(
        dso_handle,
        platform::errors::InvalidArgument(
            "Fail to open library: %s with error: %s", lib_path, dlerror()));

153
    phi::LoadCustomRuntimeLib(lib_path, dso_handle);
154
  }
155
  phi::CustomKernelMap::Instance().RegisterCustomKernels();
156 157 158 159 160
  LOG(INFO) << "Finished in LoadCustomDevice with libs_path: [" << library_dir
            << "]";
}
#endif

R
ronnywang 已提交
161 162
static std::once_flag init_devices_flag;

163
void InitDevices() {
R
ronnywang 已提交
164 165 166
  std::call_once(init_devices_flag, []() {
    // set name at the entry point of Paddle
    platform::SetCurrentThreadName("MainThread");
167 168 169
// CUPTI attribute should be set before any CUDA context is created (see CUPTI
// documentation about CUpti_ActivityAttribute).
#ifdef PADDLE_WITH_CUDA
R
ronnywang 已提交
170
    InitCupti();
171
#endif
R
ronnywang 已提交
172 173
    /*Init all available devices by default */
    std::vector<int> devices;
174
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
R
ronnywang 已提交
175 176 177 178 179 180
    try {
      // use user specified GPUs in single-node multi-process mode.
      devices = platform::GetSelectedDevices();
    } catch (const std::exception &exp) {
      LOG(WARNING) << "Compiled with WITH_GPU, but no GPU found in runtime.";
    }
181 182
#endif
#ifdef PADDLE_WITH_XPU
R
ronnywang 已提交
183 184 185 186 187 188
    try {
      // use user specified XPUs in single-node multi-process mode.
      devices = platform::GetXPUSelectedDevices();
    } catch (const std::exception &exp) {
      LOG(WARNING) << "Compiled with WITH_XPU, but no XPU found in runtime.";
    }
189 190
#endif
#ifdef PADDLE_WITH_ASCEND_CL
R
ronnywang 已提交
191 192 193 194 195 196 197 198 199
    // NOTE(zhiqiu): use singleton to explicitly init and finalize ACL
    platform::AclInstance::Instance();  // NOLINT
    try {
      // use user specified XPUs in single-node multi-process mode.
      devices = platform::GetSelectedNPUDevices();
    } catch (const std::exception &exp) {
      LOG(WARNING) << "Compiled with PADDLE_WITH_ASCEND_CL, but no NPU found "
                      "in runtime.";
    }
J
jianghaicheng 已提交
200 201
#endif
#ifdef PADDLE_WITH_IPU
R
ronnywang 已提交
202 203 204 205 206 207 208
    try {
      // use user specified IPUs.
      devices = platform::GetSelectedIPUDevices();
    } catch (const std::exception &exp) {
      LOG(WARNING)
          << "Compiled with PADDLE_WITH_IPU, but no IPU found in runtime.";
    }
F
fwenguang 已提交
209 210
#endif
#ifdef PADDLE_WITH_MLU
R
ronnywang 已提交
211 212 213 214 215 216
    try {
      // use user specified MLUs in single-node multi-process mode.
      devices = platform::GetMLUSelectedDevices();
    } catch (const std::exception &exp) {
      LOG(WARNING) << "Compiled with WITH_MLU, but no MLU found in runtime.";
    }
D
dzhwinter 已提交
217
#endif
R
ronnywang 已提交
218 219
    InitDevices(devices);
  });
D
dzhwinter 已提交
220 221
}

222
void InitDevices(const std::vector<int> devices) {
223 224 225
  std::vector<platform::Place> places;

  for (size_t i = 0; i < devices.size(); ++i) {
226 227 228
    // In multi process multi gpu mode, we may have gpuid = 7
    // but count = 1.
    if (devices[i] < 0) {
229 230 231
      LOG(WARNING) << "Invalid devices id.";
      continue;
    }
232

233
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
234
    places.emplace_back(platform::CUDAPlace(devices[i]));
235 236 237
#endif
#ifdef PADDLE_WITH_XPU
    places.emplace_back(platform::XPUPlace(devices[i]));
238
#endif
J
jianghaicheng 已提交
239 240 241
#ifdef PADDLE_WITH_IPU
    places.emplace_back(platform::IPUPlace(devices[i]));
#endif
242 243
#ifdef PADDLE_WITH_ASCEND_CL
    places.emplace_back(platform::NPUPlace(devices[i]));
F
fwenguang 已提交
244 245 246
#endif
#ifdef PADDLE_WITH_MLU
    places.emplace_back(platform::MLUPlace(devices[i]));
247
#endif
248 249
  }
  places.emplace_back(platform::CPUPlace());
250
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
251 252
  places.emplace_back(platform::CUDAPinnedPlace());
#endif
253
#ifdef PADDLE_WITH_CUSTOM_DEVICE
254 255 256 257 258 259 260
  const char *custom_kernel_root_p = std::getenv("CUSTOM_DEVICE_ROOT");
  if (!custom_kernel_root_p) {
    VLOG(3) << "Env [CUSTOM_DEVICE_ROOT] is not set.";
  } else {
    std::string custom_kernel_root(custom_kernel_root_p);
    if (!custom_kernel_root.empty()) {
      LOG(INFO) << "ENV [CUSTOM_DEVICE_ROOT]=" << custom_kernel_root;
261 262
      LoadCustomDevice(custom_kernel_root);

263
      auto device_types = phi::DeviceManager::GetAllCustomDeviceTypes();
264
      for (auto &dev_type : device_types) {
265
        auto device_list = phi::DeviceManager::GetSelectedDeviceList(dev_type);
266
        LOG(INFO) << "CustomDevice: " << dev_type
267 268 269
                  << ", visible devices count: " << device_list.size();
        for (auto &dev_id : device_list) {
          places.push_back(platform::CustomPlace(dev_type, dev_id));
270 271
        }
      }
272 273 274 275
    } else {
      VLOG(3) << "ENV [CUSTOM_DEVICE_ROOT] is empty.";
    }
  }
276
#endif
277
  platform::DeviceContextPool::Init(places, platform::EmplaceExternalContext);
Q
qingqing01 已提交
278

279
#ifndef PADDLE_WITH_MKLDNN
T
tensor-tang 已提交
280
  platform::SetNumThreads(FLAGS_paddle_num_threads);
281
#endif
282 283
}

C
chengduo 已提交
284
#ifndef _WIN32
285 286 287
// Description Quoted from
// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html
const struct {
288
  int signal_number;
289 290 291
  const char *name;
  const char *error_string;
} SignalErrorStrings[] = {
292 293 294 295 296 297
    {SIGSEGV, "SIGSEGV", "Segmentation fault"},
    {SIGILL, "SIGILL", "Illegal instruction"},
    {SIGFPE, "SIGFPE", "Erroneous arithmetic operation"},
    {SIGABRT, "SIGABRT", "Process abort signal"},
    {SIGBUS, "SIGBUS", "Access to an undefined portion of a memory object"},
    {SIGTERM, "SIGTERM", "Termination signal"},
298 299 300 301 302 303 304 305 306 307
};

bool StartsWith(const char *str, const char *prefix) {
  size_t len_prefix = strlen(prefix);
  size_t len_str = strlen(str);
  return len_str < len_prefix ? false : memcmp(prefix, str, len_prefix) == 0;
}

const char *ParseSignalErrorString(const std::string &str) {
  for (size_t i = 0;
308 309
       i < (sizeof(SignalErrorStrings) / sizeof(*(SignalErrorStrings)));
       ++i) {
310 311 312 313 314 315 316 317
    if (std::string::npos != str.find(SignalErrorStrings[i].name)) {
      return SignalErrorStrings[i].error_string;
    }
  }
  return "Unknown signal";
}

// Handle SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGBUS, and SIGTERM.
318
void SignalHandle(const char *data, int size) {
C
chengduo 已提交
319
  try {
320 321
    // NOTE1: The glog FailureSignalHandler dumped messages
    //   are deal with line by line
322
    auto signal_msg_dunmer_ptr = SignalMessageDumper::Instance().Get();
323 324 325
    // NOTE2: we only deal with the time info ane signal info,
    //   the stack trace will generated by paddle self
    if (StartsWith(data, "*** Aborted at")) {
326
      *signal_msg_dunmer_ptr << "\n  [TimeInfo: " << std::string(data, size - 1)
327
                             << "]\n";
328 329 330 331 332
    } else if (StartsWith(data, "***")) {
      std::string signal_info(data, size - 1);
      std::string useless_substr("; stack trace:");
      size_t start_pos = signal_info.rfind(useless_substr);
      signal_info.replace(start_pos, useless_substr.length(), "");
333
      *signal_msg_dunmer_ptr << "  [SignalInfo: " << signal_info << "]\n";
334 335 336

      // NOTE3: Final singal error message print.
      // Here does not throw an exception,
337
      // otherwise it will casue "terminate called recursively"
338
      std::ostringstream sout;
339 340 341 342 343 344 345 346 347 348 349
      sout << "\n\n--------------------------------------\n";
      sout << "C++ Traceback (most recent call last):";
      sout << "\n--------------------------------------\n";
      auto traceback = platform::GetCurrentTraceBackString(/*for_signal=*/true);
      if (traceback.empty()) {
        sout
            << "No stack trace in paddle, may be caused by external reasons.\n";
      } else {
        sout << traceback;
      }

350 351 352 353 354 355 356
      sout << "\n----------------------\nError Message "
              "Summary:\n----------------------\n";
      sout << platform::errors::Fatal(
                  "`%s` is detected by the operating system.",
                  ParseSignalErrorString(signal_info))
                  .to_string();
      std::cout << sout.str() << (*signal_msg_dunmer_ptr).str() << std::endl;
357
    }
C
chengduo 已提交
358
  } catch (...) {
359 360 361
    // Since the program has already triggered a system error,
    // no further processing is required here, glog FailureSignalHandler
    // will Kill program by the default signal handler
C
chengduo 已提交
362 363
  }
}
364 365 366 367 368
#endif  // _WIN32

void DisableSignalHandler() {
#ifndef _WIN32
  for (size_t i = 0;
369 370
       i < (sizeof(SignalErrorStrings) / sizeof(*(SignalErrorStrings)));
       ++i) {
371 372 373 374 375 376 377
    int signal_number = SignalErrorStrings[i].signal_number;
    struct sigaction sig_action;
    memset(&sig_action, 0, sizeof(sig_action));
    sigemptyset(&sig_action.sa_mask);
    sig_action.sa_handler = SIG_DFL;
    sigaction(signal_number, &sig_action, NULL);
  }
C
chengduo 已提交
378
#endif
379
}
C
chengduo 已提交
380

381 382
#ifdef WITH_WIN_DUMP_DBG
typedef BOOL(WINAPI *MINIDUMP_WRITE_DUMP)(
383 384 385
    IN HANDLE hProcess,
    IN DWORD ProcessId,
    IN HANDLE hFile,
386 387 388 389 390 391
    IN MINIDUMP_TYPE DumpType,
    IN CONST PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam,
    OPTIONAL IN PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam,
    OPTIONAL IN PMINIDUMP_CALLBACK_INFORMATION CallbackParam OPTIONAL);
void CreateDumpFile(LPCSTR lpstrDumpFilePathName,
                    EXCEPTION_POINTERS *pException) {
392 393 394 395 396 397 398
  HANDLE hDumpFile = CreateFile(lpstrDumpFilePathName,
                                GENERIC_WRITE,
                                0,
                                NULL,
                                CREATE_ALWAYS,
                                FILE_ATTRIBUTE_NORMAL,
                                NULL);
399 400 401 402 403 404 405 406
  MINIDUMP_EXCEPTION_INFORMATION dumpInfo;
  dumpInfo.ExceptionPointers = pException;
  dumpInfo.ThreadId = GetCurrentThreadId();
  dumpInfo.ClientPointers = TRUE;
  MINIDUMP_WRITE_DUMP MiniDumpWriteDump_;
  HMODULE hDbgHelp = LoadLibrary("DBGHELP.DLL");
  MiniDumpWriteDump_ =
      (MINIDUMP_WRITE_DUMP)GetProcAddress(hDbgHelp, "MiniDumpWriteDump");
407 408 409 410 411 412 413
  MiniDumpWriteDump_(GetCurrentProcess(),
                     GetCurrentProcessId(),
                     hDumpFile,
                     MiniDumpWithPrivateReadWriteMemory,
                     &dumpInfo,
                     NULL,
                     NULL);
414 415 416 417 418 419 420 421 422
  CloseHandle(hDumpFile);
}

LONG ApplicationCrashHandler(EXCEPTION_POINTERS *pException) {
  time_t time_seconds = time(0);
  struct tm now_time;
  localtime_s(&now_time, &time_seconds);

  char buf[1024];
423 424 425 426 427 428 429 430
  sprintf_s(buf,
            "C:\\Paddle%04d%02d%02d-%02d%02d%02d.dmp",
            1900 + now_time.tm_year,
            1 + now_time.tm_mon,
            now_time.tm_mday,
            now_time.tm_hour,
            now_time.tm_min,
            now_time.tm_sec);
431 432 433 434 435 436

  CreateDumpFile(buf, pException);
  return EXCEPTION_EXECUTE_HANDLER;
}
#endif

Y
Yang Yu 已提交
437
void InitGLOG(const std::string &prog_name) {
438
  std::call_once(glog_init_flag, [&]() {
439 440 441 442 443 444
// glog will not hold the ARGV[0] inside.
// Use strdup to alloc a new string.
#ifdef WITH_WIN_DUMP_DBG
    SetUnhandledExceptionFilter(
        (LPTOP_LEVEL_EXCEPTION_FILTER)ApplicationCrashHandler);
#endif
445
    google::InitGoogleLogging(strdup(prog_name.c_str()));
C
chengduo 已提交
446
#ifndef _WIN32
447 448
    google::InstallFailureSignalHandler();
    google::InstallFailureWriter(&SignalHandle);
C
chengduo 已提交
449
#endif
450
  });
Y
Yang Yu 已提交
451 452
}

453 454 455 456 457 458 459 460 461 462 463
void InitMemoryMethod() {
  std::call_once(memory_method_init_flag, [&]() {
    auto &memory_utils = phi::MemoryUtils::Instance();
    auto memory_method = std::make_unique<phi::MemoryInterface>();
    memory_method->alloc = paddle::memory::Alloc;
    memory_method->alloc_with_stream = paddle::memory::Alloc;
    memory_method->alloc_shared = paddle::memory::AllocShared;
    memory_method->alloc_shared_with_stream = paddle::memory::AllocShared;
    memory_method->in_same_stream = paddle::memory::InSameStream;
    memory_method->allocation_deleter =
        paddle::memory::allocation::Allocator::AllocationDeleter;
464 465 466 467 468 469 470 471
#if defined(PADDLE_WITH_CUSTOM_DEVICE) || defined(PADDLE_WITH_CUDA) || \
    defined(PADDLE_WITH_HIP)
    memory_method->copy_with_stream =
        paddle::memory::Copy<phi::Place, phi::Place>;
#endif
    memory_method->copy = paddle::memory::Copy<phi::Place, phi::Place>;
    memory_method->device_memory_stat_current_value =
        paddle::memory::DeviceMemoryStatCurrentValue;
472 473 474
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
    memory_method->gpu_memory_usage = paddle::platform::GpuMemoryUsage;
#endif
475
    memory_method->init_devices = InitDevices;
476 477 478 479
    memory_utils.Init(std::move(memory_method));
  });
}

D
dzhwinter 已提交
480 481
}  // namespace framework
}  // namespace paddle