shell.cc 12.1 KB
Newer Older
D
dongdaxiang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15
#include <array>
16
#define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
17
#include "paddle/fluid/framework/io/shell.h"
18

19 20
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/timer.h"
D
dongdaxiang 已提交
21 22 23 24 25 26

namespace paddle {
namespace framework {

std::shared_ptr<FILE> shell_fopen(const std::string& path,
                                  const std::string& mode) {
G
gongweibao 已提交
27
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
28 29
  return nullptr;
#else
D
dongdaxiang 已提交
30 31 32 33 34
  if (shell_verbose()) {
    LOG(INFO) << "Opening file[" << path << "] with mode[" << mode << "]";
  }
  FILE* fp;
  if (!(fp = fopen(path.c_str(), mode.c_str()))) {
35 36
    PADDLE_THROW(platform::errors::Unavailable(
        "Failed to open file, path[%s], mode[%s].", path, mode));
D
dongdaxiang 已提交
37 38 39 40 41 42
  }
  return {fp, [path](FILE* fp) {
            if (shell_verbose()) {
              LOG(INFO) << "Closing file[" << path << "]";
            }
            if (0 != fclose(fp)) {
43 44
              PADDLE_THROW(platform::errors::Unavailable(
                  "Failed to close file, path[%s].", path));
D
dongdaxiang 已提交
45 46
            }
          }};
47
#endif
D
dongdaxiang 已提交
48 49 50 51 52 53
}

// Close all open file descriptors
// The implementation is async signal safe
// Mostly copy from CPython code
static int close_open_fds_internal() {
G
gongweibao 已提交
54
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
55 56
  return 0;
#else
D
dongdaxiang 已提交
57
  struct linux_dirent {
D
dongdaxiang 已提交
58
    long d_ino = 0;  // NOLINT
D
dongdaxiang 已提交
59
    off_t d_off;
D
dongdaxiang 已提交
60
    unsigned short d_reclen = 0;  // NOLINT
D
dongdaxiang 已提交
61 62 63 64 65
    char d_name[256];
  };

  int dir_fd = -1;
  if ((dir_fd = open("/proc/self/fd", O_RDONLY)) < 0) {
66
    PADDLE_THROW(platform::errors::Unavailable("Failed to open proc/self/fd."));
D
dongdaxiang 已提交
67 68 69 70 71 72
    return -1;
  }
  char buffer[sizeof(linux_dirent)];

  for (;;) {
    int bytes = 0;
73 74
    if ((bytes = syscall(SYS_getdents64,
                         dir_fd,
D
dongdaxiang 已提交
75 76
                         reinterpret_cast<linux_dirent*>(buffer),
                         sizeof(buffer))) < 0) {
77 78
      PADDLE_THROW(platform::errors::Unavailable(
          "System call failed via syscall function."));
D
dongdaxiang 已提交
79 80 81 82 83 84 85
      return -1;
    }

    if (bytes == 0) {
      break;
    }

86
    linux_dirent* entry = nullptr;
D
dongdaxiang 已提交
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105

    for (int offset = 0; offset < bytes; offset += entry->d_reclen) {
      entry = reinterpret_cast<linux_dirent*>(buffer + offset);
      int fd = 0;
      const char* s = entry->d_name;

      while (*s >= '0' && *s <= '9') {
        fd = fd * 10 + (*s - '0');
        s++;
      }

      if (s != entry->d_name && fd != dir_fd && fd >= 3) {
        close(fd);
      }
    }
  }

  close(dir_fd);
  return 0;
D
dongdaxiang 已提交
106
#endif
D
dongdaxiang 已提交
107 108
}

109 110 111 112
static int shell_popen_fork_internal(const char* real_cmd,
                                     bool do_read,
                                     int parent_end,
                                     int child_end,
G
gongweibao 已提交
113 114
                                     bool redirect_stderr = false) {
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
115 116
  return 0;
#else
D
dongdaxiang 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
  int child_pid = -1;
  // Too frequent calls to fork() makes openmpi very slow. Use vfork() instead.
  // But vfork() is very dangerous. Be careful.
  if ((child_pid = vfork()) < 0) {
    return -1;
  }

  // The following code is async signal safe (No memory allocation, no access to
  // global data, etc.)
  if (child_pid != 0) {
    return child_pid;
  }

  int child_std_end = do_read ? 1 : 0;
  close(parent_end);

  if (child_end != child_std_end) {
T
Thunderbrook 已提交
134
    PCHECK(dup2(child_end, child_std_end) == child_std_end);
G
gongweibao 已提交
135 136 137
    if (redirect_stderr && do_read) {
      PCHECK(dup2(child_end, 2) == 2);
    }
D
dongdaxiang 已提交
138 139 140 141
    close(child_end);
  }

  close_open_fds_internal();
142 143

#if defined(PADDLE_WITH_MUSL)
144
  PCHECK(execl("/bin/sh", "sh", "-c", real_cmd, nullptr) >= 0);
145
#else
146
  PCHECK(execl("/bin/bash", "bash", "-c", real_cmd, nullptr) >= 0);
147
#endif
G
gongweibao 已提交
148 149
  // Note: just for compilation. the child don't run this line.
  _exit(0);
150
#endif
D
dongdaxiang 已提交
151 152
}

G
gongweibao 已提交
153
static int read_from_pipe(FILE* fp, std::string* output) {
154
  std::array<char, 4096> buf;
155
  while (true) {
156
    int n = fread(buf.data(), 1, 4096, fp);
G
gongweibao 已提交
157 158 159 160
    if (n <= 0) {
      break;
    }

161
    output->append(buf.data(), n);
G
gongweibao 已提交
162 163 164 165 166 167 168 169 170
  }

  if (!feof(fp)) {
    return -1;
  }

  return 0;
}

D
dongdaxiang 已提交
171
std::shared_ptr<FILE> shell_popen(const std::string& cmd,
172 173 174 175
                                  const std::string& mode,
                                  int* err_no,
                                  int* status,
                                  bool redirect_stderr) {
G
gongweibao 已提交
176
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
177 178
  return nullptr;
#else
D
dongdaxiang 已提交
179 180 181 182
  bool do_read = mode == "r";
  bool do_write = mode == "w";
  if (!(do_read || do_write)) {
    *err_no = -1;
183
    return nullptr;
D
dongdaxiang 已提交
184 185
  }

G
gongweibao 已提交
186
  VLOG(3) << "Opening pipe[" << cmd << "] with mode[" << mode << "]";
D
dongdaxiang 已提交
187 188 189 190 191 192

  std::string real_cmd = "set -o pipefail; " + cmd;

  int pipe_fds[2];
  if (pipe(pipe_fds) != 0) {
    *err_no = -1;
193
    return nullptr;
D
dongdaxiang 已提交
194 195 196 197 198 199 200 201 202 203 204 205
  }
  int parent_end = 0;
  int child_end = 0;

  if (do_read) {
    parent_end = pipe_fds[0];
    child_end = pipe_fds[1];
  } else if (do_write) {
    parent_end = pipe_fds[1];
    child_end = pipe_fds[0];
  }

G
gongweibao 已提交
206 207 208
  sighandler_t old_handler;
  old_handler = signal(SIGCHLD, SIG_DFL);

D
dongdaxiang 已提交
209
  fcntl(parent_end, F_SETFD, FD_CLOEXEC);
G
gongweibao 已提交
210 211 212 213 214 215

  int child_pid = shell_popen_fork_internal(
      real_cmd.c_str(), do_read, parent_end, child_end, redirect_stderr);

  close(child_end);

216 217
  FILE* fp = nullptr;
  if ((fp = fdopen(parent_end, mode.c_str())) == nullptr) {
D
dongdaxiang 已提交
218
    *err_no = -1;
G
gongweibao 已提交
219
    signal(SIGCHLD, old_handler);
220
    return nullptr;
D
dongdaxiang 已提交
221 222
  }

G
gongweibao 已提交
223 224 225
  return {fp, [cmd, child_pid, old_handler, err_no, status](FILE* fp) {
            VLOG(3) << "Closing pipe[" << cmd << "]";
            if (fclose(fp)) {
D
dongdaxiang 已提交
226 227
              *err_no = -1;
            }
G
gongweibao 已提交
228

D
dongdaxiang 已提交
229
            int wstatus = -1;
G
gongweibao 已提交
230 231
            // don't do this before parent read data from child pipe
            // or when get the large data, it will hang!
D
dongdaxiang 已提交
232
            waitpid(child_pid, &wstatus, 0);
G
gongweibao 已提交
233 234 235 236 237 238

            if (status) {
              *status = wstatus;
            }

            if (WIFEXITED(wstatus) || wstatus == (128 + SIGPIPE) * 256) {
D
dongdaxiang 已提交
239
            } else {
G
gongweibao 已提交
240
              PADDLE_ENFORCE_NE(
241 242
                  errno,
                  ECHILD,
G
gongweibao 已提交
243
                  platform::errors::Fatal("Must not be ECHILD errno here!"));
D
dongdaxiang 已提交
244 245
              *err_no = -1;
            }
G
gongweibao 已提交
246 247

            signal(SIGCHLD, old_handler);
D
dongdaxiang 已提交
248
          }};
249
#endif
D
dongdaxiang 已提交
250 251
}

252
static int shell_p2open_fork_internal(const char* real_cmd,
253 254
                                      int pipein_fds[2],     // NOLINT
                                      int pipeout_fds[2]) {  // NOLINT
G
gongweibao 已提交
255
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
256 257
  return 0;
#else
D
dongdaxiang 已提交
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
  int child_pid = -1;
  if ((child_pid = fork()) < 0) {
    return -1;
  }

  if (child_pid != 0) {
    return child_pid;
  }

  close(pipein_fds[0]);
  close(pipeout_fds[1]);

  if (pipein_fds[1] != 1) {
    if (dup2(pipein_fds[1], 1) != 1) {
      return -1;
    }
    close(pipein_fds[1]);
  }

  if (pipeout_fds[0] != 0) {
    if (dup2(pipeout_fds[0], 0) != 0) {
      return -1;
    }
    close(pipeout_fds[0]);
  }

  close_open_fds_internal();
285
  if (execl("/bin/sh", "sh", "-c", real_cmd, nullptr) < 0) {
D
dongdaxiang 已提交
286 287 288
    return -1;
  }
  exit(127);
289
#endif
D
dongdaxiang 已提交
290 291 292 293
}

std::pair<std::shared_ptr<FILE>, std::shared_ptr<FILE>> shell_p2open(
    const std::string& cmd) {
G
gongweibao 已提交
294
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
295
  return {};
D
dongdaxiang 已提交
296
#else
D
dongdaxiang 已提交
297 298 299 300 301 302 303 304 305
  if (shell_verbose()) {
    LOG(INFO) << "Opening bidirectional pipe[" << cmd << "]";
  }

  std::string real_cmd = "set -o pipefail; " + cmd;

  int pipein_fds[2];
  int pipeout_fds[2];
  if (pipe(pipein_fds) != 0) {
306
    return {nullptr, nullptr};
D
dongdaxiang 已提交
307 308
  }
  if (pipe(pipeout_fds) != 0) {
309
    return {nullptr, nullptr};
D
dongdaxiang 已提交
310 311 312 313 314 315 316 317 318 319 320
  }

  int child_pid =
      shell_p2open_fork_internal(real_cmd.c_str(), pipein_fds, pipeout_fds);

  close(pipein_fds[1]);
  close(pipeout_fds[0]);
  fcntl(pipein_fds[0], F_SETFD, FD_CLOEXEC);
  fcntl(pipeout_fds[1], F_SETFD, FD_CLOEXEC);

  std::shared_ptr<int> child_life = {
321
      nullptr, [child_pid, cmd](void*) {
D
dongdaxiang 已提交
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
        if (shell_verbose()) {
          LOG(INFO) << "Closing bidirectional pipe[" << cmd << "]";
        }

        int wstatus, ret;

        do {
          PCHECK((ret = waitpid(child_pid, &wstatus, 0)) >= 0 ||
                 (ret == -1 && errno == EINTR));
        } while (ret == -1 && errno == EINTR);

        PCHECK(wstatus == 0 || wstatus == (128 + SIGPIPE) * 256 ||
               (wstatus == -1 && errno == ECHILD))
            << "status[" << wstatus << "], cmd[" << cmd << "]";

        if (wstatus == -1 && errno == ECHILD) {
J
jiaqi 已提交
338 339
          // temporarily remove this warning
          // LOG(WARNING) << "errno is ECHILD";
D
dongdaxiang 已提交
340 341 342 343
        }
      }};

  FILE* in_fp;
344
  PCHECK((in_fp = fdopen(pipein_fds[0], "r")) != nullptr);
D
dongdaxiang 已提交
345
  FILE* out_fp;
346
  PCHECK((out_fp = fdopen(pipeout_fds[1], "w")) != nullptr);
D
dongdaxiang 已提交
347 348
  return {{in_fp, [child_life](FILE* fp) { PCHECK(fclose(fp) == 0); }},
          {out_fp, [child_life](FILE* fp) { PCHECK(fclose(fp) == 0); }}};
349
#endif
D
dongdaxiang 已提交
350 351
}

G
gongweibao 已提交
352 353 354 355 356 357 358 359 360 361 362 363 364 365
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
#else
static int _get_err_no(int err_no, int status) {
  if (err_no == 0) {
    if (WIFEXITED(status)) {
      return WEXITSTATUS(status);
    }
    return -1;
  }

  return err_no;
}
#endif

366 367 368 369
static int _shell_execute_cmd(const std::string& cmd,
                              std::string* output,
                              int time_out,
                              int sleep_inter,
G
gongweibao 已提交
370 371
                              bool redirect_stderr = false) {
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
372 373 374
  PADDLE_THROW(platform::errors::Unimplemented(
      "This function(shell_get_command_output) is not implemented under _WIN32 "
      "or __APPLE__."));
D
dongdaxiang 已提交
375
#else
D
dongdaxiang 已提交
376
  int err_no = 0;
G
gongweibao 已提交
377 378
  int status = 0;
  int cmd_status = 0;
379
  platform::Timer timer;
D
dongdaxiang 已提交
380
  do {
G
gongweibao 已提交
381 382
    VLOG(3) << "exec cmd:[" << cmd << "]";

D
dongdaxiang 已提交
383
    err_no = 0;
G
gongweibao 已提交
384 385 386
    status = 0;
    *output = "";
    auto pipe = shell_popen(cmd, "r", &err_no, &status, redirect_stderr);
D
dongdaxiang 已提交
387

388
    if (err_no == 0) {
G
gongweibao 已提交
389 390 391 392 393
      // read file
      err_no = read_from_pipe(&*pipe, output);
      if (err_no) {
        LOG(WARNING) << "status[" << status << "], cmd[" << cmd << "]"
                     << ", err_no[" << err_no << "]";
D
dongdaxiang 已提交
394
      }
395 396
    }

G
gongweibao 已提交
397 398 399 400 401 402 403 404 405 406 407
    // close file and etc.
    pipe = nullptr;
    if (err_no) {
      LOG(WARNING) << "status[" << status << "], cmd[" << cmd << "]"
                   << ", err_no[" << err_no << "]";
    }

    cmd_status = _get_err_no(err_no, status);
    // cmd run ok!
    if (cmd_status == 0) {
      return cmd_status;
D
dongdaxiang 已提交
408
    }
409

G
gongweibao 已提交
410
    // time out
411
    timer.Pause();
G
gongweibao 已提交
412 413
    if ((time_out > 0 && timer.ElapsedMS() >= time_out) || time_out == 0) {
      break;
414 415 416
    }
    timer.Resume();

G
gongweibao 已提交
417 418 419 420 421 422 423 424 425
    if (sleep_inter > 0) {
      usleep(sleep_inter * 1000);
    }
  } while (cmd_status);

  // log when check timeout!
  if (time_out != 0) {
    *output += string::Sprintf(
        " _shell_execute_cmd execute cmd:%s ElapsedMS:%d, err_no:%d status:%d",
426 427 428 429
        cmd,
        timer.ElapsedMS(),
        err_no,
        cmd_status);
G
gongweibao 已提交
430 431 432 433
    LOG(WARNING) << *output;
  }

  return cmd_status;
434

435
#endif
D
dongdaxiang 已提交
436
}
D
dongdaxiang 已提交
437

438 439
std::string shell_get_command_output(const std::string& cmd,
                                     int time_out,
G
gongweibao 已提交
440 441 442 443 444 445
                                     int sleep_inter) {
  std::string output;
  _shell_execute_cmd(cmd, &output, time_out, sleep_inter);
  return output;
}

446 447
std::vector<std::string> shell_execute_cmd(const std::string& cmd,
                                           int time_out,
G
gongweibao 已提交
448 449 450 451 452 453 454 455
                                           int sleep_inter,
                                           bool redirect_stderr) {
  std::string output;
  int ret =
      _shell_execute_cmd(cmd, &output, time_out, sleep_inter, redirect_stderr);
  return std::vector<std::string>({string::Sprintf("%d", ret), output});
}

456 457
}  // end namespace framework
}  // end namespace paddle