shell.cc 11.6 KB
Newer Older
D
dongdaxiang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15
#define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
16
#include "paddle/fluid/framework/io/shell.h"
17

18 19
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/timer.h"
D
dongdaxiang 已提交
20 21 22 23 24 25

namespace paddle {
namespace framework {

std::shared_ptr<FILE> shell_fopen(const std::string& path,
                                  const std::string& mode) {
G
gongweibao 已提交
26
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
27 28
  return nullptr;
#else
D
dongdaxiang 已提交
29 30 31 32 33
  if (shell_verbose()) {
    LOG(INFO) << "Opening file[" << path << "] with mode[" << mode << "]";
  }
  FILE* fp;
  if (!(fp = fopen(path.c_str(), mode.c_str()))) {
34 35
    PADDLE_THROW(platform::errors::Unavailable(
        "Failed to open file, path[%s], mode[%s].", path, mode));
D
dongdaxiang 已提交
36 37 38 39 40 41
  }
  return {fp, [path](FILE* fp) {
            if (shell_verbose()) {
              LOG(INFO) << "Closing file[" << path << "]";
            }
            if (0 != fclose(fp)) {
42 43
              PADDLE_THROW(platform::errors::Unavailable(
                  "Failed to close file, path[%s].", path));
D
dongdaxiang 已提交
44 45
            }
          }};
46
#endif
D
dongdaxiang 已提交
47 48 49 50 51 52
}

// Close all open file descriptors
// The implementation is async signal safe
// Mostly copy from CPython code
static int close_open_fds_internal() {
G
gongweibao 已提交
53
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
54 55
  return 0;
#else
D
dongdaxiang 已提交
56
  struct linux_dirent {
D
dongdaxiang 已提交
57
    long d_ino = 0;  // NOLINT
D
dongdaxiang 已提交
58
    off_t d_off;
D
dongdaxiang 已提交
59
    unsigned short d_reclen = 0;  // NOLINT
D
dongdaxiang 已提交
60 61 62 63 64
    char d_name[256];
  };

  int dir_fd = -1;
  if ((dir_fd = open("/proc/self/fd", O_RDONLY)) < 0) {
65
    PADDLE_THROW(platform::errors::Unavailable("Failed to open proc/self/fd."));
D
dongdaxiang 已提交
66 67 68 69 70 71
    return -1;
  }
  char buffer[sizeof(linux_dirent)];

  for (;;) {
    int bytes = 0;
72
    if ((bytes = syscall(SYS_getdents64, dir_fd,
D
dongdaxiang 已提交
73 74
                         reinterpret_cast<linux_dirent*>(buffer),
                         sizeof(buffer))) < 0) {
75 76
      PADDLE_THROW(platform::errors::Unavailable(
          "System call failed via syscall function."));
D
dongdaxiang 已提交
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
      return -1;
    }

    if (bytes == 0) {
      break;
    }

    linux_dirent* entry = NULL;

    for (int offset = 0; offset < bytes; offset += entry->d_reclen) {
      entry = reinterpret_cast<linux_dirent*>(buffer + offset);
      int fd = 0;
      const char* s = entry->d_name;

      while (*s >= '0' && *s <= '9') {
        fd = fd * 10 + (*s - '0');
        s++;
      }

      if (s != entry->d_name && fd != dir_fd && fd >= 3) {
        close(fd);
      }
    }
  }

  close(dir_fd);
  return 0;
D
dongdaxiang 已提交
104
#endif
D
dongdaxiang 已提交
105 106 107
}

static int shell_popen_fork_internal(const char* real_cmd, bool do_read,
G
gongweibao 已提交
108 109 110
                                     int parent_end, int child_end,
                                     bool redirect_stderr = false) {
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
111 112
  return 0;
#else
D
dongdaxiang 已提交
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
  int child_pid = -1;
  // Too frequent calls to fork() makes openmpi very slow. Use vfork() instead.
  // But vfork() is very dangerous. Be careful.
  if ((child_pid = vfork()) < 0) {
    return -1;
  }

  // The following code is async signal safe (No memory allocation, no access to
  // global data, etc.)
  if (child_pid != 0) {
    return child_pid;
  }

  int child_std_end = do_read ? 1 : 0;
  close(parent_end);

  if (child_end != child_std_end) {
T
Thunderbrook 已提交
130
    PCHECK(dup2(child_end, child_std_end) == child_std_end);
G
gongweibao 已提交
131 132 133
    if (redirect_stderr && do_read) {
      PCHECK(dup2(child_end, 2) == 2);
    }
D
dongdaxiang 已提交
134 135 136 137
    close(child_end);
  }

  close_open_fds_internal();
138 139 140 141

#if defined(PADDLE_WITH_MUSL)
  PCHECK(execl("/bin/sh", "sh", "-c", real_cmd, NULL) >= 0);
#else
T
Thunderbrook 已提交
142
  PCHECK(execl("/bin/bash", "bash", "-c", real_cmd, NULL) >= 0);
143
#endif
G
gongweibao 已提交
144 145
  // Note: just for compilation. the child don't run this line.
  _exit(0);
146
#endif
D
dongdaxiang 已提交
147 148
}

G
gongweibao 已提交
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
static int read_from_pipe(FILE* fp, std::string* output) {
  char buf[4096];
  while (1) {
    int n = fread(buf, 1, 4096, fp);
    if (n <= 0) {
      break;
    }

    output->append(buf, n);
  }

  if (!feof(fp)) {
    return -1;
  }

  return 0;
}

D
dongdaxiang 已提交
167
std::shared_ptr<FILE> shell_popen(const std::string& cmd,
G
gongweibao 已提交
168 169 170
                                  const std::string& mode, int* err_no,
                                  int* status, bool redirect_stderr) {
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
171 172
  return nullptr;
#else
D
dongdaxiang 已提交
173 174 175 176 177 178 179
  bool do_read = mode == "r";
  bool do_write = mode == "w";
  if (!(do_read || do_write)) {
    *err_no = -1;
    return NULL;
  }

G
gongweibao 已提交
180
  VLOG(3) << "Opening pipe[" << cmd << "] with mode[" << mode << "]";
D
dongdaxiang 已提交
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199

  std::string real_cmd = "set -o pipefail; " + cmd;

  int pipe_fds[2];
  if (pipe(pipe_fds) != 0) {
    *err_no = -1;
    return NULL;
  }
  int parent_end = 0;
  int child_end = 0;

  if (do_read) {
    parent_end = pipe_fds[0];
    child_end = pipe_fds[1];
  } else if (do_write) {
    parent_end = pipe_fds[1];
    child_end = pipe_fds[0];
  }

G
gongweibao 已提交
200 201 202
  sighandler_t old_handler;
  old_handler = signal(SIGCHLD, SIG_DFL);

D
dongdaxiang 已提交
203
  fcntl(parent_end, F_SETFD, FD_CLOEXEC);
G
gongweibao 已提交
204 205 206 207 208 209 210

  int child_pid = shell_popen_fork_internal(
      real_cmd.c_str(), do_read, parent_end, child_end, redirect_stderr);

  close(child_end);

  FILE* fp = NULL;
D
dongdaxiang 已提交
211 212
  if ((fp = fdopen(parent_end, mode.c_str())) == NULL) {
    *err_no = -1;
G
gongweibao 已提交
213
    signal(SIGCHLD, old_handler);
D
dongdaxiang 已提交
214 215 216
    return NULL;
  }

G
gongweibao 已提交
217 218 219
  return {fp, [cmd, child_pid, old_handler, err_no, status](FILE* fp) {
            VLOG(3) << "Closing pipe[" << cmd << "]";
            if (fclose(fp)) {
D
dongdaxiang 已提交
220 221
              *err_no = -1;
            }
G
gongweibao 已提交
222

D
dongdaxiang 已提交
223
            int wstatus = -1;
G
gongweibao 已提交
224 225
            // don't do this before parent read data from child pipe
            // or when get the large data, it will hang!
D
dongdaxiang 已提交
226
            waitpid(child_pid, &wstatus, 0);
G
gongweibao 已提交
227 228 229 230 231 232

            if (status) {
              *status = wstatus;
            }

            if (WIFEXITED(wstatus) || wstatus == (128 + SIGPIPE) * 256) {
D
dongdaxiang 已提交
233
            } else {
G
gongweibao 已提交
234 235 236
              PADDLE_ENFORCE_NE(
                  errno, ECHILD,
                  platform::errors::Fatal("Must not be ECHILD errno here!"));
D
dongdaxiang 已提交
237 238
              *err_no = -1;
            }
G
gongweibao 已提交
239 240

            signal(SIGCHLD, old_handler);
D
dongdaxiang 已提交
241
          }};
242
#endif
D
dongdaxiang 已提交
243 244 245 246
}

static int shell_p2open_fork_internal(const char* real_cmd, int pipein_fds[2],
                                      int pipeout_fds[2]) {
G
gongweibao 已提交
247
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
248 249
  return 0;
#else
D
dongdaxiang 已提交
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
  int child_pid = -1;
  if ((child_pid = fork()) < 0) {
    return -1;
  }

  if (child_pid != 0) {
    return child_pid;
  }

  close(pipein_fds[0]);
  close(pipeout_fds[1]);

  if (pipein_fds[1] != 1) {
    if (dup2(pipein_fds[1], 1) != 1) {
      return -1;
    }
    close(pipein_fds[1]);
  }

  if (pipeout_fds[0] != 0) {
    if (dup2(pipeout_fds[0], 0) != 0) {
      return -1;
    }
    close(pipeout_fds[0]);
  }

  close_open_fds_internal();
  if (execl("/bin/sh", "sh", "-c", real_cmd, NULL) < 0) {
    return -1;
  }
  exit(127);
281
#endif
D
dongdaxiang 已提交
282 283 284 285
}

std::pair<std::shared_ptr<FILE>, std::shared_ptr<FILE>> shell_p2open(
    const std::string& cmd) {
G
gongweibao 已提交
286
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
D
dongdaxiang 已提交
287
  return {};
D
dongdaxiang 已提交
288
#else
D
dongdaxiang 已提交
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
  if (shell_verbose()) {
    LOG(INFO) << "Opening bidirectional pipe[" << cmd << "]";
  }

  std::string real_cmd = "set -o pipefail; " + cmd;

  int pipein_fds[2];
  int pipeout_fds[2];
  if (pipe(pipein_fds) != 0) {
    return {NULL, NULL};
  }
  if (pipe(pipeout_fds) != 0) {
    return {NULL, NULL};
  }

  int child_pid =
      shell_p2open_fork_internal(real_cmd.c_str(), pipein_fds, pipeout_fds);

  close(pipein_fds[1]);
  close(pipeout_fds[0]);
  fcntl(pipein_fds[0], F_SETFD, FD_CLOEXEC);
  fcntl(pipeout_fds[1], F_SETFD, FD_CLOEXEC);

  std::shared_ptr<int> child_life = {
      NULL, [child_pid, cmd](void*) {
        if (shell_verbose()) {
          LOG(INFO) << "Closing bidirectional pipe[" << cmd << "]";
        }

        int wstatus, ret;

        do {
          PCHECK((ret = waitpid(child_pid, &wstatus, 0)) >= 0 ||
                 (ret == -1 && errno == EINTR));
        } while (ret == -1 && errno == EINTR);

        PCHECK(wstatus == 0 || wstatus == (128 + SIGPIPE) * 256 ||
               (wstatus == -1 && errno == ECHILD))
            << "status[" << wstatus << "], cmd[" << cmd << "]";

        if (wstatus == -1 && errno == ECHILD) {
J
jiaqi 已提交
330 331
          // temporarily remove this warning
          // LOG(WARNING) << "errno is ECHILD";
D
dongdaxiang 已提交
332 333 334 335 336 337 338 339 340
        }
      }};

  FILE* in_fp;
  PCHECK((in_fp = fdopen(pipein_fds[0], "r")) != NULL);
  FILE* out_fp;
  PCHECK((out_fp = fdopen(pipeout_fds[1], "w")) != NULL);
  return {{in_fp, [child_life](FILE* fp) { PCHECK(fclose(fp) == 0); }},
          {out_fp, [child_life](FILE* fp) { PCHECK(fclose(fp) == 0); }}};
341
#endif
D
dongdaxiang 已提交
342 343
}

G
gongweibao 已提交
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
#else
static int _get_err_no(int err_no, int status) {
  if (err_no == 0) {
    if (WIFEXITED(status)) {
      return WEXITSTATUS(status);
    }
    return -1;
  }

  return err_no;
}
#endif

static int _shell_execute_cmd(const std::string& cmd, std::string* output,
                              int time_out, int sleep_inter,
                              bool redirect_stderr = false) {
#if defined(_WIN32) || defined(__APPLE__) || defined(PADDLE_ARM)
362 363 364
  PADDLE_THROW(platform::errors::Unimplemented(
      "This function(shell_get_command_output) is not implemented under _WIN32 "
      "or __APPLE__."));
D
dongdaxiang 已提交
365
#else
D
dongdaxiang 已提交
366
  int err_no = 0;
G
gongweibao 已提交
367 368
  int status = 0;
  int cmd_status = 0;
369
  platform::Timer timer;
D
dongdaxiang 已提交
370
  do {
G
gongweibao 已提交
371 372
    VLOG(3) << "exec cmd:[" << cmd << "]";

D
dongdaxiang 已提交
373
    err_no = 0;
G
gongweibao 已提交
374 375 376
    status = 0;
    *output = "";
    auto pipe = shell_popen(cmd, "r", &err_no, &status, redirect_stderr);
D
dongdaxiang 已提交
377

378
    if (err_no == 0) {
G
gongweibao 已提交
379 380 381 382 383
      // read file
      err_no = read_from_pipe(&*pipe, output);
      if (err_no) {
        LOG(WARNING) << "status[" << status << "], cmd[" << cmd << "]"
                     << ", err_no[" << err_no << "]";
D
dongdaxiang 已提交
384
      }
385 386
    }

G
gongweibao 已提交
387 388 389 390 391 392 393 394 395 396 397
    // close file and etc.
    pipe = nullptr;
    if (err_no) {
      LOG(WARNING) << "status[" << status << "], cmd[" << cmd << "]"
                   << ", err_no[" << err_no << "]";
    }

    cmd_status = _get_err_no(err_no, status);
    // cmd run ok!
    if (cmd_status == 0) {
      return cmd_status;
D
dongdaxiang 已提交
398
    }
399

G
gongweibao 已提交
400
    // time out
401
    timer.Pause();
G
gongweibao 已提交
402 403
    if ((time_out > 0 && timer.ElapsedMS() >= time_out) || time_out == 0) {
      break;
404 405 406
    }
    timer.Resume();

G
gongweibao 已提交
407 408 409 410 411 412 413 414 415 416 417 418 419 420
    if (sleep_inter > 0) {
      usleep(sleep_inter * 1000);
    }
  } while (cmd_status);

  // log when check timeout!
  if (time_out != 0) {
    *output += string::Sprintf(
        " _shell_execute_cmd execute cmd:%s ElapsedMS:%d, err_no:%d status:%d",
        cmd, timer.ElapsedMS(), err_no, cmd_status);
    LOG(WARNING) << *output;
  }

  return cmd_status;
421

422
#endif
D
dongdaxiang 已提交
423
}
D
dongdaxiang 已提交
424

G
gongweibao 已提交
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
std::string shell_get_command_output(const std::string& cmd, int time_out,
                                     int sleep_inter) {
  std::string output;
  _shell_execute_cmd(cmd, &output, time_out, sleep_inter);
  return output;
}

std::vector<std::string> shell_execute_cmd(const std::string& cmd, int time_out,
                                           int sleep_inter,
                                           bool redirect_stderr) {
  std::string output;
  int ret =
      _shell_execute_cmd(cmd, &output, time_out, sleep_inter, redirect_stderr);
  return std::vector<std::string>({string::Sprintf("%d", ret), output});
}

441 442
}  // end namespace framework
}  // end namespace paddle