提交 12291656 编写于 作者: E Eric Blake

save: let iohelper work on O_DIRECT fds

Required for a coming patch where iohelper will operate on O_DIRECT
fds.  There, the user-space memory must be aligned to file system
boundaries (at least 512, but using page-aligned works better, and
some file systems prefer 64k).  Made tougher by the fact that
VIR_ALLOC won't work on void *, but posix_memalign won't work on
char * and isn't available everywhere.

This patch makes some simplifying assumptions - namely, output
to an O_DIRECT fd will only be attempted on an empty seekable
file (hence, no need to worry about preserving existing data
on a partial block, and ftruncate will work to undo the effects
of having to round up the size of the last block written), and
input from an O_DIRECT fd will only be attempted on a complete
seekable file with the only possible short read at EOF.

* configure.ac (AC_CHECK_FUNCS_ONCE): Check for posix_memalign.
* src/util/iohelper.c (runIO): Use aligned memory, and handle
quirks of O_DIRECT on last write.
上级 1eb66479
...@@ -121,9 +121,9 @@ AC_CHECK_SIZEOF([long]) ...@@ -121,9 +121,9 @@ AC_CHECK_SIZEOF([long])
dnl Availability of various common functions (non-fatal if missing), dnl Availability of various common functions (non-fatal if missing),
dnl and various less common threadsafe functions dnl and various less common threadsafe functions
AC_CHECK_FUNCS_ONCE([cfmakeraw regexec sched_getaffinity getuid getgid \ AC_CHECK_FUNCS_ONCE([cfmakeraw geteuid getgid getgrnam_r getmntent_r \
geteuid initgroups posix_fallocate mmap kill \ getpwuid_r getuid initgroups kill mmap posix_fallocate posix_memalign \
getmntent_r getgrnam_r getpwuid_r]) regexec sched_getaffinity])
dnl Availability of pthread functions (if missing, win32 threading is dnl Availability of pthread functions (if missing, win32 threading is
dnl assumed). Because of $LIB_PTHREAD, we cannot use AC_CHECK_FUNCS_ONCE. dnl assumed). Because of $LIB_PTHREAD, we cannot use AC_CHECK_FUNCS_ONCE.
......
...@@ -74,17 +74,32 @@ cleanup: ...@@ -74,17 +74,32 @@ cleanup:
static int static int
runIO(const char *path, int fd, int oflags, unsigned long long length) runIO(const char *path, int fd, int oflags, unsigned long long length)
{ {
char *buf = NULL; void *base = NULL; /* Location to be freed */
char *buf = NULL; /* Aligned location within base */
size_t buflen = 1024*1024; size_t buflen = 1024*1024;
intptr_t alignMask = 64*1024 - 1;
int ret = -1; int ret = -1;
int fdin, fdout; int fdin, fdout;
const char *fdinname, *fdoutname; const char *fdinname, *fdoutname;
unsigned long long total = 0; unsigned long long total = 0;
bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0);
bool shortRead = false; /* true if we hit a short read */
off_t end = 0;
if (VIR_ALLOC_N(buf, buflen) < 0) { #if HAVE_POSIX_MEMALIGN
if (posix_memalign(&base, alignMask + 1, buflen)) {
virReportOOMError(); virReportOOMError();
goto cleanup; goto cleanup;
} }
buf = base;
#else
if (VIR_ALLOC_N(buf, buflen + alignMask) < 0) {
virReportOOMError();
goto cleanup;
}
base = buf;
buf = (char *) (((intptr_t) base + alignMask) & alignMask);
#endif
switch (oflags & O_ACCMODE) { switch (oflags & O_ACCMODE) {
case O_RDONLY: case O_RDONLY:
...@@ -92,12 +107,26 @@ runIO(const char *path, int fd, int oflags, unsigned long long length) ...@@ -92,12 +107,26 @@ runIO(const char *path, int fd, int oflags, unsigned long long length)
fdinname = path; fdinname = path;
fdout = STDOUT_FILENO; fdout = STDOUT_FILENO;
fdoutname = "stdout"; fdoutname = "stdout";
/* To make the implementation simpler, we give up on any
* attempt to use O_DIRECT in a non-trivial manner. */
if (direct && ((end = lseek(fd, 0, SEEK_CUR)) != 0 || length)) {
virReportSystemError(end < 0 ? errno : EINVAL, "%s",
_("O_DIRECT read needs entire seekable file"));
goto cleanup;
}
break; break;
case O_WRONLY: case O_WRONLY:
fdin = STDIN_FILENO; fdin = STDIN_FILENO;
fdinname = "stdin"; fdinname = "stdin";
fdout = fd; fdout = fd;
fdoutname = path; fdoutname = path;
/* To make the implementation simpler, we give up on any
* attempt to use O_DIRECT in a non-trivial manner. */
if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
virReportSystemError(end < 0 ? errno : EINVAL, "%s",
_("O_DIRECT write needs empty seekable file"));
goto cleanup;
}
break; break;
case O_RDWR: case O_RDWR:
...@@ -124,12 +153,29 @@ runIO(const char *path, int fd, int oflags, unsigned long long length) ...@@ -124,12 +153,29 @@ runIO(const char *path, int fd, int oflags, unsigned long long length)
} }
if (got == 0) if (got == 0)
break; /* End of file before end of requested data */ break; /* End of file before end of requested data */
if (got < buflen || (buflen & alignMask)) {
/* O_DIRECT can handle at most one short read, at end of file */
if (direct && shortRead) {
virReportSystemError(EINVAL, "%s",
_("Too many short reads for O_DIRECT"));
}
shortRead = true;
}
total += got; total += got;
if (fdout == fd && direct && shortRead) {
end = total;
memset(buf + got, 0, buflen - got);
got = (got + alignMask) & ~alignMask;
}
if (safewrite(fdout, buf, got) < 0) { if (safewrite(fdout, buf, got) < 0) {
virReportSystemError(errno, _("Unable to write %s"), fdoutname); virReportSystemError(errno, _("Unable to write %s"), fdoutname);
goto cleanup; goto cleanup;
} }
if (end && ftruncate(fd, end) < 0) {
virReportSystemError(errno, _("Unable to truncate %s"), fdoutname);
goto cleanup;
}
} }
ret = 0; ret = 0;
...@@ -141,7 +187,7 @@ cleanup: ...@@ -141,7 +187,7 @@ cleanup:
ret = -1; ret = -1;
} }
VIR_FREE(buf); VIR_FREE(base);
return ret; return ret;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册