- 15 6月, 2018 1 次提交
-
-
由 Davidlohr Bueso 提交于
Both smatch and coverity are reporting potential issues with spectre variant 1 with the 'semnum' index within the sma->sems array, ie: ipc/sem.c:388 sem_lock() warn: potential spectre issue 'sma->sems' ipc/sem.c:641 perform_atomic_semop_slow() warn: potential spectre issue 'sma->sems' ipc/sem.c:721 perform_atomic_semop() warn: potential spectre issue 'sma->sems' Avoid any possible speculation by using array_index_nospec() thus ensuring the semnum value is bounded to [0, sma->sem_nsems). With the exception of sem_lock() all of these are slowpaths. Link: http://lkml.kernel.org/r/20180423171131.njs4rfm2yzyeg6do@linux-n805Signed-off-by: NDavidlohr Bueso <dbueso@suse.de> Reported-by: NDan Carpenter <dan.carpenter@oracle.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: "Gustavo A. R. Silva" <gustavo@embeddedor.com> Cc: Manfred Spraul <manfred@colorfullife.com> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
- 13 6月, 2018 1 次提交
-
-
由 Kees Cook 提交于
The kvmalloc() function has a 2-factor argument form, kvmalloc_array(). This patch replaces cases of: kvmalloc(a * b, gfp) with: kvmalloc_array(a * b, gfp) as well as handling cases of: kvmalloc(a * b * c, gfp) with: kvmalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kvmalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kvmalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kvmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kvmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kvmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kvmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kvmalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kvmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kvmalloc( - sizeof(u8) * COUNT + COUNT , ...) | kvmalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kvmalloc( - sizeof(char) * COUNT + COUNT , ...) | kvmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kvmalloc + kvmalloc_array ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kvmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kvmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kvmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kvmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kvmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kvmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kvmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kvmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kvmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kvmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kvmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kvmalloc(C1 * C2 * C3, ...) | kvmalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kvmalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kvmalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kvmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kvmalloc(sizeof(THING) * C2, ...) | kvmalloc(sizeof(TYPE) * C2, ...) | kvmalloc(C1 * C2 * C3, ...) | kvmalloc(C1 * C2, ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - (E1) * E2 + E1, E2 , ...) | - kvmalloc + kvmalloc_array ( - (E1) * (E2) + E1, E2 , ...) | - kvmalloc + kvmalloc_array ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: NKees Cook <keescook@chromium.org>
-
- 20 4月, 2018 4 次提交
-
-
由 Arnd Bergmann 提交于
Three ipc syscalls (mq_timedsend, mq_timedreceive and and semtimedop) take a timespec argument. After we move 32-bit architectures over to useing 64-bit time_t based syscalls, we need seperate entry points for the old 32-bit based interfaces. This changes the #ifdef guards for the existing 32-bit compat syscalls to check for CONFIG_COMPAT_32BIT_TIME instead, which will then be enabled on all existing 32-bit architectures. Signed-off-by: NArnd Bergmann <arnd@arndb.de>
-
由 Arnd Bergmann 提交于
This is a preparatation for changing over __kernel_timespec to 64-bit times, which involves assigning new system call numbers for mq_timedsend(), mq_timedreceive() and semtimedop() for compatibility with future y2038 proof user space. The existing ABIs will remain available through compat code. Signed-off-by: NArnd Bergmann <arnd@arndb.de>
-
由 Arnd Bergmann 提交于
The shmid64_ds/semid64_ds/msqid64_ds data structures have been extended to contain extra fields for storing the upper bits of the time stamps, this patch does the other half of the job and and fills the new fields on 32-bit architectures as well as 32-bit tasks running on a 64-bit kernel in compat mode. There should be no change for native 64-bit tasks. Signed-off-by: NArnd Bergmann <arnd@arndb.de>
-
由 Arnd Bergmann 提交于
In some places, we still used get_seconds() instead of ktime_get_real_seconds(), and I'm changing the remaining ones now to all use ktime_get_real_seconds() so we use the full available range for timestamps instead of overflowing the 'unsigned long' return value in year 2106 on 32-bit kernels. Signed-off-by: NArnd Bergmann <arnd@arndb.de>
-
- 12 4月, 2018 1 次提交
-
-
由 Davidlohr Bueso 提交于
There is a permission discrepancy when consulting shm ipc object metadata between /proc/sysvipc/sem (0444) and the SEM_STAT semctl command. The later does permission checks for the object vs S_IRUGO. As such there can be cases where EACCESS is returned via syscall but the info is displayed anyways in the procfs files. While this might have security implications via info leaking (albeit no writing to the sma metadata), this behavior goes way back and showing all the objects regardless of the permissions was most likely an overlook - so we are stuck with it. Furthermore, modifying either the syscall or the procfs file can cause userspace programs to break (ie ipcs). Some applications require getting the procfs info (without root privileges) and can be rather slow in comparison with a syscall -- up to 500x in some reported cases for shm. This patch introduces a new SEM_STAT_ANY command such that the sem ipc object permissions are ignored, and only audited instead. In addition, I've left the lsm security hook checks in place, as if some policy can block the call, then the user has no other choice than just parsing the procfs file. Link: http://lkml.kernel.org/r/20180215162458.10059-3-dave@stgolabs.netSigned-off-by: NDavidlohr Bueso <dbueso@suse.de> Reported-by: NRobert Kettler <robert.kettler@outlook.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Kees Cook <keescook@chromium.org> Cc: Manfred Spraul <manfred@colorfullife.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Michal Hocko <mhocko@kernel.org> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
- 03 4月, 2018 3 次提交
-
-
由 Dominik Brodowski 提交于
Provide ksys_semctl() and compat_ksys_semctl() wrappers to avoid in-kernel calls to these syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_semctl() and compat_sys_semctl(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: NDominik Brodowski <linux@dominikbrodowski.net>
-
由 Dominik Brodowski 提交于
Provide ksys_semget() wrapper to avoid in-kernel calls to this syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_semget(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: NDominik Brodowski <linux@dominikbrodowski.net>
-
由 Dominik Brodowski 提交于
Provide ksys_semtimedop() and compat_ksys_semtimedop() wrappers to avoid in-kernel calls to these syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_semtimedop() and compat_sys_semtimedop(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: NDominik Brodowski <linux@dominikbrodowski.net>
-
- 28 3月, 2018 2 次提交
-
-
由 Eric W. Biederman 提交于
After the last round of cleanups the shm, sem, and msg associate operations just became trivial wrappers around the appropriate security method. Simplify things further by just calling the security method directly. Signed-off-by: N"Eric W. Biederman" <ebiederm@xmission.com>
-
由 Eric W. Biederman 提交于
Today the last process to update a semaphore is remembered and reported in the pid namespace of that process. If there are processes in any other pid namespace querying that process id with GETPID the result will be unusable nonsense as it does not make any sense in your own pid namespace. Due to ipc_update_pid I don't think you will be able to get System V ipc semaphores into a troublesome cache line ping-pong. Using struct pids from separate process are not a problem because they do not share a cache line. Using struct pid from different threads of the same process are unlikely to be a problem as the reference count update can be avoided. Further linux futexes are a much better tool for the job of mutual exclusion between processes than System V semaphores. So I expect programs that are performance limited by their interprocess mutual exclusion primitive will be using futexes. So while it is possible that enhancing the storage of the last rocess of a System V semaphore from an integer to a struct pid will cause a performance regression because of the effect of frequently updating the pid reference count. I don't expect that to happen in practice. This change updates semctl(..., GETPID, ...) to return the process id of the last process to update a semphore inthe pid namespace of the calling process. Fixes: b488893a ("pid namespaces: changes to show virtual ids to user") Signed-off-by: N"Eric W. Biederman" <ebiederm@xmission.com>
-
- 23 3月, 2018 2 次提交
-
-
由 Eric W. Biederman 提交于
All of the users are now in ipc/sem.c so make the definitions local to that file to make code maintenance easier. AKA to prevent rebuilding the entire kernel when one of these files is changed. Signed-off-by: N"Eric W. Biederman" <ebiederm@xmission.com>
-
由 Eric W. Biederman 提交于
All of the implementations of security hooks that take sem_array only access sem_perm the struct kern_ipc_perm member. This means the dependencies of the sem security hooks can be simplified by passing the kern_ipc_perm member of sem_array. Making this change will allow struct sem and struct sem_array to become private to ipc/sem.c. Signed-off-by: N"Eric W. Biederman" <ebiederm@xmission.com>
-
- 07 2月, 2018 1 次提交
-
-
由 Philippe Mikoyan 提交于
As described in the title, this patch fixes <ipc>id_ds inconsistency when <ipc>ctl_stat executes concurrently with some ds-changing function, e.g. shmat, msgsnd or whatever. For instance, if shmctl(IPC_STAT) is running concurrently with shmat, following data structure can be returned: {... shm_lpid = 0, shm_nattch = 1, ...} Link: http://lkml.kernel.org/r/20171202153456.6514-1-philippe.mikoyan@skat.systemsSigned-off-by: NPhilippe Mikoyan <philippe.mikoyan@skat.systems> Reviewed-by: NDavidlohr Bueso <dbueso@suse.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Manfred Spraul <manfred@colorfullife.com> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
- 18 11月, 2017 1 次提交
-
-
由 Davidlohr Bueso 提交于
The comment in msgqueues when using ipc_addid() is quite useful imo. Duplicate it for shm and semaphores. Link: http://lkml.kernel.org/r/20170831172049.14576-3-dave@stgolabs.netSigned-off-by: NDavidlohr Bueso <dbueso@suse.de> Cc: Manfred Spraul <manfred@colorfullife.com> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
- 02 11月, 2017 1 次提交
-
-
由 Greg Kroah-Hartman 提交于
Many source files in the tree are missing licensing information, which makes it harder for compliance tools to determine the correct license. By default all files without license information are under the default license of the kernel, which is GPL version 2. Update the files which contain no license information with the 'GPL-2.0' SPDX license identifier. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This patch is based on work done by Thomas Gleixner and Kate Stewart and Philippe Ombredanne. How this work was done: Patches were generated and checked against linux-4.14-rc6 for a subset of the use cases: - file had no licensing information it it. - file was a */uapi/* one with no licensing information in it, - file was a */uapi/* one with existing licensing information, Further patches will be generated in subsequent months to fix up cases where non-standard license headers were used, and references to license had to be inferred by heuristics based on keywords. The analysis to determine which SPDX License Identifier to be applied to a file was done in a spreadsheet of side by side results from of the output of two independent scanners (ScanCode & Windriver) producing SPDX tag:value files created by Philippe Ombredanne. Philippe prepared the base worksheet, and did an initial spot review of a few 1000 files. The 4.13 kernel was the starting point of the analysis with 60,537 files assessed. Kate Stewart did a file by file comparison of the scanner results in the spreadsheet to determine which SPDX license identifier(s) to be applied to the file. She confirmed any determination that was not immediately clear with lawyers working with the Linux Foundation. Criteria used to select files for SPDX license identifier tagging was: - Files considered eligible had to be source code files. - Make and config files were included as candidates if they contained >5 lines of source - File already had some variant of a license header in it (even if <5 lines). All documentation files were explicitly excluded. The following heuristics were used to determine which SPDX license identifiers to apply. - when both scanners couldn't find any license traces, file was considered to have no license information in it, and the top level COPYING file license applied. For non */uapi/* files that summary was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 11139 and resulted in the first patch in this series. If that file was a */uapi/* path one, it was "GPL-2.0 WITH Linux-syscall-note" otherwise it was "GPL-2.0". Results of that was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 WITH Linux-syscall-note 930 and resulted in the second patch in this series. - if a file had some form of licensing information in it, and was one of the */uapi/* ones, it was denoted with the Linux-syscall-note if any GPL family license was found in the file or had no licensing in it (per prior point). Results summary: SPDX license identifier # files ---------------------------------------------------|------ GPL-2.0 WITH Linux-syscall-note 270 GPL-2.0+ WITH Linux-syscall-note 169 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) 21 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) 17 LGPL-2.1+ WITH Linux-syscall-note 15 GPL-1.0+ WITH Linux-syscall-note 14 ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) 5 LGPL-2.0+ WITH Linux-syscall-note 4 LGPL-2.1 WITH Linux-syscall-note 3 ((GPL-2.0 WITH Linux-syscall-note) OR MIT) 3 ((GPL-2.0 WITH Linux-syscall-note) AND MIT) 1 and that resulted in the third patch in this series. - when the two scanners agreed on the detected license(s), that became the concluded license(s). - when there was disagreement between the two scanners (one detected a license but the other didn't, or they both detected different licenses) a manual inspection of the file occurred. - In most cases a manual inspection of the information in the file resulted in a clear resolution of the license that should apply (and which scanner probably needed to revisit its heuristics). - When it was not immediately clear, the license identifier was confirmed with lawyers working with the Linux Foundation. - If there was any question as to the appropriate license identifier, the file was flagged for further research and to be revisited later in time. In total, over 70 hours of logged manual review was done on the spreadsheet to determine the SPDX license identifiers to apply to the source files by Kate, Philippe, Thomas and, in some cases, confirmation by lawyers working with the Linux Foundation. Kate also obtained a third independent scan of the 4.13 code base from FOSSology, and compared selected files where the other two scanners disagreed against that SPDX file, to see if there was new insights. The Windriver scanner is based on an older version of FOSSology in part, so they are related. Thomas did random spot checks in about 500 files from the spreadsheets for the uapi headers and agreed with SPDX license identifier in the files he inspected. For the non-uapi files Thomas did random spot checks in about 15000 files. In initial set of patches against 4.14-rc6, 3 files were found to have copy/paste license identifier errors, and have been fixed to reflect the correct identifier. Additionally Philippe spent 10 hours this week doing a detailed manual inspection and review of the 12,461 patched files from the initial patch version early this week with: - a full scancode scan run, collecting the matched texts, detected license ids and scores - reviewing anything where there was a license detected (about 500+ files) to ensure that the applied SPDX license was correct - reviewing anything where there was no detection but the patch license was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied SPDX license was correct This produced a worksheet with 20 files needing minor correction. This worksheet was then exported into 3 different .csv files for the different types of files to be modified. These .csv files were then reviewed by Greg. Thomas wrote a script to parse the csv files and add the proper SPDX tag to the file, in the format that the file expected. This script was further refined by Greg based on the output to detect more types of files automatically and to distinguish between header and source .c files (which need different comment types.) Finally Greg ran the script using the .csv files to generate the patches. Reviewed-by: NKate Stewart <kstewart@linuxfoundation.org> Reviewed-by: NPhilippe Ombredanne <pombredanne@nexb.com> Reviewed-by: NThomas Gleixner <tglx@linutronix.de> Signed-off-by: NGreg Kroah-Hartman <gregkh@linuxfoundation.org>
-
- 12 10月, 2017 1 次提交
-
-
由 Linus Torvalds 提交于
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org> Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
-
- 09 9月, 2017 4 次提交
-
-
由 Guillaume Knispel 提交于
ipc_findkey() used to scan all objects to look for the wanted key. This is slow when using a high number of keys. This change adds an rhashtable of kern_ipc_perm objects in ipc_ids, so that one lookup cease to be O(n). This change gives a 865% improvement of benchmark reaim.jobs_per_min on a 56 threads Intel(R) Xeon(R) CPU E5-2695 v3 @ 2.30GHz with 256G memory [1] Other (more micro) benchmark results, by the author: On an i5 laptop, the following loop executed right after a reboot took, without and with this change: for (int i = 0, k=0x424242; i < KEYS; ++i) semget(k++, 1, IPC_CREAT | 0600); total total max single max single KEYS without with call without call with 1 3.5 4.9 µs 3.5 4.9 10 7.6 8.6 µs 3.7 4.7 32 16.2 15.9 µs 4.3 5.3 100 72.9 41.8 µs 3.7 4.7 1000 5,630.0 502.0 µs * * 10000 1,340,000.0 7,240.0 µs * * 31900 17,600,000.0 22,200.0 µs * * *: unreliable measure: high variance The duration for a lookup-only usage was obtained by the same loop once the keys are present: total total max single max single KEYS without with call without call with 1 2.1 2.5 µs 2.1 2.5 10 4.5 4.8 µs 2.2 2.3 32 13.0 10.8 µs 2.3 2.8 100 82.9 25.1 µs * 2.3 1000 5,780.0 217.0 µs * * 10000 1,470,000.0 2,520.0 µs * * 31900 17,400,000.0 7,810.0 µs * * Finally, executing each semget() in a new process gave, when still summing only the durations of these syscalls: creation: total total KEYS without with 1 3.7 5.0 µs 10 32.9 36.7 µs 32 125.0 109.0 µs 100 523.0 353.0 µs 1000 20,300.0 3,280.0 µs 10000 2,470,000.0 46,700.0 µs 31900 27,800,000.0 219,000.0 µs lookup-only: total total KEYS without with 1 2.5 2.7 µs 10 25.4 24.4 µs 32 106.0 72.6 µs 100 591.0 352.0 µs 1000 22,400.0 2,250.0 µs 10000 2,510,000.0 25,700.0 µs 31900 28,200,000.0 115,000.0 µs [1] http://lkml.kernel.org/r/20170814060507.GE23258@yexl-desktop Link: http://lkml.kernel.org/r/20170815194954.ck32ta2z35yuzpwp@debixSigned-off-by: NGuillaume Knispel <guillaume.knispel@supersonicimagine.com> Reviewed-by: NMarc Pardo <marc.pardo@supersonicimagine.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Kees Cook <keescook@chromium.org> Cc: Manfred Spraul <manfred@colorfullife.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Serge Hallyn <serge@hallyn.com> Cc: Andrey Vagin <avagin@openvz.org> Cc: Guillaume Knispel <guillaume.knispel@supersonicimagine.com> Cc: Marc Pardo <marc.pardo@supersonicimagine.com> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Davidlohr Bueso 提交于
Replacing semop()'s kmalloc for kvmalloc was originally proposed by Manfred on the premise that it can be called for large (than order-1) sizes. For example, while Oracle recommends setting SEMOPM to a _minimum_ of 100, some distros[1] encourage the setting to be a factor of the amount of db tasks (PROCESSES), which can get fishy for large systems (easily going beyond 1000). [1] An Example of Semaphore Settings https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/5/html/Tuning_and_Optimizing_Red_Hat_Enterprise_Linux_for_Oracle_9i_and_10g_Databases/sect-Oracle_9i_and_10g_Tuning_Guide-Setting_Semaphores-An_Example_of_Semaphore_Settings.html So let's just convert this to kvmalloc, just like the rest of the allocations we do in ipc. While the fallback vmalloc obviously involves more overhead, this by far the uncommon path, and it's better for the user than just erroring out with kmalloc. Link: http://lkml.kernel.org/r/20170803184136.13855-2-dave@stgolabs.netSigned-off-by: NDavidlohr Bueso <dbueso@suse.de> Cc: Manfred Spraul <manfred@colorfullife.com> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Davidlohr Bueso 提交于
... 'tis not used. Link: http://lkml.kernel.org/r/20170803184136.13855-1-dave@stgolabs.netSigned-off-by: NDavidlohr Bueso <dbueso@suse.de> Cc: Manfred Spraul <manfred@colorfullife.com> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Elena Reshetova 提交于
refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Link: http://lkml.kernel.org/r/1499417992-3238-3-git-send-email-elena.reshetova@intel.comSigned-off-by: NElena Reshetova <elena.reshetova@intel.com> Signed-off-by: NHans Liljestrand <ishkamiel@gmail.com> Signed-off-by: NKees Cook <keescook@chromium.org> Signed-off-by: NDavid Windsor <dwindsor@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Serge Hallyn <serge@hallyn.com> Cc: <arozansk@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Manfred Spraul <manfred@colorfullife.com> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
- 04 9月, 2017 2 次提交
-
-
由 Deepa Dinamani 提交于
time_t is not y2038 safe. Replace all uses of time_t by y2038 safe time64_t. Similarly, replace the calls to get_seconds() with y2038 safe ktime_get_real_seconds(). Note that this preserves fast access on 64 bit systems, but 32 bit systems need sequence counters. The syscall interface themselves are not changed as part of the patch. They will be part of a different series. Signed-off-by: NDeepa Dinamani <deepa.kernel@gmail.com> Reviewed-by: NArnd Bergmann <arnd@arndb.de> Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
-
由 Deepa Dinamani 提交于
struct timespec is not y2038 safe on 32 bit machines. Replace timespec with y2038 safe struct timespec64. Note that the patch only changes the internals without modifying the syscall interface. This will be part of a separate series. Signed-off-by: NDeepa Dinamani <deepa.kernel@gmail.com> Reviewed-by: NArnd Bergmann <arnd@arndb.de> Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
-
- 17 8月, 2017 1 次提交
-
-
由 Paul E. McKenney 提交于
There is no agreed-upon definition of spin_unlock_wait()'s semantics, and it appears that all callers could do just as well with a lock/unlock pair. This commit therefore replaces the spin_unlock_wait() call in exit_sem() with spin_lock() followed immediately by spin_unlock(). This should be safe from a performance perspective because exit_sem() is rarely invoked in production. Signed-off-by: NPaul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Andrea Parri <parri.andrea@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: NManfred Spraul <manfred@colorfullife.com>
-
- 03 8月, 2017 1 次提交
-
-
由 Kees Cook 提交于
When building with the randstruct gcc plugin, the layout of the IPC structs will be randomized, which requires any sub-structure accesses to use container_of(). The proc display handlers were missing the needed container_of()s since the iterator is passing in the top-level struct kern_ipc_perm. This would lead to crashes when running the "lsipc" program after the system had IPC registered (e.g. after starting up Gnome): general protection fault: 0000 [#1] PREEMPT SMP ... RIP: 0010:shm_add_rss_swap.isra.1+0x13/0xa0 ... Call Trace: sysvipc_shm_proc_show+0x5e/0x150 sysvipc_proc_show+0x1a/0x30 seq_read+0x2e9/0x3f0 ... Link: http://lkml.kernel.org/r/20170730205950.GA55841@beast Fixes: 3859a271 ("randstruct: Mark various structs for randomization") Signed-off-by: NKees Cook <keescook@chromium.org> Reported-by: NDominik Brodowski <linux@dominikbrodowski.net> Acked-by: NDavidlohr Bueso <dave@stgolabs.net> Acked-by: NManfred Spraul <manfred@colorfullife.com> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
- 16 7月, 2017 3 次提交
-
-
由 Al Viro 提交于
... and finally kill the sodding compat_convert_timespec() Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
-
由 Al Viro 提交于
Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
-
由 Al Viro 提交于
Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
-
- 13 7月, 2017 8 次提交
-
-
由 Kees Cook 提交于
The remaining users of __sem_free() can simply call kvfree() instead for better readability. [manfred@colorfullife.com: Rediff to keep rcu protection for security_sem_alloc()] Link: http://lkml.kernel.org/r/20170525185107.12869-20-manfred@colorfullife.comSigned-off-by: NKees Cook <keescook@chromium.org> Signed-off-by: NManfred Spraul <manfred@colorfullife.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Kees Cook 提交于
Only after ipc_addid() has succeeded will refcounting be used, so move initialization into ipc_addid() and remove from open-coded *_alloc() routines. Link: http://lkml.kernel.org/r/20170525185107.12869-17-manfred@colorfullife.comSigned-off-by: NKees Cook <keescook@chromium.org> Signed-off-by: NManfred Spraul <manfred@colorfullife.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Manfred Spraul 提交于
Loosely based on a patch from Kees Cook <keescook@chromium.org>: - id and retval can be merged - if ipc_addid() fails, then use call_rcu() directly. The difference is that call_rcu is used for failed ipc_addid() calls, to continue to guaranteed an rcu delay for security_sem_free(). Link: http://lkml.kernel.org/r/20170525185107.12869-14-manfred@colorfullife.comSigned-off-by: NManfred Spraul <manfred@colorfullife.com> Cc: Kees Cook <keescook@chromium.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Kees Cook 提交于
Instead of using ipc_rcu_alloc() which only performs the refcount bump, open code it to perform better sem-specific checks. This also allows for sem_array structure layout to be randomized in the future. [manfred@colorfullife.com: Rediff, because the memset was temporarily inside ipc_rcu_alloc()] Link: http://lkml.kernel.org/r/20170525185107.12869-10-manfred@colorfullife.comSigned-off-by: NKees Cook <keescook@chromium.org> Signed-off-by: NManfred Spraul <manfred@colorfullife.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Kees Cook 提交于
Avoid using ipc_rcu_free, since it just re-finds the original structure pointer. For the pre-list-init failure path, there is no RCU needed, since it was just allocated. It can be directly freed. Link: http://lkml.kernel.org/r/20170525185107.12869-6-manfred@colorfullife.comSigned-off-by: NKees Cook <keescook@chromium.org> Signed-off-by: NManfred Spraul <manfred@colorfullife.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Kees Cook 提交于
The only users of ipc_alloc() were ipc_rcu_alloc() and the on-heap sem_io fall-back memory. Better to just open-code these to make things easier to read. [manfred@colorfullife.com: Rediff due to inclusion of memset() into ipc_rcu_alloc()] Link: http://lkml.kernel.org/r/20170525185107.12869-5-manfred@colorfullife.comSigned-off-by: NKees Cook <keescook@chromium.org> Signed-off-by: NManfred Spraul <manfred@colorfullife.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Manfred Spraul 提交于
ipc has two management structures that exist for every id: - struct kern_ipc_perm, it contains e.g. the permissions. - struct ipc_rcu, it contains the rcu head for rcu handling and the refcount. The patch merges both structures. As a bonus, we may save one cacheline, because both structures are cacheline aligned. In addition, it reduces the number of casts, instead most codepaths can use container_of. To simplify code, the ipc_rcu_alloc initializes the allocation to 0. [manfred@colorfullife.com: really include the memset() into ipc_alloc_rcu()] Link: http://lkml.kernel.org/r/564f8612-0601-b267-514f-a9f650ec9b32@colorfullife.com Link: http://lkml.kernel.org/r/20170525185107.12869-3-manfred@colorfullife.comSigned-off-by: NManfred Spraul <manfred@colorfullife.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Kees Cook <keescook@chromium.org> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Manfred Spraul 提交于
sma->sem_base is initialized with sma->sem_base = (struct sem *) &sma[1]; The current code has four problems: - There is an unnecessary pointer dereference - sem_base is not needed. - Alignment for struct sem only works by chance. - The current code causes false positive for static code analysis. - This is a cast between different non-void types, which the future randstruct GCC plugin warns on. And, as bonus, the code size gets smaller: Before: 0 .text 00003770 After: 0 .text 0000374e [manfred@colorfullife.com: s/[0]/[]/, per hch] Link: http://lkml.kernel.org/r/20170525185107.12869-2-manfred@colorfullife.com Link: http://lkml.kernel.org/r/20170515171912.6298-2-manfred@colorfullife.comSigned-off-by: NManfred Spraul <manfred@colorfullife.com> Acked-by: NKees Cook <keescook@chromium.org> Cc: Kees Cook <keescook@chromium.org> Cc: <1vier1@web.de> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Ingo Molnar <mingo@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Fabian Frederick <fabf@skynet.be> Cc: Christoph Hellwig <hch@infradead.org> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
- 02 3月, 2017 1 次提交
-
-
由 Ingo Molnar 提交于
We are going to split <linux/sched/wake_q.h> out of <linux/sched.h>, which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder <linux/sched/wake_q.h> file that just maps to <linux/sched.h> to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: NLinus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: NIngo Molnar <mingo@kernel.org>
-
- 28 2月, 2017 2 次提交
-
-
由 Manfred Spraul 提交于
sysv sem has two lock modes: One with per-semaphore locks, one lock mode with a single global lock for the whole array. When switching from the per-semaphore locks to the global lock, all per-semaphore locks must be scanned for ongoing operations. The patch adds a hysteresis for switching from the global lock to the per semaphore locks. This reduces how often the per-semaphore locks must be scanned. Compared to the initial patch, this is a simplified solution: Setting USE_GLOBAL_LOCK_HYSTERESIS to 1 restores the current behavior. In theory, a workload with exactly 10 simple sops and then one complex op now scales a bit worse, but this is pure theory: If there is concurrency, the it won't be exactly 10:1:10:1:10:1:... If there is no concurrency, then there is no need for scalability. Link: http://lkml.kernel.org/r/1476851896-3590-3-git-send-email-manfred@colorfullife.comSigned-off-by: NManfred Spraul <manfred@colorfullife.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@zytor.com> Cc: <1vier1@web.de> Cc: kernel test robot <xiaolong.ye@intel.com> Cc: <felixh@informatik.uni-bremen.de> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Manfred Spraul 提交于
a) The ACQUIRE in spin_lock() applies to the read, not to the store, at least for powerpc. This forces to add a smp_mb() into the fast path. b) The memory barrier provided by spin_unlock_wait() is right now arch dependent. Therefore: Use spin_lock()/spin_unlock() instead of spin_unlock_wait(). Advantage: faster single op semop calls(), observed +8.9% on x86. (the other solution would be arch dependencies in ipc/sem). Disadvantage: slower complex op semop calls, if (and only if) there are no sleeping operations. The next patch adds hysteresis, this further reduces the probability that the slow path is used. Link: http://lkml.kernel.org/r/1476851896-3590-2-git-send-email-manfred@colorfullife.comSigned-off-by: NManfred Spraul <manfred@colorfullife.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@zytor.com> Cc: <1vier1@web.de> Cc: kernel test robot <xiaolong.ye@intel.com> Cc: <felixh@informatik.uni-bremen.de> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-